LCOV - coverage.info

LCOV - code coverage report

Current view:	top level - fs - exec.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	16	705	2.3 %
Date:	2022-12-09 01:23:36	Functions:	3	48	6.2 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  *  linux/fs/exec.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       6             :  */
       7             : 
       8             : /*
       9             :  * #!-checking implemented by tytso.
      10             :  */
      11             : /*
      12             :  * Demand-loading implemented 01.12.91 - no need to read anything but
      13             :  * the header into memory. The inode of the executable is put into
      14             :  * "current->executable", and page faults do the actual loading. Clean.
      15             :  *
      16             :  * Once more I can proudly say that linux stood up to being changed: it
      17             :  * was less than 2 hours work to get demand-loading completely implemented.
      18             :  *
      19             :  * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
      20             :  * current->executable is only used by the procfs.  This allows a dispatch
      21             :  * table to check for several different types  of binary formats.  We keep
      22             :  * trying until we recognize the file or we run out of supported binary
      23             :  * formats.
      24             :  */
      25             : 
      26             : #include <linux/kernel_read_file.h>
      27             : #include <linux/slab.h>
      28             : #include <linux/file.h>
      29             : #include <linux/fdtable.h>
      30             : #include <linux/mm.h>
      31             : #include <linux/vmacache.h>
      32             : #include <linux/stat.h>
      33             : #include <linux/fcntl.h>
      34             : #include <linux/swap.h>
      35             : #include <linux/string.h>
      36             : #include <linux/init.h>
      37             : #include <linux/sched/mm.h>
      38             : #include <linux/sched/coredump.h>
      39             : #include <linux/sched/signal.h>
      40             : #include <linux/sched/numa_balancing.h>
      41             : #include <linux/sched/task.h>
      42             : #include <linux/pagemap.h>
      43             : #include <linux/perf_event.h>
      44             : #include <linux/highmem.h>
      45             : #include <linux/spinlock.h>
      46             : #include <linux/key.h>
      47             : #include <linux/personality.h>
      48             : #include <linux/binfmts.h>
      49             : #include <linux/utsname.h>
      50             : #include <linux/pid_namespace.h>
      51             : #include <linux/module.h>
      52             : #include <linux/namei.h>
      53             : #include <linux/mount.h>
      54             : #include <linux/security.h>
      55             : #include <linux/syscalls.h>
      56             : #include <linux/tsacct_kern.h>
      57             : #include <linux/cn_proc.h>
      58             : #include <linux/audit.h>
      59             : #include <linux/kmod.h>
      60             : #include <linux/fsnotify.h>
      61             : #include <linux/fs_struct.h>
      62             : #include <linux/oom.h>
      63             : #include <linux/compat.h>
      64             : #include <linux/vmalloc.h>
      65             : #include <linux/io_uring.h>
      66             : #include <linux/syscall_user_dispatch.h>
      67             : #include <linux/coredump.h>
      68             : 
      69             : #include <linux/uaccess.h>
      70             : #include <asm/mmu_context.h>
      71             : #include <asm/tlb.h>
      72             : 
      73             : #include <trace/events/task.h>
      74             : #include "internal.h"
      75             : 
      76             : #include <trace/events/sched.h>
      77             : 
      78             : static int bprm_creds_from_file(struct linux_binprm *bprm);
      79             : 
      80             : int suid_dumpable = 0;
      81             : 
      82             : static LIST_HEAD(formats);
      83             : static DEFINE_RWLOCK(binfmt_lock);
      84             : 
      85           2 : void __register_binfmt(struct linux_binfmt * fmt, int insert)
      86             : {
      87           2 :         write_lock(&binfmt_lock);
      88           2 :         insert ? list_add(&fmt->lh, &formats) :
      89           2 :                  list_add_tail(&fmt->lh, &formats);
      90           2 :         write_unlock(&binfmt_lock);
      91           2 : }
      92             : 
      93             : EXPORT_SYMBOL(__register_binfmt);
      94             : 
      95           0 : void unregister_binfmt(struct linux_binfmt * fmt)
      96             : {
      97           0 :         write_lock(&binfmt_lock);
      98           0 :         list_del(&fmt->lh);
      99           0 :         write_unlock(&binfmt_lock);
     100           0 : }
     101             : 
     102             : EXPORT_SYMBOL(unregister_binfmt);
     103             : 
     104             : static inline void put_binfmt(struct linux_binfmt * fmt)
     105             : {
     106           0 :         module_put(fmt->module);
     107             : }
     108             : 
     109           0 : bool path_noexec(const struct path *path)
     110             : {
     111           0 :         return (path->mnt->mnt_flags & MNT_NOEXEC) ||
     112           0 :                (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
     113             : }
     114             : 
     115             : #ifdef CONFIG_USELIB
     116             : /*
     117             :  * Note that a shared library must be both readable and executable due to
     118             :  * security reasons.
     119             :  *
     120             :  * Also note that we take the address to load from the file itself.
     121             :  */
     122             : SYSCALL_DEFINE1(uselib, const char __user *, library)
     123             : {
     124             :         struct linux_binfmt *fmt;
     125             :         struct file *file;
     126             :         struct filename *tmp = getname(library);
     127             :         int error = PTR_ERR(tmp);
     128             :         static const struct open_flags uselib_flags = {
     129             :                 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
     130             :                 .acc_mode = MAY_READ | MAY_EXEC,
     131             :                 .intent = LOOKUP_OPEN,
     132             :                 .lookup_flags = LOOKUP_FOLLOW,
     133             :         };
     134             : 
     135             :         if (IS_ERR(tmp))
     136             :                 goto out;
     137             : 
     138             :         file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
     139             :         putname(tmp);
     140             :         error = PTR_ERR(file);
     141             :         if (IS_ERR(file))
     142             :                 goto out;
     143             : 
     144             :         /*
     145             :          * may_open() has already checked for this, so it should be
     146             :          * impossible to trip now. But we need to be extra cautious
     147             :          * and check again at the very end too.
     148             :          */
     149             :         error = -EACCES;
     150             :         if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
     151             :                          path_noexec(&file->f_path)))
     152             :                 goto exit;
     153             : 
     154             :         fsnotify_open(file);
     155             : 
     156             :         error = -ENOEXEC;
     157             : 
     158             :         read_lock(&binfmt_lock);
     159             :         list_for_each_entry(fmt, &formats, lh) {
     160             :                 if (!fmt->load_shlib)
     161             :                         continue;
     162             :                 if (!try_module_get(fmt->module))
     163             :                         continue;
     164             :                 read_unlock(&binfmt_lock);
     165             :                 error = fmt->load_shlib(file);
     166             :                 read_lock(&binfmt_lock);
     167             :                 put_binfmt(fmt);
     168             :                 if (error != -ENOEXEC)
     169             :                         break;
     170             :         }
     171             :         read_unlock(&binfmt_lock);
     172             : exit:
     173             :         fput(file);
     174             : out:
     175             :         return error;
     176             : }
     177             : #endif /* #ifdef CONFIG_USELIB */
     178             : 
     179             : #ifdef CONFIG_MMU
     180             : /*
     181             :  * The nascent bprm->mm is not visible until exec_mmap() but it can
     182             :  * use a lot of memory, account these pages in current->mm temporary
     183             :  * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
     184             :  * change the counter back via acct_arg_size(0).
     185             :  */
     186           0 : static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
     187             : {
     188           0 :         struct mm_struct *mm = current->mm;
     189           0 :         long diff = (long)(pages - bprm->vma_pages);
     190             : 
     191           0 :         if (!mm || !diff)
     192             :                 return;
     193             : 
     194           0 :         bprm->vma_pages = pages;
     195             :         add_mm_counter(mm, MM_ANONPAGES, diff);
     196             : }
     197             : 
     198           0 : static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
     199             :                 int write)
     200             : {
     201             :         struct page *page;
     202             :         int ret;
     203           0 :         unsigned int gup_flags = FOLL_FORCE;
     204             : 
     205             : #ifdef CONFIG_STACK_GROWSUP
     206             :         if (write) {
     207             :                 ret = expand_downwards(bprm->vma, pos);
     208             :                 if (ret < 0)
     209             :                         return NULL;
     210             :         }
     211             : #endif
     212             : 
     213           0 :         if (write)
     214           0 :                 gup_flags |= FOLL_WRITE;
     215             : 
     216             :         /*
     217             :          * We are doing an exec().  'current' is the process
     218             :          * doing the exec and bprm->mm is the new process's mm.
     219             :          */
     220           0 :         mmap_read_lock(bprm->mm);
     221           0 :         ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
     222             :                         &page, NULL, NULL);
     223           0 :         mmap_read_unlock(bprm->mm);
     224           0 :         if (ret <= 0)
     225             :                 return NULL;
     226             : 
     227           0 :         if (write)
     228           0 :                 acct_arg_size(bprm, vma_pages(bprm->vma));
     229             : 
     230           0 :         return page;
     231             : }
     232             : 
     233             : static void put_arg_page(struct page *page)
     234             : {
     235           0 :         put_page(page);
     236             : }
     237             : 
     238             : static void free_arg_pages(struct linux_binprm *bprm)
     239             : {
     240             : }
     241             : 
     242             : static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
     243             :                 struct page *page)
     244             : {
     245           0 :         flush_cache_page(bprm->vma, pos, page_to_pfn(page));
     246             : }
     247             : 
     248           0 : static int __bprm_mm_init(struct linux_binprm *bprm)
     249             : {
     250             :         int err;
     251           0 :         struct vm_area_struct *vma = NULL;
     252           0 :         struct mm_struct *mm = bprm->mm;
     253             : 
     254           0 :         bprm->vma = vma = vm_area_alloc(mm);
     255           0 :         if (!vma)
     256             :                 return -ENOMEM;
     257           0 :         vma_set_anonymous(vma);
     258             : 
     259           0 :         if (mmap_write_lock_killable(mm)) {
     260             :                 err = -EINTR;
     261             :                 goto err_free;
     262             :         }
     263             : 
     264             :         /*
     265             :          * Place the stack at the largest stack address the architecture
     266             :          * supports. Later, we'll move this to an appropriate place. We don't
     267             :          * use STACK_TOP because that can depend on attributes which aren't
     268             :          * configured yet.
     269             :          */
     270             :         BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
     271           0 :         vma->vm_end = STACK_TOP_MAX;
     272           0 :         vma->vm_start = vma->vm_end - PAGE_SIZE;
     273           0 :         vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
     274           0 :         vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
     275             : 
     276           0 :         err = insert_vm_struct(mm, vma);
     277           0 :         if (err)
     278             :                 goto err;
     279             : 
     280           0 :         mm->stack_vm = mm->total_vm = 1;
     281           0 :         mmap_write_unlock(mm);
     282           0 :         bprm->p = vma->vm_end - sizeof(void *);
     283           0 :         return 0;
     284             : err:
     285             :         mmap_write_unlock(mm);
     286             : err_free:
     287           0 :         bprm->vma = NULL;
     288           0 :         vm_area_free(vma);
     289           0 :         return err;
     290             : }
     291             : 
     292             : static bool valid_arg_len(struct linux_binprm *bprm, long len)
     293             : {
     294           0 :         return len <= MAX_ARG_STRLEN;
     295             : }
     296             : 
     297             : #else
     298             : 
     299             : static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
     300             : {
     301             : }
     302             : 
     303             : static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
     304             :                 int write)
     305             : {
     306             :         struct page *page;
     307             : 
     308             :         page = bprm->page[pos / PAGE_SIZE];
     309             :         if (!page && write) {
     310             :                 page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
     311             :                 if (!page)
     312             :                         return NULL;
     313             :                 bprm->page[pos / PAGE_SIZE] = page;
     314             :         }
     315             : 
     316             :         return page;
     317             : }
     318             : 
     319             : static void put_arg_page(struct page *page)
     320             : {
     321             : }
     322             : 
     323             : static void free_arg_page(struct linux_binprm *bprm, int i)
     324             : {
     325             :         if (bprm->page[i]) {
     326             :                 __free_page(bprm->page[i]);
     327             :                 bprm->page[i] = NULL;
     328             :         }
     329             : }
     330             : 
     331             : static void free_arg_pages(struct linux_binprm *bprm)
     332             : {
     333             :         int i;
     334             : 
     335             :         for (i = 0; i < MAX_ARG_PAGES; i++)
     336             :                 free_arg_page(bprm, i);
     337             : }
     338             : 
     339             : static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
     340             :                 struct page *page)
     341             : {
     342             : }
     343             : 
     344             : static int __bprm_mm_init(struct linux_binprm *bprm)
     345             : {
     346             :         bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
     347             :         return 0;
     348             : }
     349             : 
     350             : static bool valid_arg_len(struct linux_binprm *bprm, long len)
     351             : {
     352             :         return len <= bprm->p;
     353             : }
     354             : 
     355             : #endif /* CONFIG_MMU */
     356             : 
     357             : /*
     358             :  * Create a new mm_struct and populate it with a temporary stack
     359             :  * vm_area_struct.  We don't have enough context at this point to set the stack
     360             :  * flags, permissions, and offset, so we use temporary values.  We'll update
     361             :  * them later in setup_arg_pages().
     362             :  */
     363           0 : static int bprm_mm_init(struct linux_binprm *bprm)
     364             : {
     365             :         int err;
     366           0 :         struct mm_struct *mm = NULL;
     367             : 
     368           0 :         bprm->mm = mm = mm_alloc();
     369           0 :         err = -ENOMEM;
     370           0 :         if (!mm)
     371             :                 goto err;
     372             : 
     373             :         /* Save current stack limit for all calculations made during exec. */
     374           0 :         task_lock(current->group_leader);
     375           0 :         bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
     376           0 :         task_unlock(current->group_leader);
     377             : 
     378           0 :         err = __bprm_mm_init(bprm);
     379           0 :         if (err)
     380             :                 goto err;
     381             : 
     382             :         return 0;
     383             : 
     384             : err:
     385           0 :         if (mm) {
     386           0 :                 bprm->mm = NULL;
     387             :                 mmdrop(mm);
     388             :         }
     389             : 
     390             :         return err;
     391             : }
     392             : 
     393             : struct user_arg_ptr {
     394             : #ifdef CONFIG_COMPAT
     395             :         bool is_compat;
     396             : #endif
     397             :         union {
     398             :                 const char __user *const __user *native;
     399             : #ifdef CONFIG_COMPAT
     400             :                 const compat_uptr_t __user *compat;
     401             : #endif
     402             :         } ptr;
     403             : };
     404             : 
     405           0 : static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
     406             : {
     407             :         const char __user *native;
     408             : 
     409             : #ifdef CONFIG_COMPAT
     410             :         if (unlikely(argv.is_compat)) {
     411             :                 compat_uptr_t compat;
     412             : 
     413             :                 if (get_user(compat, argv.ptr.compat + nr))
     414             :                         return ERR_PTR(-EFAULT);
     415             : 
     416             :                 return compat_ptr(compat);
     417             :         }
     418             : #endif
     419             : 
     420           0 :         if (get_user(native, argv.ptr.native + nr))
     421             :                 return ERR_PTR(-EFAULT);
     422             : 
     423           0 :         return native;
     424             : }
     425             : 
     426             : /*
     427             :  * count() counts the number of strings in array ARGV.
     428             :  */
     429           0 : static int count(struct user_arg_ptr argv, int max)
     430             : {
     431           0 :         int i = 0;
     432             : 
     433           0 :         if (argv.ptr.native != NULL) {
     434           0 :                 for (;;) {
     435           0 :                         const char __user *p = get_user_arg_ptr(argv, i);
     436             : 
     437           0 :                         if (!p)
     438             :                                 break;
     439             : 
     440           0 :                         if (IS_ERR(p))
     441             :                                 return -EFAULT;
     442             : 
     443           0 :                         if (i >= max)
     444             :                                 return -E2BIG;
     445           0 :                         ++i;
     446             : 
     447           0 :                         if (fatal_signal_pending(current))
     448             :                                 return -ERESTARTNOHAND;
     449           0 :                         cond_resched();
     450             :                 }
     451             :         }
     452             :         return i;
     453             : }
     454             : 
     455           0 : static int count_strings_kernel(const char *const *argv)
     456             : {
     457             :         int i;
     458             : 
     459           0 :         if (!argv)
     460             :                 return 0;
     461             : 
     462           0 :         for (i = 0; argv[i]; ++i) {
     463           0 :                 if (i >= MAX_ARG_STRINGS)
     464             :                         return -E2BIG;
     465           0 :                 if (fatal_signal_pending(current))
     466             :                         return -ERESTARTNOHAND;
     467           0 :                 cond_resched();
     468             :         }
     469             :         return i;
     470             : }
     471             : 
     472             : static int bprm_stack_limits(struct linux_binprm *bprm)
     473             : {
     474             :         unsigned long limit, ptr_size;
     475             : 
     476             :         /*
     477             :          * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
     478             :          * (whichever is smaller) for the argv+env strings.
     479             :          * This ensures that:
     480             :          *  - the remaining binfmt code will not run out of stack space,
     481             :          *  - the program will have a reasonable amount of stack left
     482             :          *    to work from.
     483             :          */
     484           0 :         limit = _STK_LIM / 4 * 3;
     485           0 :         limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
     486             :         /*
     487             :          * We've historically supported up to 32 pages (ARG_MAX)
     488             :          * of argument strings even with small stacks
     489             :          */
     490           0 :         limit = max_t(unsigned long, limit, ARG_MAX);
     491             :         /*
     492             :          * We must account for the size of all the argv and envp pointers to
     493             :          * the argv and envp strings, since they will also take up space in
     494             :          * the stack. They aren't stored until much later when we can't
     495             :          * signal to the parent that the child has run out of stack space.
     496             :          * Instead, calculate it here so it's possible to fail gracefully.
     497             :          *
     498             :          * In the case of argc = 0, make sure there is space for adding a
     499             :          * empty string (which will bump argc to 1), to ensure confused
     500             :          * userspace programs don't start processing from argv[1], thinking
     501             :          * argc can never be 0, to keep them from walking envp by accident.
     502             :          * See do_execveat_common().
     503             :          */
     504           0 :         ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
     505           0 :         if (limit <= ptr_size)
     506             :                 return -E2BIG;
     507           0 :         limit -= ptr_size;
     508             : 
     509           0 :         bprm->argmin = bprm->p - limit;
     510             :         return 0;
     511             : }
     512             : 
     513             : /*
     514             :  * 'copy_strings()' copies argument/environment strings from the old
     515             :  * processes's memory to the new process's stack.  The call to get_user_pages()
     516             :  * ensures the destination page is created and not swapped out.
     517             :  */
     518           0 : static int copy_strings(int argc, struct user_arg_ptr argv,
     519             :                         struct linux_binprm *bprm)
     520             : {
     521           0 :         struct page *kmapped_page = NULL;
     522           0 :         char *kaddr = NULL;
     523           0 :         unsigned long kpos = 0;
     524             :         int ret;
     525             : 
     526           0 :         while (argc-- > 0) {
     527             :                 const char __user *str;
     528             :                 int len;
     529             :                 unsigned long pos;
     530             : 
     531           0 :                 ret = -EFAULT;
     532           0 :                 str = get_user_arg_ptr(argv, argc);
     533           0 :                 if (IS_ERR(str))
     534             :                         goto out;
     535             : 
     536           0 :                 len = strnlen_user(str, MAX_ARG_STRLEN);
     537           0 :                 if (!len)
     538             :                         goto out;
     539             : 
     540           0 :                 ret = -E2BIG;
     541           0 :                 if (!valid_arg_len(bprm, len))
     542             :                         goto out;
     543             : 
     544             :                 /* We're going to work our way backwards. */
     545           0 :                 pos = bprm->p;
     546           0 :                 str += len;
     547           0 :                 bprm->p -= len;
     548             : #ifdef CONFIG_MMU
     549           0 :                 if (bprm->p < bprm->argmin)
     550             :                         goto out;
     551             : #endif
     552             : 
     553           0 :                 while (len > 0) {
     554             :                         int offset, bytes_to_copy;
     555             : 
     556           0 :                         if (fatal_signal_pending(current)) {
     557             :                                 ret = -ERESTARTNOHAND;
     558             :                                 goto out;
     559             :                         }
     560           0 :                         cond_resched();
     561             : 
     562           0 :                         offset = pos % PAGE_SIZE;
     563           0 :                         if (offset == 0)
     564           0 :                                 offset = PAGE_SIZE;
     565             : 
     566           0 :                         bytes_to_copy = offset;
     567           0 :                         if (bytes_to_copy > len)
     568           0 :                                 bytes_to_copy = len;
     569             : 
     570           0 :                         offset -= bytes_to_copy;
     571           0 :                         pos -= bytes_to_copy;
     572           0 :                         str -= bytes_to_copy;
     573           0 :                         len -= bytes_to_copy;
     574             : 
     575           0 :                         if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
     576             :                                 struct page *page;
     577             : 
     578           0 :                                 page = get_arg_page(bprm, pos, 1);
     579           0 :                                 if (!page) {
     580             :                                         ret = -E2BIG;
     581             :                                         goto out;
     582             :                                 }
     583             : 
     584           0 :                                 if (kmapped_page) {
     585           0 :                                         flush_dcache_page(kmapped_page);
     586           0 :                                         kunmap(kmapped_page);
     587             :                                         put_arg_page(kmapped_page);
     588             :                                 }
     589           0 :                                 kmapped_page = page;
     590           0 :                                 kaddr = kmap(kmapped_page);
     591           0 :                                 kpos = pos & PAGE_MASK;
     592           0 :                                 flush_arg_page(bprm, kpos, kmapped_page);
     593             :                         }
     594           0 :                         if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
     595             :                                 ret = -EFAULT;
     596             :                                 goto out;
     597             :                         }
     598             :                 }
     599             :         }
     600             :         ret = 0;
     601             : out:
     602           0 :         if (kmapped_page) {
     603           0 :                 flush_dcache_page(kmapped_page);
     604           0 :                 kunmap(kmapped_page);
     605             :                 put_arg_page(kmapped_page);
     606             :         }
     607           0 :         return ret;
     608             : }
     609             : 
     610             : /*
     611             :  * Copy and argument/environment string from the kernel to the processes stack.
     612             :  */
     613           0 : int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
     614             : {
     615           0 :         int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */;
     616           0 :         unsigned long pos = bprm->p;
     617             : 
     618           0 :         if (len == 0)
     619             :                 return -EFAULT;
     620           0 :         if (!valid_arg_len(bprm, len))
     621             :                 return -E2BIG;
     622             : 
     623             :         /* We're going to work our way backwards. */
     624           0 :         arg += len;
     625           0 :         bprm->p -= len;
     626           0 :         if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
     627             :                 return -E2BIG;
     628             : 
     629           0 :         while (len > 0) {
     630           0 :                 unsigned int bytes_to_copy = min_t(unsigned int, len,
     631             :                                 min_not_zero(offset_in_page(pos), PAGE_SIZE));
     632             :                 struct page *page;
     633             :                 char *kaddr;
     634             : 
     635           0 :                 pos -= bytes_to_copy;
     636           0 :                 arg -= bytes_to_copy;
     637           0 :                 len -= bytes_to_copy;
     638             : 
     639           0 :                 page = get_arg_page(bprm, pos, 1);
     640           0 :                 if (!page)
     641             :                         return -E2BIG;
     642           0 :                 kaddr = kmap_atomic(page);
     643           0 :                 flush_arg_page(bprm, pos & PAGE_MASK, page);
     644           0 :                 memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy);
     645           0 :                 flush_dcache_page(page);
     646           0 :                 kunmap_atomic(kaddr);
     647             :                 put_arg_page(page);
     648             :         }
     649             : 
     650             :         return 0;
     651             : }
     652             : EXPORT_SYMBOL(copy_string_kernel);
     653             : 
     654           0 : static int copy_strings_kernel(int argc, const char *const *argv,
     655             :                                struct linux_binprm *bprm)
     656             : {
     657           0 :         while (argc-- > 0) {
     658           0 :                 int ret = copy_string_kernel(argv[argc], bprm);
     659           0 :                 if (ret < 0)
     660             :                         return ret;
     661           0 :                 if (fatal_signal_pending(current))
     662             :                         return -ERESTARTNOHAND;
     663           0 :                 cond_resched();
     664             :         }
     665             :         return 0;
     666             : }
     667             : 
     668             : #ifdef CONFIG_MMU
     669             : 
     670             : /*
     671             :  * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX.  Once
     672             :  * the binfmt code determines where the new stack should reside, we shift it to
     673             :  * its final location.  The process proceeds as follows:
     674             :  *
     675             :  * 1) Use shift to calculate the new vma endpoints.
     676             :  * 2) Extend vma to cover both the old and new ranges.  This ensures the
     677             :  *    arguments passed to subsequent functions are consistent.
     678             :  * 3) Move vma's page tables to the new range.
     679             :  * 4) Free up any cleared pgd range.
     680             :  * 5) Shrink the vma to cover only the new range.
     681             :  */
     682           0 : static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
     683             : {
     684           0 :         struct mm_struct *mm = vma->vm_mm;
     685           0 :         unsigned long old_start = vma->vm_start;
     686           0 :         unsigned long old_end = vma->vm_end;
     687           0 :         unsigned long length = old_end - old_start;
     688           0 :         unsigned long new_start = old_start - shift;
     689           0 :         unsigned long new_end = old_end - shift;
     690             :         struct mmu_gather tlb;
     691             : 
     692           0 :         BUG_ON(new_start > new_end);
     693             : 
     694             :         /*
     695             :          * ensure there are no vmas between where we want to go
     696             :          * and where we are
     697             :          */
     698           0 :         if (vma != find_vma(mm, new_start))
     699             :                 return -EFAULT;
     700             : 
     701             :         /*
     702             :          * cover the whole range: [new_start, old_end)
     703             :          */
     704           0 :         if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
     705             :                 return -ENOMEM;
     706             : 
     707             :         /*
     708             :          * move the page tables downwards, on failure we rely on
     709             :          * process cleanup to remove whatever mess we made.
     710             :          */
     711           0 :         if (length != move_page_tables(vma, old_start,
     712             :                                        vma, new_start, length, false))
     713             :                 return -ENOMEM;
     714             : 
     715           0 :         lru_add_drain();
     716           0 :         tlb_gather_mmu(&tlb, mm);
     717           0 :         if (new_end > old_start) {
     718             :                 /*
     719             :                  * when the old and new regions overlap clear from new_end.
     720             :                  */
     721           0 :                 free_pgd_range(&tlb, new_end, old_end, new_end,
     722           0 :                         vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
     723             :         } else {
     724             :                 /*
     725             :                  * otherwise, clean from old_start; this is done to not touch
     726             :                  * the address space in [new_end, old_start) some architectures
     727             :                  * have constraints on va-space that make this illegal (IA64) -
     728             :                  * for the others its just a little faster.
     729             :                  */
     730           0 :                 free_pgd_range(&tlb, old_start, old_end, new_end,
     731           0 :                         vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
     732             :         }
     733           0 :         tlb_finish_mmu(&tlb);
     734             : 
     735             :         /*
     736             :          * Shrink the vma to just the new range.  Always succeeds.
     737             :          */
     738           0 :         vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
     739             : 
     740           0 :         return 0;
     741             : }
     742             : 
     743             : /*
     744             :  * Finalizes the stack vm_area_struct. The flags and permissions are updated,
     745             :  * the stack is optionally relocated, and some extra space is added.
     746             :  */
     747           0 : int setup_arg_pages(struct linux_binprm *bprm,
     748             :                     unsigned long stack_top,
     749             :                     int executable_stack)
     750             : {
     751             :         unsigned long ret;
     752             :         unsigned long stack_shift;
     753           0 :         struct mm_struct *mm = current->mm;
     754           0 :         struct vm_area_struct *vma = bprm->vma;
     755           0 :         struct vm_area_struct *prev = NULL;
     756             :         unsigned long vm_flags;
     757             :         unsigned long stack_base;
     758             :         unsigned long stack_size;
     759             :         unsigned long stack_expand;
     760             :         unsigned long rlim_stack;
     761             : 
     762             : #ifdef CONFIG_STACK_GROWSUP
     763             :         /* Limit stack size */
     764             :         stack_base = bprm->rlim_stack.rlim_max;
     765             : 
     766             :         stack_base = calc_max_stack_size(stack_base);
     767             : 
     768             :         /* Add space for stack randomization. */
     769             :         stack_base += (STACK_RND_MASK << PAGE_SHIFT);
     770             : 
     771             :         /* Make sure we didn't let the argument array grow too large. */
     772             :         if (vma->vm_end - vma->vm_start > stack_base)
     773             :                 return -ENOMEM;
     774             : 
     775             :         stack_base = PAGE_ALIGN(stack_top - stack_base);
     776             : 
     777             :         stack_shift = vma->vm_start - stack_base;
     778             :         mm->arg_start = bprm->p - stack_shift;
     779             :         bprm->p = vma->vm_end - stack_shift;
     780             : #else
     781           0 :         stack_top = arch_align_stack(stack_top);
     782           0 :         stack_top = PAGE_ALIGN(stack_top);
     783             : 
     784           0 :         if (unlikely(stack_top < mmap_min_addr) ||
     785           0 :             unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
     786             :                 return -ENOMEM;
     787             : 
     788           0 :         stack_shift = vma->vm_end - stack_top;
     789             : 
     790           0 :         bprm->p -= stack_shift;
     791           0 :         mm->arg_start = bprm->p;
     792             : #endif
     793             : 
     794           0 :         if (bprm->loader)
     795           0 :                 bprm->loader -= stack_shift;
     796           0 :         bprm->exec -= stack_shift;
     797             : 
     798           0 :         if (mmap_write_lock_killable(mm))
     799             :                 return -EINTR;
     800             : 
     801           0 :         vm_flags = VM_STACK_FLAGS;
     802             : 
     803             :         /*
     804             :          * Adjust stack execute permissions; explicitly enable for
     805             :          * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
     806             :          * (arch default) otherwise.
     807             :          */
     808           0 :         if (unlikely(executable_stack == EXSTACK_ENABLE_X))
     809             :                 vm_flags |= VM_EXEC;
     810           0 :         else if (executable_stack == EXSTACK_DISABLE_X)
     811           0 :                 vm_flags &= ~VM_EXEC;
     812           0 :         vm_flags |= mm->def_flags;
     813           0 :         vm_flags |= VM_STACK_INCOMPLETE_SETUP;
     814             : 
     815           0 :         ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
     816             :                         vm_flags);
     817           0 :         if (ret)
     818             :                 goto out_unlock;
     819           0 :         BUG_ON(prev != vma);
     820             : 
     821           0 :         if (unlikely(vm_flags & VM_EXEC)) {
     822           0 :                 pr_warn_once("process '%pD4' started with executable stack\n",
     823             :                              bprm->file);
     824             :         }
     825             : 
     826             :         /* Move stack pages down in memory. */
     827           0 :         if (stack_shift) {
     828           0 :                 ret = shift_arg_pages(vma, stack_shift);
     829           0 :                 if (ret)
     830             :                         goto out_unlock;
     831             :         }
     832             : 
     833             :         /* mprotect_fixup is overkill to remove the temporary stack flags */
     834           0 :         vma->vm_flags &= ~VM_STACK_INCOMPLETE_SETUP;
     835             : 
     836           0 :         stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
     837           0 :         stack_size = vma->vm_end - vma->vm_start;
     838             :         /*
     839             :          * Align this down to a page boundary as expand_stack
     840             :          * will align it up.
     841             :          */
     842           0 :         rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
     843             : #ifdef CONFIG_STACK_GROWSUP
     844             :         if (stack_size + stack_expand > rlim_stack)
     845             :                 stack_base = vma->vm_start + rlim_stack;
     846             :         else
     847             :                 stack_base = vma->vm_end + stack_expand;
     848             : #else
     849           0 :         if (stack_size + stack_expand > rlim_stack)
     850           0 :                 stack_base = vma->vm_end - rlim_stack;
     851             :         else
     852           0 :                 stack_base = vma->vm_start - stack_expand;
     853             : #endif
     854           0 :         current->mm->start_stack = bprm->p;
     855           0 :         ret = expand_stack(vma, stack_base);
     856           0 :         if (ret)
     857           0 :                 ret = -EFAULT;
     858             : 
     859             : out_unlock:
     860           0 :         mmap_write_unlock(mm);
     861           0 :         return ret;
     862             : }
     863             : EXPORT_SYMBOL(setup_arg_pages);
     864             : 
     865             : #else
     866             : 
     867             : /*
     868             :  * Transfer the program arguments and environment from the holding pages
     869             :  * onto the stack. The provided stack pointer is adjusted accordingly.
     870             :  */
     871             : int transfer_args_to_stack(struct linux_binprm *bprm,
     872             :                            unsigned long *sp_location)
     873             : {
     874             :         unsigned long index, stop, sp;
     875             :         int ret = 0;
     876             : 
     877             :         stop = bprm->p >> PAGE_SHIFT;
     878             :         sp = *sp_location;
     879             : 
     880             :         for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
     881             :                 unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
     882             :                 char *src = kmap(bprm->page[index]) + offset;
     883             :                 sp -= PAGE_SIZE - offset;
     884             :                 if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
     885             :                         ret = -EFAULT;
     886             :                 kunmap(bprm->page[index]);
     887             :                 if (ret)
     888             :                         goto out;
     889             :         }
     890             : 
     891             :         *sp_location = sp;
     892             : 
     893             : out:
     894             :         return ret;
     895             : }
     896             : EXPORT_SYMBOL(transfer_args_to_stack);
     897             : 
     898             : #endif /* CONFIG_MMU */
     899             : 
     900           0 : static struct file *do_open_execat(int fd, struct filename *name, int flags)
     901             : {
     902             :         struct file *file;
     903             :         int err;
     904           0 :         struct open_flags open_exec_flags = {
     905             :                 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
     906             :                 .acc_mode = MAY_EXEC,
     907             :                 .intent = LOOKUP_OPEN,
     908             :                 .lookup_flags = LOOKUP_FOLLOW,
     909             :         };
     910             : 
     911           0 :         if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
     912             :                 return ERR_PTR(-EINVAL);
     913           0 :         if (flags & AT_SYMLINK_NOFOLLOW)
     914           0 :                 open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
     915           0 :         if (flags & AT_EMPTY_PATH)
     916           0 :                 open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
     917             : 
     918           0 :         file = do_filp_open(fd, name, &open_exec_flags);
     919           0 :         if (IS_ERR(file))
     920             :                 goto out;
     921             : 
     922             :         /*
     923             :          * may_open() has already checked for this, so it should be
     924             :          * impossible to trip now. But we need to be extra cautious
     925             :          * and check again at the very end too.
     926             :          */
     927           0 :         err = -EACCES;
     928           0 :         if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
     929             :                          path_noexec(&file->f_path)))
     930             :                 goto exit;
     931             : 
     932           0 :         err = deny_write_access(file);
     933           0 :         if (err)
     934             :                 goto exit;
     935             : 
     936           0 :         if (name->name[0] != '\0')
     937           0 :                 fsnotify_open(file);
     938             : 
     939             : out:
     940             :         return file;
     941             : 
     942             : exit:
     943           0 :         fput(file);
     944           0 :         return ERR_PTR(err);
     945             : }
     946             : 
     947           0 : struct file *open_exec(const char *name)
     948             : {
     949           0 :         struct filename *filename = getname_kernel(name);
     950           0 :         struct file *f = ERR_CAST(filename);
     951             : 
     952           0 :         if (!IS_ERR(filename)) {
     953           0 :                 f = do_open_execat(AT_FDCWD, filename, 0);
     954           0 :                 putname(filename);
     955             :         }
     956           0 :         return f;
     957             : }
     958             : EXPORT_SYMBOL(open_exec);
     959             : 
     960             : #if defined(CONFIG_HAVE_AOUT) || defined(CONFIG_BINFMT_FLAT) || \
     961             :     defined(CONFIG_BINFMT_ELF_FDPIC)
     962             : ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
     963             : {
     964             :         ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
     965             :         if (res > 0)
     966             :                 flush_icache_user_range(addr, addr + len);
     967             :         return res;
     968             : }
     969             : EXPORT_SYMBOL(read_code);
     970             : #endif
     971             : 
     972             : /*
     973             :  * Maps the mm_struct mm into the current task struct.
     974             :  * On success, this function returns with exec_update_lock
     975             :  * held for writing.
     976             :  */
     977           0 : static int exec_mmap(struct mm_struct *mm)
     978             : {
     979             :         struct task_struct *tsk;
     980             :         struct mm_struct *old_mm, *active_mm;
     981             :         int ret;
     982             : 
     983             :         /* Notify parent that we're no longer interested in the old VM */
     984           0 :         tsk = current;
     985           0 :         old_mm = current->mm;
     986           0 :         exec_mm_release(tsk, old_mm);
     987             :         if (old_mm)
     988             :                 sync_mm_rss(old_mm);
     989             : 
     990           0 :         ret = down_write_killable(&tsk->signal->exec_update_lock);
     991           0 :         if (ret)
     992             :                 return ret;
     993             : 
     994           0 :         if (old_mm) {
     995             :                 /*
     996             :                  * If there is a pending fatal signal perhaps a signal
     997             :                  * whose default action is to create a coredump get
     998             :                  * out and die instead of going through with the exec.
     999             :                  */
    1000           0 :                 ret = mmap_read_lock_killable(old_mm);
    1001           0 :                 if (ret) {
    1002           0 :                         up_write(&tsk->signal->exec_update_lock);
    1003           0 :                         return ret;
    1004             :                 }
    1005             :         }
    1006             : 
    1007           0 :         task_lock(tsk);
    1008           0 :         membarrier_exec_mmap(mm);
    1009             : 
    1010             :         local_irq_disable();
    1011           0 :         active_mm = tsk->active_mm;
    1012           0 :         tsk->active_mm = mm;
    1013           0 :         tsk->mm = mm;
    1014             :         /*
    1015             :          * This prevents preemption while active_mm is being loaded and
    1016             :          * it and mm are being updated, which could cause problems for
    1017             :          * lazy tlb mm refcounting when these are updated by context
    1018             :          * switches. Not all architectures can handle irqs off over
    1019             :          * activate_mm yet.
    1020             :          */
    1021             :         if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
    1022             :                 local_irq_enable();
    1023           0 :         activate_mm(active_mm, mm);
    1024             :         if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
    1025             :                 local_irq_enable();
    1026           0 :         tsk->mm->vmacache_seqnum = 0;
    1027           0 :         vmacache_flush(tsk);
    1028           0 :         task_unlock(tsk);
    1029           0 :         if (old_mm) {
    1030           0 :                 mmap_read_unlock(old_mm);
    1031           0 :                 BUG_ON(active_mm != old_mm);
    1032           0 :                 setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
    1033           0 :                 mm_update_next_owner(old_mm);
    1034           0 :                 mmput(old_mm);
    1035           0 :                 return 0;
    1036             :         }
    1037             :         mmdrop(active_mm);
    1038             :         return 0;
    1039             : }
    1040             : 
    1041           0 : static int de_thread(struct task_struct *tsk)
    1042             : {
    1043           0 :         struct signal_struct *sig = tsk->signal;
    1044           0 :         struct sighand_struct *oldsighand = tsk->sighand;
    1045           0 :         spinlock_t *lock = &oldsighand->siglock;
    1046             : 
    1047           0 :         if (thread_group_empty(tsk))
    1048             :                 goto no_thread_group;
    1049             : 
    1050             :         /*
    1051             :          * Kill all other threads in the thread group.
    1052             :          */
    1053           0 :         spin_lock_irq(lock);
    1054           0 :         if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
    1055             :                 /*
    1056             :                  * Another group action in progress, just
    1057             :                  * return so that the signal is processed.
    1058             :                  */
    1059           0 :                 spin_unlock_irq(lock);
    1060           0 :                 return -EAGAIN;
    1061             :         }
    1062             : 
    1063           0 :         sig->group_exec_task = tsk;
    1064           0 :         sig->notify_count = zap_other_threads(tsk);
    1065           0 :         if (!thread_group_leader(tsk))
    1066           0 :                 sig->notify_count--;
    1067             : 
    1068           0 :         while (sig->notify_count) {
    1069           0 :                 __set_current_state(TASK_KILLABLE);
    1070           0 :                 spin_unlock_irq(lock);
    1071           0 :                 schedule();
    1072           0 :                 if (__fatal_signal_pending(tsk))
    1073             :                         goto killed;
    1074             :                 spin_lock_irq(lock);
    1075             :         }
    1076           0 :         spin_unlock_irq(lock);
    1077             : 
    1078             :         /*
    1079             :          * At this point all other threads have exited, all we have to
    1080             :          * do is to wait for the thread group leader to become inactive,
    1081             :          * and to assume its PID:
    1082             :          */
    1083           0 :         if (!thread_group_leader(tsk)) {
    1084           0 :                 struct task_struct *leader = tsk->group_leader;
    1085             : 
    1086             :                 for (;;) {
    1087           0 :                         cgroup_threadgroup_change_begin(tsk);
    1088           0 :                         write_lock_irq(&tasklist_lock);
    1089             :                         /*
    1090             :                          * Do this under tasklist_lock to ensure that
    1091             :                          * exit_notify() can't miss ->group_exec_task
    1092             :                          */
    1093           0 :                         sig->notify_count = -1;
    1094           0 :                         if (likely(leader->exit_state))
    1095             :                                 break;
    1096           0 :                         __set_current_state(TASK_KILLABLE);
    1097           0 :                         write_unlock_irq(&tasklist_lock);
    1098           0 :                         cgroup_threadgroup_change_end(tsk);
    1099           0 :                         schedule();
    1100           0 :                         if (__fatal_signal_pending(tsk))
    1101             :                                 goto killed;
    1102             :                 }
    1103             : 
    1104             :                 /*
    1105             :                  * The only record we have of the real-time age of a
    1106             :                  * process, regardless of execs it's done, is start_time.
    1107             :                  * All the past CPU time is accumulated in signal_struct
    1108             :                  * from sister threads now dead.  But in this non-leader
    1109             :                  * exec, nothing survives from the original leader thread,
    1110             :                  * whose birth marks the true age of this process now.
    1111             :                  * When we take on its identity by switching to its PID, we
    1112             :                  * also take its birthdate (always earlier than our own).
    1113             :                  */
    1114           0 :                 tsk->start_time = leader->start_time;
    1115           0 :                 tsk->start_boottime = leader->start_boottime;
    1116             : 
    1117           0 :                 BUG_ON(!same_thread_group(leader, tsk));
    1118             :                 /*
    1119             :                  * An exec() starts a new thread group with the
    1120             :                  * TGID of the previous thread group. Rehash the
    1121             :                  * two threads with a switched PID, and release
    1122             :                  * the former thread group leader:
    1123             :                  */
    1124             : 
    1125             :                 /* Become a process group leader with the old leader's pid.
    1126             :                  * The old leader becomes a thread of the this thread group.
    1127             :                  */
    1128           0 :                 exchange_tids(tsk, leader);
    1129           0 :                 transfer_pid(leader, tsk, PIDTYPE_TGID);
    1130           0 :                 transfer_pid(leader, tsk, PIDTYPE_PGID);
    1131           0 :                 transfer_pid(leader, tsk, PIDTYPE_SID);
    1132             : 
    1133           0 :                 list_replace_rcu(&leader->tasks, &tsk->tasks);
    1134           0 :                 list_replace_init(&leader->sibling, &tsk->sibling);
    1135             : 
    1136           0 :                 tsk->group_leader = tsk;
    1137           0 :                 leader->group_leader = tsk;
    1138             : 
    1139           0 :                 tsk->exit_signal = SIGCHLD;
    1140           0 :                 leader->exit_signal = -1;
    1141             : 
    1142           0 :                 BUG_ON(leader->exit_state != EXIT_ZOMBIE);
    1143           0 :                 leader->exit_state = EXIT_DEAD;
    1144             : 
    1145             :                 /*
    1146             :                  * We are going to release_task()->ptrace_unlink() silently,
    1147             :                  * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
    1148             :                  * the tracer wont't block again waiting for this thread.
    1149             :                  */
    1150           0 :                 if (unlikely(leader->ptrace))
    1151           0 :                         __wake_up_parent(leader, leader->parent);
    1152           0 :                 write_unlock_irq(&tasklist_lock);
    1153           0 :                 cgroup_threadgroup_change_end(tsk);
    1154             : 
    1155           0 :                 release_task(leader);
    1156             :         }
    1157             : 
    1158           0 :         sig->group_exec_task = NULL;
    1159           0 :         sig->notify_count = 0;
    1160             : 
    1161             : no_thread_group:
    1162             :         /* we have changed execution domain */
    1163           0 :         tsk->exit_signal = SIGCHLD;
    1164             : 
    1165           0 :         BUG_ON(!thread_group_leader(tsk));
    1166             :         return 0;
    1167             : 
    1168             : killed:
    1169             :         /* protects against exit_notify() and __exit_signal() */
    1170           0 :         read_lock(&tasklist_lock);
    1171           0 :         sig->group_exec_task = NULL;
    1172           0 :         sig->notify_count = 0;
    1173           0 :         read_unlock(&tasklist_lock);
    1174           0 :         return -EAGAIN;
    1175             : }
    1176             : 
    1177             : 
    1178             : /*
    1179             :  * This function makes sure the current process has its own signal table,
    1180             :  * so that flush_signal_handlers can later reset the handlers without
    1181             :  * disturbing other processes.  (Other processes might share the signal
    1182             :  * table via the CLONE_SIGHAND option to clone().)
    1183             :  */
    1184           0 : static int unshare_sighand(struct task_struct *me)
    1185             : {
    1186           0 :         struct sighand_struct *oldsighand = me->sighand;
    1187             : 
    1188           0 :         if (refcount_read(&oldsighand->count) != 1) {
    1189             :                 struct sighand_struct *newsighand;
    1190             :                 /*
    1191             :                  * This ->sighand is shared with the CLONE_SIGHAND
    1192             :                  * but not CLONE_THREAD task, switch to the new one.
    1193             :                  */
    1194           0 :                 newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
    1195           0 :                 if (!newsighand)
    1196             :                         return -ENOMEM;
    1197             : 
    1198           0 :                 refcount_set(&newsighand->count, 1);
    1199           0 :                 memcpy(newsighand->action, oldsighand->action,
    1200             :                        sizeof(newsighand->action));
    1201             : 
    1202           0 :                 write_lock_irq(&tasklist_lock);
    1203           0 :                 spin_lock(&oldsighand->siglock);
    1204           0 :                 rcu_assign_pointer(me->sighand, newsighand);
    1205           0 :                 spin_unlock(&oldsighand->siglock);
    1206           0 :                 write_unlock_irq(&tasklist_lock);
    1207             : 
    1208           0 :                 __cleanup_sighand(oldsighand);
    1209             :         }
    1210             :         return 0;
    1211             : }
    1212             : 
    1213           0 : char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
    1214             : {
    1215           0 :         task_lock(tsk);
    1216             :         /* Always NUL terminated and zero-padded */
    1217           0 :         strscpy_pad(buf, tsk->comm, buf_size);
    1218           0 :         task_unlock(tsk);
    1219           0 :         return buf;
    1220             : }
    1221             : EXPORT_SYMBOL_GPL(__get_task_comm);
    1222             : 
    1223             : /*
    1224             :  * These functions flushes out all traces of the currently running executable
    1225             :  * so that a new one can be started
    1226             :  */
    1227             : 
    1228         106 : void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
    1229             : {
    1230         106 :         task_lock(tsk);
    1231         106 :         trace_task_rename(tsk, buf);
    1232         106 :         strscpy_pad(tsk->comm, buf, sizeof(tsk->comm));
    1233         106 :         task_unlock(tsk);
    1234         106 :         perf_event_comm(tsk, exec);
    1235         106 : }
    1236             : 
    1237             : /*
    1238             :  * Calling this is the point of no return. None of the failures will be
    1239             :  * seen by userspace since either the process is already taking a fatal
    1240             :  * signal (via de_thread() or coredump), or will have SEGV raised
    1241             :  * (after exec_mmap()) by search_binary_handler (see below).
    1242             :  */
    1243           0 : int begin_new_exec(struct linux_binprm * bprm)
    1244             : {
    1245           0 :         struct task_struct *me = current;
    1246             :         int retval;
    1247             : 
    1248             :         /* Once we are committed compute the creds */
    1249           0 :         retval = bprm_creds_from_file(bprm);
    1250           0 :         if (retval)
    1251             :                 return retval;
    1252             : 
    1253             :         /*
    1254             :          * Ensure all future errors are fatal.
    1255             :          */
    1256           0 :         bprm->point_of_no_return = true;
    1257             : 
    1258             :         /*
    1259             :          * Make this the only thread in the thread group.
    1260             :          */
    1261           0 :         retval = de_thread(me);
    1262           0 :         if (retval)
    1263             :                 goto out;
    1264             : 
    1265             :         /*
    1266             :          * Cancel any io_uring activity across execve
    1267             :          */
    1268           0 :         io_uring_task_cancel();
    1269             : 
    1270             :         /* Ensure the files table is not shared. */
    1271           0 :         retval = unshare_files();
    1272           0 :         if (retval)
    1273             :                 goto out;
    1274             : 
    1275             :         /*
    1276             :          * Must be called _before_ exec_mmap() as bprm->mm is
    1277             :          * not visible until then. This also enables the update
    1278             :          * to be lockless.
    1279             :          */
    1280           0 :         retval = set_mm_exe_file(bprm->mm, bprm->file);
    1281           0 :         if (retval)
    1282             :                 goto out;
    1283             : 
    1284             :         /* If the binary is not readable then enforce mm->dumpable=0 */
    1285           0 :         would_dump(bprm, bprm->file);
    1286           0 :         if (bprm->have_execfd)
    1287           0 :                 would_dump(bprm, bprm->executable);
    1288             : 
    1289             :         /*
    1290             :          * Release all of the old mmap stuff
    1291             :          */
    1292           0 :         acct_arg_size(bprm, 0);
    1293           0 :         retval = exec_mmap(bprm->mm);
    1294           0 :         if (retval)
    1295             :                 goto out;
    1296             : 
    1297           0 :         bprm->mm = NULL;
    1298             : 
    1299             : #ifdef CONFIG_POSIX_TIMERS
    1300           0 :         exit_itimers(me->signal);
    1301           0 :         flush_itimer_signals();
    1302             : #endif
    1303             : 
    1304             :         /*
    1305             :          * Make the signal table private.
    1306             :          */
    1307           0 :         retval = unshare_sighand(me);
    1308           0 :         if (retval)
    1309             :                 goto out_unlock;
    1310             : 
    1311           0 :         if (me->flags & PF_KTHREAD)
    1312           0 :                 free_kthread_struct(me);
    1313           0 :         me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
    1314             :                                         PF_NOFREEZE | PF_NO_SETAFFINITY);
    1315           0 :         flush_thread();
    1316           0 :         me->personality &= ~bprm->per_clear;
    1317             : 
    1318           0 :         clear_syscall_work_syscall_user_dispatch(me);
    1319             : 
    1320             :         /*
    1321             :          * We have to apply CLOEXEC before we change whether the process is
    1322             :          * dumpable (in setup_new_exec) to avoid a race with a process in userspace
    1323             :          * trying to access the should-be-closed file descriptors of a process
    1324             :          * undergoing exec(2).
    1325             :          */
    1326           0 :         do_close_on_exec(me->files);
    1327             : 
    1328           0 :         if (bprm->secureexec) {
    1329             :                 /* Make sure parent cannot signal privileged process. */
    1330           0 :                 me->pdeath_signal = 0;
    1331             : 
    1332             :                 /*
    1333             :                  * For secureexec, reset the stack limit to sane default to
    1334             :                  * avoid bad behavior from the prior rlimits. This has to
    1335             :                  * happen before arch_pick_mmap_layout(), which examines
    1336             :                  * RLIMIT_STACK, but after the point of no return to avoid
    1337             :                  * needing to clean up the change on failure.
    1338             :                  */
    1339           0 :                 if (bprm->rlim_stack.rlim_cur > _STK_LIM)
    1340           0 :                         bprm->rlim_stack.rlim_cur = _STK_LIM;
    1341             :         }
    1342             : 
    1343           0 :         me->sas_ss_sp = me->sas_ss_size = 0;
    1344             : 
    1345             :         /*
    1346             :          * Figure out dumpability. Note that this checking only of current
    1347             :          * is wrong, but userspace depends on it. This should be testing
    1348             :          * bprm->secureexec instead.
    1349             :          */
    1350           0 :         if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
    1351           0 :             !(uid_eq(current_euid(), current_uid()) &&
    1352           0 :               gid_eq(current_egid(), current_gid())))
    1353           0 :                 set_dumpable(current->mm, suid_dumpable);
    1354             :         else
    1355           0 :                 set_dumpable(current->mm, SUID_DUMP_USER);
    1356             : 
    1357             :         perf_event_exec();
    1358           0 :         __set_task_comm(me, kbasename(bprm->filename), true);
    1359             : 
    1360             :         /* An exec changes our domain. We are no longer part of the thread
    1361             :            group */
    1362           0 :         WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
    1363           0 :         flush_signal_handlers(me, 0);
    1364             : 
    1365           0 :         retval = set_cred_ucounts(bprm->cred);
    1366           0 :         if (retval < 0)
    1367             :                 goto out_unlock;
    1368             : 
    1369             :         /*
    1370             :          * install the new credentials for this executable
    1371             :          */
    1372           0 :         security_bprm_committing_creds(bprm);
    1373             : 
    1374           0 :         commit_creds(bprm->cred);
    1375           0 :         bprm->cred = NULL;
    1376             : 
    1377             :         /*
    1378             :          * Disable monitoring for regular users
    1379             :          * when executing setuid binaries. Must
    1380             :          * wait until new credentials are committed
    1381             :          * by commit_creds() above
    1382             :          */
    1383           0 :         if (get_dumpable(me->mm) != SUID_DUMP_USER)
    1384             :                 perf_event_exit_task(me);
    1385             :         /*
    1386             :          * cred_guard_mutex must be held at least to this point to prevent
    1387             :          * ptrace_attach() from altering our determination of the task's
    1388             :          * credentials; any time after this it may be unlocked.
    1389             :          */
    1390           0 :         security_bprm_committed_creds(bprm);
    1391             : 
    1392             :         /* Pass the opened binary to the interpreter. */
    1393           0 :         if (bprm->have_execfd) {
    1394           0 :                 retval = get_unused_fd_flags(0);
    1395           0 :                 if (retval < 0)
    1396             :                         goto out_unlock;
    1397           0 :                 fd_install(retval, bprm->executable);
    1398           0 :                 bprm->executable = NULL;
    1399           0 :                 bprm->execfd = retval;
    1400             :         }
    1401             :         return 0;
    1402             : 
    1403             : out_unlock:
    1404           0 :         up_write(&me->signal->exec_update_lock);
    1405             : out:
    1406             :         return retval;
    1407             : }
    1408             : EXPORT_SYMBOL(begin_new_exec);
    1409             : 
    1410           0 : void would_dump(struct linux_binprm *bprm, struct file *file)
    1411             : {
    1412           0 :         struct inode *inode = file_inode(file);
    1413           0 :         struct user_namespace *mnt_userns = file_mnt_user_ns(file);
    1414           0 :         if (inode_permission(mnt_userns, inode, MAY_READ) < 0) {
    1415             :                 struct user_namespace *old, *user_ns;
    1416           0 :                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
    1417             : 
    1418             :                 /* Ensure mm->user_ns contains the executable */
    1419           0 :                 user_ns = old = bprm->mm->user_ns;
    1420           0 :                 while ((user_ns != &init_user_ns) &&
    1421           0 :                        !privileged_wrt_inode_uidgid(user_ns, mnt_userns, inode))
    1422           0 :                         user_ns = user_ns->parent;
    1423             : 
    1424           0 :                 if (old != user_ns) {
    1425           0 :                         bprm->mm->user_ns = get_user_ns(user_ns);
    1426           0 :                         put_user_ns(old);
    1427             :                 }
    1428             :         }
    1429           0 : }
    1430             : EXPORT_SYMBOL(would_dump);
    1431             : 
    1432           0 : void setup_new_exec(struct linux_binprm * bprm)
    1433             : {
    1434             :         /* Setup things that can depend upon the personality */
    1435           0 :         struct task_struct *me = current;
    1436             : 
    1437           0 :         arch_pick_mmap_layout(me->mm, &bprm->rlim_stack);
    1438             : 
    1439             :         arch_setup_new_exec();
    1440             : 
    1441             :         /* Set the new mm task size. We have to do that late because it may
    1442             :          * depend on TIF_32BIT which is only updated in flush_thread() on
    1443             :          * some architectures like powerpc
    1444             :          */
    1445           0 :         me->mm->task_size = TASK_SIZE;
    1446           0 :         up_write(&me->signal->exec_update_lock);
    1447           0 :         mutex_unlock(&me->signal->cred_guard_mutex);
    1448           0 : }
    1449             : EXPORT_SYMBOL(setup_new_exec);
    1450             : 
    1451             : /* Runs immediately before start_thread() takes over. */
    1452           0 : void finalize_exec(struct linux_binprm *bprm)
    1453             : {
    1454             :         /* Store any stack rlimit changes before starting thread. */
    1455           0 :         task_lock(current->group_leader);
    1456           0 :         current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
    1457           0 :         task_unlock(current->group_leader);
    1458           0 : }
    1459             : EXPORT_SYMBOL(finalize_exec);
    1460             : 
    1461             : /*
    1462             :  * Prepare credentials and lock ->cred_guard_mutex.
    1463             :  * setup_new_exec() commits the new creds and drops the lock.
    1464             :  * Or, if exec fails before, free_bprm() should release ->cred
    1465             :  * and unlock.
    1466             :  */
    1467           0 : static int prepare_bprm_creds(struct linux_binprm *bprm)
    1468             : {
    1469           0 :         if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
    1470             :                 return -ERESTARTNOINTR;
    1471             : 
    1472           0 :         bprm->cred = prepare_exec_creds();
    1473           0 :         if (likely(bprm->cred))
    1474             :                 return 0;
    1475             : 
    1476           0 :         mutex_unlock(&current->signal->cred_guard_mutex);
    1477             :         return -ENOMEM;
    1478             : }
    1479             : 
    1480           0 : static void free_bprm(struct linux_binprm *bprm)
    1481             : {
    1482           0 :         if (bprm->mm) {
    1483           0 :                 acct_arg_size(bprm, 0);
    1484           0 :                 mmput(bprm->mm);
    1485             :         }
    1486           0 :         free_arg_pages(bprm);
    1487           0 :         if (bprm->cred) {
    1488           0 :                 mutex_unlock(&current->signal->cred_guard_mutex);
    1489           0 :                 abort_creds(bprm->cred);
    1490             :         }
    1491           0 :         if (bprm->file) {
    1492           0 :                 allow_write_access(bprm->file);
    1493           0 :                 fput(bprm->file);
    1494             :         }
    1495           0 :         if (bprm->executable)
    1496           0 :                 fput(bprm->executable);
    1497             :         /* If a binfmt changed the interp, free it. */
    1498           0 :         if (bprm->interp != bprm->filename)
    1499           0 :                 kfree(bprm->interp);
    1500           0 :         kfree(bprm->fdpath);
    1501           0 :         kfree(bprm);
    1502           0 : }
    1503             : 
    1504           0 : static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
    1505             : {
    1506           0 :         struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
    1507           0 :         int retval = -ENOMEM;
    1508           0 :         if (!bprm)
    1509             :                 goto out;
    1510             : 
    1511           0 :         if (fd == AT_FDCWD || filename->name[0] == '/') {
    1512           0 :                 bprm->filename = filename->name;
    1513             :         } else {
    1514           0 :                 if (filename->name[0] == '\0')
    1515           0 :                         bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
    1516             :                 else
    1517           0 :                         bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
    1518             :                                                   fd, filename->name);
    1519           0 :                 if (!bprm->fdpath)
    1520             :                         goto out_free;
    1521             : 
    1522           0 :                 bprm->filename = bprm->fdpath;
    1523             :         }
    1524           0 :         bprm->interp = bprm->filename;
    1525             : 
    1526           0 :         retval = bprm_mm_init(bprm);
    1527           0 :         if (retval)
    1528             :                 goto out_free;
    1529             :         return bprm;
    1530             : 
    1531             : out_free:
    1532           0 :         free_bprm(bprm);
    1533             : out:
    1534           0 :         return ERR_PTR(retval);
    1535             : }
    1536             : 
    1537           0 : int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
    1538             : {
    1539             :         /* If a binfmt changed the interp, free it first. */
    1540           0 :         if (bprm->interp != bprm->filename)
    1541           0 :                 kfree(bprm->interp);
    1542           0 :         bprm->interp = kstrdup(interp, GFP_KERNEL);
    1543           0 :         if (!bprm->interp)
    1544             :                 return -ENOMEM;
    1545           0 :         return 0;
    1546             : }
    1547             : EXPORT_SYMBOL(bprm_change_interp);
    1548             : 
    1549             : /*
    1550             :  * determine how safe it is to execute the proposed program
    1551             :  * - the caller must hold ->cred_guard_mutex to protect against
    1552             :  *   PTRACE_ATTACH or seccomp thread-sync
    1553             :  */
    1554           0 : static void check_unsafe_exec(struct linux_binprm *bprm)
    1555             : {
    1556           0 :         struct task_struct *p = current, *t;
    1557             :         unsigned n_fs;
    1558             : 
    1559           0 :         if (p->ptrace)
    1560           0 :                 bprm->unsafe |= LSM_UNSAFE_PTRACE;
    1561             : 
    1562             :         /*
    1563             :          * This isn't strictly necessary, but it makes it harder for LSMs to
    1564             :          * mess up.
    1565             :          */
    1566           0 :         if (task_no_new_privs(current))
    1567           0 :                 bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
    1568             : 
    1569           0 :         t = p;
    1570           0 :         n_fs = 1;
    1571           0 :         spin_lock(&p->fs->lock);
    1572             :         rcu_read_lock();
    1573           0 :         while_each_thread(p, t) {
    1574           0 :                 if (t->fs == p->fs)
    1575           0 :                         n_fs++;
    1576             :         }
    1577             :         rcu_read_unlock();
    1578             : 
    1579           0 :         if (p->fs->users > n_fs)
    1580           0 :                 bprm->unsafe |= LSM_UNSAFE_SHARE;
    1581             :         else
    1582           0 :                 p->fs->in_exec = 1;
    1583           0 :         spin_unlock(&p->fs->lock);
    1584           0 : }
    1585             : 
    1586           0 : static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
    1587             : {
    1588             :         /* Handle suid and sgid on files */
    1589             :         struct user_namespace *mnt_userns;
    1590             :         struct inode *inode;
    1591             :         unsigned int mode;
    1592             :         kuid_t uid;
    1593             :         kgid_t gid;
    1594             : 
    1595           0 :         if (!mnt_may_suid(file->f_path.mnt))
    1596             :                 return;
    1597             : 
    1598           0 :         if (task_no_new_privs(current))
    1599             :                 return;
    1600             : 
    1601           0 :         inode = file->f_path.dentry->d_inode;
    1602           0 :         mode = READ_ONCE(inode->i_mode);
    1603           0 :         if (!(mode & (S_ISUID|S_ISGID)))
    1604             :                 return;
    1605             : 
    1606           0 :         mnt_userns = file_mnt_user_ns(file);
    1607             : 
    1608             :         /* Be careful if suid/sgid is set */
    1609           0 :         inode_lock(inode);
    1610             : 
    1611             :         /* reload atomically mode/uid/gid now that lock held */
    1612           0 :         mode = inode->i_mode;
    1613           0 :         uid = i_uid_into_mnt(mnt_userns, inode);
    1614           0 :         gid = i_gid_into_mnt(mnt_userns, inode);
    1615           0 :         inode_unlock(inode);
    1616             : 
    1617             :         /* We ignore suid/sgid if there are no mappings for them in the ns */
    1618           0 :         if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
    1619           0 :                  !kgid_has_mapping(bprm->cred->user_ns, gid))
    1620             :                 return;
    1621             : 
    1622           0 :         if (mode & S_ISUID) {
    1623           0 :                 bprm->per_clear |= PER_CLEAR_ON_SETID;
    1624           0 :                 bprm->cred->euid = uid;
    1625             :         }
    1626             : 
    1627           0 :         if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
    1628           0 :                 bprm->per_clear |= PER_CLEAR_ON_SETID;
    1629           0 :                 bprm->cred->egid = gid;
    1630             :         }
    1631             : }
    1632             : 
    1633             : /*
    1634             :  * Compute brpm->cred based upon the final binary.
    1635             :  */
    1636           0 : static int bprm_creds_from_file(struct linux_binprm *bprm)
    1637             : {
    1638             :         /* Compute creds based on which file? */
    1639           0 :         struct file *file = bprm->execfd_creds ? bprm->executable : bprm->file;
    1640             : 
    1641           0 :         bprm_fill_uid(bprm, file);
    1642           0 :         return security_bprm_creds_from_file(bprm, file);
    1643             : }
    1644             : 
    1645             : /*
    1646             :  * Fill the binprm structure from the inode.
    1647             :  * Read the first BINPRM_BUF_SIZE bytes
    1648             :  *
    1649             :  * This may be called multiple times for binary chains (scripts for example).
    1650             :  */
    1651           0 : static int prepare_binprm(struct linux_binprm *bprm)
    1652             : {
    1653           0 :         loff_t pos = 0;
    1654             : 
    1655           0 :         memset(bprm->buf, 0, BINPRM_BUF_SIZE);
    1656           0 :         return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos);
    1657             : }
    1658             : 
    1659             : /*
    1660             :  * Arguments are '\0' separated strings found at the location bprm->p
    1661             :  * points to; chop off the first by relocating brpm->p to right after
    1662             :  * the first '\0' encountered.
    1663             :  */
    1664           0 : int remove_arg_zero(struct linux_binprm *bprm)
    1665             : {
    1666           0 :         int ret = 0;
    1667             :         unsigned long offset;
    1668             :         char *kaddr;
    1669             :         struct page *page;
    1670             : 
    1671           0 :         if (!bprm->argc)
    1672             :                 return 0;
    1673             : 
    1674             :         do {
    1675           0 :                 offset = bprm->p & ~PAGE_MASK;
    1676           0 :                 page = get_arg_page(bprm, bprm->p, 0);
    1677           0 :                 if (!page) {
    1678             :                         ret = -EFAULT;
    1679             :                         goto out;
    1680             :                 }
    1681           0 :                 kaddr = kmap_atomic(page);
    1682             : 
    1683           0 :                 for (; offset < PAGE_SIZE && kaddr[offset];
    1684           0 :                                 offset++, bprm->p++)
    1685             :                         ;
    1686             : 
    1687           0 :                 kunmap_atomic(kaddr);
    1688           0 :                 put_arg_page(page);
    1689           0 :         } while (offset == PAGE_SIZE);
    1690             : 
    1691           0 :         bprm->p++;
    1692           0 :         bprm->argc--;
    1693           0 :         ret = 0;
    1694             : 
    1695             : out:
    1696             :         return ret;
    1697             : }
    1698             : EXPORT_SYMBOL(remove_arg_zero);
    1699             : 
    1700             : #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
    1701             : /*
    1702             :  * cycle the list of binary formats handler, until one recognizes the image
    1703             :  */
    1704           0 : static int search_binary_handler(struct linux_binprm *bprm)
    1705             : {
    1706           0 :         bool need_retry = IS_ENABLED(CONFIG_MODULES);
    1707             :         struct linux_binfmt *fmt;
    1708             :         int retval;
    1709             : 
    1710           0 :         retval = prepare_binprm(bprm);
    1711           0 :         if (retval < 0)
    1712             :                 return retval;
    1713             : 
    1714           0 :         retval = security_bprm_check(bprm);
    1715             :         if (retval)
    1716             :                 return retval;
    1717             : 
    1718           0 :         retval = -ENOENT;
    1719             :  retry:
    1720           0 :         read_lock(&binfmt_lock);
    1721           0 :         list_for_each_entry(fmt, &formats, lh) {
    1722           0 :                 if (!try_module_get(fmt->module))
    1723             :                         continue;
    1724           0 :                 read_unlock(&binfmt_lock);
    1725             : 
    1726           0 :                 retval = fmt->load_binary(bprm);
    1727             : 
    1728           0 :                 read_lock(&binfmt_lock);
    1729           0 :                 put_binfmt(fmt);
    1730           0 :                 if (bprm->point_of_no_return || (retval != -ENOEXEC)) {
    1731           0 :                         read_unlock(&binfmt_lock);
    1732           0 :                         return retval;
    1733             :                 }
    1734             :         }
    1735           0 :         read_unlock(&binfmt_lock);
    1736             : 
    1737             :         if (need_retry) {
    1738             :                 if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
    1739             :                     printable(bprm->buf[2]) && printable(bprm->buf[3]))
    1740             :                         return retval;
    1741             :                 if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
    1742             :                         return retval;
    1743             :                 need_retry = false;
    1744             :                 goto retry;
    1745             :         }
    1746             : 
    1747           0 :         return retval;
    1748             : }
    1749             : 
    1750           0 : static int exec_binprm(struct linux_binprm *bprm)
    1751             : {
    1752             :         pid_t old_pid, old_vpid;
    1753             :         int ret, depth;
    1754             : 
    1755             :         /* Need to fetch pid before load_binary changes it */
    1756           0 :         old_pid = current->pid;
    1757             :         rcu_read_lock();
    1758           0 :         old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
    1759             :         rcu_read_unlock();
    1760             : 
    1761             :         /* This allows 4 levels of binfmt rewrites before failing hard. */
    1762           0 :         for (depth = 0;; depth++) {
    1763             :                 struct file *exec;
    1764           0 :                 if (depth > 5)
    1765             :                         return -ELOOP;
    1766             : 
    1767           0 :                 ret = search_binary_handler(bprm);
    1768           0 :                 if (ret < 0)
    1769             :                         return ret;
    1770           0 :                 if (!bprm->interpreter)
    1771             :                         break;
    1772             : 
    1773           0 :                 exec = bprm->file;
    1774           0 :                 bprm->file = bprm->interpreter;
    1775           0 :                 bprm->interpreter = NULL;
    1776             : 
    1777           0 :                 allow_write_access(exec);
    1778           0 :                 if (unlikely(bprm->have_execfd)) {
    1779           0 :                         if (bprm->executable) {
    1780           0 :                                 fput(exec);
    1781           0 :                                 return -ENOEXEC;
    1782             :                         }
    1783           0 :                         bprm->executable = exec;
    1784             :                 } else
    1785           0 :                         fput(exec);
    1786             :         }
    1787             : 
    1788           0 :         audit_bprm(bprm);
    1789           0 :         trace_sched_process_exec(current, old_pid, bprm);
    1790           0 :         ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
    1791           0 :         proc_exec_connector(current);
    1792           0 :         return 0;
    1793             : }
    1794             : 
    1795             : /*
    1796             :  * sys_execve() executes a new program.
    1797             :  */
    1798           0 : static int bprm_execve(struct linux_binprm *bprm,
    1799             :                        int fd, struct filename *filename, int flags)
    1800             : {
    1801             :         struct file *file;
    1802             :         int retval;
    1803             : 
    1804           0 :         retval = prepare_bprm_creds(bprm);
    1805           0 :         if (retval)
    1806             :                 return retval;
    1807             : 
    1808           0 :         check_unsafe_exec(bprm);
    1809           0 :         current->in_execve = 1;
    1810             : 
    1811           0 :         file = do_open_execat(fd, filename, flags);
    1812           0 :         retval = PTR_ERR(file);
    1813           0 :         if (IS_ERR(file))
    1814             :                 goto out_unmark;
    1815             : 
    1816             :         sched_exec();
    1817             : 
    1818           0 :         bprm->file = file;
    1819             :         /*
    1820             :          * Record that a name derived from an O_CLOEXEC fd will be
    1821             :          * inaccessible after exec.  This allows the code in exec to
    1822             :          * choose to fail when the executable is not mmaped into the
    1823             :          * interpreter and an open file descriptor is not passed to
    1824             :          * the interpreter.  This makes for a better user experience
    1825             :          * than having the interpreter start and then immediately fail
    1826             :          * when it finds the executable is inaccessible.
    1827             :          */
    1828           0 :         if (bprm->fdpath && get_close_on_exec(fd))
    1829           0 :                 bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
    1830             : 
    1831             :         /* Set the unchanging part of bprm->cred */
    1832           0 :         retval = security_bprm_creds_for_exec(bprm);
    1833             :         if (retval)
    1834             :                 goto out;
    1835             : 
    1836           0 :         retval = exec_binprm(bprm);
    1837           0 :         if (retval < 0)
    1838             :                 goto out;
    1839             : 
    1840             :         /* execve succeeded */
    1841           0 :         current->fs->in_exec = 0;
    1842           0 :         current->in_execve = 0;
    1843           0 :         rseq_execve(current);
    1844           0 :         acct_update_integrals(current);
    1845           0 :         task_numa_free(current, false);
    1846           0 :         return retval;
    1847             : 
    1848             : out:
    1849             :         /*
    1850             :          * If past the point of no return ensure the code never
    1851             :          * returns to the userspace process.  Use an existing fatal
    1852             :          * signal if present otherwise terminate the process with
    1853             :          * SIGSEGV.
    1854             :          */
    1855           0 :         if (bprm->point_of_no_return && !fatal_signal_pending(current))
    1856           0 :                 force_fatal_sig(SIGSEGV);
    1857             : 
    1858             : out_unmark:
    1859           0 :         current->fs->in_exec = 0;
    1860           0 :         current->in_execve = 0;
    1861             : 
    1862           0 :         return retval;
    1863             : }
    1864             : 
    1865           0 : static int do_execveat_common(int fd, struct filename *filename,
    1866             :                               struct user_arg_ptr argv,
    1867             :                               struct user_arg_ptr envp,
    1868             :                               int flags)
    1869             : {
    1870             :         struct linux_binprm *bprm;
    1871             :         int retval;
    1872             : 
    1873           0 :         if (IS_ERR(filename))
    1874           0 :                 return PTR_ERR(filename);
    1875             : 
    1876             :         /*
    1877             :          * We move the actual failure in case of RLIMIT_NPROC excess from
    1878             :          * set*uid() to execve() because too many poorly written programs
    1879             :          * don't check setuid() return code.  Here we additionally recheck
    1880             :          * whether NPROC limit is still exceeded.
    1881             :          */
    1882           0 :         if ((current->flags & PF_NPROC_EXCEEDED) &&
    1883           0 :             is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
    1884             :                 retval = -EAGAIN;
    1885             :                 goto out_ret;
    1886             :         }
    1887             : 
    1888             :         /* We're below the limit (still or again), so we don't want to make
    1889             :          * further execve() calls fail. */
    1890           0 :         current->flags &= ~PF_NPROC_EXCEEDED;
    1891             : 
    1892           0 :         bprm = alloc_bprm(fd, filename);
    1893           0 :         if (IS_ERR(bprm)) {
    1894           0 :                 retval = PTR_ERR(bprm);
    1895           0 :                 goto out_ret;
    1896             :         }
    1897             : 
    1898           0 :         retval = count(argv, MAX_ARG_STRINGS);
    1899           0 :         if (retval == 0)
    1900           0 :                 pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
    1901             :                              current->comm, bprm->filename);
    1902           0 :         if (retval < 0)
    1903             :                 goto out_free;
    1904           0 :         bprm->argc = retval;
    1905             : 
    1906           0 :         retval = count(envp, MAX_ARG_STRINGS);
    1907           0 :         if (retval < 0)
    1908             :                 goto out_free;
    1909           0 :         bprm->envc = retval;
    1910             : 
    1911           0 :         retval = bprm_stack_limits(bprm);
    1912           0 :         if (retval < 0)
    1913             :                 goto out_free;
    1914             : 
    1915           0 :         retval = copy_string_kernel(bprm->filename, bprm);
    1916           0 :         if (retval < 0)
    1917             :                 goto out_free;
    1918           0 :         bprm->exec = bprm->p;
    1919             : 
    1920           0 :         retval = copy_strings(bprm->envc, envp, bprm);
    1921           0 :         if (retval < 0)
    1922             :                 goto out_free;
    1923             : 
    1924           0 :         retval = copy_strings(bprm->argc, argv, bprm);
    1925           0 :         if (retval < 0)
    1926             :                 goto out_free;
    1927             : 
    1928             :         /*
    1929             :          * When argv is empty, add an empty string ("") as argv[0] to
    1930             :          * ensure confused userspace programs that start processing
    1931             :          * from argv[1] won't end up walking envp. See also
    1932             :          * bprm_stack_limits().
    1933             :          */
    1934           0 :         if (bprm->argc == 0) {
    1935           0 :                 retval = copy_string_kernel("", bprm);
    1936           0 :                 if (retval < 0)
    1937             :                         goto out_free;
    1938           0 :                 bprm->argc = 1;
    1939             :         }
    1940             : 
    1941           0 :         retval = bprm_execve(bprm, fd, filename, flags);
    1942             : out_free:
    1943           0 :         free_bprm(bprm);
    1944             : 
    1945             : out_ret:
    1946           0 :         putname(filename);
    1947           0 :         return retval;
    1948             : }
    1949             : 
    1950           0 : int kernel_execve(const char *kernel_filename,
    1951             :                   const char *const *argv, const char *const *envp)
    1952             : {
    1953             :         struct filename *filename;
    1954             :         struct linux_binprm *bprm;
    1955           0 :         int fd = AT_FDCWD;
    1956             :         int retval;
    1957             : 
    1958           0 :         filename = getname_kernel(kernel_filename);
    1959           0 :         if (IS_ERR(filename))
    1960           0 :                 return PTR_ERR(filename);
    1961             : 
    1962           0 :         bprm = alloc_bprm(fd, filename);
    1963           0 :         if (IS_ERR(bprm)) {
    1964           0 :                 retval = PTR_ERR(bprm);
    1965           0 :                 goto out_ret;
    1966             :         }
    1967             : 
    1968           0 :         retval = count_strings_kernel(argv);
    1969           0 :         if (WARN_ON_ONCE(retval == 0))
    1970           0 :                 retval = -EINVAL;
    1971           0 :         if (retval < 0)
    1972             :                 goto out_free;
    1973           0 :         bprm->argc = retval;
    1974             : 
    1975           0 :         retval = count_strings_kernel(envp);
    1976           0 :         if (retval < 0)
    1977             :                 goto out_free;
    1978           0 :         bprm->envc = retval;
    1979             : 
    1980           0 :         retval = bprm_stack_limits(bprm);
    1981           0 :         if (retval < 0)
    1982             :                 goto out_free;
    1983             : 
    1984           0 :         retval = copy_string_kernel(bprm->filename, bprm);
    1985           0 :         if (retval < 0)
    1986             :                 goto out_free;
    1987           0 :         bprm->exec = bprm->p;
    1988             : 
    1989           0 :         retval = copy_strings_kernel(bprm->envc, envp, bprm);
    1990           0 :         if (retval < 0)
    1991             :                 goto out_free;
    1992             : 
    1993           0 :         retval = copy_strings_kernel(bprm->argc, argv, bprm);
    1994           0 :         if (retval < 0)
    1995             :                 goto out_free;
    1996             : 
    1997           0 :         retval = bprm_execve(bprm, fd, filename, 0);
    1998             : out_free:
    1999           0 :         free_bprm(bprm);
    2000             : out_ret:
    2001           0 :         putname(filename);
    2002           0 :         return retval;
    2003             : }
    2004             : 
    2005             : static int do_execve(struct filename *filename,
    2006             :         const char __user *const __user *__argv,
    2007             :         const char __user *const __user *__envp)
    2008             : {
    2009           0 :         struct user_arg_ptr argv = { .ptr.native = __argv };
    2010           0 :         struct user_arg_ptr envp = { .ptr.native = __envp };
    2011           0 :         return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
    2012             : }
    2013             : 
    2014             : static int do_execveat(int fd, struct filename *filename,
    2015             :                 const char __user *const __user *__argv,
    2016             :                 const char __user *const __user *__envp,
    2017             :                 int flags)
    2018             : {
    2019           0 :         struct user_arg_ptr argv = { .ptr.native = __argv };
    2020           0 :         struct user_arg_ptr envp = { .ptr.native = __envp };
    2021             : 
    2022           0 :         return do_execveat_common(fd, filename, argv, envp, flags);
    2023             : }
    2024             : 
    2025             : #ifdef CONFIG_COMPAT
    2026             : static int compat_do_execve(struct filename *filename,
    2027             :         const compat_uptr_t __user *__argv,
    2028             :         const compat_uptr_t __user *__envp)
    2029             : {
    2030             :         struct user_arg_ptr argv = {
    2031             :                 .is_compat = true,
    2032             :                 .ptr.compat = __argv,
    2033             :         };
    2034             :         struct user_arg_ptr envp = {
    2035             :                 .is_compat = true,
    2036             :                 .ptr.compat = __envp,
    2037             :         };
    2038             :         return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
    2039             : }
    2040             : 
    2041             : static int compat_do_execveat(int fd, struct filename *filename,
    2042             :                               const compat_uptr_t __user *__argv,
    2043             :                               const compat_uptr_t __user *__envp,
    2044             :                               int flags)
    2045             : {
    2046             :         struct user_arg_ptr argv = {
    2047             :                 .is_compat = true,
    2048             :                 .ptr.compat = __argv,
    2049             :         };
    2050             :         struct user_arg_ptr envp = {
    2051             :                 .is_compat = true,
    2052             :                 .ptr.compat = __envp,
    2053             :         };
    2054             :         return do_execveat_common(fd, filename, argv, envp, flags);
    2055             : }
    2056             : #endif
    2057             : 
    2058           0 : void set_binfmt(struct linux_binfmt *new)
    2059             : {
    2060           0 :         struct mm_struct *mm = current->mm;
    2061             : 
    2062           0 :         if (mm->binfmt)
    2063             :                 module_put(mm->binfmt->module);
    2064             : 
    2065           0 :         mm->binfmt = new;
    2066             :         if (new)
    2067             :                 __module_get(new->module);
    2068           0 : }
    2069             : EXPORT_SYMBOL(set_binfmt);
    2070             : 
    2071             : /*
    2072             :  * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
    2073             :  */
    2074           0 : void set_dumpable(struct mm_struct *mm, int value)
    2075             : {
    2076           0 :         if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
    2077             :                 return;
    2078             : 
    2079           0 :         set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value);
    2080             : }
    2081             : 
    2082           0 : SYSCALL_DEFINE3(execve,
    2083             :                 const char __user *, filename,
    2084             :                 const char __user *const __user *, argv,
    2085             :                 const char __user *const __user *, envp)
    2086             : {
    2087           0 :         return do_execve(getname(filename), argv, envp);
    2088             : }
    2089             : 
    2090           0 : SYSCALL_DEFINE5(execveat,
    2091             :                 int, fd, const char __user *, filename,
    2092             :                 const char __user *const __user *, argv,
    2093             :                 const char __user *const __user *, envp,
    2094             :                 int, flags)
    2095             : {
    2096           0 :         return do_execveat(fd,
    2097             :                            getname_uflags(filename, flags),
    2098             :                            argv, envp, flags);
    2099             : }
    2100             : 
    2101             : #ifdef CONFIG_COMPAT
    2102             : COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
    2103             :         const compat_uptr_t __user *, argv,
    2104             :         const compat_uptr_t __user *, envp)
    2105             : {
    2106             :         return compat_do_execve(getname(filename), argv, envp);
    2107             : }
    2108             : 
    2109             : COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
    2110             :                        const char __user *, filename,
    2111             :                        const compat_uptr_t __user *, argv,
    2112             :                        const compat_uptr_t __user *, envp,
    2113             :                        int,  flags)
    2114             : {
    2115             :         return compat_do_execveat(fd,
    2116             :                                   getname_uflags(filename, flags),
    2117             :                                   argv, envp, flags);
    2118             : }
    2119             : #endif
    2120             : 
    2121             : #ifdef CONFIG_SYSCTL
    2122             : 
    2123           0 : static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
    2124             :                 void *buffer, size_t *lenp, loff_t *ppos)
    2125             : {
    2126           0 :         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
    2127             : 
    2128           0 :         if (!error)
    2129           0 :                 validate_coredump_safety();
    2130           0 :         return error;
    2131             : }
    2132             : 
    2133             : static struct ctl_table fs_exec_sysctls[] = {
    2134             :         {
    2135             :                 .procname       = "suid_dumpable",
    2136             :                 .data           = &suid_dumpable,
    2137             :                 .maxlen         = sizeof(int),
    2138             :                 .mode           = 0644,
    2139             :                 .proc_handler   = proc_dointvec_minmax_coredump,
    2140             :                 .extra1         = SYSCTL_ZERO,
    2141             :                 .extra2         = SYSCTL_TWO,
    2142             :         },
    2143             :         { }
    2144             : };
    2145             : 
    2146           1 : static int __init init_fs_exec_sysctls(void)
    2147             : {
    2148           1 :         register_sysctl_init("fs", fs_exec_sysctls);
    2149           1 :         return 0;
    2150             : }
    2151             : 
    2152             : fs_initcall(init_fs_exec_sysctls);
    2153             : #endif /* CONFIG_SYSCTL */

Generated by: LCOV version 1.14