LCOV - code coverage report
Current view: top level - fs - namei.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 351 1889 18.6 %
Date: 2022-12-09 01:23:36 Functions: 37 149 24.8 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  linux/fs/namei.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       6             :  */
       7             : 
       8             : /*
       9             :  * Some corrections by tytso.
      10             :  */
      11             : 
      12             : /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
      13             :  * lookup logic.
      14             :  */
      15             : /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
      16             :  */
      17             : 
      18             : #include <linux/init.h>
      19             : #include <linux/export.h>
      20             : #include <linux/kernel.h>
      21             : #include <linux/slab.h>
      22             : #include <linux/fs.h>
      23             : #include <linux/namei.h>
      24             : #include <linux/pagemap.h>
      25             : #include <linux/fsnotify.h>
      26             : #include <linux/personality.h>
      27             : #include <linux/security.h>
      28             : #include <linux/ima.h>
      29             : #include <linux/syscalls.h>
      30             : #include <linux/mount.h>
      31             : #include <linux/audit.h>
      32             : #include <linux/capability.h>
      33             : #include <linux/file.h>
      34             : #include <linux/fcntl.h>
      35             : #include <linux/device_cgroup.h>
      36             : #include <linux/fs_struct.h>
      37             : #include <linux/posix_acl.h>
      38             : #include <linux/hash.h>
      39             : #include <linux/bitops.h>
      40             : #include <linux/init_task.h>
      41             : #include <linux/uaccess.h>
      42             : 
      43             : #include "internal.h"
      44             : #include "mount.h"
      45             : 
      46             : /* [Feb-1997 T. Schoebel-Theuer]
      47             :  * Fundamental changes in the pathname lookup mechanisms (namei)
      48             :  * were necessary because of omirr.  The reason is that omirr needs
      49             :  * to know the _real_ pathname, not the user-supplied one, in case
      50             :  * of symlinks (and also when transname replacements occur).
      51             :  *
      52             :  * The new code replaces the old recursive symlink resolution with
      53             :  * an iterative one (in case of non-nested symlink chains).  It does
      54             :  * this with calls to <fs>_follow_link().
      55             :  * As a side effect, dir_namei(), _namei() and follow_link() are now 
      56             :  * replaced with a single function lookup_dentry() that can handle all 
      57             :  * the special cases of the former code.
      58             :  *
      59             :  * With the new dcache, the pathname is stored at each inode, at least as
      60             :  * long as the refcount of the inode is positive.  As a side effect, the
      61             :  * size of the dcache depends on the inode cache and thus is dynamic.
      62             :  *
      63             :  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
      64             :  * resolution to correspond with current state of the code.
      65             :  *
      66             :  * Note that the symlink resolution is not *completely* iterative.
      67             :  * There is still a significant amount of tail- and mid- recursion in
      68             :  * the algorithm.  Also, note that <fs>_readlink() is not used in
      69             :  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
      70             :  * may return different results than <fs>_follow_link().  Many virtual
      71             :  * filesystems (including /proc) exhibit this behavior.
      72             :  */
      73             : 
      74             : /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
      75             :  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
      76             :  * and the name already exists in form of a symlink, try to create the new
      77             :  * name indicated by the symlink. The old code always complained that the
      78             :  * name already exists, due to not following the symlink even if its target
      79             :  * is nonexistent.  The new semantics affects also mknod() and link() when
      80             :  * the name is a symlink pointing to a non-existent name.
      81             :  *
      82             :  * I don't know which semantics is the right one, since I have no access
      83             :  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
      84             :  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
      85             :  * "old" one. Personally, I think the new semantics is much more logical.
      86             :  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
      87             :  * file does succeed in both HP-UX and SunOs, but not in Solaris
      88             :  * and in the old Linux semantics.
      89             :  */
      90             : 
      91             : /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
      92             :  * semantics.  See the comments in "open_namei" and "do_link" below.
      93             :  *
      94             :  * [10-Sep-98 Alan Modra] Another symlink change.
      95             :  */
      96             : 
      97             : /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
      98             :  *      inside the path - always follow.
      99             :  *      in the last component in creation/removal/renaming - never follow.
     100             :  *      if LOOKUP_FOLLOW passed - follow.
     101             :  *      if the pathname has trailing slashes - follow.
     102             :  *      otherwise - don't follow.
     103             :  * (applied in that order).
     104             :  *
     105             :  * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
     106             :  * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
     107             :  * During the 2.4 we need to fix the userland stuff depending on it -
     108             :  * hopefully we will be able to get rid of that wart in 2.5. So far only
     109             :  * XEmacs seems to be relying on it...
     110             :  */
     111             : /*
     112             :  * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
     113             :  * implemented.  Let's see if raised priority of ->s_vfs_rename_mutex gives
     114             :  * any extra contention...
     115             :  */
     116             : 
     117             : /* In order to reduce some races, while at the same time doing additional
     118             :  * checking and hopefully speeding things up, we copy filenames to the
     119             :  * kernel data space before using them..
     120             :  *
     121             :  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
     122             :  * PATH_MAX includes the nul terminator --RR.
     123             :  */
     124             : 
     125             : #define EMBEDDED_NAME_MAX       (PATH_MAX - offsetof(struct filename, iname))
     126             : 
     127             : struct filename *
     128           0 : getname_flags(const char __user *filename, int flags, int *empty)
     129             : {
     130             :         struct filename *result;
     131             :         char *kname;
     132             :         int len;
     133             : 
     134           0 :         result = audit_reusename(filename);
     135             :         if (result)
     136             :                 return result;
     137             : 
     138           0 :         result = __getname();
     139           0 :         if (unlikely(!result))
     140             :                 return ERR_PTR(-ENOMEM);
     141             : 
     142             :         /*
     143             :          * First, try to embed the struct filename inside the names_cache
     144             :          * allocation
     145             :          */
     146           0 :         kname = (char *)result->iname;
     147           0 :         result->name = kname;
     148             : 
     149           0 :         len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
     150           0 :         if (unlikely(len < 0)) {
     151           0 :                 __putname(result);
     152           0 :                 return ERR_PTR(len);
     153             :         }
     154             : 
     155             :         /*
     156             :          * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
     157             :          * separate struct filename so we can dedicate the entire
     158             :          * names_cache allocation for the pathname, and re-do the copy from
     159             :          * userland.
     160             :          */
     161           0 :         if (unlikely(len == EMBEDDED_NAME_MAX)) {
     162           0 :                 const size_t size = offsetof(struct filename, iname[1]);
     163           0 :                 kname = (char *)result;
     164             : 
     165             :                 /*
     166             :                  * size is chosen that way we to guarantee that
     167             :                  * result->iname[0] is within the same object and that
     168             :                  * kname can't be equal to result->iname, no matter what.
     169             :                  */
     170           0 :                 result = kzalloc(size, GFP_KERNEL);
     171           0 :                 if (unlikely(!result)) {
     172           0 :                         __putname(kname);
     173           0 :                         return ERR_PTR(-ENOMEM);
     174             :                 }
     175           0 :                 result->name = kname;
     176           0 :                 len = strncpy_from_user(kname, filename, PATH_MAX);
     177           0 :                 if (unlikely(len < 0)) {
     178           0 :                         __putname(kname);
     179           0 :                         kfree(result);
     180           0 :                         return ERR_PTR(len);
     181             :                 }
     182           0 :                 if (unlikely(len == PATH_MAX)) {
     183           0 :                         __putname(kname);
     184           0 :                         kfree(result);
     185           0 :                         return ERR_PTR(-ENAMETOOLONG);
     186             :                 }
     187             :         }
     188             : 
     189           0 :         result->refcnt = 1;
     190             :         /* The empty path is special. */
     191           0 :         if (unlikely(!len)) {
     192           0 :                 if (empty)
     193           0 :                         *empty = 1;
     194           0 :                 if (!(flags & LOOKUP_EMPTY)) {
     195           0 :                         putname(result);
     196           0 :                         return ERR_PTR(-ENOENT);
     197             :                 }
     198             :         }
     199             : 
     200           0 :         result->uptr = filename;
     201           0 :         result->aname = NULL;
     202           0 :         audit_getname(result);
     203           0 :         return result;
     204             : }
     205             : 
     206             : struct filename *
     207           0 : getname_uflags(const char __user *filename, int uflags)
     208             : {
     209           0 :         int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
     210             : 
     211           0 :         return getname_flags(filename, flags, NULL);
     212             : }
     213             : 
     214             : struct filename *
     215           0 : getname(const char __user * filename)
     216             : {
     217           0 :         return getname_flags(filename, 0, NULL);
     218             : }
     219             : 
     220             : struct filename *
     221           3 : getname_kernel(const char * filename)
     222             : {
     223             :         struct filename *result;
     224           3 :         int len = strlen(filename) + 1;
     225             : 
     226           3 :         result = __getname();
     227           3 :         if (unlikely(!result))
     228             :                 return ERR_PTR(-ENOMEM);
     229             : 
     230           3 :         if (len <= EMBEDDED_NAME_MAX) {
     231           3 :                 result->name = (char *)result->iname;
     232           0 :         } else if (len <= PATH_MAX) {
     233           0 :                 const size_t size = offsetof(struct filename, iname[1]);
     234             :                 struct filename *tmp;
     235             : 
     236           0 :                 tmp = kmalloc(size, GFP_KERNEL);
     237           0 :                 if (unlikely(!tmp)) {
     238           0 :                         __putname(result);
     239           0 :                         return ERR_PTR(-ENOMEM);
     240             :                 }
     241           0 :                 tmp->name = (char *)result;
     242           0 :                 result = tmp;
     243             :         } else {
     244           0 :                 __putname(result);
     245           0 :                 return ERR_PTR(-ENAMETOOLONG);
     246             :         }
     247           3 :         memcpy((char *)result->name, filename, len);
     248           3 :         result->uptr = NULL;
     249           3 :         result->aname = NULL;
     250           3 :         result->refcnt = 1;
     251           3 :         audit_getname(result);
     252             : 
     253           3 :         return result;
     254             : }
     255             : 
     256           3 : void putname(struct filename *name)
     257             : {
     258           3 :         if (IS_ERR(name))
     259             :                 return;
     260             : 
     261           3 :         BUG_ON(name->refcnt <= 0);
     262             : 
     263           3 :         if (--name->refcnt > 0)
     264             :                 return;
     265             : 
     266           3 :         if (name->name != name->iname) {
     267           0 :                 __putname(name->name);
     268           0 :                 kfree(name);
     269             :         } else
     270           3 :                 __putname(name);
     271             : }
     272             : 
     273             : /**
     274             :  * check_acl - perform ACL permission checking
     275             :  * @mnt_userns: user namespace of the mount the inode was found from
     276             :  * @inode:      inode to check permissions on
     277             :  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
     278             :  *
     279             :  * This function performs the ACL permission checking. Since this function
     280             :  * retrieve POSIX acls it needs to know whether it is called from a blocking or
     281             :  * non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
     282             :  *
     283             :  * If the inode has been found through an idmapped mount the user namespace of
     284             :  * the vfsmount must be passed through @mnt_userns. This function will then take
     285             :  * care to map the inode according to @mnt_userns before checking permissions.
     286             :  * On non-idmapped mounts or if permission checking is to be performed on the
     287             :  * raw inode simply passs init_user_ns.
     288             :  */
     289             : static int check_acl(struct user_namespace *mnt_userns,
     290             :                      struct inode *inode, int mask)
     291             : {
     292             : #ifdef CONFIG_FS_POSIX_ACL
     293             :         struct posix_acl *acl;
     294             : 
     295             :         if (mask & MAY_NOT_BLOCK) {
     296             :                 acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
     297             :                 if (!acl)
     298             :                         return -EAGAIN;
     299             :                 /* no ->get_acl() calls in RCU mode... */
     300             :                 if (is_uncached_acl(acl))
     301             :                         return -ECHILD;
     302             :                 return posix_acl_permission(mnt_userns, inode, acl, mask);
     303             :         }
     304             : 
     305             :         acl = get_acl(inode, ACL_TYPE_ACCESS);
     306             :         if (IS_ERR(acl))
     307             :                 return PTR_ERR(acl);
     308             :         if (acl) {
     309             :                 int error = posix_acl_permission(mnt_userns, inode, acl, mask);
     310             :                 posix_acl_release(acl);
     311             :                 return error;
     312             :         }
     313             : #endif
     314             : 
     315             :         return -EAGAIN;
     316             : }
     317             : 
     318             : /**
     319             :  * acl_permission_check - perform basic UNIX permission checking
     320             :  * @mnt_userns: user namespace of the mount the inode was found from
     321             :  * @inode:      inode to check permissions on
     322             :  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
     323             :  *
     324             :  * This function performs the basic UNIX permission checking. Since this
     325             :  * function may retrieve POSIX acls it needs to know whether it is called from a
     326             :  * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
     327             :  *
     328             :  * If the inode has been found through an idmapped mount the user namespace of
     329             :  * the vfsmount must be passed through @mnt_userns. This function will then take
     330             :  * care to map the inode according to @mnt_userns before checking permissions.
     331             :  * On non-idmapped mounts or if permission checking is to be performed on the
     332             :  * raw inode simply passs init_user_ns.
     333             :  */
     334           7 : static int acl_permission_check(struct user_namespace *mnt_userns,
     335             :                                 struct inode *inode, int mask)
     336             : {
     337           7 :         unsigned int mode = inode->i_mode;
     338             :         kuid_t i_uid;
     339             : 
     340             :         /* Are we the owner? If so, ACL's don't matter */
     341           7 :         i_uid = i_uid_into_mnt(mnt_userns, inode);
     342          14 :         if (likely(uid_eq(current_fsuid(), i_uid))) {
     343           7 :                 mask &= 7;
     344           7 :                 mode >>= 6;
     345           7 :                 return (mask & ~mode) ? -EACCES : 0;
     346             :         }
     347             : 
     348             :         /* Do we have ACL's? */
     349             :         if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
     350             :                 int error = check_acl(mnt_userns, inode, mask);
     351             :                 if (error != -EAGAIN)
     352             :                         return error;
     353             :         }
     354             : 
     355             :         /* Only RWX matters for group/other mode bits */
     356           0 :         mask &= 7;
     357             : 
     358             :         /*
     359             :          * Are the group permissions different from
     360             :          * the other permissions in the bits we care
     361             :          * about? Need to check group ownership if so.
     362             :          */
     363           0 :         if (mask & (mode ^ (mode >> 3))) {
     364           0 :                 kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
     365           0 :                 if (in_group_p(kgid))
     366           0 :                         mode >>= 3;
     367             :         }
     368             : 
     369             :         /* Bits in 'mode' clear that we require? */
     370           0 :         return (mask & ~mode) ? -EACCES : 0;
     371             : }
     372             : 
     373             : /**
     374             :  * generic_permission -  check for access rights on a Posix-like filesystem
     375             :  * @mnt_userns: user namespace of the mount the inode was found from
     376             :  * @inode:      inode to check access rights for
     377             :  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
     378             :  *              %MAY_NOT_BLOCK ...)
     379             :  *
     380             :  * Used to check for read/write/execute permissions on a file.
     381             :  * We use "fsuid" for this, letting us set arbitrary permissions
     382             :  * for filesystem access without changing the "normal" uids which
     383             :  * are used for other things.
     384             :  *
     385             :  * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
     386             :  * request cannot be satisfied (eg. requires blocking or too much complexity).
     387             :  * It would then be called again in ref-walk mode.
     388             :  *
     389             :  * If the inode has been found through an idmapped mount the user namespace of
     390             :  * the vfsmount must be passed through @mnt_userns. This function will then take
     391             :  * care to map the inode according to @mnt_userns before checking permissions.
     392             :  * On non-idmapped mounts or if permission checking is to be performed on the
     393             :  * raw inode simply passs init_user_ns.
     394             :  */
     395           7 : int generic_permission(struct user_namespace *mnt_userns, struct inode *inode,
     396             :                        int mask)
     397             : {
     398             :         int ret;
     399             : 
     400             :         /*
     401             :          * Do the basic permission checks.
     402             :          */
     403           7 :         ret = acl_permission_check(mnt_userns, inode, mask);
     404           7 :         if (ret != -EACCES)
     405             :                 return ret;
     406             : 
     407           0 :         if (S_ISDIR(inode->i_mode)) {
     408             :                 /* DACs are overridable for directories */
     409           0 :                 if (!(mask & MAY_WRITE))
     410           0 :                         if (capable_wrt_inode_uidgid(mnt_userns, inode,
     411             :                                                      CAP_DAC_READ_SEARCH))
     412             :                                 return 0;
     413           0 :                 if (capable_wrt_inode_uidgid(mnt_userns, inode,
     414             :                                              CAP_DAC_OVERRIDE))
     415             :                         return 0;
     416           0 :                 return -EACCES;
     417             :         }
     418             : 
     419             :         /*
     420             :          * Searching includes executable on directories, else just read.
     421             :          */
     422           0 :         mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
     423           0 :         if (mask == MAY_READ)
     424           0 :                 if (capable_wrt_inode_uidgid(mnt_userns, inode,
     425             :                                              CAP_DAC_READ_SEARCH))
     426             :                         return 0;
     427             :         /*
     428             :          * Read/write DACs are always overridable.
     429             :          * Executable DACs are overridable when there is
     430             :          * at least one exec bit set.
     431             :          */
     432           0 :         if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
     433           0 :                 if (capable_wrt_inode_uidgid(mnt_userns, inode,
     434             :                                              CAP_DAC_OVERRIDE))
     435             :                         return 0;
     436             : 
     437             :         return -EACCES;
     438             : }
     439             : EXPORT_SYMBOL(generic_permission);
     440             : 
     441             : /**
     442             :  * do_inode_permission - UNIX permission checking
     443             :  * @mnt_userns: user namespace of the mount the inode was found from
     444             :  * @inode:      inode to check permissions on
     445             :  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
     446             :  *
     447             :  * We _really_ want to just do "generic_permission()" without
     448             :  * even looking at the inode->i_op values. So we keep a cache
     449             :  * flag in inode->i_opflags, that says "this has not special
     450             :  * permission function, use the fast case".
     451             :  */
     452           7 : static inline int do_inode_permission(struct user_namespace *mnt_userns,
     453             :                                       struct inode *inode, int mask)
     454             : {
     455           7 :         if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
     456           2 :                 if (likely(inode->i_op->permission))
     457           0 :                         return inode->i_op->permission(mnt_userns, inode, mask);
     458             : 
     459             :                 /* This gets set once for the inode lifetime */
     460           4 :                 spin_lock(&inode->i_lock);
     461           2 :                 inode->i_opflags |= IOP_FASTPERM;
     462           2 :                 spin_unlock(&inode->i_lock);
     463             :         }
     464           7 :         return generic_permission(mnt_userns, inode, mask);
     465             : }
     466             : 
     467             : /**
     468             :  * sb_permission - Check superblock-level permissions
     469             :  * @sb: Superblock of inode to check permission on
     470             :  * @inode: Inode to check permission on
     471             :  * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
     472             :  *
     473             :  * Separate out file-system wide checks from inode-specific permission checks.
     474             :  */
     475             : static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
     476             : {
     477           7 :         if (unlikely(mask & MAY_WRITE)) {
     478           3 :                 umode_t mode = inode->i_mode;
     479             : 
     480             :                 /* Nobody gets write access to a read-only fs. */
     481           6 :                 if (sb_rdonly(sb) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
     482             :                         return -EROFS;
     483             :         }
     484             :         return 0;
     485             : }
     486             : 
     487             : /**
     488             :  * inode_permission - Check for access rights to a given inode
     489             :  * @mnt_userns: User namespace of the mount the inode was found from
     490             :  * @inode:      Inode to check permission on
     491             :  * @mask:       Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
     492             :  *
     493             :  * Check for read/write/execute permissions on an inode.  We use fs[ug]id for
     494             :  * this, letting us set arbitrary permissions for filesystem access without
     495             :  * changing the "normal" UIDs which are used for other things.
     496             :  *
     497             :  * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
     498             :  */
     499           7 : int inode_permission(struct user_namespace *mnt_userns,
     500             :                      struct inode *inode, int mask)
     501             : {
     502             :         int retval;
     503             : 
     504          14 :         retval = sb_permission(inode->i_sb, inode, mask);
     505           7 :         if (retval)
     506             :                 return retval;
     507             : 
     508           7 :         if (unlikely(mask & MAY_WRITE)) {
     509             :                 /*
     510             :                  * Nobody gets write access to an immutable file.
     511             :                  */
     512           3 :                 if (IS_IMMUTABLE(inode))
     513             :                         return -EPERM;
     514             : 
     515             :                 /*
     516             :                  * Updating mtime will likely cause i_uid and i_gid to be
     517             :                  * written back improperly if their true value is unknown
     518             :                  * to the vfs.
     519             :                  */
     520           3 :                 if (HAS_UNMAPPED_ID(mnt_userns, inode))
     521             :                         return -EACCES;
     522             :         }
     523             : 
     524           7 :         retval = do_inode_permission(mnt_userns, inode, mask);
     525           7 :         if (retval)
     526             :                 return retval;
     527             : 
     528           7 :         retval = devcgroup_inode_permission(inode, mask);
     529             :         if (retval)
     530             :                 return retval;
     531             : 
     532           7 :         return security_inode_permission(inode, mask);
     533             : }
     534             : EXPORT_SYMBOL(inode_permission);
     535             : 
     536             : /**
     537             :  * path_get - get a reference to a path
     538             :  * @path: path to get the reference to
     539             :  *
     540             :  * Given a path increment the reference count to the dentry and the vfsmount.
     541             :  */
     542           2 : void path_get(const struct path *path)
     543             : {
     544           2 :         mntget(path->mnt);
     545           4 :         dget(path->dentry);
     546           2 : }
     547             : EXPORT_SYMBOL(path_get);
     548             : 
     549             : /**
     550             :  * path_put - put a reference to a path
     551             :  * @path: path to put the reference to
     552             :  *
     553             :  * Given a path decrement the reference count to the dentry and the vfsmount.
     554             :  */
     555           0 : void path_put(const struct path *path)
     556             : {
     557           6 :         dput(path->dentry);
     558           6 :         mntput(path->mnt);
     559           0 : }
     560             : EXPORT_SYMBOL(path_put);
     561             : 
     562             : #define EMBEDDED_LEVELS 2
     563             : struct nameidata {
     564             :         struct path     path;
     565             :         struct qstr     last;
     566             :         struct path     root;
     567             :         struct inode    *inode; /* path.dentry.d_inode */
     568             :         unsigned int    flags, state;
     569             :         unsigned        seq, m_seq, r_seq;
     570             :         int             last_type;
     571             :         unsigned        depth;
     572             :         int             total_link_count;
     573             :         struct saved {
     574             :                 struct path link;
     575             :                 struct delayed_call done;
     576             :                 const char *name;
     577             :                 unsigned seq;
     578             :         } *stack, internal[EMBEDDED_LEVELS];
     579             :         struct filename *name;
     580             :         struct nameidata *saved;
     581             :         unsigned        root_seq;
     582             :         int             dfd;
     583             :         kuid_t          dir_uid;
     584             :         umode_t         dir_mode;
     585             : } __randomize_layout;
     586             : 
     587             : #define ND_ROOT_PRESET 1
     588             : #define ND_ROOT_GRABBED 2
     589             : #define ND_JUMPED 4
     590             : 
     591             : static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name)
     592             : {
     593           3 :         struct nameidata *old = current->nameidata;
     594           3 :         p->stack = p->internal;
     595           3 :         p->depth = 0;
     596           3 :         p->dfd = dfd;
     597           3 :         p->name = name;
     598           3 :         p->path.mnt = NULL;
     599           3 :         p->path.dentry = NULL;
     600           3 :         p->total_link_count = old ? old->total_link_count : 0;
     601           3 :         p->saved = old;
     602           3 :         current->nameidata = p;
     603             : }
     604             : 
     605             : static inline void set_nameidata(struct nameidata *p, int dfd, struct filename *name,
     606             :                           const struct path *root)
     607             : {
     608           3 :         __set_nameidata(p, dfd, name);
     609           3 :         p->state = 0;
     610           0 :         if (unlikely(root)) {
     611           0 :                 p->state = ND_ROOT_PRESET;
     612           0 :                 p->root = *root;
     613             :         }
     614             : }
     615             : 
     616           3 : static void restore_nameidata(void)
     617             : {
     618           3 :         struct nameidata *now = current->nameidata, *old = now->saved;
     619             : 
     620           3 :         current->nameidata = old;
     621           3 :         if (old)
     622           0 :                 old->total_link_count = now->total_link_count;
     623           3 :         if (now->stack != now->internal)
     624           0 :                 kfree(now->stack);
     625           3 : }
     626             : 
     627           0 : static bool nd_alloc_stack(struct nameidata *nd)
     628             : {
     629             :         struct saved *p;
     630             : 
     631           0 :         p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
     632           0 :                          nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL);
     633           0 :         if (unlikely(!p))
     634             :                 return false;
     635           0 :         memcpy(p, nd->internal, sizeof(nd->internal));
     636           0 :         nd->stack = p;
     637           0 :         return true;
     638             : }
     639             : 
     640             : /**
     641             :  * path_connected - Verify that a dentry is below mnt.mnt_root
     642             :  *
     643             :  * Rename can sometimes move a file or directory outside of a bind
     644             :  * mount, path_connected allows those cases to be detected.
     645             :  */
     646             : static bool path_connected(struct vfsmount *mnt, struct dentry *dentry)
     647             : {
     648           0 :         struct super_block *sb = mnt->mnt_sb;
     649             : 
     650             :         /* Bind mounts can have disconnected paths */
     651           0 :         if (mnt->mnt_root == sb->s_root)
     652             :                 return true;
     653             : 
     654           0 :         return is_subdir(dentry, mnt->mnt_root);
     655             : }
     656             : 
     657             : static void drop_links(struct nameidata *nd)
     658             : {
     659           3 :         int i = nd->depth;
     660           3 :         while (i--) {
     661           0 :                 struct saved *last = nd->stack + i;
     662           0 :                 do_delayed_call(&last->done);
     663           0 :                 clear_delayed_call(&last->done);
     664             :         }
     665             : }
     666             : 
     667           3 : static void terminate_walk(struct nameidata *nd)
     668             : {
     669           6 :         drop_links(nd);
     670           3 :         if (!(nd->flags & LOOKUP_RCU)) {
     671             :                 int i;
     672           6 :                 path_put(&nd->path);
     673           3 :                 for (i = 0; i < nd->depth; i++)
     674           0 :                         path_put(&nd->stack[i].link);
     675           3 :                 if (nd->state & ND_ROOT_GRABBED) {
     676           0 :                         path_put(&nd->root);
     677           0 :                         nd->state &= ~ND_ROOT_GRABBED;
     678             :                 }
     679             :         } else {
     680           0 :                 nd->flags &= ~LOOKUP_RCU;
     681             :                 rcu_read_unlock();
     682             :         }
     683           3 :         nd->depth = 0;
     684           3 :         nd->path.mnt = NULL;
     685           3 :         nd->path.dentry = NULL;
     686           3 : }
     687             : 
     688             : /* path_put is needed afterwards regardless of success or failure */
     689           3 : static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
     690             : {
     691           3 :         int res = __legitimize_mnt(path->mnt, mseq);
     692           3 :         if (unlikely(res)) {
     693           0 :                 if (res > 0)
     694           0 :                         path->mnt = NULL;
     695           0 :                 path->dentry = NULL;
     696           0 :                 return false;
     697             :         }
     698           3 :         if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) {
     699           0 :                 path->dentry = NULL;
     700           0 :                 return false;
     701             :         }
     702           9 :         return !read_seqcount_retry(&path->dentry->d_seq, seq);
     703             : }
     704             : 
     705             : static inline bool legitimize_path(struct nameidata *nd,
     706             :                             struct path *path, unsigned seq)
     707             : {
     708           3 :         return __legitimize_path(path, seq, nd->m_seq);
     709             : }
     710             : 
     711           3 : static bool legitimize_links(struct nameidata *nd)
     712             : {
     713             :         int i;
     714           3 :         if (unlikely(nd->flags & LOOKUP_CACHED)) {
     715           0 :                 drop_links(nd);
     716           0 :                 nd->depth = 0;
     717           0 :                 return false;
     718             :         }
     719           0 :         for (i = 0; i < nd->depth; i++) {
     720           0 :                 struct saved *last = nd->stack + i;
     721           0 :                 if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
     722           0 :                         drop_links(nd);
     723           0 :                         nd->depth = i + 1;
     724           0 :                         return false;
     725             :                 }
     726             :         }
     727             :         return true;
     728             : }
     729             : 
     730           3 : static bool legitimize_root(struct nameidata *nd)
     731             : {
     732             :         /*
     733             :          * For scoped-lookups (where nd->root has been zeroed), we need to
     734             :          * restart the whole lookup from scratch -- because set_root() is wrong
     735             :          * for these lookups (nd->dfd is the root, not the filesystem root).
     736             :          */
     737           3 :         if (!nd->root.mnt && (nd->flags & LOOKUP_IS_SCOPED))
     738             :                 return false;
     739             :         /* Nothing to do if nd->root is zero or is managed by the VFS user. */
     740           3 :         if (!nd->root.mnt || (nd->state & ND_ROOT_PRESET))
     741             :                 return true;
     742           0 :         nd->state |= ND_ROOT_GRABBED;
     743           0 :         return legitimize_path(nd, &nd->root, nd->root_seq);
     744             : }
     745             : 
     746             : /*
     747             :  * Path walking has 2 modes, rcu-walk and ref-walk (see
     748             :  * Documentation/filesystems/path-lookup.txt).  In situations when we can't
     749             :  * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
     750             :  * normal reference counts on dentries and vfsmounts to transition to ref-walk
     751             :  * mode.  Refcounts are grabbed at the last known good point before rcu-walk
     752             :  * got stuck, so ref-walk may continue from there. If this is not successful
     753             :  * (eg. a seqcount has changed), then failure is returned and it's up to caller
     754             :  * to restart the path walk from the beginning in ref-walk mode.
     755             :  */
     756             : 
     757             : /**
     758             :  * try_to_unlazy - try to switch to ref-walk mode.
     759             :  * @nd: nameidata pathwalk data
     760             :  * Returns: true on success, false on failure
     761             :  *
     762             :  * try_to_unlazy attempts to legitimize the current nd->path and nd->root
     763             :  * for ref-walk mode.
     764             :  * Must be called from rcu-walk context.
     765             :  * Nothing should touch nameidata between try_to_unlazy() failure and
     766             :  * terminate_walk().
     767             :  */
     768           3 : static bool try_to_unlazy(struct nameidata *nd)
     769             : {
     770           3 :         struct dentry *parent = nd->path.dentry;
     771             : 
     772           3 :         BUG_ON(!(nd->flags & LOOKUP_RCU));
     773             : 
     774           3 :         nd->flags &= ~LOOKUP_RCU;
     775           3 :         if (unlikely(!legitimize_links(nd)))
     776             :                 goto out1;
     777           6 :         if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
     778             :                 goto out;
     779           3 :         if (unlikely(!legitimize_root(nd)))
     780             :                 goto out;
     781             :         rcu_read_unlock();
     782           3 :         BUG_ON(nd->inode != parent->d_inode);
     783             :         return true;
     784             : 
     785             : out1:
     786           0 :         nd->path.mnt = NULL;
     787           0 :         nd->path.dentry = NULL;
     788             : out:
     789             :         rcu_read_unlock();
     790           0 :         return false;
     791             : }
     792             : 
     793             : /**
     794             :  * try_to_unlazy_next - try to switch to ref-walk mode.
     795             :  * @nd: nameidata pathwalk data
     796             :  * @dentry: next dentry to step into
     797             :  * @seq: seq number to check @dentry against
     798             :  * Returns: true on success, false on failure
     799             :  *
     800             :  * Similar to to try_to_unlazy(), but here we have the next dentry already
     801             :  * picked by rcu-walk and want to legitimize that in addition to the current
     802             :  * nd->path and nd->root for ref-walk mode.  Must be called from rcu-walk context.
     803             :  * Nothing should touch nameidata between try_to_unlazy_next() failure and
     804             :  * terminate_walk().
     805             :  */
     806           0 : static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq)
     807             : {
     808           0 :         BUG_ON(!(nd->flags & LOOKUP_RCU));
     809             : 
     810           0 :         nd->flags &= ~LOOKUP_RCU;
     811           0 :         if (unlikely(!legitimize_links(nd)))
     812             :                 goto out2;
     813           0 :         if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
     814             :                 goto out2;
     815           0 :         if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
     816             :                 goto out1;
     817             : 
     818             :         /*
     819             :          * We need to move both the parent and the dentry from the RCU domain
     820             :          * to be properly refcounted. And the sequence number in the dentry
     821             :          * validates *both* dentry counters, since we checked the sequence
     822             :          * number of the parent after we got the child sequence number. So we
     823             :          * know the parent must still be valid if the child sequence number is
     824             :          */
     825           0 :         if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
     826             :                 goto out;
     827           0 :         if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
     828             :                 goto out_dput;
     829             :         /*
     830             :          * Sequence counts matched. Now make sure that the root is
     831             :          * still valid and get it if required.
     832             :          */
     833           0 :         if (unlikely(!legitimize_root(nd)))
     834             :                 goto out_dput;
     835             :         rcu_read_unlock();
     836           0 :         return true;
     837             : 
     838             : out2:
     839           0 :         nd->path.mnt = NULL;
     840             : out1:
     841           0 :         nd->path.dentry = NULL;
     842             : out:
     843             :         rcu_read_unlock();
     844           0 :         return false;
     845             : out_dput:
     846             :         rcu_read_unlock();
     847           0 :         dput(dentry);
     848           0 :         return false;
     849             : }
     850             : 
     851             : static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
     852             : {
     853           1 :         if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
     854           0 :                 return dentry->d_op->d_revalidate(dentry, flags);
     855             :         else
     856             :                 return 1;
     857             : }
     858             : 
     859             : /**
     860             :  * complete_walk - successful completion of path walk
     861             :  * @nd:  pointer nameidata
     862             :  *
     863             :  * If we had been in RCU mode, drop out of it and legitimize nd->path.
     864             :  * Revalidate the final result, unless we'd already done that during
     865             :  * the path walk or the filesystem doesn't ask for it.  Return 0 on
     866             :  * success, -error on failure.  In case of failure caller does not
     867             :  * need to drop nd->path.
     868             :  */
     869           3 : static int complete_walk(struct nameidata *nd)
     870             : {
     871           3 :         struct dentry *dentry = nd->path.dentry;
     872             :         int status;
     873             : 
     874           3 :         if (nd->flags & LOOKUP_RCU) {
     875             :                 /*
     876             :                  * We don't want to zero nd->root for scoped-lookups or
     877             :                  * externally-managed nd->root.
     878             :                  */
     879           3 :                 if (!(nd->state & ND_ROOT_PRESET))
     880           3 :                         if (!(nd->flags & LOOKUP_IS_SCOPED))
     881           3 :                                 nd->root.mnt = NULL;
     882           3 :                 nd->flags &= ~LOOKUP_CACHED;
     883           3 :                 if (!try_to_unlazy(nd))
     884             :                         return -ECHILD;
     885             :         }
     886             : 
     887           3 :         if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
     888             :                 /*
     889             :                  * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't
     890             :                  * ever step outside the root during lookup" and should already
     891             :                  * be guaranteed by the rest of namei, we want to avoid a namei
     892             :                  * BUG resulting in userspace being given a path that was not
     893             :                  * scoped within the root at some point during the lookup.
     894             :                  *
     895             :                  * So, do a final sanity-check to make sure that in the
     896             :                  * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED)
     897             :                  * we won't silently return an fd completely outside of the
     898             :                  * requested root to userspace.
     899             :                  *
     900             :                  * Userspace could move the path outside the root after this
     901             :                  * check, but as discussed elsewhere this is not a concern (the
     902             :                  * resolved file was inside the root at some point).
     903             :                  */
     904           0 :                 if (!path_is_under(&nd->path, &nd->root))
     905             :                         return -EXDEV;
     906             :         }
     907             : 
     908           3 :         if (likely(!(nd->state & ND_JUMPED)))
     909             :                 return 0;
     910             : 
     911           0 :         if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
     912             :                 return 0;
     913             : 
     914           0 :         status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
     915           0 :         if (status > 0)
     916             :                 return 0;
     917             : 
     918           0 :         if (!status)
     919           0 :                 status = -ESTALE;
     920             : 
     921             :         return status;
     922             : }
     923             : 
     924           3 : static int set_root(struct nameidata *nd)
     925             : {
     926           3 :         struct fs_struct *fs = current->fs;
     927             : 
     928             :         /*
     929             :          * Jumping to the real root in a scoped-lookup is a BUG in namei, but we
     930             :          * still have to ensure it doesn't happen because it will cause a breakout
     931             :          * from the dirfd.
     932             :          */
     933           3 :         if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED))
     934             :                 return -ENOTRECOVERABLE;
     935             : 
     936           3 :         if (nd->flags & LOOKUP_RCU) {
     937             :                 unsigned seq;
     938             : 
     939             :                 do {
     940           6 :                         seq = read_seqcount_begin(&fs->seq);
     941           3 :                         nd->root = fs->root;
     942           9 :                         nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
     943           9 :                 } while (read_seqcount_retry(&fs->seq, seq));
     944             :         } else {
     945           0 :                 get_fs_root(fs, &nd->root);
     946           0 :                 nd->state |= ND_ROOT_GRABBED;
     947             :         }
     948             :         return 0;
     949             : }
     950             : 
     951           3 : static int nd_jump_root(struct nameidata *nd)
     952             : {
     953           3 :         if (unlikely(nd->flags & LOOKUP_BENEATH))
     954             :                 return -EXDEV;
     955           3 :         if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
     956             :                 /* Absolute path arguments to path_init() are allowed. */
     957           0 :                 if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt)
     958             :                         return -EXDEV;
     959             :         }
     960           3 :         if (!nd->root.mnt) {
     961           3 :                 int error = set_root(nd);
     962           3 :                 if (error)
     963             :                         return error;
     964             :         }
     965           3 :         if (nd->flags & LOOKUP_RCU) {
     966             :                 struct dentry *d;
     967           3 :                 nd->path = nd->root;
     968           3 :                 d = nd->path.dentry;
     969           3 :                 nd->inode = d->d_inode;
     970           3 :                 nd->seq = nd->root_seq;
     971           9 :                 if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
     972             :                         return -ECHILD;
     973             :         } else {
     974           0 :                 path_put(&nd->path);
     975           0 :                 nd->path = nd->root;
     976           0 :                 path_get(&nd->path);
     977           0 :                 nd->inode = nd->path.dentry->d_inode;
     978             :         }
     979           3 :         nd->state |= ND_JUMPED;
     980           3 :         return 0;
     981             : }
     982             : 
     983             : /*
     984             :  * Helper to directly jump to a known parsed path from ->get_link,
     985             :  * caller must have taken a reference to path beforehand.
     986             :  */
     987           0 : int nd_jump_link(struct path *path)
     988             : {
     989           0 :         int error = -ELOOP;
     990           0 :         struct nameidata *nd = current->nameidata;
     991             : 
     992           0 :         if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS))
     993             :                 goto err;
     994             : 
     995           0 :         error = -EXDEV;
     996           0 :         if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
     997           0 :                 if (nd->path.mnt != path->mnt)
     998             :                         goto err;
     999             :         }
    1000             :         /* Not currently safe for scoped-lookups. */
    1001           0 :         if (unlikely(nd->flags & LOOKUP_IS_SCOPED))
    1002             :                 goto err;
    1003             : 
    1004           0 :         path_put(&nd->path);
    1005           0 :         nd->path = *path;
    1006           0 :         nd->inode = nd->path.dentry->d_inode;
    1007           0 :         nd->state |= ND_JUMPED;
    1008           0 :         return 0;
    1009             : 
    1010             : err:
    1011           0 :         path_put(path);
    1012           0 :         return error;
    1013             : }
    1014             : 
    1015           0 : static inline void put_link(struct nameidata *nd)
    1016             : {
    1017           0 :         struct saved *last = nd->stack + --nd->depth;
    1018           0 :         do_delayed_call(&last->done);
    1019           0 :         if (!(nd->flags & LOOKUP_RCU))
    1020           0 :                 path_put(&last->link);
    1021           0 : }
    1022             : 
    1023             : static int sysctl_protected_symlinks __read_mostly;
    1024             : static int sysctl_protected_hardlinks __read_mostly;
    1025             : static int sysctl_protected_fifos __read_mostly;
    1026             : static int sysctl_protected_regular __read_mostly;
    1027             : 
    1028             : #ifdef CONFIG_SYSCTL
    1029             : static struct ctl_table namei_sysctls[] = {
    1030             :         {
    1031             :                 .procname       = "protected_symlinks",
    1032             :                 .data           = &sysctl_protected_symlinks,
    1033             :                 .maxlen         = sizeof(int),
    1034             :                 .mode           = 0600,
    1035             :                 .proc_handler   = proc_dointvec_minmax,
    1036             :                 .extra1         = SYSCTL_ZERO,
    1037             :                 .extra2         = SYSCTL_ONE,
    1038             :         },
    1039             :         {
    1040             :                 .procname       = "protected_hardlinks",
    1041             :                 .data           = &sysctl_protected_hardlinks,
    1042             :                 .maxlen         = sizeof(int),
    1043             :                 .mode           = 0600,
    1044             :                 .proc_handler   = proc_dointvec_minmax,
    1045             :                 .extra1         = SYSCTL_ZERO,
    1046             :                 .extra2         = SYSCTL_ONE,
    1047             :         },
    1048             :         {
    1049             :                 .procname       = "protected_fifos",
    1050             :                 .data           = &sysctl_protected_fifos,
    1051             :                 .maxlen         = sizeof(int),
    1052             :                 .mode           = 0600,
    1053             :                 .proc_handler   = proc_dointvec_minmax,
    1054             :                 .extra1         = SYSCTL_ZERO,
    1055             :                 .extra2         = SYSCTL_TWO,
    1056             :         },
    1057             :         {
    1058             :                 .procname       = "protected_regular",
    1059             :                 .data           = &sysctl_protected_regular,
    1060             :                 .maxlen         = sizeof(int),
    1061             :                 .mode           = 0600,
    1062             :                 .proc_handler   = proc_dointvec_minmax,
    1063             :                 .extra1         = SYSCTL_ZERO,
    1064             :                 .extra2         = SYSCTL_TWO,
    1065             :         },
    1066             :         { }
    1067             : };
    1068             : 
    1069           1 : static int __init init_fs_namei_sysctls(void)
    1070             : {
    1071           1 :         register_sysctl_init("fs", namei_sysctls);
    1072           1 :         return 0;
    1073             : }
    1074             : fs_initcall(init_fs_namei_sysctls);
    1075             : 
    1076             : #endif /* CONFIG_SYSCTL */
    1077             : 
    1078             : /**
    1079             :  * may_follow_link - Check symlink following for unsafe situations
    1080             :  * @nd: nameidata pathwalk data
    1081             :  *
    1082             :  * In the case of the sysctl_protected_symlinks sysctl being enabled,
    1083             :  * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
    1084             :  * in a sticky world-writable directory. This is to protect privileged
    1085             :  * processes from failing races against path names that may change out
    1086             :  * from under them by way of other users creating malicious symlinks.
    1087             :  * It will permit symlinks to be followed only when outside a sticky
    1088             :  * world-writable directory, or when the uid of the symlink and follower
    1089             :  * match, or when the directory owner matches the symlink's owner.
    1090             :  *
    1091             :  * Returns 0 if following the symlink is allowed, -ve on error.
    1092             :  */
    1093           0 : static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
    1094             : {
    1095             :         struct user_namespace *mnt_userns;
    1096             :         kuid_t i_uid;
    1097             : 
    1098           0 :         if (!sysctl_protected_symlinks)
    1099             :                 return 0;
    1100             : 
    1101           0 :         mnt_userns = mnt_user_ns(nd->path.mnt);
    1102           0 :         i_uid = i_uid_into_mnt(mnt_userns, inode);
    1103             :         /* Allowed if owner and follower match. */
    1104           0 :         if (uid_eq(current_cred()->fsuid, i_uid))
    1105             :                 return 0;
    1106             : 
    1107             :         /* Allowed if parent directory not sticky and world-writable. */
    1108           0 :         if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
    1109             :                 return 0;
    1110             : 
    1111             :         /* Allowed if parent directory and link owner match. */
    1112           0 :         if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, i_uid))
    1113             :                 return 0;
    1114             : 
    1115           0 :         if (nd->flags & LOOKUP_RCU)
    1116             :                 return -ECHILD;
    1117             : 
    1118           0 :         audit_inode(nd->name, nd->stack[0].link.dentry, 0);
    1119           0 :         audit_log_path_denied(AUDIT_ANOM_LINK, "follow_link");
    1120           0 :         return -EACCES;
    1121             : }
    1122             : 
    1123             : /**
    1124             :  * safe_hardlink_source - Check for safe hardlink conditions
    1125             :  * @mnt_userns: user namespace of the mount the inode was found from
    1126             :  * @inode: the source inode to hardlink from
    1127             :  *
    1128             :  * Return false if at least one of the following conditions:
    1129             :  *    - inode is not a regular file
    1130             :  *    - inode is setuid
    1131             :  *    - inode is setgid and group-exec
    1132             :  *    - access failure for read and write
    1133             :  *
    1134             :  * Otherwise returns true.
    1135             :  */
    1136           0 : static bool safe_hardlink_source(struct user_namespace *mnt_userns,
    1137             :                                  struct inode *inode)
    1138             : {
    1139           0 :         umode_t mode = inode->i_mode;
    1140             : 
    1141             :         /* Special files should not get pinned to the filesystem. */
    1142           0 :         if (!S_ISREG(mode))
    1143             :                 return false;
    1144             : 
    1145             :         /* Setuid files should not get pinned to the filesystem. */
    1146           0 :         if (mode & S_ISUID)
    1147             :                 return false;
    1148             : 
    1149             :         /* Executable setgid files should not get pinned to the filesystem. */
    1150           0 :         if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
    1151             :                 return false;
    1152             : 
    1153             :         /* Hardlinking to unreadable or unwritable sources is dangerous. */
    1154           0 :         if (inode_permission(mnt_userns, inode, MAY_READ | MAY_WRITE))
    1155             :                 return false;
    1156             : 
    1157           0 :         return true;
    1158             : }
    1159             : 
    1160             : /**
    1161             :  * may_linkat - Check permissions for creating a hardlink
    1162             :  * @mnt_userns: user namespace of the mount the inode was found from
    1163             :  * @link: the source to hardlink from
    1164             :  *
    1165             :  * Block hardlink when all of:
    1166             :  *  - sysctl_protected_hardlinks enabled
    1167             :  *  - fsuid does not match inode
    1168             :  *  - hardlink source is unsafe (see safe_hardlink_source() above)
    1169             :  *  - not CAP_FOWNER in a namespace with the inode owner uid mapped
    1170             :  *
    1171             :  * If the inode has been found through an idmapped mount the user namespace of
    1172             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    1173             :  * care to map the inode according to @mnt_userns before checking permissions.
    1174             :  * On non-idmapped mounts or if permission checking is to be performed on the
    1175             :  * raw inode simply passs init_user_ns.
    1176             :  *
    1177             :  * Returns 0 if successful, -ve on error.
    1178             :  */
    1179           0 : int may_linkat(struct user_namespace *mnt_userns, struct path *link)
    1180             : {
    1181           0 :         struct inode *inode = link->dentry->d_inode;
    1182             : 
    1183             :         /* Inode writeback is not safe when the uid or gid are invalid. */
    1184           0 :         if (!uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
    1185           0 :             !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
    1186             :                 return -EOVERFLOW;
    1187             : 
    1188           0 :         if (!sysctl_protected_hardlinks)
    1189             :                 return 0;
    1190             : 
    1191             :         /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
    1192             :          * otherwise, it must be a safe source.
    1193             :          */
    1194           0 :         if (safe_hardlink_source(mnt_userns, inode) ||
    1195           0 :             inode_owner_or_capable(mnt_userns, inode))
    1196             :                 return 0;
    1197             : 
    1198             :         audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
    1199             :         return -EPERM;
    1200             : }
    1201             : 
    1202             : /**
    1203             :  * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
    1204             :  *                        should be allowed, or not, on files that already
    1205             :  *                        exist.
    1206             :  * @mnt_userns: user namespace of the mount the inode was found from
    1207             :  * @nd: nameidata pathwalk data
    1208             :  * @inode: the inode of the file to open
    1209             :  *
    1210             :  * Block an O_CREAT open of a FIFO (or a regular file) when:
    1211             :  *   - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
    1212             :  *   - the file already exists
    1213             :  *   - we are in a sticky directory
    1214             :  *   - we don't own the file
    1215             :  *   - the owner of the directory doesn't own the file
    1216             :  *   - the directory is world writable
    1217             :  * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
    1218             :  * the directory doesn't have to be world writable: being group writable will
    1219             :  * be enough.
    1220             :  *
    1221             :  * If the inode has been found through an idmapped mount the user namespace of
    1222             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    1223             :  * care to map the inode according to @mnt_userns before checking permissions.
    1224             :  * On non-idmapped mounts or if permission checking is to be performed on the
    1225             :  * raw inode simply passs init_user_ns.
    1226             :  *
    1227             :  * Returns 0 if the open is allowed, -ve on error.
    1228             :  */
    1229           0 : static int may_create_in_sticky(struct user_namespace *mnt_userns,
    1230             :                                 struct nameidata *nd, struct inode *const inode)
    1231             : {
    1232           0 :         umode_t dir_mode = nd->dir_mode;
    1233           0 :         kuid_t dir_uid = nd->dir_uid;
    1234             : 
    1235           0 :         if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
    1236           0 :             (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
    1237           0 :             likely(!(dir_mode & S_ISVTX)) ||
    1238           0 :             uid_eq(i_uid_into_mnt(mnt_userns, inode), dir_uid) ||
    1239           0 :             uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)))
    1240             :                 return 0;
    1241             : 
    1242           0 :         if (likely(dir_mode & 0002) ||
    1243           0 :             (dir_mode & 0020 &&
    1244           0 :              ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
    1245           0 :               (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
    1246           0 :                 const char *operation = S_ISFIFO(inode->i_mode) ?
    1247             :                                         "sticky_create_fifo" :
    1248             :                                         "sticky_create_regular";
    1249             :                 audit_log_path_denied(AUDIT_ANOM_CREAT, operation);
    1250             :                 return -EACCES;
    1251             :         }
    1252             :         return 0;
    1253             : }
    1254             : 
    1255             : /*
    1256             :  * follow_up - Find the mountpoint of path's vfsmount
    1257             :  *
    1258             :  * Given a path, find the mountpoint of its source file system.
    1259             :  * Replace @path with the path of the mountpoint in the parent mount.
    1260             :  * Up is towards /.
    1261             :  *
    1262             :  * Return 1 if we went up a level and 0 if we were already at the
    1263             :  * root.
    1264             :  */
    1265           0 : int follow_up(struct path *path)
    1266             : {
    1267           0 :         struct mount *mnt = real_mount(path->mnt);
    1268             :         struct mount *parent;
    1269             :         struct dentry *mountpoint;
    1270             : 
    1271           0 :         read_seqlock_excl(&mount_lock);
    1272           0 :         parent = mnt->mnt_parent;
    1273           0 :         if (parent == mnt) {
    1274           0 :                 read_sequnlock_excl(&mount_lock);
    1275           0 :                 return 0;
    1276             :         }
    1277           0 :         mntget(&parent->mnt);
    1278           0 :         mountpoint = dget(mnt->mnt_mountpoint);
    1279           0 :         read_sequnlock_excl(&mount_lock);
    1280           0 :         dput(path->dentry);
    1281           0 :         path->dentry = mountpoint;
    1282           0 :         mntput(path->mnt);
    1283           0 :         path->mnt = &parent->mnt;
    1284           0 :         return 1;
    1285             : }
    1286             : EXPORT_SYMBOL(follow_up);
    1287             : 
    1288             : static bool choose_mountpoint_rcu(struct mount *m, const struct path *root,
    1289             :                                   struct path *path, unsigned *seqp)
    1290             : {
    1291           0 :         while (mnt_has_parent(m)) {
    1292           0 :                 struct dentry *mountpoint = m->mnt_mountpoint;
    1293             : 
    1294           0 :                 m = m->mnt_parent;
    1295           0 :                 if (unlikely(root->dentry == mountpoint &&
    1296             :                              root->mnt == &m->mnt))
    1297             :                         break;
    1298           0 :                 if (mountpoint != m->mnt.mnt_root) {
    1299           0 :                         path->mnt = &m->mnt;
    1300           0 :                         path->dentry = mountpoint;
    1301           0 :                         *seqp = read_seqcount_begin(&mountpoint->d_seq);
    1302             :                         return true;
    1303             :                 }
    1304             :         }
    1305             :         return false;
    1306             : }
    1307             : 
    1308           0 : static bool choose_mountpoint(struct mount *m, const struct path *root,
    1309             :                               struct path *path)
    1310             : {
    1311             :         bool found;
    1312             : 
    1313             :         rcu_read_lock();
    1314             :         while (1) {
    1315           0 :                 unsigned seq, mseq = read_seqbegin(&mount_lock);
    1316             : 
    1317           0 :                 found = choose_mountpoint_rcu(m, root, path, &seq);
    1318           0 :                 if (unlikely(!found)) {
    1319           0 :                         if (!read_seqretry(&mount_lock, mseq))
    1320             :                                 break;
    1321             :                 } else {
    1322           0 :                         if (likely(__legitimize_path(path, seq, mseq)))
    1323             :                                 break;
    1324           0 :                         rcu_read_unlock();
    1325           0 :                         path_put(path);
    1326             :                         rcu_read_lock();
    1327             :                 }
    1328             :         }
    1329             :         rcu_read_unlock();
    1330           0 :         return found;
    1331             : }
    1332             : 
    1333             : /*
    1334             :  * Perform an automount
    1335             :  * - return -EISDIR to tell follow_managed() to stop and return the path we
    1336             :  *   were called with.
    1337             :  */
    1338           0 : static int follow_automount(struct path *path, int *count, unsigned lookup_flags)
    1339             : {
    1340           0 :         struct dentry *dentry = path->dentry;
    1341             : 
    1342             :         /* We don't want to mount if someone's just doing a stat -
    1343             :          * unless they're stat'ing a directory and appended a '/' to
    1344             :          * the name.
    1345             :          *
    1346             :          * We do, however, want to mount if someone wants to open or
    1347             :          * create a file of any type under the mountpoint, wants to
    1348             :          * traverse through the mountpoint or wants to open the
    1349             :          * mounted directory.  Also, autofs may mark negative dentries
    1350             :          * as being automount points.  These will need the attentions
    1351             :          * of the daemon to instantiate them before they can be used.
    1352             :          */
    1353           0 :         if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
    1354           0 :                            LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
    1355           0 :             dentry->d_inode)
    1356             :                 return -EISDIR;
    1357             : 
    1358           0 :         if (count && (*count)++ >= MAXSYMLINKS)
    1359             :                 return -ELOOP;
    1360             : 
    1361           0 :         return finish_automount(dentry->d_op->d_automount(path), path);
    1362             : }
    1363             : 
    1364             : /*
    1365             :  * mount traversal - out-of-line part.  One note on ->d_flags accesses -
    1366             :  * dentries are pinned but not locked here, so negative dentry can go
    1367             :  * positive right under us.  Use of smp_load_acquire() provides a barrier
    1368             :  * sufficient for ->d_inode and ->d_flags consistency.
    1369             :  */
    1370           0 : static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
    1371             :                              int *count, unsigned lookup_flags)
    1372             : {
    1373           0 :         struct vfsmount *mnt = path->mnt;
    1374           0 :         bool need_mntput = false;
    1375           0 :         int ret = 0;
    1376             : 
    1377           0 :         while (flags & DCACHE_MANAGED_DENTRY) {
    1378             :                 /* Allow the filesystem to manage the transit without i_mutex
    1379             :                  * being held. */
    1380           0 :                 if (flags & DCACHE_MANAGE_TRANSIT) {
    1381           0 :                         ret = path->dentry->d_op->d_manage(path, false);
    1382           0 :                         flags = smp_load_acquire(&path->dentry->d_flags);
    1383           0 :                         if (ret < 0)
    1384             :                                 break;
    1385             :                 }
    1386             : 
    1387           0 :                 if (flags & DCACHE_MOUNTED) {       // something's mounted on it..
    1388           0 :                         struct vfsmount *mounted = lookup_mnt(path);
    1389           0 :                         if (mounted) {          // ... in our namespace
    1390           0 :                                 dput(path->dentry);
    1391           0 :                                 if (need_mntput)
    1392           0 :                                         mntput(path->mnt);
    1393           0 :                                 path->mnt = mounted;
    1394           0 :                                 path->dentry = dget(mounted->mnt_root);
    1395             :                                 // here we know it's positive
    1396           0 :                                 flags = path->dentry->d_flags;
    1397           0 :                                 need_mntput = true;
    1398           0 :                                 continue;
    1399             :                         }
    1400             :                 }
    1401             : 
    1402           0 :                 if (!(flags & DCACHE_NEED_AUTOMOUNT))
    1403             :                         break;
    1404             : 
    1405             :                 // uncovered automount point
    1406           0 :                 ret = follow_automount(path, count, lookup_flags);
    1407           0 :                 flags = smp_load_acquire(&path->dentry->d_flags);
    1408           0 :                 if (ret < 0)
    1409             :                         break;
    1410             :         }
    1411             : 
    1412           0 :         if (ret == -EISDIR)
    1413           0 :                 ret = 0;
    1414             :         // possible if you race with several mount --move
    1415           0 :         if (need_mntput && path->mnt == mnt)
    1416           0 :                 mntput(path->mnt);
    1417           0 :         if (!ret && unlikely(d_flags_negative(flags)))
    1418           0 :                 ret = -ENOENT;
    1419           0 :         *jumped = need_mntput;
    1420           0 :         return ret;
    1421             : }
    1422             : 
    1423           0 : static inline int traverse_mounts(struct path *path, bool *jumped,
    1424             :                                   int *count, unsigned lookup_flags)
    1425             : {
    1426           0 :         unsigned flags = smp_load_acquire(&path->dentry->d_flags);
    1427             : 
    1428             :         /* fastpath */
    1429           0 :         if (likely(!(flags & DCACHE_MANAGED_DENTRY))) {
    1430           0 :                 *jumped = false;
    1431           0 :                 if (unlikely(d_flags_negative(flags)))
    1432             :                         return -ENOENT;
    1433           0 :                 return 0;
    1434             :         }
    1435           0 :         return __traverse_mounts(path, flags, jumped, count, lookup_flags);
    1436             : }
    1437             : 
    1438           0 : int follow_down_one(struct path *path)
    1439             : {
    1440             :         struct vfsmount *mounted;
    1441             : 
    1442           0 :         mounted = lookup_mnt(path);
    1443           0 :         if (mounted) {
    1444           0 :                 dput(path->dentry);
    1445           0 :                 mntput(path->mnt);
    1446           0 :                 path->mnt = mounted;
    1447           0 :                 path->dentry = dget(mounted->mnt_root);
    1448           0 :                 return 1;
    1449             :         }
    1450             :         return 0;
    1451             : }
    1452             : EXPORT_SYMBOL(follow_down_one);
    1453             : 
    1454             : /*
    1455             :  * Follow down to the covering mount currently visible to userspace.  At each
    1456             :  * point, the filesystem owning that dentry may be queried as to whether the
    1457             :  * caller is permitted to proceed or not.
    1458             :  */
    1459           0 : int follow_down(struct path *path)
    1460             : {
    1461           0 :         struct vfsmount *mnt = path->mnt;
    1462             :         bool jumped;
    1463           0 :         int ret = traverse_mounts(path, &jumped, NULL, 0);
    1464             : 
    1465           0 :         if (path->mnt != mnt)
    1466           0 :                 mntput(mnt);
    1467           0 :         return ret;
    1468             : }
    1469             : EXPORT_SYMBOL(follow_down);
    1470             : 
    1471             : /*
    1472             :  * Try to skip to top of mountpoint pile in rcuwalk mode.  Fail if
    1473             :  * we meet a managed dentry that would need blocking.
    1474             :  */
    1475           1 : static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
    1476             :                                struct inode **inode, unsigned *seqp)
    1477             : {
    1478           1 :         struct dentry *dentry = path->dentry;
    1479           1 :         unsigned int flags = dentry->d_flags;
    1480             : 
    1481           1 :         if (likely(!(flags & DCACHE_MANAGED_DENTRY)))
    1482             :                 return true;
    1483             : 
    1484           0 :         if (unlikely(nd->flags & LOOKUP_NO_XDEV))
    1485             :                 return false;
    1486             : 
    1487             :         for (;;) {
    1488             :                 /*
    1489             :                  * Don't forget we might have a non-mountpoint managed dentry
    1490             :                  * that wants to block transit.
    1491             :                  */
    1492           0 :                 if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) {
    1493           0 :                         int res = dentry->d_op->d_manage(path, true);
    1494           0 :                         if (res)
    1495           0 :                                 return res == -EISDIR;
    1496           0 :                         flags = dentry->d_flags;
    1497             :                 }
    1498             : 
    1499           0 :                 if (flags & DCACHE_MOUNTED) {
    1500           0 :                         struct mount *mounted = __lookup_mnt(path->mnt, dentry);
    1501           0 :                         if (mounted) {
    1502           0 :                                 path->mnt = &mounted->mnt;
    1503           0 :                                 dentry = path->dentry = mounted->mnt.mnt_root;
    1504           0 :                                 nd->state |= ND_JUMPED;
    1505           0 :                                 *seqp = read_seqcount_begin(&dentry->d_seq);
    1506           0 :                                 *inode = dentry->d_inode;
    1507             :                                 /*
    1508             :                                  * We don't need to re-check ->d_seq after this
    1509             :                                  * ->d_inode read - there will be an RCU delay
    1510             :                                  * between mount hash removal and ->mnt_root
    1511             :                                  * becoming unpinned.
    1512             :                                  */
    1513           0 :                                 flags = dentry->d_flags;
    1514           0 :                                 continue;
    1515             :                         }
    1516           0 :                         if (read_seqretry(&mount_lock, nd->m_seq))
    1517             :                                 return false;
    1518             :                 }
    1519           0 :                 return !(flags & DCACHE_NEED_AUTOMOUNT);
    1520             :         }
    1521             : }
    1522             : 
    1523           1 : static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
    1524             :                           struct path *path, struct inode **inode,
    1525             :                           unsigned int *seqp)
    1526             : {
    1527             :         bool jumped;
    1528             :         int ret;
    1529             : 
    1530           1 :         path->mnt = nd->path.mnt;
    1531           1 :         path->dentry = dentry;
    1532           1 :         if (nd->flags & LOOKUP_RCU) {
    1533           1 :                 unsigned int seq = *seqp;
    1534           1 :                 if (unlikely(!*inode))
    1535             :                         return -ENOENT;
    1536           1 :                 if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
    1537             :                         return 0;
    1538           0 :                 if (!try_to_unlazy_next(nd, dentry, seq))
    1539             :                         return -ECHILD;
    1540             :                 // *path might've been clobbered by __follow_mount_rcu()
    1541           0 :                 path->mnt = nd->path.mnt;
    1542           0 :                 path->dentry = dentry;
    1543             :         }
    1544           0 :         ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
    1545           0 :         if (jumped) {
    1546           0 :                 if (unlikely(nd->flags & LOOKUP_NO_XDEV))
    1547             :                         ret = -EXDEV;
    1548             :                 else
    1549           0 :                         nd->state |= ND_JUMPED;
    1550             :         }
    1551           0 :         if (unlikely(ret)) {
    1552           0 :                 dput(path->dentry);
    1553           0 :                 if (path->mnt != nd->path.mnt)
    1554           0 :                         mntput(path->mnt);
    1555             :         } else {
    1556           0 :                 *inode = d_backing_inode(path->dentry);
    1557           0 :                 *seqp = 0; /* out of RCU mode, so the value doesn't matter */
    1558             :         }
    1559             :         return ret;
    1560             : }
    1561             : 
    1562             : /*
    1563             :  * This looks up the name in dcache and possibly revalidates the found dentry.
    1564             :  * NULL is returned if the dentry does not exist in the cache.
    1565             :  */
    1566           3 : static struct dentry *lookup_dcache(const struct qstr *name,
    1567             :                                     struct dentry *dir,
    1568             :                                     unsigned int flags)
    1569             : {
    1570           3 :         struct dentry *dentry = d_lookup(dir, name);
    1571           3 :         if (dentry) {
    1572           0 :                 int error = d_revalidate(dentry, flags);
    1573           0 :                 if (unlikely(error <= 0)) {
    1574           0 :                         if (!error)
    1575           0 :                                 d_invalidate(dentry);
    1576           0 :                         dput(dentry);
    1577           0 :                         return ERR_PTR(error);
    1578             :                 }
    1579             :         }
    1580             :         return dentry;
    1581             : }
    1582             : 
    1583             : /*
    1584             :  * Parent directory has inode locked exclusive.  This is one
    1585             :  * and only case when ->lookup() gets called on non in-lookup
    1586             :  * dentries - as the matter of fact, this only gets called
    1587             :  * when directory is guaranteed to have no in-lookup children
    1588             :  * at all.
    1589             :  */
    1590           3 : static struct dentry *__lookup_hash(const struct qstr *name,
    1591             :                 struct dentry *base, unsigned int flags)
    1592             : {
    1593           3 :         struct dentry *dentry = lookup_dcache(name, base, flags);
    1594             :         struct dentry *old;
    1595           3 :         struct inode *dir = base->d_inode;
    1596             : 
    1597           3 :         if (dentry)
    1598             :                 return dentry;
    1599             : 
    1600             :         /* Don't create child dentry for a dead directory. */
    1601           3 :         if (unlikely(IS_DEADDIR(dir)))
    1602             :                 return ERR_PTR(-ENOENT);
    1603             : 
    1604           3 :         dentry = d_alloc(base, name);
    1605           3 :         if (unlikely(!dentry))
    1606             :                 return ERR_PTR(-ENOMEM);
    1607             : 
    1608           3 :         old = dir->i_op->lookup(dir, dentry, flags);
    1609           3 :         if (unlikely(old)) {
    1610           0 :                 dput(dentry);
    1611           0 :                 dentry = old;
    1612             :         }
    1613             :         return dentry;
    1614             : }
    1615             : 
    1616           1 : static struct dentry *lookup_fast(struct nameidata *nd,
    1617             :                                   struct inode **inode,
    1618             :                                   unsigned *seqp)
    1619             : {
    1620           1 :         struct dentry *dentry, *parent = nd->path.dentry;
    1621           1 :         int status = 1;
    1622             : 
    1623             :         /*
    1624             :          * Rename seqlock is not required here because in the off chance
    1625             :          * of a false negative due to a concurrent rename, the caller is
    1626             :          * going to fall back to non-racy lookup.
    1627             :          */
    1628           1 :         if (nd->flags & LOOKUP_RCU) {
    1629             :                 unsigned seq;
    1630           1 :                 dentry = __d_lookup_rcu(parent, &nd->last, &seq);
    1631           1 :                 if (unlikely(!dentry)) {
    1632           0 :                         if (!try_to_unlazy(nd))
    1633           1 :                                 return ERR_PTR(-ECHILD);
    1634           0 :                         return NULL;
    1635             :                 }
    1636             : 
    1637             :                 /*
    1638             :                  * This sequence count validates that the inode matches
    1639             :                  * the dentry name information from lookup.
    1640             :                  */
    1641           2 :                 *inode = d_backing_inode(dentry);
    1642           3 :                 if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
    1643             :                         return ERR_PTR(-ECHILD);
    1644             : 
    1645             :                 /*
    1646             :                  * This sequence count validates that the parent had no
    1647             :                  * changes while we did the lookup of the dentry above.
    1648             :                  *
    1649             :                  * The memory barrier in read_seqcount_begin of child is
    1650             :                  *  enough, we can use __read_seqcount_retry here.
    1651             :                  */
    1652           3 :                 if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
    1653             :                         return ERR_PTR(-ECHILD);
    1654             : 
    1655           1 :                 *seqp = seq;
    1656           2 :                 status = d_revalidate(dentry, nd->flags);
    1657           1 :                 if (likely(status > 0))
    1658             :                         return dentry;
    1659           0 :                 if (!try_to_unlazy_next(nd, dentry, seq))
    1660             :                         return ERR_PTR(-ECHILD);
    1661           0 :                 if (status == -ECHILD)
    1662             :                         /* we'd been told to redo it in non-rcu mode */
    1663           0 :                         status = d_revalidate(dentry, nd->flags);
    1664             :         } else {
    1665           0 :                 dentry = __d_lookup(parent, &nd->last);
    1666           0 :                 if (unlikely(!dentry))
    1667             :                         return NULL;
    1668           0 :                 status = d_revalidate(dentry, nd->flags);
    1669             :         }
    1670           0 :         if (unlikely(status <= 0)) {
    1671           0 :                 if (!status)
    1672           0 :                         d_invalidate(dentry);
    1673           0 :                 dput(dentry);
    1674           0 :                 return ERR_PTR(status);
    1675             :         }
    1676             :         return dentry;
    1677             : }
    1678             : 
    1679             : /* Fast lookup failed, do it the slow way */
    1680           0 : static struct dentry *__lookup_slow(const struct qstr *name,
    1681             :                                     struct dentry *dir,
    1682             :                                     unsigned int flags)
    1683             : {
    1684             :         struct dentry *dentry, *old;
    1685           0 :         struct inode *inode = dir->d_inode;
    1686           0 :         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
    1687             : 
    1688             :         /* Don't go there if it's already dead */
    1689           0 :         if (unlikely(IS_DEADDIR(inode)))
    1690             :                 return ERR_PTR(-ENOENT);
    1691             : again:
    1692           0 :         dentry = d_alloc_parallel(dir, name, &wq);
    1693           0 :         if (IS_ERR(dentry))
    1694             :                 return dentry;
    1695           0 :         if (unlikely(!d_in_lookup(dentry))) {
    1696           0 :                 int error = d_revalidate(dentry, flags);
    1697           0 :                 if (unlikely(error <= 0)) {
    1698           0 :                         if (!error) {
    1699           0 :                                 d_invalidate(dentry);
    1700           0 :                                 dput(dentry);
    1701           0 :                                 goto again;
    1702             :                         }
    1703           0 :                         dput(dentry);
    1704           0 :                         dentry = ERR_PTR(error);
    1705             :                 }
    1706             :         } else {
    1707           0 :                 old = inode->i_op->lookup(inode, dentry, flags);
    1708           0 :                 d_lookup_done(dentry);
    1709           0 :                 if (unlikely(old)) {
    1710           0 :                         dput(dentry);
    1711           0 :                         dentry = old;
    1712             :                 }
    1713             :         }
    1714             :         return dentry;
    1715             : }
    1716             : 
    1717           0 : static struct dentry *lookup_slow(const struct qstr *name,
    1718             :                                   struct dentry *dir,
    1719             :                                   unsigned int flags)
    1720             : {
    1721           0 :         struct inode *inode = dir->d_inode;
    1722             :         struct dentry *res;
    1723           0 :         inode_lock_shared(inode);
    1724           0 :         res = __lookup_slow(name, dir, flags);
    1725           0 :         inode_unlock_shared(inode);
    1726           0 :         return res;
    1727             : }
    1728             : 
    1729           4 : static inline int may_lookup(struct user_namespace *mnt_userns,
    1730             :                              struct nameidata *nd)
    1731             : {
    1732           4 :         if (nd->flags & LOOKUP_RCU) {
    1733           4 :                 int err = inode_permission(mnt_userns, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
    1734           4 :                 if (err != -ECHILD || !try_to_unlazy(nd))
    1735             :                         return err;
    1736             :         }
    1737           0 :         return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
    1738             : }
    1739             : 
    1740           0 : static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
    1741             : {
    1742           0 :         if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
    1743             :                 return -ELOOP;
    1744             : 
    1745           0 :         if (likely(nd->depth != EMBEDDED_LEVELS))
    1746             :                 return 0;
    1747           0 :         if (likely(nd->stack != nd->internal))
    1748             :                 return 0;
    1749           0 :         if (likely(nd_alloc_stack(nd)))
    1750             :                 return 0;
    1751             : 
    1752           0 :         if (nd->flags & LOOKUP_RCU) {
    1753             :                 // we need to grab link before we do unlazy.  And we can't skip
    1754             :                 // unlazy even if we fail to grab the link - cleanup needs it
    1755           0 :                 bool grabbed_link = legitimize_path(nd, link, seq);
    1756             : 
    1757           0 :                 if (!try_to_unlazy(nd) != 0 || !grabbed_link)
    1758             :                         return -ECHILD;
    1759             : 
    1760           0 :                 if (nd_alloc_stack(nd))
    1761             :                         return 0;
    1762             :         }
    1763             :         return -ENOMEM;
    1764             : }
    1765             : 
    1766             : enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
    1767             : 
    1768           0 : static const char *pick_link(struct nameidata *nd, struct path *link,
    1769             :                      struct inode *inode, unsigned seq, int flags)
    1770             : {
    1771             :         struct saved *last;
    1772             :         const char *res;
    1773           0 :         int error = reserve_stack(nd, link, seq);
    1774             : 
    1775           0 :         if (unlikely(error)) {
    1776           0 :                 if (!(nd->flags & LOOKUP_RCU))
    1777             :                         path_put(link);
    1778           0 :                 return ERR_PTR(error);
    1779             :         }
    1780           0 :         last = nd->stack + nd->depth++;
    1781           0 :         last->link = *link;
    1782           0 :         clear_delayed_call(&last->done);
    1783           0 :         last->seq = seq;
    1784             : 
    1785           0 :         if (flags & WALK_TRAILING) {
    1786           0 :                 error = may_follow_link(nd, inode);
    1787           0 :                 if (unlikely(error))
    1788           0 :                         return ERR_PTR(error);
    1789             :         }
    1790             : 
    1791           0 :         if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) ||
    1792           0 :                         unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
    1793             :                 return ERR_PTR(-ELOOP);
    1794             : 
    1795           0 :         if (!(nd->flags & LOOKUP_RCU)) {
    1796           0 :                 touch_atime(&last->link);
    1797           0 :                 cond_resched();
    1798           0 :         } else if (atime_needs_update(&last->link, inode)) {
    1799           0 :                 if (!try_to_unlazy(nd))
    1800             :                         return ERR_PTR(-ECHILD);
    1801           0 :                 touch_atime(&last->link);
    1802             :         }
    1803             : 
    1804           0 :         error = security_inode_follow_link(link->dentry, inode,
    1805           0 :                                            nd->flags & LOOKUP_RCU);
    1806             :         if (unlikely(error))
    1807             :                 return ERR_PTR(error);
    1808             : 
    1809           0 :         res = READ_ONCE(inode->i_link);
    1810           0 :         if (!res) {
    1811             :                 const char * (*get)(struct dentry *, struct inode *,
    1812             :                                 struct delayed_call *);
    1813           0 :                 get = inode->i_op->get_link;
    1814           0 :                 if (nd->flags & LOOKUP_RCU) {
    1815           0 :                         res = get(NULL, inode, &last->done);
    1816           0 :                         if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd))
    1817           0 :                                 res = get(link->dentry, inode, &last->done);
    1818             :                 } else {
    1819           0 :                         res = get(link->dentry, inode, &last->done);
    1820             :                 }
    1821           0 :                 if (!res)
    1822             :                         goto all_done;
    1823           0 :                 if (IS_ERR(res))
    1824             :                         return res;
    1825             :         }
    1826           0 :         if (*res == '/') {
    1827           0 :                 error = nd_jump_root(nd);
    1828           0 :                 if (unlikely(error))
    1829           0 :                         return ERR_PTR(error);
    1830           0 :                 while (unlikely(*++res == '/'))
    1831             :                         ;
    1832             :         }
    1833           0 :         if (*res)
    1834             :                 return res;
    1835             : all_done: // pure jump
    1836           0 :         put_link(nd);
    1837           0 :         return NULL;
    1838             : }
    1839             : 
    1840             : /*
    1841             :  * Do we need to follow links? We _really_ want to be able
    1842             :  * to do this check without having to look at inode->i_op,
    1843             :  * so we keep a cache of "no, this doesn't need follow_link"
    1844             :  * for the common case.
    1845             :  */
    1846           1 : static const char *step_into(struct nameidata *nd, int flags,
    1847             :                      struct dentry *dentry, struct inode *inode, unsigned seq)
    1848             : {
    1849             :         struct path path;
    1850           1 :         int err = handle_mounts(nd, dentry, &path, &inode, &seq);
    1851             : 
    1852           1 :         if (err < 0)
    1853           0 :                 return ERR_PTR(err);
    1854           2 :         if (likely(!d_is_symlink(path.dentry)) ||
    1855           0 :            ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
    1856           0 :            (flags & WALK_NOFOLLOW)) {
    1857             :                 /* not a symlink or should not follow */
    1858           1 :                 if (!(nd->flags & LOOKUP_RCU)) {
    1859           0 :                         dput(nd->path.dentry);
    1860           0 :                         if (nd->path.mnt != path.mnt)
    1861           0 :                                 mntput(nd->path.mnt);
    1862             :                 }
    1863           1 :                 nd->path = path;
    1864           1 :                 nd->inode = inode;
    1865           1 :                 nd->seq = seq;
    1866           1 :                 return NULL;
    1867             :         }
    1868           0 :         if (nd->flags & LOOKUP_RCU) {
    1869             :                 /* make sure that d_is_symlink above matches inode */
    1870           0 :                 if (read_seqcount_retry(&path.dentry->d_seq, seq))
    1871             :                         return ERR_PTR(-ECHILD);
    1872             :         } else {
    1873           0 :                 if (path.mnt == nd->path.mnt)
    1874           0 :                         mntget(path.mnt);
    1875             :         }
    1876           0 :         return pick_link(nd, &path, inode, seq, flags);
    1877             : }
    1878             : 
    1879           0 : static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
    1880             :                                         struct inode **inodep,
    1881             :                                         unsigned *seqp)
    1882             : {
    1883             :         struct dentry *parent, *old;
    1884             : 
    1885           0 :         if (path_equal(&nd->path, &nd->root))
    1886             :                 goto in_root;
    1887           0 :         if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
    1888             :                 struct path path;
    1889             :                 unsigned seq;
    1890           0 :                 if (!choose_mountpoint_rcu(real_mount(nd->path.mnt),
    1891           0 :                                            &nd->root, &path, &seq))
    1892             :                         goto in_root;
    1893           0 :                 if (unlikely(nd->flags & LOOKUP_NO_XDEV))
    1894             :                         return ERR_PTR(-ECHILD);
    1895           0 :                 nd->path = path;
    1896           0 :                 nd->inode = path.dentry->d_inode;
    1897           0 :                 nd->seq = seq;
    1898           0 :                 if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
    1899             :                         return ERR_PTR(-ECHILD);
    1900             :                 /* we know that mountpoint was pinned */
    1901             :         }
    1902           0 :         old = nd->path.dentry;
    1903           0 :         parent = old->d_parent;
    1904           0 :         *inodep = parent->d_inode;
    1905           0 :         *seqp = read_seqcount_begin(&parent->d_seq);
    1906           0 :         if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
    1907             :                 return ERR_PTR(-ECHILD);
    1908           0 :         if (unlikely(!path_connected(nd->path.mnt, parent)))
    1909             :                 return ERR_PTR(-ECHILD);
    1910           0 :         return parent;
    1911             : in_root:
    1912           0 :         if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
    1913             :                 return ERR_PTR(-ECHILD);
    1914           0 :         if (unlikely(nd->flags & LOOKUP_BENEATH))
    1915             :                 return ERR_PTR(-ECHILD);
    1916           0 :         return NULL;
    1917             : }
    1918             : 
    1919           0 : static struct dentry *follow_dotdot(struct nameidata *nd,
    1920             :                                  struct inode **inodep,
    1921             :                                  unsigned *seqp)
    1922             : {
    1923             :         struct dentry *parent;
    1924             : 
    1925           0 :         if (path_equal(&nd->path, &nd->root))
    1926             :                 goto in_root;
    1927           0 :         if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
    1928             :                 struct path path;
    1929             : 
    1930           0 :                 if (!choose_mountpoint(real_mount(nd->path.mnt),
    1931           0 :                                        &nd->root, &path))
    1932             :                         goto in_root;
    1933           0 :                 path_put(&nd->path);
    1934           0 :                 nd->path = path;
    1935           0 :                 nd->inode = path.dentry->d_inode;
    1936           0 :                 if (unlikely(nd->flags & LOOKUP_NO_XDEV))
    1937           0 :                         return ERR_PTR(-EXDEV);
    1938             :         }
    1939             :         /* rare case of legitimate dget_parent()... */
    1940           0 :         parent = dget_parent(nd->path.dentry);
    1941           0 :         if (unlikely(!path_connected(nd->path.mnt, parent))) {
    1942           0 :                 dput(parent);
    1943           0 :                 return ERR_PTR(-ENOENT);
    1944             :         }
    1945           0 :         *seqp = 0;
    1946           0 :         *inodep = parent->d_inode;
    1947           0 :         return parent;
    1948             : 
    1949             : in_root:
    1950           0 :         if (unlikely(nd->flags & LOOKUP_BENEATH))
    1951             :                 return ERR_PTR(-EXDEV);
    1952           0 :         dget(nd->path.dentry);
    1953             :         return NULL;
    1954             : }
    1955             : 
    1956           0 : static const char *handle_dots(struct nameidata *nd, int type)
    1957             : {
    1958           0 :         if (type == LAST_DOTDOT) {
    1959           0 :                 const char *error = NULL;
    1960             :                 struct dentry *parent;
    1961             :                 struct inode *inode;
    1962             :                 unsigned seq;
    1963             : 
    1964           0 :                 if (!nd->root.mnt) {
    1965           0 :                         error = ERR_PTR(set_root(nd));
    1966           0 :                         if (error)
    1967           0 :                                 return error;
    1968             :                 }
    1969           0 :                 if (nd->flags & LOOKUP_RCU)
    1970           0 :                         parent = follow_dotdot_rcu(nd, &inode, &seq);
    1971             :                 else
    1972           0 :                         parent = follow_dotdot(nd, &inode, &seq);
    1973           0 :                 if (IS_ERR(parent))
    1974             :                         return ERR_CAST(parent);
    1975           0 :                 if (unlikely(!parent))
    1976           0 :                         error = step_into(nd, WALK_NOFOLLOW,
    1977             :                                          nd->path.dentry, nd->inode, nd->seq);
    1978             :                 else
    1979           0 :                         error = step_into(nd, WALK_NOFOLLOW,
    1980             :                                          parent, inode, seq);
    1981           0 :                 if (unlikely(error))
    1982             :                         return error;
    1983             : 
    1984           0 :                 if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
    1985             :                         /*
    1986             :                          * If there was a racing rename or mount along our
    1987             :                          * path, then we can't be sure that ".." hasn't jumped
    1988             :                          * above nd->root (and so userspace should retry or use
    1989             :                          * some fallback).
    1990             :                          */
    1991           0 :                         smp_rmb();
    1992           0 :                         if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)))
    1993             :                                 return ERR_PTR(-EAGAIN);
    1994           0 :                         if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
    1995             :                                 return ERR_PTR(-EAGAIN);
    1996             :                 }
    1997             :         }
    1998             :         return NULL;
    1999             : }
    2000             : 
    2001           1 : static const char *walk_component(struct nameidata *nd, int flags)
    2002             : {
    2003             :         struct dentry *dentry;
    2004             :         struct inode *inode;
    2005             :         unsigned seq;
    2006             :         /*
    2007             :          * "." and ".." are special - ".." especially so because it has
    2008             :          * to be able to know about the current root directory and
    2009             :          * parent relationships.
    2010             :          */
    2011           1 :         if (unlikely(nd->last_type != LAST_NORM)) {
    2012           0 :                 if (!(flags & WALK_MORE) && nd->depth)
    2013           0 :                         put_link(nd);
    2014           0 :                 return handle_dots(nd, nd->last_type);
    2015             :         }
    2016           1 :         dentry = lookup_fast(nd, &inode, &seq);
    2017           1 :         if (IS_ERR(dentry))
    2018             :                 return ERR_CAST(dentry);
    2019           1 :         if (unlikely(!dentry)) {
    2020           0 :                 dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags);
    2021           0 :                 if (IS_ERR(dentry))
    2022             :                         return ERR_CAST(dentry);
    2023             :         }
    2024           1 :         if (!(flags & WALK_MORE) && nd->depth)
    2025           0 :                 put_link(nd);
    2026           1 :         return step_into(nd, flags, dentry, inode, seq);
    2027             : }
    2028             : 
    2029             : /*
    2030             :  * We can do the critical dentry name comparison and hashing
    2031             :  * operations one word at a time, but we are limited to:
    2032             :  *
    2033             :  * - Architectures with fast unaligned word accesses. We could
    2034             :  *   do a "get_unaligned()" if this helps and is sufficiently
    2035             :  *   fast.
    2036             :  *
    2037             :  * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
    2038             :  *   do not trap on the (extremely unlikely) case of a page
    2039             :  *   crossing operation.
    2040             :  *
    2041             :  * - Furthermore, we need an efficient 64-bit compile for the
    2042             :  *   64-bit case in order to generate the "number of bytes in
    2043             :  *   the final mask". Again, that could be replaced with a
    2044             :  *   efficient population count instruction or similar.
    2045             :  */
    2046             : #ifdef CONFIG_DCACHE_WORD_ACCESS
    2047             : 
    2048             : #include <asm/word-at-a-time.h>
    2049             : 
    2050             : #ifdef HASH_MIX
    2051             : 
    2052             : /* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */
    2053             : 
    2054             : #elif defined(CONFIG_64BIT)
    2055             : /*
    2056             :  * Register pressure in the mixing function is an issue, particularly
    2057             :  * on 32-bit x86, but almost any function requires one state value and
    2058             :  * one temporary.  Instead, use a function designed for two state values
    2059             :  * and no temporaries.
    2060             :  *
    2061             :  * This function cannot create a collision in only two iterations, so
    2062             :  * we have two iterations to achieve avalanche.  In those two iterations,
    2063             :  * we have six layers of mixing, which is enough to spread one bit's
    2064             :  * influence out to 2^6 = 64 state bits.
    2065             :  *
    2066             :  * Rotate constants are scored by considering either 64 one-bit input
    2067             :  * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the
    2068             :  * probability of that delta causing a change to each of the 128 output
    2069             :  * bits, using a sample of random initial states.
    2070             :  *
    2071             :  * The Shannon entropy of the computed probabilities is then summed
    2072             :  * to produce a score.  Ideally, any input change has a 50% chance of
    2073             :  * toggling any given output bit.
    2074             :  *
    2075             :  * Mixing scores (in bits) for (12,45):
    2076             :  * Input delta: 1-bit      2-bit
    2077             :  * 1 round:     713.3    42542.6
    2078             :  * 2 rounds:   2753.7   140389.8
    2079             :  * 3 rounds:   5954.1   233458.2
    2080             :  * 4 rounds:   7862.6   256672.2
    2081             :  * Perfect:    8192     258048
    2082             :  *            (64*128) (64*63/2 * 128)
    2083             :  */
    2084             : #define HASH_MIX(x, y, a)       \
    2085             :         (       x ^= (a),       \
    2086             :         y ^= x, x = rol64(x,12),\
    2087             :         x += y, y = rol64(y,45),\
    2088             :         y *= 9                  )
    2089             : 
    2090             : /*
    2091             :  * Fold two longs into one 32-bit hash value.  This must be fast, but
    2092             :  * latency isn't quite as critical, as there is a fair bit of additional
    2093             :  * work done before the hash value is used.
    2094             :  */
    2095             : static inline unsigned int fold_hash(unsigned long x, unsigned long y)
    2096             : {
    2097           6 :         y ^= x * GOLDEN_RATIO_64;
    2098           6 :         y *= GOLDEN_RATIO_64;
    2099           6 :         return y >> 32;
    2100             : }
    2101             : 
    2102             : #else   /* 32-bit case */
    2103             : 
    2104             : /*
    2105             :  * Mixing scores (in bits) for (7,20):
    2106             :  * Input delta: 1-bit      2-bit
    2107             :  * 1 round:     330.3     9201.6
    2108             :  * 2 rounds:   1246.4    25475.4
    2109             :  * 3 rounds:   1907.1    31295.1
    2110             :  * 4 rounds:   2042.3    31718.6
    2111             :  * Perfect:    2048      31744
    2112             :  *            (32*64)   (32*31/2 * 64)
    2113             :  */
    2114             : #define HASH_MIX(x, y, a)       \
    2115             :         (       x ^= (a),       \
    2116             :         y ^= x, x = rol32(x, 7),\
    2117             :         x += y, y = rol32(y,20),\
    2118             :         y *= 9                  )
    2119             : 
    2120             : static inline unsigned int fold_hash(unsigned long x, unsigned long y)
    2121             : {
    2122             :         /* Use arch-optimized multiply if one exists */
    2123             :         return __hash_32(y ^ __hash_32(x));
    2124             : }
    2125             : 
    2126             : #endif
    2127             : 
    2128             : /*
    2129             :  * Return the hash of a string of known length.  This is carfully
    2130             :  * designed to match hash_name(), which is the more critical function.
    2131             :  * In particular, we must end by hashing a final word containing 0..7
    2132             :  * payload bytes, to match the way that hash_name() iterates until it
    2133             :  * finds the delimiter after the name.
    2134             :  */
    2135           0 : unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
    2136             : {
    2137           0 :         unsigned long a, x = 0, y = (unsigned long)salt;
    2138             : 
    2139             :         for (;;) {
    2140           0 :                 if (!len)
    2141             :                         goto done;
    2142           0 :                 a = load_unaligned_zeropad(name);
    2143           0 :                 if (len < sizeof(unsigned long))
    2144             :                         break;
    2145           0 :                 HASH_MIX(x, y, a);
    2146           0 :                 name += sizeof(unsigned long);
    2147           0 :                 len -= sizeof(unsigned long);
    2148             :         }
    2149           0 :         x ^= a & bytemask_from_count(len);
    2150             : done:
    2151           0 :         return fold_hash(x, y);
    2152             : }
    2153             : EXPORT_SYMBOL(full_name_hash);
    2154             : 
    2155             : /* Return the "hash_len" (hash and length) of a null-terminated string */
    2156           2 : u64 hashlen_string(const void *salt, const char *name)
    2157             : {
    2158           2 :         unsigned long a = 0, x = 0, y = (unsigned long)salt;
    2159             :         unsigned long adata, mask, len;
    2160           2 :         const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
    2161             : 
    2162           2 :         len = 0;
    2163           2 :         goto inside;
    2164             : 
    2165             :         do {
    2166           3 :                 HASH_MIX(x, y, a);
    2167           1 :                 len += sizeof(unsigned long);
    2168             : inside:
    2169           6 :                 a = load_unaligned_zeropad(name+len);
    2170           3 :         } while (!has_zero(a, &adata, &constants));
    2171             : 
    2172           2 :         adata = prep_zero_mask(a, adata, &constants);
    2173           4 :         mask = create_zero_mask(adata);
    2174           2 :         x ^= a & zero_bytemask(mask);
    2175             : 
    2176           4 :         return hashlen_create(fold_hash(x, y), len + find_zero(mask));
    2177             : }
    2178             : EXPORT_SYMBOL(hashlen_string);
    2179             : 
    2180             : /*
    2181             :  * Calculate the length and hash of the path component, and
    2182             :  * return the "hash_len" as the result.
    2183             :  */
    2184           4 : static inline u64 hash_name(const void *salt, const char *name)
    2185             : {
    2186           4 :         unsigned long a = 0, b, x = 0, y = (unsigned long)salt;
    2187             :         unsigned long adata, bdata, mask, len;
    2188           4 :         const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
    2189             : 
    2190           4 :         len = 0;
    2191           4 :         goto inside;
    2192             : 
    2193             :         do {
    2194           0 :                 HASH_MIX(x, y, a);
    2195           0 :                 len += sizeof(unsigned long);
    2196             : inside:
    2197           8 :                 a = load_unaligned_zeropad(name+len);
    2198           4 :                 b = a ^ REPEAT_BYTE('/');
    2199           8 :         } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
    2200             : 
    2201           4 :         adata = prep_zero_mask(a, adata, &constants);
    2202           4 :         bdata = prep_zero_mask(b, bdata, &constants);
    2203           8 :         mask = create_zero_mask(adata | bdata);
    2204           4 :         x ^= a & zero_bytemask(mask);
    2205             : 
    2206           8 :         return hashlen_create(fold_hash(x, y), len + find_zero(mask));
    2207             : }
    2208             : 
    2209             : #else   /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
    2210             : 
    2211             : /* Return the hash of a string of known length */
    2212             : unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
    2213             : {
    2214             :         unsigned long hash = init_name_hash(salt);
    2215             :         while (len--)
    2216             :                 hash = partial_name_hash((unsigned char)*name++, hash);
    2217             :         return end_name_hash(hash);
    2218             : }
    2219             : EXPORT_SYMBOL(full_name_hash);
    2220             : 
    2221             : /* Return the "hash_len" (hash and length) of a null-terminated string */
    2222             : u64 hashlen_string(const void *salt, const char *name)
    2223             : {
    2224             :         unsigned long hash = init_name_hash(salt);
    2225             :         unsigned long len = 0, c;
    2226             : 
    2227             :         c = (unsigned char)*name;
    2228             :         while (c) {
    2229             :                 len++;
    2230             :                 hash = partial_name_hash(c, hash);
    2231             :                 c = (unsigned char)name[len];
    2232             :         }
    2233             :         return hashlen_create(end_name_hash(hash), len);
    2234             : }
    2235             : EXPORT_SYMBOL(hashlen_string);
    2236             : 
    2237             : /*
    2238             :  * We know there's a real path component here of at least
    2239             :  * one character.
    2240             :  */
    2241             : static inline u64 hash_name(const void *salt, const char *name)
    2242             : {
    2243             :         unsigned long hash = init_name_hash(salt);
    2244             :         unsigned long len = 0, c;
    2245             : 
    2246             :         c = (unsigned char)*name;
    2247             :         do {
    2248             :                 len++;
    2249             :                 hash = partial_name_hash(c, hash);
    2250             :                 c = (unsigned char)name[len];
    2251             :         } while (c && c != '/');
    2252             :         return hashlen_create(end_name_hash(hash), len);
    2253             : }
    2254             : 
    2255             : #endif
    2256             : 
    2257             : /*
    2258             :  * Name resolution.
    2259             :  * This is the basic name resolution function, turning a pathname into
    2260             :  * the final dentry. We expect 'base' to be positive and a directory.
    2261             :  *
    2262             :  * Returns 0 and nd will have valid dentry and mnt on success.
    2263             :  * Returns error and drops reference to input namei data on failure.
    2264             :  */
    2265           3 : static int link_path_walk(const char *name, struct nameidata *nd)
    2266             : {
    2267           3 :         int depth = 0; // depth <= nd->depth
    2268             :         int err;
    2269             : 
    2270           3 :         nd->last_type = LAST_ROOT;
    2271           3 :         nd->flags |= LOOKUP_PARENT;
    2272           3 :         if (IS_ERR(name))
    2273           0 :                 return PTR_ERR(name);
    2274           6 :         while (*name=='/')
    2275           3 :                 name++;
    2276           3 :         if (!*name) {
    2277           0 :                 nd->dir_mode = 0; // short-circuit the 'hardening' idiocy
    2278           0 :                 return 0;
    2279             :         }
    2280             : 
    2281             :         /* At this point we know we have a real path component. */
    2282             :         for(;;) {
    2283             :                 struct user_namespace *mnt_userns;
    2284             :                 const char *link;
    2285             :                 u64 hash_len;
    2286             :                 int type;
    2287             : 
    2288           8 :                 mnt_userns = mnt_user_ns(nd->path.mnt);
    2289           4 :                 err = may_lookup(mnt_userns, nd);
    2290           4 :                 if (err)
    2291             :                         return err;
    2292             : 
    2293           4 :                 hash_len = hash_name(nd->path.dentry, name);
    2294             : 
    2295           4 :                 type = LAST_NORM;
    2296           4 :                 if (name[0] == '.') switch (hashlen_len(hash_len)) {
    2297             :                         case 2:
    2298           0 :                                 if (name[1] == '.') {
    2299           0 :                                         type = LAST_DOTDOT;
    2300           0 :                                         nd->state |= ND_JUMPED;
    2301             :                                 }
    2302             :                                 break;
    2303             :                         case 1:
    2304           0 :                                 type = LAST_DOT;
    2305             :                 }
    2306           4 :                 if (likely(type == LAST_NORM)) {
    2307           4 :                         struct dentry *parent = nd->path.dentry;
    2308           4 :                         nd->state &= ~ND_JUMPED;
    2309           4 :                         if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
    2310           0 :                                 struct qstr this = { { .hash_len = hash_len }, .name = name };
    2311           0 :                                 err = parent->d_op->d_hash(parent, &this);
    2312           0 :                                 if (err < 0)
    2313           0 :                                         return err;
    2314           0 :                                 hash_len = this.hash_len;
    2315           0 :                                 name = this.name;
    2316             :                         }
    2317             :                 }
    2318             : 
    2319           4 :                 nd->last.hash_len = hash_len;
    2320           4 :                 nd->last.name = name;
    2321           4 :                 nd->last_type = type;
    2322             : 
    2323           4 :                 name += hashlen_len(hash_len);
    2324           4 :                 if (!*name)
    2325             :                         goto OK;
    2326             :                 /*
    2327             :                  * If it wasn't NUL, we know it was '/'. Skip that
    2328             :                  * slash, and continue until no more slashes.
    2329             :                  */
    2330             :                 do {
    2331           1 :                         name++;
    2332           1 :                 } while (unlikely(*name == '/'));
    2333           1 :                 if (unlikely(!*name)) {
    2334             : OK:
    2335             :                         /* pathname or trailing symlink, done */
    2336           3 :                         if (!depth) {
    2337           6 :                                 nd->dir_uid = i_uid_into_mnt(mnt_userns, nd->inode);
    2338           3 :                                 nd->dir_mode = nd->inode->i_mode;
    2339           3 :                                 nd->flags &= ~LOOKUP_PARENT;
    2340           3 :                                 return 0;
    2341             :                         }
    2342             :                         /* last component of nested symlink */
    2343           0 :                         name = nd->stack[--depth].name;
    2344           0 :                         link = walk_component(nd, 0);
    2345             :                 } else {
    2346             :                         /* not the last component */
    2347           1 :                         link = walk_component(nd, WALK_MORE);
    2348             :                 }
    2349           1 :                 if (unlikely(link)) {
    2350           0 :                         if (IS_ERR(link))
    2351           0 :                                 return PTR_ERR(link);
    2352             :                         /* a symlink to follow */
    2353           0 :                         nd->stack[depth++].name = name;
    2354           0 :                         name = link;
    2355           0 :                         continue;
    2356             :                 }
    2357           2 :                 if (unlikely(!d_can_lookup(nd->path.dentry))) {
    2358           0 :                         if (nd->flags & LOOKUP_RCU) {
    2359           0 :                                 if (!try_to_unlazy(nd))
    2360             :                                         return -ECHILD;
    2361             :                         }
    2362             :                         return -ENOTDIR;
    2363             :                 }
    2364             :         }
    2365             : }
    2366             : 
    2367             : /* must be paired with terminate_walk() */
    2368           3 : static const char *path_init(struct nameidata *nd, unsigned flags)
    2369             : {
    2370             :         int error;
    2371           3 :         const char *s = nd->name->name;
    2372             : 
    2373             :         /* LOOKUP_CACHED requires RCU, ask caller to retry */
    2374           3 :         if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED)
    2375             :                 return ERR_PTR(-EAGAIN);
    2376             : 
    2377           3 :         if (!*s)
    2378           0 :                 flags &= ~LOOKUP_RCU;
    2379           3 :         if (flags & LOOKUP_RCU)
    2380             :                 rcu_read_lock();
    2381             : 
    2382           3 :         nd->flags = flags;
    2383           3 :         nd->state |= ND_JUMPED;
    2384             : 
    2385           6 :         nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount);
    2386           6 :         nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount);
    2387           3 :         smp_rmb();
    2388             : 
    2389           3 :         if (nd->state & ND_ROOT_PRESET) {
    2390           0 :                 struct dentry *root = nd->root.dentry;
    2391           0 :                 struct inode *inode = root->d_inode;
    2392           0 :                 if (*s && unlikely(!d_can_lookup(root)))
    2393             :                         return ERR_PTR(-ENOTDIR);
    2394           0 :                 nd->path = nd->root;
    2395           0 :                 nd->inode = inode;
    2396           0 :                 if (flags & LOOKUP_RCU) {
    2397           0 :                         nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
    2398           0 :                         nd->root_seq = nd->seq;
    2399             :                 } else {
    2400           0 :                         path_get(&nd->path);
    2401             :                 }
    2402             :                 return s;
    2403             :         }
    2404             : 
    2405           3 :         nd->root.mnt = NULL;
    2406             : 
    2407             :         /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
    2408           3 :         if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
    2409           3 :                 error = nd_jump_root(nd);
    2410           3 :                 if (unlikely(error))
    2411           0 :                         return ERR_PTR(error);
    2412             :                 return s;
    2413             :         }
    2414             : 
    2415             :         /* Relative pathname -- get the starting-point it is relative to. */
    2416           0 :         if (nd->dfd == AT_FDCWD) {
    2417           0 :                 if (flags & LOOKUP_RCU) {
    2418           0 :                         struct fs_struct *fs = current->fs;
    2419             :                         unsigned seq;
    2420             : 
    2421             :                         do {
    2422           0 :                                 seq = read_seqcount_begin(&fs->seq);
    2423           0 :                                 nd->path = fs->pwd;
    2424           0 :                                 nd->inode = nd->path.dentry->d_inode;
    2425           0 :                                 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
    2426           0 :                         } while (read_seqcount_retry(&fs->seq, seq));
    2427             :                 } else {
    2428           0 :                         get_fs_pwd(current->fs, &nd->path);
    2429           0 :                         nd->inode = nd->path.dentry->d_inode;
    2430             :                 }
    2431             :         } else {
    2432             :                 /* Caller must check execute permissions on the starting path component */
    2433           0 :                 struct fd f = fdget_raw(nd->dfd);
    2434             :                 struct dentry *dentry;
    2435             : 
    2436           0 :                 if (!f.file)
    2437           0 :                         return ERR_PTR(-EBADF);
    2438             : 
    2439           0 :                 dentry = f.file->f_path.dentry;
    2440             : 
    2441           0 :                 if (*s && unlikely(!d_can_lookup(dentry))) {
    2442           0 :                         fdput(f);
    2443             :                         return ERR_PTR(-ENOTDIR);
    2444             :                 }
    2445             : 
    2446           0 :                 nd->path = f.file->f_path;
    2447           0 :                 if (flags & LOOKUP_RCU) {
    2448           0 :                         nd->inode = nd->path.dentry->d_inode;
    2449           0 :                         nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
    2450             :                 } else {
    2451           0 :                         path_get(&nd->path);
    2452           0 :                         nd->inode = nd->path.dentry->d_inode;
    2453             :                 }
    2454           0 :                 fdput(f);
    2455             :         }
    2456             : 
    2457             :         /* For scoped-lookups we need to set the root to the dirfd as well. */
    2458           0 :         if (flags & LOOKUP_IS_SCOPED) {
    2459           0 :                 nd->root = nd->path;
    2460           0 :                 if (flags & LOOKUP_RCU) {
    2461           0 :                         nd->root_seq = nd->seq;
    2462             :                 } else {
    2463           0 :                         path_get(&nd->root);
    2464           0 :                         nd->state |= ND_ROOT_GRABBED;
    2465             :                 }
    2466             :         }
    2467             :         return s;
    2468             : }
    2469             : 
    2470           0 : static inline const char *lookup_last(struct nameidata *nd)
    2471             : {
    2472           0 :         if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
    2473           0 :                 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
    2474             : 
    2475           0 :         return walk_component(nd, WALK_TRAILING);
    2476             : }
    2477             : 
    2478           0 : static int handle_lookup_down(struct nameidata *nd)
    2479             : {
    2480           0 :         if (!(nd->flags & LOOKUP_RCU))
    2481           0 :                 dget(nd->path.dentry);
    2482           0 :         return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
    2483             :                         nd->path.dentry, nd->inode, nd->seq));
    2484             : }
    2485             : 
    2486             : /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
    2487           0 : static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
    2488             : {
    2489           0 :         const char *s = path_init(nd, flags);
    2490             :         int err;
    2491             : 
    2492           0 :         if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) {
    2493           0 :                 err = handle_lookup_down(nd);
    2494           0 :                 if (unlikely(err < 0))
    2495           0 :                         s = ERR_PTR(err);
    2496             :         }
    2497             : 
    2498           0 :         while (!(err = link_path_walk(s, nd)) &&
    2499             :                (s = lookup_last(nd)) != NULL)
    2500             :                 ;
    2501           0 :         if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
    2502           0 :                 err = handle_lookup_down(nd);
    2503           0 :                 nd->state &= ~ND_JUMPED; // no d_weak_revalidate(), please...
    2504             :         }
    2505           0 :         if (!err)
    2506           0 :                 err = complete_walk(nd);
    2507             : 
    2508           0 :         if (!err && nd->flags & LOOKUP_DIRECTORY)
    2509           0 :                 if (!d_can_lookup(nd->path.dentry))
    2510           0 :                         err = -ENOTDIR;
    2511           0 :         if (!err) {
    2512           0 :                 *path = nd->path;
    2513           0 :                 nd->path.mnt = NULL;
    2514           0 :                 nd->path.dentry = NULL;
    2515             :         }
    2516           0 :         terminate_walk(nd);
    2517           0 :         return err;
    2518             : }
    2519             : 
    2520           0 : int filename_lookup(int dfd, struct filename *name, unsigned flags,
    2521             :                     struct path *path, struct path *root)
    2522             : {
    2523             :         int retval;
    2524             :         struct nameidata nd;
    2525           0 :         if (IS_ERR(name))
    2526           0 :                 return PTR_ERR(name);
    2527           0 :         set_nameidata(&nd, dfd, name, root);
    2528           0 :         retval = path_lookupat(&nd, flags | LOOKUP_RCU, path);
    2529           0 :         if (unlikely(retval == -ECHILD))
    2530           0 :                 retval = path_lookupat(&nd, flags, path);
    2531           0 :         if (unlikely(retval == -ESTALE))
    2532           0 :                 retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path);
    2533             : 
    2534             :         if (likely(!retval))
    2535             :                 audit_inode(name, path->dentry,
    2536             :                             flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
    2537           0 :         restore_nameidata();
    2538           0 :         return retval;
    2539             : }
    2540             : 
    2541             : /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
    2542           3 : static int path_parentat(struct nameidata *nd, unsigned flags,
    2543             :                                 struct path *parent)
    2544             : {
    2545           3 :         const char *s = path_init(nd, flags);
    2546           3 :         int err = link_path_walk(s, nd);
    2547           3 :         if (!err)
    2548           3 :                 err = complete_walk(nd);
    2549           3 :         if (!err) {
    2550           3 :                 *parent = nd->path;
    2551           3 :                 nd->path.mnt = NULL;
    2552           3 :                 nd->path.dentry = NULL;
    2553             :         }
    2554           3 :         terminate_walk(nd);
    2555           3 :         return err;
    2556             : }
    2557             : 
    2558             : /* Note: this does not consume "name" */
    2559           3 : static int filename_parentat(int dfd, struct filename *name,
    2560             :                              unsigned int flags, struct path *parent,
    2561             :                              struct qstr *last, int *type)
    2562             : {
    2563             :         int retval;
    2564             :         struct nameidata nd;
    2565             : 
    2566           3 :         if (IS_ERR(name))
    2567           0 :                 return PTR_ERR(name);
    2568           3 :         set_nameidata(&nd, dfd, name, NULL);
    2569           3 :         retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
    2570           3 :         if (unlikely(retval == -ECHILD))
    2571           0 :                 retval = path_parentat(&nd, flags, parent);
    2572           3 :         if (unlikely(retval == -ESTALE))
    2573           0 :                 retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent);
    2574           3 :         if (likely(!retval)) {
    2575           3 :                 *last = nd.last;
    2576           3 :                 *type = nd.last_type;
    2577           3 :                 audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
    2578             :         }
    2579           3 :         restore_nameidata();
    2580           3 :         return retval;
    2581             : }
    2582             : 
    2583             : /* does lookup, returns the object with parent locked */
    2584           0 : static struct dentry *__kern_path_locked(struct filename *name, struct path *path)
    2585             : {
    2586             :         struct dentry *d;
    2587             :         struct qstr last;
    2588             :         int type, error;
    2589             : 
    2590           0 :         error = filename_parentat(AT_FDCWD, name, 0, path, &last, &type);
    2591           0 :         if (error)
    2592           0 :                 return ERR_PTR(error);
    2593           0 :         if (unlikely(type != LAST_NORM)) {
    2594           0 :                 path_put(path);
    2595           0 :                 return ERR_PTR(-EINVAL);
    2596             :         }
    2597           0 :         inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
    2598           0 :         d = __lookup_hash(&last, path->dentry, 0);
    2599           0 :         if (IS_ERR(d)) {
    2600           0 :                 inode_unlock(path->dentry->d_inode);
    2601             :                 path_put(path);
    2602             :         }
    2603             :         return d;
    2604             : }
    2605             : 
    2606           0 : struct dentry *kern_path_locked(const char *name, struct path *path)
    2607             : {
    2608           0 :         struct filename *filename = getname_kernel(name);
    2609           0 :         struct dentry *res = __kern_path_locked(filename, path);
    2610             : 
    2611           0 :         putname(filename);
    2612           0 :         return res;
    2613             : }
    2614             : 
    2615           0 : int kern_path(const char *name, unsigned int flags, struct path *path)
    2616             : {
    2617           0 :         struct filename *filename = getname_kernel(name);
    2618           0 :         int ret = filename_lookup(AT_FDCWD, filename, flags, path, NULL);
    2619             : 
    2620           0 :         putname(filename);
    2621           0 :         return ret;
    2622             : 
    2623             : }
    2624             : EXPORT_SYMBOL(kern_path);
    2625             : 
    2626             : /**
    2627             :  * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
    2628             :  * @dentry:  pointer to dentry of the base directory
    2629             :  * @mnt: pointer to vfs mount of the base directory
    2630             :  * @name: pointer to file name
    2631             :  * @flags: lookup flags
    2632             :  * @path: pointer to struct path to fill
    2633             :  */
    2634           0 : int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
    2635             :                     const char *name, unsigned int flags,
    2636             :                     struct path *path)
    2637             : {
    2638             :         struct filename *filename;
    2639           0 :         struct path root = {.mnt = mnt, .dentry = dentry};
    2640             :         int ret;
    2641             : 
    2642           0 :         filename = getname_kernel(name);
    2643             :         /* the first argument of filename_lookup() is ignored with root */
    2644           0 :         ret = filename_lookup(AT_FDCWD, filename, flags, path, &root);
    2645           0 :         putname(filename);
    2646           0 :         return ret;
    2647             : }
    2648             : EXPORT_SYMBOL(vfs_path_lookup);
    2649             : 
    2650           0 : static int lookup_one_common(struct user_namespace *mnt_userns,
    2651             :                              const char *name, struct dentry *base, int len,
    2652             :                              struct qstr *this)
    2653             : {
    2654           0 :         this->name = name;
    2655           0 :         this->len = len;
    2656           0 :         this->hash = full_name_hash(base, name, len);
    2657           0 :         if (!len)
    2658             :                 return -EACCES;
    2659             : 
    2660           0 :         if (unlikely(name[0] == '.')) {
    2661           0 :                 if (len < 2 || (len == 2 && name[1] == '.'))
    2662             :                         return -EACCES;
    2663             :         }
    2664             : 
    2665           0 :         while (len--) {
    2666           0 :                 unsigned int c = *(const unsigned char *)name++;
    2667           0 :                 if (c == '/' || c == '\0')
    2668             :                         return -EACCES;
    2669             :         }
    2670             :         /*
    2671             :          * See if the low-level filesystem might want
    2672             :          * to use its own hash..
    2673             :          */
    2674           0 :         if (base->d_flags & DCACHE_OP_HASH) {
    2675           0 :                 int err = base->d_op->d_hash(base, this);
    2676           0 :                 if (err < 0)
    2677             :                         return err;
    2678             :         }
    2679             : 
    2680           0 :         return inode_permission(mnt_userns, base->d_inode, MAY_EXEC);
    2681             : }
    2682             : 
    2683             : /**
    2684             :  * try_lookup_one_len - filesystem helper to lookup single pathname component
    2685             :  * @name:       pathname component to lookup
    2686             :  * @base:       base directory to lookup from
    2687             :  * @len:        maximum length @len should be interpreted to
    2688             :  *
    2689             :  * Look up a dentry by name in the dcache, returning NULL if it does not
    2690             :  * currently exist.  The function does not try to create a dentry.
    2691             :  *
    2692             :  * Note that this routine is purely a helper for filesystem usage and should
    2693             :  * not be called by generic code.
    2694             :  *
    2695             :  * The caller must hold base->i_mutex.
    2696             :  */
    2697           0 : struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len)
    2698             : {
    2699             :         struct qstr this;
    2700             :         int err;
    2701             : 
    2702           0 :         WARN_ON_ONCE(!inode_is_locked(base->d_inode));
    2703             : 
    2704           0 :         err = lookup_one_common(&init_user_ns, name, base, len, &this);
    2705           0 :         if (err)
    2706           0 :                 return ERR_PTR(err);
    2707             : 
    2708           0 :         return lookup_dcache(&this, base, 0);
    2709             : }
    2710             : EXPORT_SYMBOL(try_lookup_one_len);
    2711             : 
    2712             : /**
    2713             :  * lookup_one_len - filesystem helper to lookup single pathname component
    2714             :  * @name:       pathname component to lookup
    2715             :  * @base:       base directory to lookup from
    2716             :  * @len:        maximum length @len should be interpreted to
    2717             :  *
    2718             :  * Note that this routine is purely a helper for filesystem usage and should
    2719             :  * not be called by generic code.
    2720             :  *
    2721             :  * The caller must hold base->i_mutex.
    2722             :  */
    2723           0 : struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
    2724             : {
    2725             :         struct dentry *dentry;
    2726             :         struct qstr this;
    2727             :         int err;
    2728             : 
    2729           0 :         WARN_ON_ONCE(!inode_is_locked(base->d_inode));
    2730             : 
    2731           0 :         err = lookup_one_common(&init_user_ns, name, base, len, &this);
    2732           0 :         if (err)
    2733           0 :                 return ERR_PTR(err);
    2734             : 
    2735           0 :         dentry = lookup_dcache(&this, base, 0);
    2736           0 :         return dentry ? dentry : __lookup_slow(&this, base, 0);
    2737             : }
    2738             : EXPORT_SYMBOL(lookup_one_len);
    2739             : 
    2740             : /**
    2741             :  * lookup_one - filesystem helper to lookup single pathname component
    2742             :  * @mnt_userns: user namespace of the mount the lookup is performed from
    2743             :  * @name:       pathname component to lookup
    2744             :  * @base:       base directory to lookup from
    2745             :  * @len:        maximum length @len should be interpreted to
    2746             :  *
    2747             :  * Note that this routine is purely a helper for filesystem usage and should
    2748             :  * not be called by generic code.
    2749             :  *
    2750             :  * The caller must hold base->i_mutex.
    2751             :  */
    2752           0 : struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name,
    2753             :                           struct dentry *base, int len)
    2754             : {
    2755             :         struct dentry *dentry;
    2756             :         struct qstr this;
    2757             :         int err;
    2758             : 
    2759           0 :         WARN_ON_ONCE(!inode_is_locked(base->d_inode));
    2760             : 
    2761           0 :         err = lookup_one_common(mnt_userns, name, base, len, &this);
    2762           0 :         if (err)
    2763           0 :                 return ERR_PTR(err);
    2764             : 
    2765           0 :         dentry = lookup_dcache(&this, base, 0);
    2766           0 :         return dentry ? dentry : __lookup_slow(&this, base, 0);
    2767             : }
    2768             : EXPORT_SYMBOL(lookup_one);
    2769             : 
    2770             : /**
    2771             :  * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
    2772             :  * @name:       pathname component to lookup
    2773             :  * @base:       base directory to lookup from
    2774             :  * @len:        maximum length @len should be interpreted to
    2775             :  *
    2776             :  * Note that this routine is purely a helper for filesystem usage and should
    2777             :  * not be called by generic code.
    2778             :  *
    2779             :  * Unlike lookup_one_len, it should be called without the parent
    2780             :  * i_mutex held, and will take the i_mutex itself if necessary.
    2781             :  */
    2782           0 : struct dentry *lookup_one_len_unlocked(const char *name,
    2783             :                                        struct dentry *base, int len)
    2784             : {
    2785             :         struct qstr this;
    2786             :         int err;
    2787             :         struct dentry *ret;
    2788             : 
    2789           0 :         err = lookup_one_common(&init_user_ns, name, base, len, &this);
    2790           0 :         if (err)
    2791           0 :                 return ERR_PTR(err);
    2792             : 
    2793           0 :         ret = lookup_dcache(&this, base, 0);
    2794           0 :         if (!ret)
    2795           0 :                 ret = lookup_slow(&this, base, 0);
    2796             :         return ret;
    2797             : }
    2798             : EXPORT_SYMBOL(lookup_one_len_unlocked);
    2799             : 
    2800             : /*
    2801             :  * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
    2802             :  * on negatives.  Returns known positive or ERR_PTR(); that's what
    2803             :  * most of the users want.  Note that pinned negative with unlocked parent
    2804             :  * _can_ become positive at any time, so callers of lookup_one_len_unlocked()
    2805             :  * need to be very careful; pinned positives have ->d_inode stable, so
    2806             :  * this one avoids such problems.
    2807             :  */
    2808           0 : struct dentry *lookup_positive_unlocked(const char *name,
    2809             :                                        struct dentry *base, int len)
    2810             : {
    2811           0 :         struct dentry *ret = lookup_one_len_unlocked(name, base, len);
    2812           0 :         if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
    2813           0 :                 dput(ret);
    2814           0 :                 ret = ERR_PTR(-ENOENT);
    2815             :         }
    2816           0 :         return ret;
    2817             : }
    2818             : EXPORT_SYMBOL(lookup_positive_unlocked);
    2819             : 
    2820             : #ifdef CONFIG_UNIX98_PTYS
    2821           0 : int path_pts(struct path *path)
    2822             : {
    2823             :         /* Find something mounted on "pts" in the same directory as
    2824             :          * the input path.
    2825             :          */
    2826           0 :         struct dentry *parent = dget_parent(path->dentry);
    2827             :         struct dentry *child;
    2828           0 :         struct qstr this = QSTR_INIT("pts", 3);
    2829             : 
    2830           0 :         if (unlikely(!path_connected(path->mnt, parent))) {
    2831           0 :                 dput(parent);
    2832           0 :                 return -ENOENT;
    2833             :         }
    2834           0 :         dput(path->dentry);
    2835           0 :         path->dentry = parent;
    2836           0 :         child = d_hash_and_lookup(parent, &this);
    2837           0 :         if (!child)
    2838             :                 return -ENOENT;
    2839             : 
    2840           0 :         path->dentry = child;
    2841           0 :         dput(parent);
    2842           0 :         follow_down(path);
    2843           0 :         return 0;
    2844             : }
    2845             : #endif
    2846             : 
    2847           0 : int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
    2848             :                  struct path *path, int *empty)
    2849             : {
    2850           0 :         struct filename *filename = getname_flags(name, flags, empty);
    2851           0 :         int ret = filename_lookup(dfd, filename, flags, path, NULL);
    2852             : 
    2853           0 :         putname(filename);
    2854           0 :         return ret;
    2855             : }
    2856             : EXPORT_SYMBOL(user_path_at_empty);
    2857             : 
    2858           0 : int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir,
    2859             :                    struct inode *inode)
    2860             : {
    2861           0 :         kuid_t fsuid = current_fsuid();
    2862             : 
    2863           0 :         if (uid_eq(i_uid_into_mnt(mnt_userns, inode), fsuid))
    2864             :                 return 0;
    2865           0 :         if (uid_eq(i_uid_into_mnt(mnt_userns, dir), fsuid))
    2866             :                 return 0;
    2867           0 :         return !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FOWNER);
    2868             : }
    2869             : EXPORT_SYMBOL(__check_sticky);
    2870             : 
    2871             : /*
    2872             :  *      Check whether we can remove a link victim from directory dir, check
    2873             :  *  whether the type of victim is right.
    2874             :  *  1. We can't do it if dir is read-only (done in permission())
    2875             :  *  2. We should have write and exec permissions on dir
    2876             :  *  3. We can't remove anything from append-only dir
    2877             :  *  4. We can't do anything with immutable dir (done in permission())
    2878             :  *  5. If the sticky bit on dir is set we should either
    2879             :  *      a. be owner of dir, or
    2880             :  *      b. be owner of victim, or
    2881             :  *      c. have CAP_FOWNER capability
    2882             :  *  6. If the victim is append-only or immutable we can't do antyhing with
    2883             :  *     links pointing to it.
    2884             :  *  7. If the victim has an unknown uid or gid we can't change the inode.
    2885             :  *  8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
    2886             :  *  9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
    2887             :  * 10. We can't remove a root or mountpoint.
    2888             :  * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
    2889             :  *     nfs_async_unlink().
    2890             :  */
    2891           0 : static int may_delete(struct user_namespace *mnt_userns, struct inode *dir,
    2892             :                       struct dentry *victim, bool isdir)
    2893             : {
    2894           0 :         struct inode *inode = d_backing_inode(victim);
    2895             :         int error;
    2896             : 
    2897           0 :         if (d_is_negative(victim))
    2898             :                 return -ENOENT;
    2899           0 :         BUG_ON(!inode);
    2900             : 
    2901           0 :         BUG_ON(victim->d_parent->d_inode != dir);
    2902             : 
    2903             :         /* Inode writeback is not safe when the uid or gid are invalid. */
    2904           0 :         if (!uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
    2905           0 :             !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
    2906             :                 return -EOVERFLOW;
    2907             : 
    2908           0 :         audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
    2909             : 
    2910           0 :         error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
    2911           0 :         if (error)
    2912             :                 return error;
    2913           0 :         if (IS_APPEND(dir))
    2914             :                 return -EPERM;
    2915             : 
    2916           0 :         if (check_sticky(mnt_userns, dir, inode) || IS_APPEND(inode) ||
    2917           0 :             IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) ||
    2918           0 :             HAS_UNMAPPED_ID(mnt_userns, inode))
    2919             :                 return -EPERM;
    2920           0 :         if (isdir) {
    2921           0 :                 if (!d_is_dir(victim))
    2922             :                         return -ENOTDIR;
    2923           0 :                 if (IS_ROOT(victim))
    2924             :                         return -EBUSY;
    2925           0 :         } else if (d_is_dir(victim))
    2926             :                 return -EISDIR;
    2927           0 :         if (IS_DEADDIR(dir))
    2928             :                 return -ENOENT;
    2929           0 :         if (victim->d_flags & DCACHE_NFSFS_RENAMED)
    2930             :                 return -EBUSY;
    2931           0 :         return 0;
    2932             : }
    2933             : 
    2934             : /*      Check whether we can create an object with dentry child in directory
    2935             :  *  dir.
    2936             :  *  1. We can't do it if child already exists (open has special treatment for
    2937             :  *     this case, but since we are inlined it's OK)
    2938             :  *  2. We can't do it if dir is read-only (done in permission())
    2939             :  *  3. We can't do it if the fs can't represent the fsuid or fsgid.
    2940             :  *  4. We should have write and exec permissions on dir
    2941             :  *  5. We can't do it if dir is immutable (done in permission())
    2942             :  */
    2943           3 : static inline int may_create(struct user_namespace *mnt_userns,
    2944             :                              struct inode *dir, struct dentry *child)
    2945             : {
    2946           3 :         audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
    2947           3 :         if (child->d_inode)
    2948             :                 return -EEXIST;
    2949           3 :         if (IS_DEADDIR(dir))
    2950             :                 return -ENOENT;
    2951           3 :         if (!fsuidgid_has_mapping(dir->i_sb, mnt_userns))
    2952             :                 return -EOVERFLOW;
    2953             : 
    2954           3 :         return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
    2955             : }
    2956             : 
    2957             : /*
    2958             :  * p1 and p2 should be directories on the same fs.
    2959             :  */
    2960           0 : struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
    2961             : {
    2962             :         struct dentry *p;
    2963             : 
    2964           0 :         if (p1 == p2) {
    2965           0 :                 inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
    2966           0 :                 return NULL;
    2967             :         }
    2968             : 
    2969           0 :         mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
    2970             : 
    2971           0 :         p = d_ancestor(p2, p1);
    2972           0 :         if (p) {
    2973           0 :                 inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
    2974           0 :                 inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
    2975           0 :                 return p;
    2976             :         }
    2977             : 
    2978           0 :         p = d_ancestor(p1, p2);
    2979           0 :         if (p) {
    2980           0 :                 inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
    2981           0 :                 inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
    2982           0 :                 return p;
    2983             :         }
    2984             : 
    2985           0 :         inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
    2986           0 :         inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
    2987           0 :         return NULL;
    2988             : }
    2989             : EXPORT_SYMBOL(lock_rename);
    2990             : 
    2991           0 : void unlock_rename(struct dentry *p1, struct dentry *p2)
    2992             : {
    2993           0 :         inode_unlock(p1->d_inode);
    2994           0 :         if (p1 != p2) {
    2995           0 :                 inode_unlock(p2->d_inode);
    2996           0 :                 mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
    2997             :         }
    2998           0 : }
    2999             : EXPORT_SYMBOL(unlock_rename);
    3000             : 
    3001             : /**
    3002             :  * vfs_create - create new file
    3003             :  * @mnt_userns: user namespace of the mount the inode was found from
    3004             :  * @dir:        inode of @dentry
    3005             :  * @dentry:     pointer to dentry of the base directory
    3006             :  * @mode:       mode of the new file
    3007             :  * @want_excl:  whether the file must not yet exist
    3008             :  *
    3009             :  * Create a new file.
    3010             :  *
    3011             :  * If the inode has been found through an idmapped mount the user namespace of
    3012             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    3013             :  * care to map the inode according to @mnt_userns before checking permissions.
    3014             :  * On non-idmapped mounts or if permission checking is to be performed on the
    3015             :  * raw inode simply passs init_user_ns.
    3016             :  */
    3017           0 : int vfs_create(struct user_namespace *mnt_userns, struct inode *dir,
    3018             :                struct dentry *dentry, umode_t mode, bool want_excl)
    3019             : {
    3020           0 :         int error = may_create(mnt_userns, dir, dentry);
    3021           0 :         if (error)
    3022             :                 return error;
    3023             : 
    3024           0 :         if (!dir->i_op->create)
    3025             :                 return -EACCES; /* shouldn't it be ENOSYS? */
    3026           0 :         mode &= S_IALLUGO;
    3027           0 :         mode |= S_IFREG;
    3028           0 :         error = security_inode_create(dir, dentry, mode);
    3029             :         if (error)
    3030             :                 return error;
    3031           0 :         error = dir->i_op->create(mnt_userns, dir, dentry, mode, want_excl);
    3032           0 :         if (!error)
    3033             :                 fsnotify_create(dir, dentry);
    3034             :         return error;
    3035             : }
    3036             : EXPORT_SYMBOL(vfs_create);
    3037             : 
    3038           0 : int vfs_mkobj(struct dentry *dentry, umode_t mode,
    3039             :                 int (*f)(struct dentry *, umode_t, void *),
    3040             :                 void *arg)
    3041             : {
    3042           0 :         struct inode *dir = dentry->d_parent->d_inode;
    3043           0 :         int error = may_create(&init_user_ns, dir, dentry);
    3044           0 :         if (error)
    3045             :                 return error;
    3046             : 
    3047           0 :         mode &= S_IALLUGO;
    3048           0 :         mode |= S_IFREG;
    3049           0 :         error = security_inode_create(dir, dentry, mode);
    3050             :         if (error)
    3051             :                 return error;
    3052           0 :         error = f(dentry, mode, arg);
    3053           0 :         if (!error)
    3054             :                 fsnotify_create(dir, dentry);
    3055             :         return error;
    3056             : }
    3057             : EXPORT_SYMBOL(vfs_mkobj);
    3058             : 
    3059           0 : bool may_open_dev(const struct path *path)
    3060             : {
    3061           0 :         return !(path->mnt->mnt_flags & MNT_NODEV) &&
    3062           0 :                 !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
    3063             : }
    3064             : 
    3065           0 : static int may_open(struct user_namespace *mnt_userns, const struct path *path,
    3066             :                     int acc_mode, int flag)
    3067             : {
    3068           0 :         struct dentry *dentry = path->dentry;
    3069           0 :         struct inode *inode = dentry->d_inode;
    3070             :         int error;
    3071             : 
    3072           0 :         if (!inode)
    3073             :                 return -ENOENT;
    3074             : 
    3075           0 :         switch (inode->i_mode & S_IFMT) {
    3076             :         case S_IFLNK:
    3077             :                 return -ELOOP;
    3078             :         case S_IFDIR:
    3079           0 :                 if (acc_mode & MAY_WRITE)
    3080             :                         return -EISDIR;
    3081           0 :                 if (acc_mode & MAY_EXEC)
    3082             :                         return -EACCES;
    3083             :                 break;
    3084             :         case S_IFBLK:
    3085             :         case S_IFCHR:
    3086           0 :                 if (!may_open_dev(path))
    3087             :                         return -EACCES;
    3088             :                 fallthrough;
    3089             :         case S_IFIFO:
    3090             :         case S_IFSOCK:
    3091           0 :                 if (acc_mode & MAY_EXEC)
    3092             :                         return -EACCES;
    3093           0 :                 flag &= ~O_TRUNC;
    3094           0 :                 break;
    3095             :         case S_IFREG:
    3096           0 :                 if ((acc_mode & MAY_EXEC) && path_noexec(path))
    3097             :                         return -EACCES;
    3098             :                 break;
    3099             :         }
    3100             : 
    3101           0 :         error = inode_permission(mnt_userns, inode, MAY_OPEN | acc_mode);
    3102           0 :         if (error)
    3103             :                 return error;
    3104             : 
    3105             :         /*
    3106             :          * An append-only file must be opened in append mode for writing.
    3107             :          */
    3108           0 :         if (IS_APPEND(inode)) {
    3109           0 :                 if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
    3110             :                         return -EPERM;
    3111           0 :                 if (flag & O_TRUNC)
    3112             :                         return -EPERM;
    3113             :         }
    3114             : 
    3115             :         /* O_NOATIME can only be set by the owner or superuser */
    3116           0 :         if (flag & O_NOATIME && !inode_owner_or_capable(mnt_userns, inode))
    3117             :                 return -EPERM;
    3118             : 
    3119             :         return 0;
    3120             : }
    3121             : 
    3122           0 : static int handle_truncate(struct user_namespace *mnt_userns, struct file *filp)
    3123             : {
    3124           0 :         const struct path *path = &filp->f_path;
    3125           0 :         struct inode *inode = path->dentry->d_inode;
    3126           0 :         int error = get_write_access(inode);
    3127           0 :         if (error)
    3128             :                 return error;
    3129             : 
    3130           0 :         error = security_path_truncate(path);
    3131             :         if (!error) {
    3132           0 :                 error = do_truncate(mnt_userns, path->dentry, 0,
    3133             :                                     ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
    3134             :                                     filp);
    3135             :         }
    3136           0 :         put_write_access(inode);
    3137           0 :         return error;
    3138             : }
    3139             : 
    3140             : static inline int open_to_namei_flags(int flag)
    3141             : {
    3142           0 :         if ((flag & O_ACCMODE) == 3)
    3143           0 :                 flag--;
    3144             :         return flag;
    3145             : }
    3146             : 
    3147           0 : static int may_o_create(struct user_namespace *mnt_userns,
    3148             :                         const struct path *dir, struct dentry *dentry,
    3149             :                         umode_t mode)
    3150             : {
    3151           0 :         int error = security_path_mknod(dir, dentry, mode, 0);
    3152             :         if (error)
    3153             :                 return error;
    3154             : 
    3155           0 :         if (!fsuidgid_has_mapping(dir->dentry->d_sb, mnt_userns))
    3156             :                 return -EOVERFLOW;
    3157             : 
    3158           0 :         error = inode_permission(mnt_userns, dir->dentry->d_inode,
    3159             :                                  MAY_WRITE | MAY_EXEC);
    3160           0 :         if (error)
    3161             :                 return error;
    3162             : 
    3163           0 :         return security_inode_create(dir->dentry->d_inode, dentry, mode);
    3164             : }
    3165             : 
    3166             : /*
    3167             :  * Attempt to atomically look up, create and open a file from a negative
    3168             :  * dentry.
    3169             :  *
    3170             :  * Returns 0 if successful.  The file will have been created and attached to
    3171             :  * @file by the filesystem calling finish_open().
    3172             :  *
    3173             :  * If the file was looked up only or didn't need creating, FMODE_OPENED won't
    3174             :  * be set.  The caller will need to perform the open themselves.  @path will
    3175             :  * have been updated to point to the new dentry.  This may be negative.
    3176             :  *
    3177             :  * Returns an error code otherwise.
    3178             :  */
    3179           0 : static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
    3180             :                                   struct file *file,
    3181             :                                   int open_flag, umode_t mode)
    3182             : {
    3183           0 :         struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
    3184           0 :         struct inode *dir =  nd->path.dentry->d_inode;
    3185             :         int error;
    3186             : 
    3187           0 :         if (nd->flags & LOOKUP_DIRECTORY)
    3188           0 :                 open_flag |= O_DIRECTORY;
    3189             : 
    3190           0 :         file->f_path.dentry = DENTRY_NOT_SET;
    3191           0 :         file->f_path.mnt = nd->path.mnt;
    3192           0 :         error = dir->i_op->atomic_open(dir, dentry, file,
    3193           0 :                                        open_to_namei_flags(open_flag), mode);
    3194           0 :         d_lookup_done(dentry);
    3195           0 :         if (!error) {
    3196           0 :                 if (file->f_mode & FMODE_OPENED) {
    3197           0 :                         if (unlikely(dentry != file->f_path.dentry)) {
    3198           0 :                                 dput(dentry);
    3199           0 :                                 dentry = dget(file->f_path.dentry);
    3200             :                         }
    3201           0 :                 } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
    3202             :                         error = -EIO;
    3203             :                 } else {
    3204           0 :                         if (file->f_path.dentry) {
    3205           0 :                                 dput(dentry);
    3206           0 :                                 dentry = file->f_path.dentry;
    3207             :                         }
    3208           0 :                         if (unlikely(d_is_negative(dentry)))
    3209           0 :                                 error = -ENOENT;
    3210             :                 }
    3211             :         }
    3212           0 :         if (error) {
    3213           0 :                 dput(dentry);
    3214           0 :                 dentry = ERR_PTR(error);
    3215             :         }
    3216           0 :         return dentry;
    3217             : }
    3218             : 
    3219             : /*
    3220             :  * Look up and maybe create and open the last component.
    3221             :  *
    3222             :  * Must be called with parent locked (exclusive in O_CREAT case).
    3223             :  *
    3224             :  * Returns 0 on success, that is, if
    3225             :  *  the file was successfully atomically created (if necessary) and opened, or
    3226             :  *  the file was not completely opened at this time, though lookups and
    3227             :  *  creations were performed.
    3228             :  * These case are distinguished by presence of FMODE_OPENED on file->f_mode.
    3229             :  * In the latter case dentry returned in @path might be negative if O_CREAT
    3230             :  * hadn't been specified.
    3231             :  *
    3232             :  * An error code is returned on failure.
    3233             :  */
    3234           0 : static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
    3235             :                                   const struct open_flags *op,
    3236             :                                   bool got_write)
    3237             : {
    3238             :         struct user_namespace *mnt_userns;
    3239           0 :         struct dentry *dir = nd->path.dentry;
    3240           0 :         struct inode *dir_inode = dir->d_inode;
    3241           0 :         int open_flag = op->open_flag;
    3242             :         struct dentry *dentry;
    3243           0 :         int error, create_error = 0;
    3244           0 :         umode_t mode = op->mode;
    3245           0 :         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
    3246             : 
    3247           0 :         if (unlikely(IS_DEADDIR(dir_inode)))
    3248             :                 return ERR_PTR(-ENOENT);
    3249             : 
    3250           0 :         file->f_mode &= ~FMODE_CREATED;
    3251           0 :         dentry = d_lookup(dir, &nd->last);
    3252             :         for (;;) {
    3253           0 :                 if (!dentry) {
    3254           0 :                         dentry = d_alloc_parallel(dir, &nd->last, &wq);
    3255           0 :                         if (IS_ERR(dentry))
    3256             :                                 return dentry;
    3257             :                 }
    3258           0 :                 if (d_in_lookup(dentry))
    3259             :                         break;
    3260             : 
    3261           0 :                 error = d_revalidate(dentry, nd->flags);
    3262           0 :                 if (likely(error > 0))
    3263             :                         break;
    3264           0 :                 if (error)
    3265             :                         goto out_dput;
    3266           0 :                 d_invalidate(dentry);
    3267           0 :                 dput(dentry);
    3268           0 :                 dentry = NULL;
    3269             :         }
    3270           0 :         if (dentry->d_inode) {
    3271             :                 /* Cached positive dentry: will open in f_op->open */
    3272             :                 return dentry;
    3273             :         }
    3274             : 
    3275             :         /*
    3276             :          * Checking write permission is tricky, bacuse we don't know if we are
    3277             :          * going to actually need it: O_CREAT opens should work as long as the
    3278             :          * file exists.  But checking existence breaks atomicity.  The trick is
    3279             :          * to check access and if not granted clear O_CREAT from the flags.
    3280             :          *
    3281             :          * Another problem is returing the "right" error value (e.g. for an
    3282             :          * O_EXCL open we want to return EEXIST not EROFS).
    3283             :          */
    3284           0 :         if (unlikely(!got_write))
    3285           0 :                 open_flag &= ~O_TRUNC;
    3286           0 :         mnt_userns = mnt_user_ns(nd->path.mnt);
    3287           0 :         if (open_flag & O_CREAT) {
    3288           0 :                 if (open_flag & O_EXCL)
    3289           0 :                         open_flag &= ~O_TRUNC;
    3290           0 :                 if (!IS_POSIXACL(dir->d_inode))
    3291           0 :                         mode &= ~current_umask();
    3292           0 :                 if (likely(got_write))
    3293           0 :                         create_error = may_o_create(mnt_userns, &nd->path,
    3294             :                                                     dentry, mode);
    3295             :                 else
    3296             :                         create_error = -EROFS;
    3297             :         }
    3298           0 :         if (create_error)
    3299           0 :                 open_flag &= ~O_CREAT;
    3300           0 :         if (dir_inode->i_op->atomic_open) {
    3301           0 :                 dentry = atomic_open(nd, dentry, file, open_flag, mode);
    3302           0 :                 if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
    3303           0 :                         dentry = ERR_PTR(create_error);
    3304             :                 return dentry;
    3305             :         }
    3306             : 
    3307           0 :         if (d_in_lookup(dentry)) {
    3308           0 :                 struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
    3309             :                                                              nd->flags);
    3310           0 :                 d_lookup_done(dentry);
    3311           0 :                 if (unlikely(res)) {
    3312           0 :                         if (IS_ERR(res)) {
    3313           0 :                                 error = PTR_ERR(res);
    3314           0 :                                 goto out_dput;
    3315             :                         }
    3316           0 :                         dput(dentry);
    3317           0 :                         dentry = res;
    3318             :                 }
    3319             :         }
    3320             : 
    3321             :         /* Negative dentry, just create the file */
    3322           0 :         if (!dentry->d_inode && (open_flag & O_CREAT)) {
    3323           0 :                 file->f_mode |= FMODE_CREATED;
    3324           0 :                 audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
    3325           0 :                 if (!dir_inode->i_op->create) {
    3326             :                         error = -EACCES;
    3327             :                         goto out_dput;
    3328             :                 }
    3329             : 
    3330           0 :                 error = dir_inode->i_op->create(mnt_userns, dir_inode, dentry,
    3331           0 :                                                 mode, open_flag & O_EXCL);
    3332           0 :                 if (error)
    3333             :                         goto out_dput;
    3334             :         }
    3335           0 :         if (unlikely(create_error) && !dentry->d_inode) {
    3336             :                 error = create_error;
    3337             :                 goto out_dput;
    3338             :         }
    3339             :         return dentry;
    3340             : 
    3341             : out_dput:
    3342           0 :         dput(dentry);
    3343           0 :         return ERR_PTR(error);
    3344             : }
    3345             : 
    3346           0 : static const char *open_last_lookups(struct nameidata *nd,
    3347             :                    struct file *file, const struct open_flags *op)
    3348             : {
    3349           0 :         struct dentry *dir = nd->path.dentry;
    3350           0 :         int open_flag = op->open_flag;
    3351           0 :         bool got_write = false;
    3352             :         unsigned seq;
    3353             :         struct inode *inode;
    3354             :         struct dentry *dentry;
    3355             :         const char *res;
    3356             : 
    3357           0 :         nd->flags |= op->intent;
    3358             : 
    3359           0 :         if (nd->last_type != LAST_NORM) {
    3360           0 :                 if (nd->depth)
    3361           0 :                         put_link(nd);
    3362           0 :                 return handle_dots(nd, nd->last_type);
    3363             :         }
    3364             : 
    3365           0 :         if (!(open_flag & O_CREAT)) {
    3366           0 :                 if (nd->last.name[nd->last.len])
    3367           0 :                         nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
    3368             :                 /* we _can_ be in RCU mode here */
    3369           0 :                 dentry = lookup_fast(nd, &inode, &seq);
    3370           0 :                 if (IS_ERR(dentry))
    3371             :                         return ERR_CAST(dentry);
    3372           0 :                 if (likely(dentry))
    3373             :                         goto finish_lookup;
    3374             : 
    3375           0 :                 BUG_ON(nd->flags & LOOKUP_RCU);
    3376             :         } else {
    3377             :                 /* create side of things */
    3378           0 :                 if (nd->flags & LOOKUP_RCU) {
    3379           0 :                         if (!try_to_unlazy(nd))
    3380             :                                 return ERR_PTR(-ECHILD);
    3381             :                 }
    3382           0 :                 audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
    3383             :                 /* trailing slashes? */
    3384           0 :                 if (unlikely(nd->last.name[nd->last.len]))
    3385             :                         return ERR_PTR(-EISDIR);
    3386             :         }
    3387             : 
    3388           0 :         if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
    3389           0 :                 got_write = !mnt_want_write(nd->path.mnt);
    3390             :                 /*
    3391             :                  * do _not_ fail yet - we might not need that or fail with
    3392             :                  * a different error; let lookup_open() decide; we'll be
    3393             :                  * dropping this one anyway.
    3394             :                  */
    3395             :         }
    3396           0 :         if (open_flag & O_CREAT)
    3397           0 :                 inode_lock(dir->d_inode);
    3398             :         else
    3399           0 :                 inode_lock_shared(dir->d_inode);
    3400           0 :         dentry = lookup_open(nd, file, op, got_write);
    3401           0 :         if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
    3402           0 :                 fsnotify_create(dir->d_inode, dentry);
    3403           0 :         if (open_flag & O_CREAT)
    3404           0 :                 inode_unlock(dir->d_inode);
    3405             :         else
    3406           0 :                 inode_unlock_shared(dir->d_inode);
    3407             : 
    3408           0 :         if (got_write)
    3409           0 :                 mnt_drop_write(nd->path.mnt);
    3410             : 
    3411           0 :         if (IS_ERR(dentry))
    3412             :                 return ERR_CAST(dentry);
    3413             : 
    3414           0 :         if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) {
    3415           0 :                 dput(nd->path.dentry);
    3416           0 :                 nd->path.dentry = dentry;
    3417           0 :                 return NULL;
    3418             :         }
    3419             : 
    3420             : finish_lookup:
    3421           0 :         if (nd->depth)
    3422           0 :                 put_link(nd);
    3423           0 :         res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
    3424           0 :         if (unlikely(res))
    3425           0 :                 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
    3426             :         return res;
    3427             : }
    3428             : 
    3429             : /*
    3430             :  * Handle the last step of open()
    3431             :  */
    3432           0 : static int do_open(struct nameidata *nd,
    3433             :                    struct file *file, const struct open_flags *op)
    3434             : {
    3435             :         struct user_namespace *mnt_userns;
    3436           0 :         int open_flag = op->open_flag;
    3437             :         bool do_truncate;
    3438             :         int acc_mode;
    3439             :         int error;
    3440             : 
    3441           0 :         if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
    3442           0 :                 error = complete_walk(nd);
    3443           0 :                 if (error)
    3444             :                         return error;
    3445             :         }
    3446             :         if (!(file->f_mode & FMODE_CREATED))
    3447             :                 audit_inode(nd->name, nd->path.dentry, 0);
    3448           0 :         mnt_userns = mnt_user_ns(nd->path.mnt);
    3449           0 :         if (open_flag & O_CREAT) {
    3450           0 :                 if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
    3451             :                         return -EEXIST;
    3452           0 :                 if (d_is_dir(nd->path.dentry))
    3453             :                         return -EISDIR;
    3454           0 :                 error = may_create_in_sticky(mnt_userns, nd,
    3455             :                                              d_backing_inode(nd->path.dentry));
    3456           0 :                 if (unlikely(error))
    3457             :                         return error;
    3458             :         }
    3459           0 :         if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
    3460             :                 return -ENOTDIR;
    3461             : 
    3462           0 :         do_truncate = false;
    3463           0 :         acc_mode = op->acc_mode;
    3464           0 :         if (file->f_mode & FMODE_CREATED) {
    3465             :                 /* Don't check for write permission, don't truncate */
    3466           0 :                 open_flag &= ~O_TRUNC;
    3467           0 :                 acc_mode = 0;
    3468           0 :         } else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) {
    3469           0 :                 error = mnt_want_write(nd->path.mnt);
    3470           0 :                 if (error)
    3471             :                         return error;
    3472             :                 do_truncate = true;
    3473             :         }
    3474           0 :         error = may_open(mnt_userns, &nd->path, acc_mode, open_flag);
    3475           0 :         if (!error && !(file->f_mode & FMODE_OPENED))
    3476           0 :                 error = vfs_open(&nd->path, file);
    3477           0 :         if (!error)
    3478           0 :                 error = ima_file_check(file, op->acc_mode);
    3479           0 :         if (!error && do_truncate)
    3480           0 :                 error = handle_truncate(mnt_userns, file);
    3481           0 :         if (unlikely(error > 0)) {
    3482           0 :                 WARN_ON(1);
    3483           0 :                 error = -EINVAL;
    3484             :         }
    3485           0 :         if (do_truncate)
    3486           0 :                 mnt_drop_write(nd->path.mnt);
    3487             :         return error;
    3488             : }
    3489             : 
    3490             : /**
    3491             :  * vfs_tmpfile - create tmpfile
    3492             :  * @mnt_userns: user namespace of the mount the inode was found from
    3493             :  * @dentry:     pointer to dentry of the base directory
    3494             :  * @mode:       mode of the new tmpfile
    3495             :  * @open_flag:  flags
    3496             :  *
    3497             :  * Create a temporary file.
    3498             :  *
    3499             :  * If the inode has been found through an idmapped mount the user namespace of
    3500             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    3501             :  * care to map the inode according to @mnt_userns before checking permissions.
    3502             :  * On non-idmapped mounts or if permission checking is to be performed on the
    3503             :  * raw inode simply passs init_user_ns.
    3504             :  */
    3505           0 : struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
    3506             :                            struct dentry *dentry, umode_t mode, int open_flag)
    3507             : {
    3508           0 :         struct dentry *child = NULL;
    3509           0 :         struct inode *dir = dentry->d_inode;
    3510             :         struct inode *inode;
    3511             :         int error;
    3512             : 
    3513             :         /* we want directory to be writable */
    3514           0 :         error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
    3515           0 :         if (error)
    3516             :                 goto out_err;
    3517           0 :         error = -EOPNOTSUPP;
    3518           0 :         if (!dir->i_op->tmpfile)
    3519             :                 goto out_err;
    3520           0 :         error = -ENOMEM;
    3521           0 :         child = d_alloc(dentry, &slash_name);
    3522           0 :         if (unlikely(!child))
    3523             :                 goto out_err;
    3524           0 :         error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
    3525           0 :         if (error)
    3526             :                 goto out_err;
    3527           0 :         error = -ENOENT;
    3528           0 :         inode = child->d_inode;
    3529           0 :         if (unlikely(!inode))
    3530             :                 goto out_err;
    3531           0 :         if (!(open_flag & O_EXCL)) {
    3532           0 :                 spin_lock(&inode->i_lock);
    3533           0 :                 inode->i_state |= I_LINKABLE;
    3534           0 :                 spin_unlock(&inode->i_lock);
    3535             :         }
    3536             :         ima_post_create_tmpfile(mnt_userns, inode);
    3537             :         return child;
    3538             : 
    3539             : out_err:
    3540           0 :         dput(child);
    3541           0 :         return ERR_PTR(error);
    3542             : }
    3543             : EXPORT_SYMBOL(vfs_tmpfile);
    3544             : 
    3545           0 : static int do_tmpfile(struct nameidata *nd, unsigned flags,
    3546             :                 const struct open_flags *op,
    3547             :                 struct file *file)
    3548             : {
    3549             :         struct user_namespace *mnt_userns;
    3550             :         struct dentry *child;
    3551             :         struct path path;
    3552           0 :         int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
    3553           0 :         if (unlikely(error))
    3554             :                 return error;
    3555           0 :         error = mnt_want_write(path.mnt);
    3556           0 :         if (unlikely(error))
    3557             :                 goto out;
    3558           0 :         mnt_userns = mnt_user_ns(path.mnt);
    3559           0 :         child = vfs_tmpfile(mnt_userns, path.dentry, op->mode, op->open_flag);
    3560           0 :         error = PTR_ERR(child);
    3561           0 :         if (IS_ERR(child))
    3562             :                 goto out2;
    3563           0 :         dput(path.dentry);
    3564           0 :         path.dentry = child;
    3565           0 :         audit_inode(nd->name, child, 0);
    3566             :         /* Don't check for other permissions, the inode was just created */
    3567           0 :         error = may_open(mnt_userns, &path, 0, op->open_flag);
    3568           0 :         if (!error)
    3569           0 :                 error = vfs_open(&path, file);
    3570             : out2:
    3571           0 :         mnt_drop_write(path.mnt);
    3572             : out:
    3573           0 :         path_put(&path);
    3574             :         return error;
    3575             : }
    3576             : 
    3577           0 : static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
    3578             : {
    3579             :         struct path path;
    3580           0 :         int error = path_lookupat(nd, flags, &path);
    3581           0 :         if (!error) {
    3582           0 :                 audit_inode(nd->name, path.dentry, 0);
    3583           0 :                 error = vfs_open(&path, file);
    3584             :                 path_put(&path);
    3585             :         }
    3586           0 :         return error;
    3587             : }
    3588             : 
    3589           0 : static struct file *path_openat(struct nameidata *nd,
    3590             :                         const struct open_flags *op, unsigned flags)
    3591             : {
    3592             :         struct file *file;
    3593             :         int error;
    3594             : 
    3595           0 :         file = alloc_empty_file(op->open_flag, current_cred());
    3596           0 :         if (IS_ERR(file))
    3597             :                 return file;
    3598             : 
    3599           0 :         if (unlikely(file->f_flags & __O_TMPFILE)) {
    3600           0 :                 error = do_tmpfile(nd, flags, op, file);
    3601           0 :         } else if (unlikely(file->f_flags & O_PATH)) {
    3602           0 :                 error = do_o_path(nd, flags, file);
    3603             :         } else {
    3604           0 :                 const char *s = path_init(nd, flags);
    3605           0 :                 while (!(error = link_path_walk(s, nd)) &&
    3606             :                        (s = open_last_lookups(nd, file, op)) != NULL)
    3607             :                         ;
    3608           0 :                 if (!error)
    3609           0 :                         error = do_open(nd, file, op);
    3610           0 :                 terminate_walk(nd);
    3611             :         }
    3612           0 :         if (likely(!error)) {
    3613           0 :                 if (likely(file->f_mode & FMODE_OPENED))
    3614             :                         return file;
    3615           0 :                 WARN_ON(1);
    3616           0 :                 error = -EINVAL;
    3617             :         }
    3618           0 :         fput(file);
    3619           0 :         if (error == -EOPENSTALE) {
    3620           0 :                 if (flags & LOOKUP_RCU)
    3621             :                         error = -ECHILD;
    3622             :                 else
    3623           0 :                         error = -ESTALE;
    3624             :         }
    3625           0 :         return ERR_PTR(error);
    3626             : }
    3627             : 
    3628           0 : struct file *do_filp_open(int dfd, struct filename *pathname,
    3629             :                 const struct open_flags *op)
    3630             : {
    3631             :         struct nameidata nd;
    3632           0 :         int flags = op->lookup_flags;
    3633             :         struct file *filp;
    3634             : 
    3635           0 :         set_nameidata(&nd, dfd, pathname, NULL);
    3636           0 :         filp = path_openat(&nd, op, flags | LOOKUP_RCU);
    3637           0 :         if (unlikely(filp == ERR_PTR(-ECHILD)))
    3638           0 :                 filp = path_openat(&nd, op, flags);
    3639           0 :         if (unlikely(filp == ERR_PTR(-ESTALE)))
    3640           0 :                 filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
    3641           0 :         restore_nameidata();
    3642           0 :         return filp;
    3643             : }
    3644             : 
    3645           0 : struct file *do_file_open_root(const struct path *root,
    3646             :                 const char *name, const struct open_flags *op)
    3647             : {
    3648             :         struct nameidata nd;
    3649             :         struct file *file;
    3650             :         struct filename *filename;
    3651           0 :         int flags = op->lookup_flags;
    3652             : 
    3653           0 :         if (d_is_symlink(root->dentry) && op->intent & LOOKUP_OPEN)
    3654             :                 return ERR_PTR(-ELOOP);
    3655             : 
    3656           0 :         filename = getname_kernel(name);
    3657           0 :         if (IS_ERR(filename))
    3658             :                 return ERR_CAST(filename);
    3659             : 
    3660           0 :         set_nameidata(&nd, -1, filename, root);
    3661           0 :         file = path_openat(&nd, op, flags | LOOKUP_RCU);
    3662           0 :         if (unlikely(file == ERR_PTR(-ECHILD)))
    3663           0 :                 file = path_openat(&nd, op, flags);
    3664           0 :         if (unlikely(file == ERR_PTR(-ESTALE)))
    3665           0 :                 file = path_openat(&nd, op, flags | LOOKUP_REVAL);
    3666           0 :         restore_nameidata();
    3667           0 :         putname(filename);
    3668           0 :         return file;
    3669             : }
    3670             : 
    3671           3 : static struct dentry *filename_create(int dfd, struct filename *name,
    3672             :                                       struct path *path, unsigned int lookup_flags)
    3673             : {
    3674           3 :         struct dentry *dentry = ERR_PTR(-EEXIST);
    3675             :         struct qstr last;
    3676           3 :         bool want_dir = lookup_flags & LOOKUP_DIRECTORY;
    3677           3 :         unsigned int reval_flag = lookup_flags & LOOKUP_REVAL;
    3678           3 :         unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL;
    3679             :         int type;
    3680             :         int err2;
    3681             :         int error;
    3682             : 
    3683           3 :         error = filename_parentat(dfd, name, reval_flag, path, &last, &type);
    3684           3 :         if (error)
    3685           0 :                 return ERR_PTR(error);
    3686             : 
    3687             :         /*
    3688             :          * Yucky last component or no last component at all?
    3689             :          * (foo/., foo/.., /////)
    3690             :          */
    3691           3 :         if (unlikely(type != LAST_NORM))
    3692             :                 goto out;
    3693             : 
    3694             :         /* don't fail immediately if it's r/o, at least try to report other errors */
    3695           3 :         err2 = mnt_want_write(path->mnt);
    3696             :         /*
    3697             :          * Do the final lookup.  Suppress 'create' if there is a trailing
    3698             :          * '/', and a directory wasn't requested.
    3699             :          */
    3700           3 :         if (last.name[last.len] && !want_dir)
    3701           0 :                 create_flags = 0;
    3702           6 :         inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
    3703           3 :         dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags);
    3704           3 :         if (IS_ERR(dentry))
    3705             :                 goto unlock;
    3706             : 
    3707           3 :         error = -EEXIST;
    3708           3 :         if (d_is_positive(dentry))
    3709             :                 goto fail;
    3710             : 
    3711             :         /*
    3712             :          * Special case - lookup gave negative, but... we had foo/bar/
    3713             :          * From the vfs_mknod() POV we just have a negative dentry -
    3714             :          * all is fine. Let's be bastards - you had / on the end, you've
    3715             :          * been asking for (non-existent) directory. -ENOENT for you.
    3716             :          */
    3717           3 :         if (unlikely(!create_flags)) {
    3718             :                 error = -ENOENT;
    3719             :                 goto fail;
    3720             :         }
    3721           3 :         if (unlikely(err2)) {
    3722             :                 error = err2;
    3723             :                 goto fail;
    3724             :         }
    3725             :         return dentry;
    3726             : fail:
    3727           0 :         dput(dentry);
    3728           0 :         dentry = ERR_PTR(error);
    3729             : unlock:
    3730           0 :         inode_unlock(path->dentry->d_inode);
    3731           0 :         if (!err2)
    3732           0 :                 mnt_drop_write(path->mnt);
    3733             : out:
    3734           0 :         path_put(path);
    3735           0 :         return dentry;
    3736             : }
    3737             : 
    3738           3 : struct dentry *kern_path_create(int dfd, const char *pathname,
    3739             :                                 struct path *path, unsigned int lookup_flags)
    3740             : {
    3741           3 :         struct filename *filename = getname_kernel(pathname);
    3742           3 :         struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
    3743             : 
    3744           3 :         putname(filename);
    3745           3 :         return res;
    3746             : }
    3747             : EXPORT_SYMBOL(kern_path_create);
    3748             : 
    3749           3 : void done_path_create(struct path *path, struct dentry *dentry)
    3750             : {
    3751           3 :         dput(dentry);
    3752           6 :         inode_unlock(path->dentry->d_inode);
    3753           3 :         mnt_drop_write(path->mnt);
    3754           3 :         path_put(path);
    3755           3 : }
    3756             : EXPORT_SYMBOL(done_path_create);
    3757             : 
    3758           0 : inline struct dentry *user_path_create(int dfd, const char __user *pathname,
    3759             :                                 struct path *path, unsigned int lookup_flags)
    3760             : {
    3761           0 :         struct filename *filename = getname(pathname);
    3762           0 :         struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
    3763             : 
    3764           0 :         putname(filename);
    3765           0 :         return res;
    3766             : }
    3767             : EXPORT_SYMBOL(user_path_create);
    3768             : 
    3769             : /**
    3770             :  * vfs_mknod - create device node or file
    3771             :  * @mnt_userns: user namespace of the mount the inode was found from
    3772             :  * @dir:        inode of @dentry
    3773             :  * @dentry:     pointer to dentry of the base directory
    3774             :  * @mode:       mode of the new device node or file
    3775             :  * @dev:        device number of device to create
    3776             :  *
    3777             :  * Create a device node or file.
    3778             :  *
    3779             :  * If the inode has been found through an idmapped mount the user namespace of
    3780             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    3781             :  * care to map the inode according to @mnt_userns before checking permissions.
    3782             :  * On non-idmapped mounts or if permission checking is to be performed on the
    3783             :  * raw inode simply passs init_user_ns.
    3784             :  */
    3785           1 : int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
    3786             :               struct dentry *dentry, umode_t mode, dev_t dev)
    3787             : {
    3788           1 :         bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
    3789           1 :         int error = may_create(mnt_userns, dir, dentry);
    3790             : 
    3791           1 :         if (error)
    3792             :                 return error;
    3793             : 
    3794           2 :         if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
    3795           1 :             !capable(CAP_MKNOD))
    3796             :                 return -EPERM;
    3797             : 
    3798           1 :         if (!dir->i_op->mknod)
    3799             :                 return -EPERM;
    3800             : 
    3801           1 :         error = devcgroup_inode_mknod(mode, dev);
    3802             :         if (error)
    3803             :                 return error;
    3804             : 
    3805           1 :         error = security_inode_mknod(dir, dentry, mode, dev);
    3806             :         if (error)
    3807             :                 return error;
    3808             : 
    3809           1 :         error = dir->i_op->mknod(mnt_userns, dir, dentry, mode, dev);
    3810           1 :         if (!error)
    3811             :                 fsnotify_create(dir, dentry);
    3812             :         return error;
    3813             : }
    3814             : EXPORT_SYMBOL(vfs_mknod);
    3815             : 
    3816             : static int may_mknod(umode_t mode)
    3817             : {
    3818           0 :         switch (mode & S_IFMT) {
    3819             :         case S_IFREG:
    3820             :         case S_IFCHR:
    3821             :         case S_IFBLK:
    3822             :         case S_IFIFO:
    3823             :         case S_IFSOCK:
    3824             :         case 0: /* zero mode translates to S_IFREG */
    3825             :                 return 0;
    3826             :         case S_IFDIR:
    3827             :                 return -EPERM;
    3828             :         default:
    3829             :                 return -EINVAL;
    3830             :         }
    3831             : }
    3832             : 
    3833           0 : static int do_mknodat(int dfd, struct filename *name, umode_t mode,
    3834             :                 unsigned int dev)
    3835             : {
    3836             :         struct user_namespace *mnt_userns;
    3837             :         struct dentry *dentry;
    3838             :         struct path path;
    3839             :         int error;
    3840           0 :         unsigned int lookup_flags = 0;
    3841             : 
    3842           0 :         error = may_mknod(mode);
    3843           0 :         if (error)
    3844             :                 goto out1;
    3845             : retry:
    3846           0 :         dentry = filename_create(dfd, name, &path, lookup_flags);
    3847           0 :         error = PTR_ERR(dentry);
    3848           0 :         if (IS_ERR(dentry))
    3849             :                 goto out1;
    3850             : 
    3851           0 :         if (!IS_POSIXACL(path.dentry->d_inode))
    3852           0 :                 mode &= ~current_umask();
    3853           0 :         error = security_path_mknod(&path, dentry, mode, dev);
    3854             :         if (error)
    3855             :                 goto out2;
    3856             : 
    3857           0 :         mnt_userns = mnt_user_ns(path.mnt);
    3858           0 :         switch (mode & S_IFMT) {
    3859             :                 case 0: case S_IFREG:
    3860           0 :                         error = vfs_create(mnt_userns, path.dentry->d_inode,
    3861             :                                            dentry, mode, true);
    3862             :                         if (!error)
    3863             :                                 ima_post_path_mknod(mnt_userns, dentry);
    3864             :                         break;
    3865             :                 case S_IFCHR: case S_IFBLK:
    3866           0 :                         error = vfs_mknod(mnt_userns, path.dentry->d_inode,
    3867             :                                           dentry, mode, new_decode_dev(dev));
    3868           0 :                         break;
    3869             :                 case S_IFIFO: case S_IFSOCK:
    3870           0 :                         error = vfs_mknod(mnt_userns, path.dentry->d_inode,
    3871             :                                           dentry, mode, 0);
    3872           0 :                         break;
    3873             :         }
    3874             : out2:
    3875           0 :         done_path_create(&path, dentry);
    3876           0 :         if (retry_estale(error, lookup_flags)) {
    3877             :                 lookup_flags |= LOOKUP_REVAL;
    3878             :                 goto retry;
    3879             :         }
    3880             : out1:
    3881           0 :         putname(name);
    3882           0 :         return error;
    3883             : }
    3884             : 
    3885           0 : SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
    3886             :                 unsigned int, dev)
    3887             : {
    3888           0 :         return do_mknodat(dfd, getname(filename), mode, dev);
    3889             : }
    3890             : 
    3891           0 : SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
    3892             : {
    3893           0 :         return do_mknodat(AT_FDCWD, getname(filename), mode, dev);
    3894             : }
    3895             : 
    3896             : /**
    3897             :  * vfs_mkdir - create directory
    3898             :  * @mnt_userns: user namespace of the mount the inode was found from
    3899             :  * @dir:        inode of @dentry
    3900             :  * @dentry:     pointer to dentry of the base directory
    3901             :  * @mode:       mode of the new directory
    3902             :  *
    3903             :  * Create a directory.
    3904             :  *
    3905             :  * If the inode has been found through an idmapped mount the user namespace of
    3906             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    3907             :  * care to map the inode according to @mnt_userns before checking permissions.
    3908             :  * On non-idmapped mounts or if permission checking is to be performed on the
    3909             :  * raw inode simply passs init_user_ns.
    3910             :  */
    3911           2 : int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
    3912             :               struct dentry *dentry, umode_t mode)
    3913             : {
    3914           2 :         int error = may_create(mnt_userns, dir, dentry);
    3915           2 :         unsigned max_links = dir->i_sb->s_max_links;
    3916             : 
    3917           2 :         if (error)
    3918             :                 return error;
    3919             : 
    3920           2 :         if (!dir->i_op->mkdir)
    3921             :                 return -EPERM;
    3922             : 
    3923           2 :         mode &= (S_IRWXUGO|S_ISVTX);
    3924           2 :         error = security_inode_mkdir(dir, dentry, mode);
    3925             :         if (error)
    3926             :                 return error;
    3927             : 
    3928           2 :         if (max_links && dir->i_nlink >= max_links)
    3929             :                 return -EMLINK;
    3930             : 
    3931           2 :         error = dir->i_op->mkdir(mnt_userns, dir, dentry, mode);
    3932           2 :         if (!error)
    3933             :                 fsnotify_mkdir(dir, dentry);
    3934             :         return error;
    3935             : }
    3936             : EXPORT_SYMBOL(vfs_mkdir);
    3937             : 
    3938           0 : int do_mkdirat(int dfd, struct filename *name, umode_t mode)
    3939             : {
    3940             :         struct dentry *dentry;
    3941             :         struct path path;
    3942             :         int error;
    3943           0 :         unsigned int lookup_flags = LOOKUP_DIRECTORY;
    3944             : 
    3945             : retry:
    3946           0 :         dentry = filename_create(dfd, name, &path, lookup_flags);
    3947           0 :         error = PTR_ERR(dentry);
    3948           0 :         if (IS_ERR(dentry))
    3949             :                 goto out_putname;
    3950             : 
    3951           0 :         if (!IS_POSIXACL(path.dentry->d_inode))
    3952           0 :                 mode &= ~current_umask();
    3953           0 :         error = security_path_mkdir(&path, dentry, mode);
    3954             :         if (!error) {
    3955             :                 struct user_namespace *mnt_userns;
    3956           0 :                 mnt_userns = mnt_user_ns(path.mnt);
    3957           0 :                 error = vfs_mkdir(mnt_userns, path.dentry->d_inode, dentry,
    3958             :                                   mode);
    3959             :         }
    3960           0 :         done_path_create(&path, dentry);
    3961           0 :         if (retry_estale(error, lookup_flags)) {
    3962             :                 lookup_flags |= LOOKUP_REVAL;
    3963             :                 goto retry;
    3964             :         }
    3965             : out_putname:
    3966           0 :         putname(name);
    3967           0 :         return error;
    3968             : }
    3969             : 
    3970           0 : SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
    3971             : {
    3972           0 :         return do_mkdirat(dfd, getname(pathname), mode);
    3973             : }
    3974             : 
    3975           0 : SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
    3976             : {
    3977           0 :         return do_mkdirat(AT_FDCWD, getname(pathname), mode);
    3978             : }
    3979             : 
    3980             : /**
    3981             :  * vfs_rmdir - remove directory
    3982             :  * @mnt_userns: user namespace of the mount the inode was found from
    3983             :  * @dir:        inode of @dentry
    3984             :  * @dentry:     pointer to dentry of the base directory
    3985             :  *
    3986             :  * Remove a directory.
    3987             :  *
    3988             :  * If the inode has been found through an idmapped mount the user namespace of
    3989             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    3990             :  * care to map the inode according to @mnt_userns before checking permissions.
    3991             :  * On non-idmapped mounts or if permission checking is to be performed on the
    3992             :  * raw inode simply passs init_user_ns.
    3993             :  */
    3994           0 : int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir,
    3995             :                      struct dentry *dentry)
    3996             : {
    3997           0 :         int error = may_delete(mnt_userns, dir, dentry, 1);
    3998             : 
    3999           0 :         if (error)
    4000             :                 return error;
    4001             : 
    4002           0 :         if (!dir->i_op->rmdir)
    4003             :                 return -EPERM;
    4004             : 
    4005           0 :         dget(dentry);
    4006           0 :         inode_lock(dentry->d_inode);
    4007             : 
    4008           0 :         error = -EBUSY;
    4009           0 :         if (is_local_mountpoint(dentry) ||
    4010           0 :             (dentry->d_inode->i_flags & S_KERNEL_FILE))
    4011             :                 goto out;
    4012             : 
    4013           0 :         error = security_inode_rmdir(dir, dentry);
    4014             :         if (error)
    4015             :                 goto out;
    4016             : 
    4017           0 :         error = dir->i_op->rmdir(dir, dentry);
    4018           0 :         if (error)
    4019             :                 goto out;
    4020             : 
    4021           0 :         shrink_dcache_parent(dentry);
    4022           0 :         dentry->d_inode->i_flags |= S_DEAD;
    4023           0 :         dont_mount(dentry);
    4024             :         detach_mounts(dentry);
    4025             : 
    4026             : out:
    4027           0 :         inode_unlock(dentry->d_inode);
    4028           0 :         dput(dentry);
    4029           0 :         if (!error)
    4030           0 :                 d_delete_notify(dir, dentry);
    4031             :         return error;
    4032             : }
    4033             : EXPORT_SYMBOL(vfs_rmdir);
    4034             : 
    4035           0 : int do_rmdir(int dfd, struct filename *name)
    4036             : {
    4037             :         struct user_namespace *mnt_userns;
    4038             :         int error;
    4039             :         struct dentry *dentry;
    4040             :         struct path path;
    4041             :         struct qstr last;
    4042             :         int type;
    4043           0 :         unsigned int lookup_flags = 0;
    4044             : retry:
    4045           0 :         error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
    4046           0 :         if (error)
    4047             :                 goto exit1;
    4048             : 
    4049           0 :         switch (type) {
    4050             :         case LAST_DOTDOT:
    4051             :                 error = -ENOTEMPTY;
    4052             :                 goto exit2;
    4053             :         case LAST_DOT:
    4054           0 :                 error = -EINVAL;
    4055           0 :                 goto exit2;
    4056             :         case LAST_ROOT:
    4057           0 :                 error = -EBUSY;
    4058           0 :                 goto exit2;
    4059             :         }
    4060             : 
    4061           0 :         error = mnt_want_write(path.mnt);
    4062           0 :         if (error)
    4063             :                 goto exit2;
    4064             : 
    4065           0 :         inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
    4066           0 :         dentry = __lookup_hash(&last, path.dentry, lookup_flags);
    4067           0 :         error = PTR_ERR(dentry);
    4068           0 :         if (IS_ERR(dentry))
    4069             :                 goto exit3;
    4070           0 :         if (!dentry->d_inode) {
    4071             :                 error = -ENOENT;
    4072             :                 goto exit4;
    4073             :         }
    4074           0 :         error = security_path_rmdir(&path, dentry);
    4075             :         if (error)
    4076             :                 goto exit4;
    4077           0 :         mnt_userns = mnt_user_ns(path.mnt);
    4078           0 :         error = vfs_rmdir(mnt_userns, path.dentry->d_inode, dentry);
    4079             : exit4:
    4080           0 :         dput(dentry);
    4081             : exit3:
    4082           0 :         inode_unlock(path.dentry->d_inode);
    4083           0 :         mnt_drop_write(path.mnt);
    4084             : exit2:
    4085           0 :         path_put(&path);
    4086           0 :         if (retry_estale(error, lookup_flags)) {
    4087             :                 lookup_flags |= LOOKUP_REVAL;
    4088             :                 goto retry;
    4089             :         }
    4090             : exit1:
    4091           0 :         putname(name);
    4092           0 :         return error;
    4093             : }
    4094             : 
    4095           0 : SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
    4096             : {
    4097           0 :         return do_rmdir(AT_FDCWD, getname(pathname));
    4098             : }
    4099             : 
    4100             : /**
    4101             :  * vfs_unlink - unlink a filesystem object
    4102             :  * @mnt_userns: user namespace of the mount the inode was found from
    4103             :  * @dir:        parent directory
    4104             :  * @dentry:     victim
    4105             :  * @delegated_inode: returns victim inode, if the inode is delegated.
    4106             :  *
    4107             :  * The caller must hold dir->i_mutex.
    4108             :  *
    4109             :  * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
    4110             :  * return a reference to the inode in delegated_inode.  The caller
    4111             :  * should then break the delegation on that inode and retry.  Because
    4112             :  * breaking a delegation may take a long time, the caller should drop
    4113             :  * dir->i_mutex before doing so.
    4114             :  *
    4115             :  * Alternatively, a caller may pass NULL for delegated_inode.  This may
    4116             :  * be appropriate for callers that expect the underlying filesystem not
    4117             :  * to be NFS exported.
    4118             :  *
    4119             :  * If the inode has been found through an idmapped mount the user namespace of
    4120             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    4121             :  * care to map the inode according to @mnt_userns before checking permissions.
    4122             :  * On non-idmapped mounts or if permission checking is to be performed on the
    4123             :  * raw inode simply passs init_user_ns.
    4124             :  */
    4125           0 : int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir,
    4126             :                struct dentry *dentry, struct inode **delegated_inode)
    4127             : {
    4128           0 :         struct inode *target = dentry->d_inode;
    4129           0 :         int error = may_delete(mnt_userns, dir, dentry, 0);
    4130             : 
    4131           0 :         if (error)
    4132             :                 return error;
    4133             : 
    4134           0 :         if (!dir->i_op->unlink)
    4135             :                 return -EPERM;
    4136             : 
    4137           0 :         inode_lock(target);
    4138           0 :         if (IS_SWAPFILE(target))
    4139             :                 error = -EPERM;
    4140           0 :         else if (is_local_mountpoint(dentry))
    4141             :                 error = -EBUSY;
    4142             :         else {
    4143           0 :                 error = security_inode_unlink(dir, dentry);
    4144             :                 if (!error) {
    4145           0 :                         error = try_break_deleg(target, delegated_inode);
    4146           0 :                         if (error)
    4147             :                                 goto out;
    4148           0 :                         error = dir->i_op->unlink(dir, dentry);
    4149           0 :                         if (!error) {
    4150           0 :                                 dont_mount(dentry);
    4151             :                                 detach_mounts(dentry);
    4152             :                         }
    4153             :                 }
    4154             :         }
    4155             : out:
    4156           0 :         inode_unlock(target);
    4157             : 
    4158             :         /* We don't d_delete() NFS sillyrenamed files--they still exist. */
    4159           0 :         if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
    4160           0 :                 fsnotify_unlink(dir, dentry);
    4161           0 :         } else if (!error) {
    4162           0 :                 fsnotify_link_count(target);
    4163           0 :                 d_delete_notify(dir, dentry);
    4164             :         }
    4165             : 
    4166             :         return error;
    4167             : }
    4168             : EXPORT_SYMBOL(vfs_unlink);
    4169             : 
    4170             : /*
    4171             :  * Make sure that the actual truncation of the file will occur outside its
    4172             :  * directory's i_mutex.  Truncate can take a long time if there is a lot of
    4173             :  * writeout happening, and we don't want to prevent access to the directory
    4174             :  * while waiting on the I/O.
    4175             :  */
    4176           0 : int do_unlinkat(int dfd, struct filename *name)
    4177             : {
    4178             :         int error;
    4179             :         struct dentry *dentry;
    4180             :         struct path path;
    4181             :         struct qstr last;
    4182             :         int type;
    4183           0 :         struct inode *inode = NULL;
    4184           0 :         struct inode *delegated_inode = NULL;
    4185           0 :         unsigned int lookup_flags = 0;
    4186             : retry:
    4187           0 :         error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
    4188           0 :         if (error)
    4189             :                 goto exit1;
    4190             : 
    4191           0 :         error = -EISDIR;
    4192           0 :         if (type != LAST_NORM)
    4193             :                 goto exit2;
    4194             : 
    4195           0 :         error = mnt_want_write(path.mnt);
    4196           0 :         if (error)
    4197             :                 goto exit2;
    4198             : retry_deleg:
    4199           0 :         inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
    4200           0 :         dentry = __lookup_hash(&last, path.dentry, lookup_flags);
    4201           0 :         error = PTR_ERR(dentry);
    4202           0 :         if (!IS_ERR(dentry)) {
    4203             :                 struct user_namespace *mnt_userns;
    4204             : 
    4205             :                 /* Why not before? Because we want correct error value */
    4206           0 :                 if (last.name[last.len])
    4207             :                         goto slashes;
    4208           0 :                 inode = dentry->d_inode;
    4209           0 :                 if (d_is_negative(dentry))
    4210             :                         goto slashes;
    4211           0 :                 ihold(inode);
    4212           0 :                 error = security_path_unlink(&path, dentry);
    4213             :                 if (error)
    4214             :                         goto exit3;
    4215           0 :                 mnt_userns = mnt_user_ns(path.mnt);
    4216           0 :                 error = vfs_unlink(mnt_userns, path.dentry->d_inode, dentry,
    4217             :                                    &delegated_inode);
    4218             : exit3:
    4219           0 :                 dput(dentry);
    4220             :         }
    4221           0 :         inode_unlock(path.dentry->d_inode);
    4222           0 :         if (inode)
    4223           0 :                 iput(inode);    /* truncate the inode here */
    4224           0 :         inode = NULL;
    4225           0 :         if (delegated_inode) {
    4226           0 :                 error = break_deleg_wait(&delegated_inode);
    4227           0 :                 if (!error)
    4228             :                         goto retry_deleg;
    4229             :         }
    4230           0 :         mnt_drop_write(path.mnt);
    4231             : exit2:
    4232           0 :         path_put(&path);
    4233           0 :         if (retry_estale(error, lookup_flags)) {
    4234             :                 lookup_flags |= LOOKUP_REVAL;
    4235             :                 inode = NULL;
    4236             :                 goto retry;
    4237             :         }
    4238             : exit1:
    4239           0 :         putname(name);
    4240           0 :         return error;
    4241             : 
    4242             : slashes:
    4243           0 :         if (d_is_negative(dentry))
    4244             :                 error = -ENOENT;
    4245           0 :         else if (d_is_dir(dentry))
    4246             :                 error = -EISDIR;
    4247             :         else
    4248           0 :                 error = -ENOTDIR;
    4249             :         goto exit3;
    4250             : }
    4251             : 
    4252           0 : SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
    4253             : {
    4254           0 :         if ((flag & ~AT_REMOVEDIR) != 0)
    4255             :                 return -EINVAL;
    4256             : 
    4257           0 :         if (flag & AT_REMOVEDIR)
    4258           0 :                 return do_rmdir(dfd, getname(pathname));
    4259           0 :         return do_unlinkat(dfd, getname(pathname));
    4260             : }
    4261             : 
    4262           0 : SYSCALL_DEFINE1(unlink, const char __user *, pathname)
    4263             : {
    4264           0 :         return do_unlinkat(AT_FDCWD, getname(pathname));
    4265             : }
    4266             : 
    4267             : /**
    4268             :  * vfs_symlink - create symlink
    4269             :  * @mnt_userns: user namespace of the mount the inode was found from
    4270             :  * @dir:        inode of @dentry
    4271             :  * @dentry:     pointer to dentry of the base directory
    4272             :  * @oldname:    name of the file to link to
    4273             :  *
    4274             :  * Create a symlink.
    4275             :  *
    4276             :  * If the inode has been found through an idmapped mount the user namespace of
    4277             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    4278             :  * care to map the inode according to @mnt_userns before checking permissions.
    4279             :  * On non-idmapped mounts or if permission checking is to be performed on the
    4280             :  * raw inode simply passs init_user_ns.
    4281             :  */
    4282           0 : int vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
    4283             :                 struct dentry *dentry, const char *oldname)
    4284             : {
    4285           0 :         int error = may_create(mnt_userns, dir, dentry);
    4286             : 
    4287           0 :         if (error)
    4288             :                 return error;
    4289             : 
    4290           0 :         if (!dir->i_op->symlink)
    4291             :                 return -EPERM;
    4292             : 
    4293           0 :         error = security_inode_symlink(dir, dentry, oldname);
    4294             :         if (error)
    4295             :                 return error;
    4296             : 
    4297           0 :         error = dir->i_op->symlink(mnt_userns, dir, dentry, oldname);
    4298           0 :         if (!error)
    4299             :                 fsnotify_create(dir, dentry);
    4300             :         return error;
    4301             : }
    4302             : EXPORT_SYMBOL(vfs_symlink);
    4303             : 
    4304           0 : int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
    4305             : {
    4306             :         int error;
    4307             :         struct dentry *dentry;
    4308             :         struct path path;
    4309           0 :         unsigned int lookup_flags = 0;
    4310             : 
    4311           0 :         if (IS_ERR(from)) {
    4312           0 :                 error = PTR_ERR(from);
    4313           0 :                 goto out_putnames;
    4314             :         }
    4315             : retry:
    4316           0 :         dentry = filename_create(newdfd, to, &path, lookup_flags);
    4317           0 :         error = PTR_ERR(dentry);
    4318           0 :         if (IS_ERR(dentry))
    4319             :                 goto out_putnames;
    4320             : 
    4321           0 :         error = security_path_symlink(&path, dentry, from->name);
    4322             :         if (!error) {
    4323             :                 struct user_namespace *mnt_userns;
    4324             : 
    4325           0 :                 mnt_userns = mnt_user_ns(path.mnt);
    4326           0 :                 error = vfs_symlink(mnt_userns, path.dentry->d_inode, dentry,
    4327             :                                     from->name);
    4328             :         }
    4329           0 :         done_path_create(&path, dentry);
    4330           0 :         if (retry_estale(error, lookup_flags)) {
    4331             :                 lookup_flags |= LOOKUP_REVAL;
    4332             :                 goto retry;
    4333             :         }
    4334             : out_putnames:
    4335           0 :         putname(to);
    4336           0 :         putname(from);
    4337           0 :         return error;
    4338             : }
    4339             : 
    4340           0 : SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
    4341             :                 int, newdfd, const char __user *, newname)
    4342             : {
    4343           0 :         return do_symlinkat(getname(oldname), newdfd, getname(newname));
    4344             : }
    4345             : 
    4346           0 : SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
    4347             : {
    4348           0 :         return do_symlinkat(getname(oldname), AT_FDCWD, getname(newname));
    4349             : }
    4350             : 
    4351             : /**
    4352             :  * vfs_link - create a new link
    4353             :  * @old_dentry: object to be linked
    4354             :  * @mnt_userns: the user namespace of the mount
    4355             :  * @dir:        new parent
    4356             :  * @new_dentry: where to create the new link
    4357             :  * @delegated_inode: returns inode needing a delegation break
    4358             :  *
    4359             :  * The caller must hold dir->i_mutex
    4360             :  *
    4361             :  * If vfs_link discovers a delegation on the to-be-linked file in need
    4362             :  * of breaking, it will return -EWOULDBLOCK and return a reference to the
    4363             :  * inode in delegated_inode.  The caller should then break the delegation
    4364             :  * and retry.  Because breaking a delegation may take a long time, the
    4365             :  * caller should drop the i_mutex before doing so.
    4366             :  *
    4367             :  * Alternatively, a caller may pass NULL for delegated_inode.  This may
    4368             :  * be appropriate for callers that expect the underlying filesystem not
    4369             :  * to be NFS exported.
    4370             :  *
    4371             :  * If the inode has been found through an idmapped mount the user namespace of
    4372             :  * the vfsmount must be passed through @mnt_userns. This function will then take
    4373             :  * care to map the inode according to @mnt_userns before checking permissions.
    4374             :  * On non-idmapped mounts or if permission checking is to be performed on the
    4375             :  * raw inode simply passs init_user_ns.
    4376             :  */
    4377           0 : int vfs_link(struct dentry *old_dentry, struct user_namespace *mnt_userns,
    4378             :              struct inode *dir, struct dentry *new_dentry,
    4379             :              struct inode **delegated_inode)
    4380             : {
    4381           0 :         struct inode *inode = old_dentry->d_inode;
    4382           0 :         unsigned max_links = dir->i_sb->s_max_links;
    4383             :         int error;
    4384             : 
    4385           0 :         if (!inode)
    4386             :                 return -ENOENT;
    4387             : 
    4388           0 :         error = may_create(mnt_userns, dir, new_dentry);
    4389           0 :         if (error)
    4390             :                 return error;
    4391             : 
    4392           0 :         if (dir->i_sb != inode->i_sb)
    4393             :                 return -EXDEV;
    4394             : 
    4395             :         /*
    4396             :          * A link to an append-only or immutable file cannot be created.
    4397             :          */
    4398           0 :         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
    4399             :                 return -EPERM;
    4400             :         /*
    4401             :          * Updating the link count will likely cause i_uid and i_gid to
    4402             :          * be writen back improperly if their true value is unknown to
    4403             :          * the vfs.
    4404             :          */
    4405           0 :         if (HAS_UNMAPPED_ID(mnt_userns, inode))
    4406             :                 return -EPERM;
    4407           0 :         if (!dir->i_op->link)
    4408             :                 return -EPERM;
    4409           0 :         if (S_ISDIR(inode->i_mode))
    4410             :                 return -EPERM;
    4411             : 
    4412           0 :         error = security_inode_link(old_dentry, dir, new_dentry);
    4413             :         if (error)
    4414             :                 return error;
    4415             : 
    4416           0 :         inode_lock(inode);
    4417             :         /* Make sure we don't allow creating hardlink to an unlinked file */
    4418           0 :         if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
    4419             :                 error =  -ENOENT;
    4420           0 :         else if (max_links && inode->i_nlink >= max_links)
    4421             :                 error = -EMLINK;
    4422             :         else {
    4423           0 :                 error = try_break_deleg(inode, delegated_inode);
    4424           0 :                 if (!error)
    4425           0 :                         error = dir->i_op->link(old_dentry, dir, new_dentry);
    4426             :         }
    4427             : 
    4428           0 :         if (!error && (inode->i_state & I_LINKABLE)) {
    4429           0 :                 spin_lock(&inode->i_lock);
    4430           0 :                 inode->i_state &= ~I_LINKABLE;
    4431           0 :                 spin_unlock(&inode->i_lock);
    4432             :         }
    4433           0 :         inode_unlock(inode);
    4434           0 :         if (!error)
    4435           0 :                 fsnotify_link(dir, inode, new_dentry);
    4436             :         return error;
    4437             : }
    4438             : EXPORT_SYMBOL(vfs_link);
    4439             : 
    4440             : /*
    4441             :  * Hardlinks are often used in delicate situations.  We avoid
    4442             :  * security-related surprises by not following symlinks on the
    4443             :  * newname.  --KAB
    4444             :  *
    4445             :  * We don't follow them on the oldname either to be compatible
    4446             :  * with linux 2.0, and to avoid hard-linking to directories
    4447             :  * and other special files.  --ADM
    4448             :  */
    4449           0 : int do_linkat(int olddfd, struct filename *old, int newdfd,
    4450             :               struct filename *new, int flags)
    4451             : {
    4452             :         struct user_namespace *mnt_userns;
    4453             :         struct dentry *new_dentry;
    4454             :         struct path old_path, new_path;
    4455           0 :         struct inode *delegated_inode = NULL;
    4456           0 :         int how = 0;
    4457             :         int error;
    4458             : 
    4459           0 :         if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
    4460             :                 error = -EINVAL;
    4461             :                 goto out_putnames;
    4462             :         }
    4463             :         /*
    4464             :          * To use null names we require CAP_DAC_READ_SEARCH
    4465             :          * This ensures that not everyone will be able to create
    4466             :          * handlink using the passed filedescriptor.
    4467             :          */
    4468           0 :         if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) {
    4469             :                 error = -ENOENT;
    4470             :                 goto out_putnames;
    4471             :         }
    4472             : 
    4473           0 :         if (flags & AT_SYMLINK_FOLLOW)
    4474           0 :                 how |= LOOKUP_FOLLOW;
    4475             : retry:
    4476           0 :         error = filename_lookup(olddfd, old, how, &old_path, NULL);
    4477           0 :         if (error)
    4478             :                 goto out_putnames;
    4479             : 
    4480           0 :         new_dentry = filename_create(newdfd, new, &new_path,
    4481             :                                         (how & LOOKUP_REVAL));
    4482           0 :         error = PTR_ERR(new_dentry);
    4483           0 :         if (IS_ERR(new_dentry))
    4484             :                 goto out_putpath;
    4485             : 
    4486           0 :         error = -EXDEV;
    4487           0 :         if (old_path.mnt != new_path.mnt)
    4488             :                 goto out_dput;
    4489           0 :         mnt_userns = mnt_user_ns(new_path.mnt);
    4490           0 :         error = may_linkat(mnt_userns, &old_path);
    4491           0 :         if (unlikely(error))
    4492             :                 goto out_dput;
    4493           0 :         error = security_path_link(old_path.dentry, &new_path, new_dentry);
    4494             :         if (error)
    4495             :                 goto out_dput;
    4496           0 :         error = vfs_link(old_path.dentry, mnt_userns, new_path.dentry->d_inode,
    4497             :                          new_dentry, &delegated_inode);
    4498             : out_dput:
    4499           0 :         done_path_create(&new_path, new_dentry);
    4500           0 :         if (delegated_inode) {
    4501           0 :                 error = break_deleg_wait(&delegated_inode);
    4502           0 :                 if (!error) {
    4503             :                         path_put(&old_path);
    4504             :                         goto retry;
    4505             :                 }
    4506             :         }
    4507           0 :         if (retry_estale(error, how)) {
    4508           0 :                 path_put(&old_path);
    4509           0 :                 how |= LOOKUP_REVAL;
    4510           0 :                 goto retry;
    4511             :         }
    4512             : out_putpath:
    4513             :         path_put(&old_path);
    4514             : out_putnames:
    4515           0 :         putname(old);
    4516           0 :         putname(new);
    4517             : 
    4518           0 :         return error;
    4519             : }
    4520             : 
    4521           0 : SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
    4522             :                 int, newdfd, const char __user *, newname, int, flags)
    4523             : {
    4524           0 :         return do_linkat(olddfd, getname_uflags(oldname, flags),
    4525             :                 newdfd, getname(newname), flags);
    4526             : }
    4527             : 
    4528           0 : SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
    4529             : {
    4530           0 :         return do_linkat(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0);
    4531             : }
    4532             : 
    4533             : /**
    4534             :  * vfs_rename - rename a filesystem object
    4535             :  * @rd:         pointer to &struct renamedata info
    4536             :  *
    4537             :  * The caller must hold multiple mutexes--see lock_rename()).
    4538             :  *
    4539             :  * If vfs_rename discovers a delegation in need of breaking at either
    4540             :  * the source or destination, it will return -EWOULDBLOCK and return a
    4541             :  * reference to the inode in delegated_inode.  The caller should then
    4542             :  * break the delegation and retry.  Because breaking a delegation may
    4543             :  * take a long time, the caller should drop all locks before doing
    4544             :  * so.
    4545             :  *
    4546             :  * Alternatively, a caller may pass NULL for delegated_inode.  This may
    4547             :  * be appropriate for callers that expect the underlying filesystem not
    4548             :  * to be NFS exported.
    4549             :  *
    4550             :  * The worst of all namespace operations - renaming directory. "Perverted"
    4551             :  * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
    4552             :  * Problems:
    4553             :  *
    4554             :  *      a) we can get into loop creation.
    4555             :  *      b) race potential - two innocent renames can create a loop together.
    4556             :  *         That's where 4.4 screws up. Current fix: serialization on
    4557             :  *         sb->s_vfs_rename_mutex. We might be more accurate, but that's another
    4558             :  *         story.
    4559             :  *      c) we have to lock _four_ objects - parents and victim (if it exists),
    4560             :  *         and source (if it is not a directory).
    4561             :  *         And that - after we got ->i_mutex on parents (until then we don't know
    4562             :  *         whether the target exists).  Solution: try to be smart with locking
    4563             :  *         order for inodes.  We rely on the fact that tree topology may change
    4564             :  *         only under ->s_vfs_rename_mutex _and_ that parent of the object we
    4565             :  *         move will be locked.  Thus we can rank directories by the tree
    4566             :  *         (ancestors first) and rank all non-directories after them.
    4567             :  *         That works since everybody except rename does "lock parent, lookup,
    4568             :  *         lock child" and rename is under ->s_vfs_rename_mutex.
    4569             :  *         HOWEVER, it relies on the assumption that any object with ->lookup()
    4570             :  *         has no more than 1 dentry.  If "hybrid" objects will ever appear,
    4571             :  *         we'd better make sure that there's no link(2) for them.
    4572             :  *      d) conversion from fhandle to dentry may come in the wrong moment - when
    4573             :  *         we are removing the target. Solution: we will have to grab ->i_mutex
    4574             :  *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
    4575             :  *         ->i_mutex on parents, which works but leads to some truly excessive
    4576             :  *         locking].
    4577             :  */
    4578           0 : int vfs_rename(struct renamedata *rd)
    4579             : {
    4580             :         int error;
    4581           0 :         struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
    4582           0 :         struct dentry *old_dentry = rd->old_dentry;
    4583           0 :         struct dentry *new_dentry = rd->new_dentry;
    4584           0 :         struct inode **delegated_inode = rd->delegated_inode;
    4585           0 :         unsigned int flags = rd->flags;
    4586           0 :         bool is_dir = d_is_dir(old_dentry);
    4587           0 :         struct inode *source = old_dentry->d_inode;
    4588           0 :         struct inode *target = new_dentry->d_inode;
    4589           0 :         bool new_is_dir = false;
    4590           0 :         unsigned max_links = new_dir->i_sb->s_max_links;
    4591             :         struct name_snapshot old_name;
    4592             : 
    4593           0 :         if (source == target)
    4594             :                 return 0;
    4595             : 
    4596           0 :         error = may_delete(rd->old_mnt_userns, old_dir, old_dentry, is_dir);
    4597           0 :         if (error)
    4598             :                 return error;
    4599             : 
    4600           0 :         if (!target) {
    4601           0 :                 error = may_create(rd->new_mnt_userns, new_dir, new_dentry);
    4602             :         } else {
    4603           0 :                 new_is_dir = d_is_dir(new_dentry);
    4604             : 
    4605           0 :                 if (!(flags & RENAME_EXCHANGE))
    4606           0 :                         error = may_delete(rd->new_mnt_userns, new_dir,
    4607             :                                            new_dentry, is_dir);
    4608             :                 else
    4609           0 :                         error = may_delete(rd->new_mnt_userns, new_dir,
    4610             :                                            new_dentry, new_is_dir);
    4611             :         }
    4612           0 :         if (error)
    4613             :                 return error;
    4614             : 
    4615           0 :         if (!old_dir->i_op->rename)
    4616             :                 return -EPERM;
    4617             : 
    4618             :         /*
    4619             :          * If we are going to change the parent - check write permissions,
    4620             :          * we'll need to flip '..'.
    4621             :          */
    4622           0 :         if (new_dir != old_dir) {
    4623           0 :                 if (is_dir) {
    4624           0 :                         error = inode_permission(rd->old_mnt_userns, source,
    4625             :                                                  MAY_WRITE);
    4626           0 :                         if (error)
    4627             :                                 return error;
    4628             :                 }
    4629           0 :                 if ((flags & RENAME_EXCHANGE) && new_is_dir) {
    4630           0 :                         error = inode_permission(rd->new_mnt_userns, target,
    4631             :                                                  MAY_WRITE);
    4632           0 :                         if (error)
    4633             :                                 return error;
    4634             :                 }
    4635             :         }
    4636             : 
    4637           0 :         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry,
    4638             :                                       flags);
    4639             :         if (error)
    4640             :                 return error;
    4641             : 
    4642           0 :         take_dentry_name_snapshot(&old_name, old_dentry);
    4643           0 :         dget(new_dentry);
    4644           0 :         if (!is_dir || (flags & RENAME_EXCHANGE))
    4645           0 :                 lock_two_nondirectories(source, target);
    4646           0 :         else if (target)
    4647             :                 inode_lock(target);
    4648             : 
    4649           0 :         error = -EPERM;
    4650           0 :         if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target)))
    4651             :                 goto out;
    4652             : 
    4653           0 :         error = -EBUSY;
    4654           0 :         if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
    4655             :                 goto out;
    4656             : 
    4657           0 :         if (max_links && new_dir != old_dir) {
    4658           0 :                 error = -EMLINK;
    4659           0 :                 if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
    4660             :                         goto out;
    4661           0 :                 if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir &&
    4662           0 :                     old_dir->i_nlink >= max_links)
    4663             :                         goto out;
    4664             :         }
    4665           0 :         if (!is_dir) {
    4666           0 :                 error = try_break_deleg(source, delegated_inode);
    4667           0 :                 if (error)
    4668             :                         goto out;
    4669             :         }
    4670           0 :         if (target && !new_is_dir) {
    4671           0 :                 error = try_break_deleg(target, delegated_inode);
    4672           0 :                 if (error)
    4673             :                         goto out;
    4674             :         }
    4675           0 :         error = old_dir->i_op->rename(rd->new_mnt_userns, old_dir, old_dentry,
    4676             :                                       new_dir, new_dentry, flags);
    4677           0 :         if (error)
    4678             :                 goto out;
    4679             : 
    4680           0 :         if (!(flags & RENAME_EXCHANGE) && target) {
    4681           0 :                 if (is_dir) {
    4682           0 :                         shrink_dcache_parent(new_dentry);
    4683           0 :                         target->i_flags |= S_DEAD;
    4684             :                 }
    4685           0 :                 dont_mount(new_dentry);
    4686             :                 detach_mounts(new_dentry);
    4687             :         }
    4688           0 :         if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
    4689           0 :                 if (!(flags & RENAME_EXCHANGE))
    4690           0 :                         d_move(old_dentry, new_dentry);
    4691             :                 else
    4692           0 :                         d_exchange(old_dentry, new_dentry);
    4693             :         }
    4694             : out:
    4695           0 :         if (!is_dir || (flags & RENAME_EXCHANGE))
    4696           0 :                 unlock_two_nondirectories(source, target);
    4697           0 :         else if (target)
    4698             :                 inode_unlock(target);
    4699           0 :         dput(new_dentry);
    4700           0 :         if (!error) {
    4701           0 :                 fsnotify_move(old_dir, new_dir, &old_name.name, is_dir,
    4702           0 :                               !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
    4703           0 :                 if (flags & RENAME_EXCHANGE) {
    4704           0 :                         fsnotify_move(new_dir, old_dir, &old_dentry->d_name,
    4705             :                                       new_is_dir, NULL, new_dentry);
    4706             :                 }
    4707             :         }
    4708           0 :         release_dentry_name_snapshot(&old_name);
    4709             : 
    4710           0 :         return error;
    4711             : }
    4712             : EXPORT_SYMBOL(vfs_rename);
    4713             : 
    4714           0 : int do_renameat2(int olddfd, struct filename *from, int newdfd,
    4715             :                  struct filename *to, unsigned int flags)
    4716             : {
    4717             :         struct renamedata rd;
    4718             :         struct dentry *old_dentry, *new_dentry;
    4719             :         struct dentry *trap;
    4720             :         struct path old_path, new_path;
    4721             :         struct qstr old_last, new_last;
    4722             :         int old_type, new_type;
    4723           0 :         struct inode *delegated_inode = NULL;
    4724           0 :         unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
    4725           0 :         bool should_retry = false;
    4726           0 :         int error = -EINVAL;
    4727             : 
    4728           0 :         if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
    4729             :                 goto put_names;
    4730             : 
    4731           0 :         if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
    4732           0 :             (flags & RENAME_EXCHANGE))
    4733             :                 goto put_names;
    4734             : 
    4735           0 :         if (flags & RENAME_EXCHANGE)
    4736           0 :                 target_flags = 0;
    4737             : 
    4738             : retry:
    4739           0 :         error = filename_parentat(olddfd, from, lookup_flags, &old_path,
    4740             :                                   &old_last, &old_type);
    4741           0 :         if (error)
    4742             :                 goto put_names;
    4743             : 
    4744           0 :         error = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
    4745             :                                   &new_type);
    4746           0 :         if (error)
    4747             :                 goto exit1;
    4748             : 
    4749           0 :         error = -EXDEV;
    4750           0 :         if (old_path.mnt != new_path.mnt)
    4751             :                 goto exit2;
    4752             : 
    4753           0 :         error = -EBUSY;
    4754           0 :         if (old_type != LAST_NORM)
    4755             :                 goto exit2;
    4756             : 
    4757           0 :         if (flags & RENAME_NOREPLACE)
    4758           0 :                 error = -EEXIST;
    4759           0 :         if (new_type != LAST_NORM)
    4760             :                 goto exit2;
    4761             : 
    4762           0 :         error = mnt_want_write(old_path.mnt);
    4763           0 :         if (error)
    4764             :                 goto exit2;
    4765             : 
    4766             : retry_deleg:
    4767           0 :         trap = lock_rename(new_path.dentry, old_path.dentry);
    4768             : 
    4769           0 :         old_dentry = __lookup_hash(&old_last, old_path.dentry, lookup_flags);
    4770           0 :         error = PTR_ERR(old_dentry);
    4771           0 :         if (IS_ERR(old_dentry))
    4772             :                 goto exit3;
    4773             :         /* source must exist */
    4774           0 :         error = -ENOENT;
    4775           0 :         if (d_is_negative(old_dentry))
    4776             :                 goto exit4;
    4777           0 :         new_dentry = __lookup_hash(&new_last, new_path.dentry, lookup_flags | target_flags);
    4778           0 :         error = PTR_ERR(new_dentry);
    4779           0 :         if (IS_ERR(new_dentry))
    4780             :                 goto exit4;
    4781           0 :         error = -EEXIST;
    4782           0 :         if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
    4783             :                 goto exit5;
    4784           0 :         if (flags & RENAME_EXCHANGE) {
    4785           0 :                 error = -ENOENT;
    4786           0 :                 if (d_is_negative(new_dentry))
    4787             :                         goto exit5;
    4788             : 
    4789           0 :                 if (!d_is_dir(new_dentry)) {
    4790           0 :                         error = -ENOTDIR;
    4791           0 :                         if (new_last.name[new_last.len])
    4792             :                                 goto exit5;
    4793             :                 }
    4794             :         }
    4795             :         /* unless the source is a directory trailing slashes give -ENOTDIR */
    4796           0 :         if (!d_is_dir(old_dentry)) {
    4797           0 :                 error = -ENOTDIR;
    4798           0 :                 if (old_last.name[old_last.len])
    4799             :                         goto exit5;
    4800           0 :                 if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
    4801             :                         goto exit5;
    4802             :         }
    4803             :         /* source should not be ancestor of target */
    4804           0 :         error = -EINVAL;
    4805           0 :         if (old_dentry == trap)
    4806             :                 goto exit5;
    4807             :         /* target should not be an ancestor of source */
    4808           0 :         if (!(flags & RENAME_EXCHANGE))
    4809           0 :                 error = -ENOTEMPTY;
    4810           0 :         if (new_dentry == trap)
    4811             :                 goto exit5;
    4812             : 
    4813           0 :         error = security_path_rename(&old_path, old_dentry,
    4814             :                                      &new_path, new_dentry, flags);
    4815             :         if (error)
    4816             :                 goto exit5;
    4817             : 
    4818           0 :         rd.old_dir         = old_path.dentry->d_inode;
    4819           0 :         rd.old_dentry      = old_dentry;
    4820           0 :         rd.old_mnt_userns  = mnt_user_ns(old_path.mnt);
    4821           0 :         rd.new_dir         = new_path.dentry->d_inode;
    4822           0 :         rd.new_dentry      = new_dentry;
    4823           0 :         rd.new_mnt_userns  = mnt_user_ns(new_path.mnt);
    4824           0 :         rd.delegated_inode = &delegated_inode;
    4825           0 :         rd.flags           = flags;
    4826           0 :         error = vfs_rename(&rd);
    4827             : exit5:
    4828           0 :         dput(new_dentry);
    4829             : exit4:
    4830           0 :         dput(old_dentry);
    4831             : exit3:
    4832           0 :         unlock_rename(new_path.dentry, old_path.dentry);
    4833           0 :         if (delegated_inode) {
    4834           0 :                 error = break_deleg_wait(&delegated_inode);
    4835           0 :                 if (!error)
    4836             :                         goto retry_deleg;
    4837             :         }
    4838           0 :         mnt_drop_write(old_path.mnt);
    4839             : exit2:
    4840           0 :         if (retry_estale(error, lookup_flags))
    4841           0 :                 should_retry = true;
    4842             :         path_put(&new_path);
    4843             : exit1:
    4844           0 :         path_put(&old_path);
    4845           0 :         if (should_retry) {
    4846             :                 should_retry = false;
    4847             :                 lookup_flags |= LOOKUP_REVAL;
    4848             :                 goto retry;
    4849             :         }
    4850             : put_names:
    4851           0 :         putname(from);
    4852           0 :         putname(to);
    4853           0 :         return error;
    4854             : }
    4855             : 
    4856           0 : SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
    4857             :                 int, newdfd, const char __user *, newname, unsigned int, flags)
    4858             : {
    4859           0 :         return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
    4860             :                                 flags);
    4861             : }
    4862             : 
    4863           0 : SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
    4864             :                 int, newdfd, const char __user *, newname)
    4865             : {
    4866           0 :         return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
    4867             :                                 0);
    4868             : }
    4869             : 
    4870           0 : SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
    4871             : {
    4872           0 :         return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD,
    4873             :                                 getname(newname), 0);
    4874             : }
    4875             : 
    4876           0 : int readlink_copy(char __user *buffer, int buflen, const char *link)
    4877             : {
    4878           0 :         int len = PTR_ERR(link);
    4879           0 :         if (IS_ERR(link))
    4880             :                 goto out;
    4881             : 
    4882           0 :         len = strlen(link);
    4883           0 :         if (len > (unsigned) buflen)
    4884           0 :                 len = buflen;
    4885           0 :         if (copy_to_user(buffer, link, len))
    4886           0 :                 len = -EFAULT;
    4887             : out:
    4888           0 :         return len;
    4889             : }
    4890             : 
    4891             : /**
    4892             :  * vfs_readlink - copy symlink body into userspace buffer
    4893             :  * @dentry: dentry on which to get symbolic link
    4894             :  * @buffer: user memory pointer
    4895             :  * @buflen: size of buffer
    4896             :  *
    4897             :  * Does not touch atime.  That's up to the caller if necessary
    4898             :  *
    4899             :  * Does not call security hook.
    4900             :  */
    4901           0 : int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
    4902             : {
    4903           0 :         struct inode *inode = d_inode(dentry);
    4904           0 :         DEFINE_DELAYED_CALL(done);
    4905             :         const char *link;
    4906             :         int res;
    4907             : 
    4908           0 :         if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
    4909           0 :                 if (unlikely(inode->i_op->readlink))
    4910           0 :                         return inode->i_op->readlink(dentry, buffer, buflen);
    4911             : 
    4912           0 :                 if (!d_is_symlink(dentry))
    4913             :                         return -EINVAL;
    4914             : 
    4915           0 :                 spin_lock(&inode->i_lock);
    4916           0 :                 inode->i_opflags |= IOP_DEFAULT_READLINK;
    4917           0 :                 spin_unlock(&inode->i_lock);
    4918             :         }
    4919             : 
    4920           0 :         link = READ_ONCE(inode->i_link);
    4921           0 :         if (!link) {
    4922           0 :                 link = inode->i_op->get_link(dentry, inode, &done);
    4923           0 :                 if (IS_ERR(link))
    4924           0 :                         return PTR_ERR(link);
    4925             :         }
    4926           0 :         res = readlink_copy(buffer, buflen, link);
    4927             :         do_delayed_call(&done);
    4928             :         return res;
    4929             : }
    4930             : EXPORT_SYMBOL(vfs_readlink);
    4931             : 
    4932             : /**
    4933             :  * vfs_get_link - get symlink body
    4934             :  * @dentry: dentry on which to get symbolic link
    4935             :  * @done: caller needs to free returned data with this
    4936             :  *
    4937             :  * Calls security hook and i_op->get_link() on the supplied inode.
    4938             :  *
    4939             :  * It does not touch atime.  That's up to the caller if necessary.
    4940             :  *
    4941             :  * Does not work on "special" symlinks like /proc/$$/fd/N
    4942             :  */
    4943           0 : const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done)
    4944             : {
    4945           0 :         const char *res = ERR_PTR(-EINVAL);
    4946           0 :         struct inode *inode = d_inode(dentry);
    4947             : 
    4948           0 :         if (d_is_symlink(dentry)) {
    4949           0 :                 res = ERR_PTR(security_inode_readlink(dentry));
    4950             :                 if (!res)
    4951           0 :                         res = inode->i_op->get_link(dentry, inode, done);
    4952             :         }
    4953           0 :         return res;
    4954             : }
    4955             : EXPORT_SYMBOL(vfs_get_link);
    4956             : 
    4957             : /* get the link contents into pagecache */
    4958           0 : const char *page_get_link(struct dentry *dentry, struct inode *inode,
    4959             :                           struct delayed_call *callback)
    4960             : {
    4961             :         char *kaddr;
    4962             :         struct page *page;
    4963           0 :         struct address_space *mapping = inode->i_mapping;
    4964             : 
    4965           0 :         if (!dentry) {
    4966           0 :                 page = find_get_page(mapping, 0);
    4967           0 :                 if (!page)
    4968             :                         return ERR_PTR(-ECHILD);
    4969           0 :                 if (!PageUptodate(page)) {
    4970           0 :                         put_page(page);
    4971           0 :                         return ERR_PTR(-ECHILD);
    4972             :                 }
    4973             :         } else {
    4974           0 :                 page = read_mapping_page(mapping, 0, NULL);
    4975           0 :                 if (IS_ERR(page))
    4976             :                         return (char*)page;
    4977             :         }
    4978           0 :         set_delayed_call(callback, page_put_link, page);
    4979           0 :         BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
    4980           0 :         kaddr = page_address(page);
    4981           0 :         nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
    4982           0 :         return kaddr;
    4983             : }
    4984             : 
    4985             : EXPORT_SYMBOL(page_get_link);
    4986             : 
    4987           0 : void page_put_link(void *arg)
    4988             : {
    4989           0 :         put_page(arg);
    4990           0 : }
    4991             : EXPORT_SYMBOL(page_put_link);
    4992             : 
    4993           0 : int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
    4994             : {
    4995           0 :         DEFINE_DELAYED_CALL(done);
    4996           0 :         int res = readlink_copy(buffer, buflen,
    4997             :                                 page_get_link(dentry, d_inode(dentry),
    4998             :                                               &done));
    4999           0 :         do_delayed_call(&done);
    5000           0 :         return res;
    5001             : }
    5002             : EXPORT_SYMBOL(page_readlink);
    5003             : 
    5004             : /*
    5005             :  * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
    5006             :  */
    5007           0 : int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
    5008             : {
    5009           0 :         struct address_space *mapping = inode->i_mapping;
    5010             :         struct page *page;
    5011             :         void *fsdata;
    5012             :         int err;
    5013           0 :         unsigned int flags = 0;
    5014           0 :         if (nofs)
    5015           0 :                 flags |= AOP_FLAG_NOFS;
    5016             : 
    5017             : retry:
    5018           0 :         err = pagecache_write_begin(NULL, mapping, 0, len-1,
    5019             :                                 flags, &page, &fsdata);
    5020           0 :         if (err)
    5021             :                 goto fail;
    5022             : 
    5023           0 :         memcpy(page_address(page), symname, len-1);
    5024             : 
    5025           0 :         err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
    5026             :                                                         page, fsdata);
    5027           0 :         if (err < 0)
    5028             :                 goto fail;
    5029           0 :         if (err < len-1)
    5030             :                 goto retry;
    5031             : 
    5032           0 :         mark_inode_dirty(inode);
    5033           0 :         return 0;
    5034             : fail:
    5035             :         return err;
    5036             : }
    5037             : EXPORT_SYMBOL(__page_symlink);
    5038             : 
    5039           0 : int page_symlink(struct inode *inode, const char *symname, int len)
    5040             : {
    5041           0 :         return __page_symlink(inode, symname, len,
    5042           0 :                         !mapping_gfp_constraint(inode->i_mapping, __GFP_FS));
    5043             : }
    5044             : EXPORT_SYMBOL(page_symlink);
    5045             : 
    5046             : const struct inode_operations page_symlink_inode_operations = {
    5047             :         .get_link       = page_get_link,
    5048             : };
    5049             : EXPORT_SYMBOL(page_symlink_inode_operations);

Generated by: LCOV version 1.14