LCOV - code coverage report
Current view: top level - fs/proc - base.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 13 939 1.4 %
Date: 2022-12-09 01:23:36 Functions: 3 86 3.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  linux/fs/proc/base.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992 Linus Torvalds
       6             :  *
       7             :  *  proc base directory handling functions
       8             :  *
       9             :  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
      10             :  *  Instead of using magical inumbers to determine the kind of object
      11             :  *  we allocate and fill in-core inodes upon lookup. They don't even
      12             :  *  go into icache. We cache the reference to task_struct upon lookup too.
      13             :  *  Eventually it should become a filesystem in its own. We don't use the
      14             :  *  rest of procfs anymore.
      15             :  *
      16             :  *
      17             :  *  Changelog:
      18             :  *  17-Jan-2005
      19             :  *  Allan Bezerra
      20             :  *  Bruna Moreira <bruna.moreira@indt.org.br>
      21             :  *  Edjard Mota <edjard.mota@indt.org.br>
      22             :  *  Ilias Biris <ilias.biris@indt.org.br>
      23             :  *  Mauricio Lin <mauricio.lin@indt.org.br>
      24             :  *
      25             :  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
      26             :  *
      27             :  *  A new process specific entry (smaps) included in /proc. It shows the
      28             :  *  size of rss for each memory area. The maps entry lacks information
      29             :  *  about physical memory size (rss) for each mapped file, i.e.,
      30             :  *  rss information for executables and library files.
      31             :  *  This additional information is useful for any tools that need to know
      32             :  *  about physical memory consumption for a process specific library.
      33             :  *
      34             :  *  Changelog:
      35             :  *  21-Feb-2005
      36             :  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
      37             :  *  Pud inclusion in the page table walking.
      38             :  *
      39             :  *  ChangeLog:
      40             :  *  10-Mar-2005
      41             :  *  10LE Instituto Nokia de Tecnologia - INdT:
      42             :  *  A better way to walks through the page table as suggested by Hugh Dickins.
      43             :  *
      44             :  *  Simo Piiroinen <simo.piiroinen@nokia.com>:
      45             :  *  Smaps information related to shared, private, clean and dirty pages.
      46             :  *
      47             :  *  Paul Mundt <paul.mundt@nokia.com>:
      48             :  *  Overall revision about smaps.
      49             :  */
      50             : 
      51             : #include <linux/uaccess.h>
      52             : 
      53             : #include <linux/errno.h>
      54             : #include <linux/time.h>
      55             : #include <linux/proc_fs.h>
      56             : #include <linux/stat.h>
      57             : #include <linux/task_io_accounting_ops.h>
      58             : #include <linux/init.h>
      59             : #include <linux/capability.h>
      60             : #include <linux/file.h>
      61             : #include <linux/fdtable.h>
      62             : #include <linux/generic-radix-tree.h>
      63             : #include <linux/string.h>
      64             : #include <linux/seq_file.h>
      65             : #include <linux/namei.h>
      66             : #include <linux/mnt_namespace.h>
      67             : #include <linux/mm.h>
      68             : #include <linux/swap.h>
      69             : #include <linux/rcupdate.h>
      70             : #include <linux/kallsyms.h>
      71             : #include <linux/stacktrace.h>
      72             : #include <linux/resource.h>
      73             : #include <linux/module.h>
      74             : #include <linux/mount.h>
      75             : #include <linux/security.h>
      76             : #include <linux/ptrace.h>
      77             : #include <linux/printk.h>
      78             : #include <linux/cache.h>
      79             : #include <linux/cgroup.h>
      80             : #include <linux/cpuset.h>
      81             : #include <linux/audit.h>
      82             : #include <linux/poll.h>
      83             : #include <linux/nsproxy.h>
      84             : #include <linux/oom.h>
      85             : #include <linux/elf.h>
      86             : #include <linux/pid_namespace.h>
      87             : #include <linux/user_namespace.h>
      88             : #include <linux/fs_struct.h>
      89             : #include <linux/slab.h>
      90             : #include <linux/sched/autogroup.h>
      91             : #include <linux/sched/mm.h>
      92             : #include <linux/sched/coredump.h>
      93             : #include <linux/sched/debug.h>
      94             : #include <linux/sched/stat.h>
      95             : #include <linux/posix-timers.h>
      96             : #include <linux/time_namespace.h>
      97             : #include <linux/resctrl.h>
      98             : #include <linux/cn_proc.h>
      99             : #include <trace/events/oom.h>
     100             : #include "internal.h"
     101             : #include "fd.h"
     102             : 
     103             : #include "../../lib/kstrtox.h"
     104             : 
     105             : /* NOTE:
     106             :  *      Implementing inode permission operations in /proc is almost
     107             :  *      certainly an error.  Permission checks need to happen during
     108             :  *      each system call not at open time.  The reason is that most of
     109             :  *      what we wish to check for permissions in /proc varies at runtime.
     110             :  *
     111             :  *      The classic example of a problem is opening file descriptors
     112             :  *      in /proc for a task before it execs a suid executable.
     113             :  */
     114             : 
     115             : static u8 nlink_tid __ro_after_init;
     116             : static u8 nlink_tgid __ro_after_init;
     117             : 
     118             : struct pid_entry {
     119             :         const char *name;
     120             :         unsigned int len;
     121             :         umode_t mode;
     122             :         const struct inode_operations *iop;
     123             :         const struct file_operations *fop;
     124             :         union proc_op op;
     125             : };
     126             : 
     127             : #define NOD(NAME, MODE, IOP, FOP, OP) {                 \
     128             :         .name = (NAME),                                 \
     129             :         .len  = sizeof(NAME) - 1,                       \
     130             :         .mode = MODE,                                   \
     131             :         .iop  = IOP,                                    \
     132             :         .fop  = FOP,                                    \
     133             :         .op   = OP,                                     \
     134             : }
     135             : 
     136             : #define DIR(NAME, MODE, iops, fops)     \
     137             :         NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
     138             : #define LNK(NAME, get_link)                                     \
     139             :         NOD(NAME, (S_IFLNK|S_IRWXUGO),                          \
     140             :                 &proc_pid_link_inode_operations, NULL,              \
     141             :                 { .proc_get_link = get_link } )
     142             : #define REG(NAME, MODE, fops)                           \
     143             :         NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
     144             : #define ONE(NAME, MODE, show)                           \
     145             :         NOD(NAME, (S_IFREG|(MODE)),                     \
     146             :                 NULL, &proc_single_file_operations, \
     147             :                 { .proc_show = show } )
     148             : #define ATTR(LSM, NAME, MODE)                           \
     149             :         NOD(NAME, (S_IFREG|(MODE)),                     \
     150             :                 NULL, &proc_pid_attr_operations,    \
     151             :                 { .lsm = LSM })
     152             : 
     153             : /*
     154             :  * Count the number of hardlinks for the pid_entry table, excluding the .
     155             :  * and .. links.
     156             :  */
     157           2 : static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
     158             :         unsigned int n)
     159             : {
     160             :         unsigned int i;
     161             :         unsigned int count;
     162             : 
     163           2 :         count = 2;
     164          65 :         for (i = 0; i < n; ++i) {
     165          63 :                 if (S_ISDIR(entries[i].mode))
     166           8 :                         ++count;
     167             :         }
     168             : 
     169           2 :         return count;
     170             : }
     171             : 
     172             : static int get_task_root(struct task_struct *task, struct path *root)
     173             : {
     174           0 :         int result = -ENOENT;
     175             : 
     176           0 :         task_lock(task);
     177           0 :         if (task->fs) {
     178           0 :                 get_fs_root(task->fs, root);
     179           0 :                 result = 0;
     180             :         }
     181           0 :         task_unlock(task);
     182             :         return result;
     183             : }
     184             : 
     185           0 : static int proc_cwd_link(struct dentry *dentry, struct path *path)
     186             : {
     187           0 :         struct task_struct *task = get_proc_task(d_inode(dentry));
     188           0 :         int result = -ENOENT;
     189             : 
     190           0 :         if (task) {
     191             :                 task_lock(task);
     192           0 :                 if (task->fs) {
     193           0 :                         get_fs_pwd(task->fs, path);
     194           0 :                         result = 0;
     195             :                 }
     196             :                 task_unlock(task);
     197           0 :                 put_task_struct(task);
     198             :         }
     199           0 :         return result;
     200             : }
     201             : 
     202           0 : static int proc_root_link(struct dentry *dentry, struct path *path)
     203             : {
     204           0 :         struct task_struct *task = get_proc_task(d_inode(dentry));
     205           0 :         int result = -ENOENT;
     206             : 
     207           0 :         if (task) {
     208           0 :                 result = get_task_root(task, path);
     209           0 :                 put_task_struct(task);
     210             :         }
     211           0 :         return result;
     212             : }
     213             : 
     214             : /*
     215             :  * If the user used setproctitle(), we just get the string from
     216             :  * user space at arg_start, and limit it to a maximum of one page.
     217             :  */
     218           0 : static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
     219             :                                 size_t count, unsigned long pos,
     220             :                                 unsigned long arg_start)
     221             : {
     222             :         char *page;
     223             :         int ret, got;
     224             : 
     225           0 :         if (pos >= PAGE_SIZE)
     226             :                 return 0;
     227             : 
     228           0 :         page = (char *)__get_free_page(GFP_KERNEL);
     229           0 :         if (!page)
     230             :                 return -ENOMEM;
     231             : 
     232           0 :         ret = 0;
     233           0 :         got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
     234           0 :         if (got > 0) {
     235           0 :                 int len = strnlen(page, got);
     236             : 
     237             :                 /* Include the NUL character if it was found */
     238           0 :                 if (len < got)
     239           0 :                         len++;
     240             : 
     241           0 :                 if (len > pos) {
     242           0 :                         len -= pos;
     243           0 :                         if (len > count)
     244           0 :                                 len = count;
     245           0 :                         len -= copy_to_user(buf, page+pos, len);
     246           0 :                         if (!len)
     247           0 :                                 len = -EFAULT;
     248             :                         ret = len;
     249             :                 }
     250             :         }
     251           0 :         free_page((unsigned long)page);
     252           0 :         return ret;
     253             : }
     254             : 
     255           0 : static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
     256             :                               size_t count, loff_t *ppos)
     257             : {
     258             :         unsigned long arg_start, arg_end, env_start, env_end;
     259             :         unsigned long pos, len;
     260             :         char *page, c;
     261             : 
     262             :         /* Check if process spawned far enough to have cmdline. */
     263           0 :         if (!mm->env_end)
     264             :                 return 0;
     265             : 
     266           0 :         spin_lock(&mm->arg_lock);
     267           0 :         arg_start = mm->arg_start;
     268           0 :         arg_end = mm->arg_end;
     269           0 :         env_start = mm->env_start;
     270           0 :         env_end = mm->env_end;
     271           0 :         spin_unlock(&mm->arg_lock);
     272             : 
     273           0 :         if (arg_start >= arg_end)
     274             :                 return 0;
     275             : 
     276             :         /*
     277             :          * We allow setproctitle() to overwrite the argument
     278             :          * strings, and overflow past the original end. But
     279             :          * only when it overflows into the environment area.
     280             :          */
     281           0 :         if (env_start != arg_end || env_end < env_start)
     282           0 :                 env_start = env_end = arg_end;
     283           0 :         len = env_end - arg_start;
     284             : 
     285             :         /* We're not going to care if "*ppos" has high bits set */
     286           0 :         pos = *ppos;
     287           0 :         if (pos >= len)
     288             :                 return 0;
     289           0 :         if (count > len - pos)
     290           0 :                 count = len - pos;
     291           0 :         if (!count)
     292             :                 return 0;
     293             : 
     294             :         /*
     295             :          * Magical special case: if the argv[] end byte is not
     296             :          * zero, the user has overwritten it with setproctitle(3).
     297             :          *
     298             :          * Possible future enhancement: do this only once when
     299             :          * pos is 0, and set a flag in the 'struct file'.
     300             :          */
     301           0 :         if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
     302           0 :                 return get_mm_proctitle(mm, buf, count, pos, arg_start);
     303             : 
     304             :         /*
     305             :          * For the non-setproctitle() case we limit things strictly
     306             :          * to the [arg_start, arg_end[ range.
     307             :          */
     308           0 :         pos += arg_start;
     309           0 :         if (pos < arg_start || pos >= arg_end)
     310             :                 return 0;
     311           0 :         if (count > arg_end - pos)
     312           0 :                 count = arg_end - pos;
     313             : 
     314           0 :         page = (char *)__get_free_page(GFP_KERNEL);
     315           0 :         if (!page)
     316             :                 return -ENOMEM;
     317             : 
     318             :         len = 0;
     319           0 :         while (count) {
     320             :                 int got;
     321           0 :                 size_t size = min_t(size_t, PAGE_SIZE, count);
     322             : 
     323           0 :                 got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
     324           0 :                 if (got <= 0)
     325             :                         break;
     326           0 :                 got -= copy_to_user(buf, page, got);
     327           0 :                 if (unlikely(!got)) {
     328           0 :                         if (!len)
     329           0 :                                 len = -EFAULT;
     330             :                         break;
     331             :                 }
     332           0 :                 pos += got;
     333           0 :                 buf += got;
     334           0 :                 len += got;
     335           0 :                 count -= got;
     336             :         }
     337             : 
     338           0 :         free_page((unsigned long)page);
     339           0 :         return len;
     340             : }
     341             : 
     342           0 : static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf,
     343             :                                 size_t count, loff_t *pos)
     344             : {
     345             :         struct mm_struct *mm;
     346             :         ssize_t ret;
     347             : 
     348           0 :         mm = get_task_mm(tsk);
     349           0 :         if (!mm)
     350             :                 return 0;
     351             : 
     352           0 :         ret = get_mm_cmdline(mm, buf, count, pos);
     353           0 :         mmput(mm);
     354           0 :         return ret;
     355             : }
     356             : 
     357           0 : static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
     358             :                                      size_t count, loff_t *pos)
     359             : {
     360             :         struct task_struct *tsk;
     361             :         ssize_t ret;
     362             : 
     363           0 :         BUG_ON(*pos < 0);
     364             : 
     365           0 :         tsk = get_proc_task(file_inode(file));
     366           0 :         if (!tsk)
     367             :                 return -ESRCH;
     368           0 :         ret = get_task_cmdline(tsk, buf, count, pos);
     369           0 :         put_task_struct(tsk);
     370           0 :         if (ret > 0)
     371           0 :                 *pos += ret;
     372             :         return ret;
     373             : }
     374             : 
     375             : static const struct file_operations proc_pid_cmdline_ops = {
     376             :         .read   = proc_pid_cmdline_read,
     377             :         .llseek = generic_file_llseek,
     378             : };
     379             : 
     380             : #ifdef CONFIG_KALLSYMS
     381             : /*
     382             :  * Provides a wchan file via kallsyms in a proper one-value-per-file format.
     383             :  * Returns the resolved symbol.  If that fails, simply return the address.
     384             :  */
     385           0 : static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
     386             :                           struct pid *pid, struct task_struct *task)
     387             : {
     388             :         unsigned long wchan;
     389             :         char symname[KSYM_NAME_LEN];
     390             : 
     391           0 :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
     392             :                 goto print0;
     393             : 
     394           0 :         wchan = get_wchan(task);
     395           0 :         if (wchan && !lookup_symbol_name(wchan, symname)) {
     396           0 :                 seq_puts(m, symname);
     397           0 :                 return 0;
     398             :         }
     399             : 
     400             : print0:
     401           0 :         seq_putc(m, '0');
     402           0 :         return 0;
     403             : }
     404             : #endif /* CONFIG_KALLSYMS */
     405             : 
     406           0 : static int lock_trace(struct task_struct *task)
     407             : {
     408           0 :         int err = down_read_killable(&task->signal->exec_update_lock);
     409           0 :         if (err)
     410             :                 return err;
     411           0 :         if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
     412           0 :                 up_read(&task->signal->exec_update_lock);
     413           0 :                 return -EPERM;
     414             :         }
     415             :         return 0;
     416             : }
     417             : 
     418             : static void unlock_trace(struct task_struct *task)
     419             : {
     420           0 :         up_read(&task->signal->exec_update_lock);
     421             : }
     422             : 
     423             : #ifdef CONFIG_STACKTRACE
     424             : 
     425             : #define MAX_STACK_TRACE_DEPTH   64
     426             : 
     427           0 : static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
     428             :                           struct pid *pid, struct task_struct *task)
     429             : {
     430             :         unsigned long *entries;
     431             :         int err;
     432             : 
     433             :         /*
     434             :          * The ability to racily run the kernel stack unwinder on a running task
     435             :          * and then observe the unwinder output is scary; while it is useful for
     436             :          * debugging kernel issues, it can also allow an attacker to leak kernel
     437             :          * stack contents.
     438             :          * Doing this in a manner that is at least safe from races would require
     439             :          * some work to ensure that the remote task can not be scheduled; and
     440             :          * even then, this would still expose the unwinder as local attack
     441             :          * surface.
     442             :          * Therefore, this interface is restricted to root.
     443             :          */
     444           0 :         if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
     445             :                 return -EACCES;
     446             : 
     447           0 :         entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
     448             :                                 GFP_KERNEL);
     449           0 :         if (!entries)
     450             :                 return -ENOMEM;
     451             : 
     452           0 :         err = lock_trace(task);
     453           0 :         if (!err) {
     454             :                 unsigned int i, nr_entries;
     455             : 
     456           0 :                 nr_entries = stack_trace_save_tsk(task, entries,
     457             :                                                   MAX_STACK_TRACE_DEPTH, 0);
     458             : 
     459           0 :                 for (i = 0; i < nr_entries; i++) {
     460           0 :                         seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
     461             :                 }
     462             : 
     463           0 :                 unlock_trace(task);
     464             :         }
     465           0 :         kfree(entries);
     466             : 
     467           0 :         return err;
     468             : }
     469             : #endif
     470             : 
     471             : #ifdef CONFIG_SCHED_INFO
     472             : /*
     473             :  * Provides /proc/PID/schedstat
     474             :  */
     475             : static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
     476             :                               struct pid *pid, struct task_struct *task)
     477             : {
     478             :         if (unlikely(!sched_info_on()))
     479             :                 seq_puts(m, "0 0 0\n");
     480             :         else
     481             :                 seq_printf(m, "%llu %llu %lu\n",
     482             :                    (unsigned long long)task->se.sum_exec_runtime,
     483             :                    (unsigned long long)task->sched_info.run_delay,
     484             :                    task->sched_info.pcount);
     485             : 
     486             :         return 0;
     487             : }
     488             : #endif
     489             : 
     490             : #ifdef CONFIG_LATENCYTOP
     491             : static int lstats_show_proc(struct seq_file *m, void *v)
     492             : {
     493             :         int i;
     494             :         struct inode *inode = m->private;
     495             :         struct task_struct *task = get_proc_task(inode);
     496             : 
     497             :         if (!task)
     498             :                 return -ESRCH;
     499             :         seq_puts(m, "Latency Top version : v0.1\n");
     500             :         for (i = 0; i < LT_SAVECOUNT; i++) {
     501             :                 struct latency_record *lr = &task->latency_record[i];
     502             :                 if (lr->backtrace[0]) {
     503             :                         int q;
     504             :                         seq_printf(m, "%i %li %li",
     505             :                                    lr->count, lr->time, lr->max);
     506             :                         for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
     507             :                                 unsigned long bt = lr->backtrace[q];
     508             : 
     509             :                                 if (!bt)
     510             :                                         break;
     511             :                                 seq_printf(m, " %ps", (void *)bt);
     512             :                         }
     513             :                         seq_putc(m, '\n');
     514             :                 }
     515             : 
     516             :         }
     517             :         put_task_struct(task);
     518             :         return 0;
     519             : }
     520             : 
     521             : static int lstats_open(struct inode *inode, struct file *file)
     522             : {
     523             :         return single_open(file, lstats_show_proc, inode);
     524             : }
     525             : 
     526             : static ssize_t lstats_write(struct file *file, const char __user *buf,
     527             :                             size_t count, loff_t *offs)
     528             : {
     529             :         struct task_struct *task = get_proc_task(file_inode(file));
     530             : 
     531             :         if (!task)
     532             :                 return -ESRCH;
     533             :         clear_tsk_latency_tracing(task);
     534             :         put_task_struct(task);
     535             : 
     536             :         return count;
     537             : }
     538             : 
     539             : static const struct file_operations proc_lstats_operations = {
     540             :         .open           = lstats_open,
     541             :         .read           = seq_read,
     542             :         .write          = lstats_write,
     543             :         .llseek         = seq_lseek,
     544             :         .release        = single_release,
     545             : };
     546             : 
     547             : #endif
     548             : 
     549           0 : static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
     550             :                           struct pid *pid, struct task_struct *task)
     551             : {
     552           0 :         unsigned long totalpages = totalram_pages() + total_swap_pages;
     553           0 :         unsigned long points = 0;
     554             :         long badness;
     555             : 
     556           0 :         badness = oom_badness(task, totalpages);
     557             :         /*
     558             :          * Special case OOM_SCORE_ADJ_MIN for all others scale the
     559             :          * badness value into [0, 2000] range which we have been
     560             :          * exporting for a long time so userspace might depend on it.
     561             :          */
     562           0 :         if (badness != LONG_MIN)
     563           0 :                 points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3;
     564             : 
     565           0 :         seq_printf(m, "%lu\n", points);
     566             : 
     567           0 :         return 0;
     568             : }
     569             : 
     570             : struct limit_names {
     571             :         const char *name;
     572             :         const char *unit;
     573             : };
     574             : 
     575             : static const struct limit_names lnames[RLIM_NLIMITS] = {
     576             :         [RLIMIT_CPU] = {"Max cpu time", "seconds"},
     577             :         [RLIMIT_FSIZE] = {"Max file size", "bytes"},
     578             :         [RLIMIT_DATA] = {"Max data size", "bytes"},
     579             :         [RLIMIT_STACK] = {"Max stack size", "bytes"},
     580             :         [RLIMIT_CORE] = {"Max core file size", "bytes"},
     581             :         [RLIMIT_RSS] = {"Max resident set", "bytes"},
     582             :         [RLIMIT_NPROC] = {"Max processes", "processes"},
     583             :         [RLIMIT_NOFILE] = {"Max open files", "files"},
     584             :         [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
     585             :         [RLIMIT_AS] = {"Max address space", "bytes"},
     586             :         [RLIMIT_LOCKS] = {"Max file locks", "locks"},
     587             :         [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
     588             :         [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
     589             :         [RLIMIT_NICE] = {"Max nice priority", NULL},
     590             :         [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
     591             :         [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
     592             : };
     593             : 
     594             : /* Display limits for a process */
     595           0 : static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
     596             :                            struct pid *pid, struct task_struct *task)
     597             : {
     598             :         unsigned int i;
     599             :         unsigned long flags;
     600             : 
     601             :         struct rlimit rlim[RLIM_NLIMITS];
     602             : 
     603           0 :         if (!lock_task_sighand(task, &flags))
     604             :                 return 0;
     605           0 :         memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
     606           0 :         unlock_task_sighand(task, &flags);
     607             : 
     608             :         /*
     609             :          * print the file header
     610             :          */
     611           0 :         seq_puts(m, "Limit                     "
     612             :                 "Soft Limit           "
     613             :                 "Hard Limit           "
     614             :                 "Units     \n");
     615             : 
     616           0 :         for (i = 0; i < RLIM_NLIMITS; i++) {
     617           0 :                 if (rlim[i].rlim_cur == RLIM_INFINITY)
     618           0 :                         seq_printf(m, "%-25s %-20s ",
     619             :                                    lnames[i].name, "unlimited");
     620             :                 else
     621           0 :                         seq_printf(m, "%-25s %-20lu ",
     622             :                                    lnames[i].name, rlim[i].rlim_cur);
     623             : 
     624           0 :                 if (rlim[i].rlim_max == RLIM_INFINITY)
     625           0 :                         seq_printf(m, "%-20s ", "unlimited");
     626             :                 else
     627           0 :                         seq_printf(m, "%-20lu ", rlim[i].rlim_max);
     628             : 
     629           0 :                 if (lnames[i].unit)
     630           0 :                         seq_printf(m, "%-10s\n", lnames[i].unit);
     631             :                 else
     632           0 :                         seq_putc(m, '\n');
     633             :         }
     634             : 
     635             :         return 0;
     636             : }
     637             : 
     638             : #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
     639             : static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
     640             :                             struct pid *pid, struct task_struct *task)
     641             : {
     642             :         struct syscall_info info;
     643             :         u64 *args = &info.data.args[0];
     644             :         int res;
     645             : 
     646             :         res = lock_trace(task);
     647             :         if (res)
     648             :                 return res;
     649             : 
     650             :         if (task_current_syscall(task, &info))
     651             :                 seq_puts(m, "running\n");
     652             :         else if (info.data.nr < 0)
     653             :                 seq_printf(m, "%d 0x%llx 0x%llx\n",
     654             :                            info.data.nr, info.sp, info.data.instruction_pointer);
     655             :         else
     656             :                 seq_printf(m,
     657             :                        "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
     658             :                        info.data.nr,
     659             :                        args[0], args[1], args[2], args[3], args[4], args[5],
     660             :                        info.sp, info.data.instruction_pointer);
     661             :         unlock_trace(task);
     662             : 
     663             :         return 0;
     664             : }
     665             : #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
     666             : 
     667             : /************************************************************************/
     668             : /*                       Here the fs part begins                        */
     669             : /************************************************************************/
     670             : 
     671             : /* permission checks */
     672           0 : static bool proc_fd_access_allowed(struct inode *inode)
     673             : {
     674             :         struct task_struct *task;
     675           0 :         bool allowed = false;
     676             :         /* Allow access to a task's file descriptors if it is us or we
     677             :          * may use ptrace attach to the process and find out that
     678             :          * information.
     679             :          */
     680           0 :         task = get_proc_task(inode);
     681           0 :         if (task) {
     682           0 :                 allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
     683           0 :                 put_task_struct(task);
     684             :         }
     685           0 :         return allowed;
     686             : }
     687             : 
     688           0 : int proc_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
     689             :                  struct iattr *attr)
     690             : {
     691             :         int error;
     692           0 :         struct inode *inode = d_inode(dentry);
     693             : 
     694           0 :         if (attr->ia_valid & ATTR_MODE)
     695             :                 return -EPERM;
     696             : 
     697           0 :         error = setattr_prepare(&init_user_ns, dentry, attr);
     698           0 :         if (error)
     699             :                 return error;
     700             : 
     701           0 :         setattr_copy(&init_user_ns, inode, attr);
     702           0 :         mark_inode_dirty(inode);
     703           0 :         return 0;
     704             : }
     705             : 
     706             : /*
     707             :  * May current process learn task's sched/cmdline info (for hide_pid_min=1)
     708             :  * or euid/egid (for hide_pid_min=2)?
     709             :  */
     710           0 : static bool has_pid_permissions(struct proc_fs_info *fs_info,
     711             :                                  struct task_struct *task,
     712             :                                  enum proc_hidepid hide_pid_min)
     713             : {
     714             :         /*
     715             :          * If 'hidpid' mount option is set force a ptrace check,
     716             :          * we indicate that we are using a filesystem syscall
     717             :          * by passing PTRACE_MODE_READ_FSCREDS
     718             :          */
     719           0 :         if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
     720           0 :                 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
     721             : 
     722           0 :         if (fs_info->hide_pid < hide_pid_min)
     723             :                 return true;
     724           0 :         if (in_group_p(fs_info->pid_gid))
     725             :                 return true;
     726           0 :         return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
     727             : }
     728             : 
     729             : 
     730           0 : static int proc_pid_permission(struct user_namespace *mnt_userns,
     731             :                                struct inode *inode, int mask)
     732             : {
     733           0 :         struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
     734             :         struct task_struct *task;
     735             :         bool has_perms;
     736             : 
     737           0 :         task = get_proc_task(inode);
     738           0 :         if (!task)
     739             :                 return -ESRCH;
     740           0 :         has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
     741           0 :         put_task_struct(task);
     742             : 
     743           0 :         if (!has_perms) {
     744           0 :                 if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
     745             :                         /*
     746             :                          * Let's make getdents(), stat(), and open()
     747             :                          * consistent with each other.  If a process
     748             :                          * may not stat() a file, it shouldn't be seen
     749             :                          * in procfs at all.
     750             :                          */
     751             :                         return -ENOENT;
     752             :                 }
     753             : 
     754           0 :                 return -EPERM;
     755             :         }
     756           0 :         return generic_permission(&init_user_ns, inode, mask);
     757             : }
     758             : 
     759             : 
     760             : 
     761             : static const struct inode_operations proc_def_inode_operations = {
     762             :         .setattr        = proc_setattr,
     763             : };
     764             : 
     765           0 : static int proc_single_show(struct seq_file *m, void *v)
     766             : {
     767           0 :         struct inode *inode = m->private;
     768           0 :         struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
     769           0 :         struct pid *pid = proc_pid(inode);
     770             :         struct task_struct *task;
     771             :         int ret;
     772             : 
     773           0 :         task = get_pid_task(pid, PIDTYPE_PID);
     774           0 :         if (!task)
     775             :                 return -ESRCH;
     776             : 
     777           0 :         ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
     778             : 
     779           0 :         put_task_struct(task);
     780           0 :         return ret;
     781             : }
     782             : 
     783           0 : static int proc_single_open(struct inode *inode, struct file *filp)
     784             : {
     785           0 :         return single_open(filp, proc_single_show, inode);
     786             : }
     787             : 
     788             : static const struct file_operations proc_single_file_operations = {
     789             :         .open           = proc_single_open,
     790             :         .read           = seq_read,
     791             :         .llseek         = seq_lseek,
     792             :         .release        = single_release,
     793             : };
     794             : 
     795             : 
     796           0 : struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
     797             : {
     798           0 :         struct task_struct *task = get_proc_task(inode);
     799           0 :         struct mm_struct *mm = ERR_PTR(-ESRCH);
     800             : 
     801           0 :         if (task) {
     802           0 :                 mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
     803           0 :                 put_task_struct(task);
     804             : 
     805           0 :                 if (!IS_ERR_OR_NULL(mm)) {
     806             :                         /* ensure this mm_struct can't be freed */
     807           0 :                         mmgrab(mm);
     808             :                         /* but do not pin its memory */
     809           0 :                         mmput(mm);
     810             :                 }
     811             :         }
     812             : 
     813           0 :         return mm;
     814             : }
     815             : 
     816             : static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
     817             : {
     818           0 :         struct mm_struct *mm = proc_mem_open(inode, mode);
     819             : 
     820           0 :         if (IS_ERR(mm))
     821           0 :                 return PTR_ERR(mm);
     822             : 
     823           0 :         file->private_data = mm;
     824             :         return 0;
     825             : }
     826             : 
     827           0 : static int mem_open(struct inode *inode, struct file *file)
     828             : {
     829           0 :         int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
     830             : 
     831             :         /* OK to pass negative loff_t, we can catch out-of-range */
     832           0 :         file->f_mode |= FMODE_UNSIGNED_OFFSET;
     833             : 
     834           0 :         return ret;
     835             : }
     836             : 
     837           0 : static ssize_t mem_rw(struct file *file, char __user *buf,
     838             :                         size_t count, loff_t *ppos, int write)
     839             : {
     840           0 :         struct mm_struct *mm = file->private_data;
     841           0 :         unsigned long addr = *ppos;
     842             :         ssize_t copied;
     843             :         char *page;
     844             :         unsigned int flags;
     845             : 
     846           0 :         if (!mm)
     847             :                 return 0;
     848             : 
     849           0 :         page = (char *)__get_free_page(GFP_KERNEL);
     850           0 :         if (!page)
     851             :                 return -ENOMEM;
     852             : 
     853           0 :         copied = 0;
     854           0 :         if (!mmget_not_zero(mm))
     855             :                 goto free;
     856             : 
     857           0 :         flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
     858             : 
     859           0 :         while (count > 0) {
     860           0 :                 size_t this_len = min_t(size_t, count, PAGE_SIZE);
     861             : 
     862           0 :                 if (write && copy_from_user(page, buf, this_len)) {
     863             :                         copied = -EFAULT;
     864             :                         break;
     865             :                 }
     866             : 
     867           0 :                 this_len = access_remote_vm(mm, addr, page, this_len, flags);
     868           0 :                 if (!this_len) {
     869           0 :                         if (!copied)
     870           0 :                                 copied = -EIO;
     871             :                         break;
     872             :                 }
     873             : 
     874           0 :                 if (!write && copy_to_user(buf, page, this_len)) {
     875             :                         copied = -EFAULT;
     876             :                         break;
     877             :                 }
     878             : 
     879           0 :                 buf += this_len;
     880           0 :                 addr += this_len;
     881           0 :                 copied += this_len;
     882           0 :                 count -= this_len;
     883             :         }
     884           0 :         *ppos = addr;
     885             : 
     886           0 :         mmput(mm);
     887             : free:
     888           0 :         free_page((unsigned long) page);
     889             :         return copied;
     890             : }
     891             : 
     892           0 : static ssize_t mem_read(struct file *file, char __user *buf,
     893             :                         size_t count, loff_t *ppos)
     894             : {
     895           0 :         return mem_rw(file, buf, count, ppos, 0);
     896             : }
     897             : 
     898           0 : static ssize_t mem_write(struct file *file, const char __user *buf,
     899             :                          size_t count, loff_t *ppos)
     900             : {
     901           0 :         return mem_rw(file, (char __user*)buf, count, ppos, 1);
     902             : }
     903             : 
     904           0 : loff_t mem_lseek(struct file *file, loff_t offset, int orig)
     905             : {
     906           0 :         switch (orig) {
     907             :         case 0:
     908           0 :                 file->f_pos = offset;
     909           0 :                 break;
     910             :         case 1:
     911           0 :                 file->f_pos += offset;
     912           0 :                 break;
     913             :         default:
     914             :                 return -EINVAL;
     915             :         }
     916             :         force_successful_syscall_return();
     917           0 :         return file->f_pos;
     918             : }
     919             : 
     920           0 : static int mem_release(struct inode *inode, struct file *file)
     921             : {
     922           0 :         struct mm_struct *mm = file->private_data;
     923           0 :         if (mm)
     924             :                 mmdrop(mm);
     925           0 :         return 0;
     926             : }
     927             : 
     928             : static const struct file_operations proc_mem_operations = {
     929             :         .llseek         = mem_lseek,
     930             :         .read           = mem_read,
     931             :         .write          = mem_write,
     932             :         .open           = mem_open,
     933             :         .release        = mem_release,
     934             : };
     935             : 
     936           0 : static int environ_open(struct inode *inode, struct file *file)
     937             : {
     938           0 :         return __mem_open(inode, file, PTRACE_MODE_READ);
     939             : }
     940             : 
     941           0 : static ssize_t environ_read(struct file *file, char __user *buf,
     942             :                         size_t count, loff_t *ppos)
     943             : {
     944             :         char *page;
     945           0 :         unsigned long src = *ppos;
     946           0 :         int ret = 0;
     947           0 :         struct mm_struct *mm = file->private_data;
     948             :         unsigned long env_start, env_end;
     949             : 
     950             :         /* Ensure the process spawned far enough to have an environment. */
     951           0 :         if (!mm || !mm->env_end)
     952             :                 return 0;
     953             : 
     954           0 :         page = (char *)__get_free_page(GFP_KERNEL);
     955           0 :         if (!page)
     956             :                 return -ENOMEM;
     957             : 
     958           0 :         ret = 0;
     959           0 :         if (!mmget_not_zero(mm))
     960             :                 goto free;
     961             : 
     962           0 :         spin_lock(&mm->arg_lock);
     963           0 :         env_start = mm->env_start;
     964           0 :         env_end = mm->env_end;
     965           0 :         spin_unlock(&mm->arg_lock);
     966             : 
     967           0 :         while (count > 0) {
     968             :                 size_t this_len, max_len;
     969             :                 int retval;
     970             : 
     971           0 :                 if (src >= (env_end - env_start))
     972             :                         break;
     973             : 
     974           0 :                 this_len = env_end - (env_start + src);
     975             : 
     976           0 :                 max_len = min_t(size_t, PAGE_SIZE, count);
     977           0 :                 this_len = min(max_len, this_len);
     978             : 
     979           0 :                 retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
     980             : 
     981           0 :                 if (retval <= 0) {
     982             :                         ret = retval;
     983             :                         break;
     984             :                 }
     985             : 
     986           0 :                 if (copy_to_user(buf, page, retval)) {
     987             :                         ret = -EFAULT;
     988             :                         break;
     989             :                 }
     990             : 
     991           0 :                 ret += retval;
     992           0 :                 src += retval;
     993           0 :                 buf += retval;
     994           0 :                 count -= retval;
     995             :         }
     996           0 :         *ppos = src;
     997           0 :         mmput(mm);
     998             : 
     999             : free:
    1000           0 :         free_page((unsigned long) page);
    1001           0 :         return ret;
    1002             : }
    1003             : 
    1004             : static const struct file_operations proc_environ_operations = {
    1005             :         .open           = environ_open,
    1006             :         .read           = environ_read,
    1007             :         .llseek         = generic_file_llseek,
    1008             :         .release        = mem_release,
    1009             : };
    1010             : 
    1011           0 : static int auxv_open(struct inode *inode, struct file *file)
    1012             : {
    1013           0 :         return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
    1014             : }
    1015             : 
    1016           0 : static ssize_t auxv_read(struct file *file, char __user *buf,
    1017             :                         size_t count, loff_t *ppos)
    1018             : {
    1019           0 :         struct mm_struct *mm = file->private_data;
    1020           0 :         unsigned int nwords = 0;
    1021             : 
    1022           0 :         if (!mm)
    1023             :                 return 0;
    1024             :         do {
    1025           0 :                 nwords += 2;
    1026           0 :         } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
    1027           0 :         return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
    1028             :                                        nwords * sizeof(mm->saved_auxv[0]));
    1029             : }
    1030             : 
    1031             : static const struct file_operations proc_auxv_operations = {
    1032             :         .open           = auxv_open,
    1033             :         .read           = auxv_read,
    1034             :         .llseek         = generic_file_llseek,
    1035             :         .release        = mem_release,
    1036             : };
    1037             : 
    1038           0 : static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
    1039             :                             loff_t *ppos)
    1040             : {
    1041           0 :         struct task_struct *task = get_proc_task(file_inode(file));
    1042             :         char buffer[PROC_NUMBUF];
    1043           0 :         int oom_adj = OOM_ADJUST_MIN;
    1044             :         size_t len;
    1045             : 
    1046           0 :         if (!task)
    1047             :                 return -ESRCH;
    1048           0 :         if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
    1049             :                 oom_adj = OOM_ADJUST_MAX;
    1050             :         else
    1051           0 :                 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
    1052             :                           OOM_SCORE_ADJ_MAX;
    1053           0 :         put_task_struct(task);
    1054           0 :         if (oom_adj > OOM_ADJUST_MAX)
    1055           0 :                 oom_adj = OOM_ADJUST_MAX;
    1056           0 :         len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
    1057           0 :         return simple_read_from_buffer(buf, count, ppos, buffer, len);
    1058             : }
    1059             : 
    1060           0 : static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
    1061             : {
    1062           0 :         struct mm_struct *mm = NULL;
    1063             :         struct task_struct *task;
    1064           0 :         int err = 0;
    1065             : 
    1066           0 :         task = get_proc_task(file_inode(file));
    1067           0 :         if (!task)
    1068             :                 return -ESRCH;
    1069             : 
    1070           0 :         mutex_lock(&oom_adj_mutex);
    1071           0 :         if (legacy) {
    1072           0 :                 if (oom_adj < task->signal->oom_score_adj &&
    1073           0 :                                 !capable(CAP_SYS_RESOURCE)) {
    1074             :                         err = -EACCES;
    1075             :                         goto err_unlock;
    1076             :                 }
    1077             :                 /*
    1078             :                  * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
    1079             :                  * /proc/pid/oom_score_adj instead.
    1080             :                  */
    1081           0 :                 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
    1082             :                           current->comm, task_pid_nr(current), task_pid_nr(task),
    1083             :                           task_pid_nr(task));
    1084             :         } else {
    1085           0 :                 if ((short)oom_adj < task->signal->oom_score_adj_min &&
    1086           0 :                                 !capable(CAP_SYS_RESOURCE)) {
    1087             :                         err = -EACCES;
    1088             :                         goto err_unlock;
    1089             :                 }
    1090             :         }
    1091             : 
    1092             :         /*
    1093             :          * Make sure we will check other processes sharing the mm if this is
    1094             :          * not vfrok which wants its own oom_score_adj.
    1095             :          * pin the mm so it doesn't go away and get reused after task_unlock
    1096             :          */
    1097           0 :         if (!task->vfork_done) {
    1098           0 :                 struct task_struct *p = find_lock_task_mm(task);
    1099             : 
    1100           0 :                 if (p) {
    1101           0 :                         if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) {
    1102           0 :                                 mm = p->mm;
    1103             :                                 mmgrab(mm);
    1104             :                         }
    1105           0 :                         task_unlock(p);
    1106             :                 }
    1107             :         }
    1108             : 
    1109           0 :         task->signal->oom_score_adj = oom_adj;
    1110           0 :         if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
    1111           0 :                 task->signal->oom_score_adj_min = (short)oom_adj;
    1112             :         trace_oom_score_adj_update(task);
    1113             : 
    1114           0 :         if (mm) {
    1115             :                 struct task_struct *p;
    1116             : 
    1117             :                 rcu_read_lock();
    1118           0 :                 for_each_process(p) {
    1119           0 :                         if (same_thread_group(task, p))
    1120           0 :                                 continue;
    1121             : 
    1122             :                         /* do not touch kernel threads or the global init */
    1123           0 :                         if (p->flags & PF_KTHREAD || is_global_init(p))
    1124           0 :                                 continue;
    1125             : 
    1126           0 :                         task_lock(p);
    1127           0 :                         if (!p->vfork_done && process_shares_mm(p, mm)) {
    1128           0 :                                 p->signal->oom_score_adj = oom_adj;
    1129           0 :                                 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
    1130           0 :                                         p->signal->oom_score_adj_min = (short)oom_adj;
    1131             :                         }
    1132           0 :                         task_unlock(p);
    1133             :                 }
    1134           0 :                 rcu_read_unlock();
    1135             :                 mmdrop(mm);
    1136             :         }
    1137             : err_unlock:
    1138           0 :         mutex_unlock(&oom_adj_mutex);
    1139           0 :         put_task_struct(task);
    1140           0 :         return err;
    1141             : }
    1142             : 
    1143             : /*
    1144             :  * /proc/pid/oom_adj exists solely for backwards compatibility with previous
    1145             :  * kernels.  The effective policy is defined by oom_score_adj, which has a
    1146             :  * different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
    1147             :  * Values written to oom_adj are simply mapped linearly to oom_score_adj.
    1148             :  * Processes that become oom disabled via oom_adj will still be oom disabled
    1149             :  * with this implementation.
    1150             :  *
    1151             :  * oom_adj cannot be removed since existing userspace binaries use it.
    1152             :  */
    1153           0 : static ssize_t oom_adj_write(struct file *file, const char __user *buf,
    1154             :                              size_t count, loff_t *ppos)
    1155             : {
    1156             :         char buffer[PROC_NUMBUF];
    1157             :         int oom_adj;
    1158             :         int err;
    1159             : 
    1160           0 :         memset(buffer, 0, sizeof(buffer));
    1161           0 :         if (count > sizeof(buffer) - 1)
    1162           0 :                 count = sizeof(buffer) - 1;
    1163           0 :         if (copy_from_user(buffer, buf, count)) {
    1164             :                 err = -EFAULT;
    1165             :                 goto out;
    1166             :         }
    1167             : 
    1168           0 :         err = kstrtoint(strstrip(buffer), 0, &oom_adj);
    1169           0 :         if (err)
    1170             :                 goto out;
    1171           0 :         if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
    1172             :              oom_adj != OOM_DISABLE) {
    1173             :                 err = -EINVAL;
    1174             :                 goto out;
    1175             :         }
    1176             : 
    1177             :         /*
    1178             :          * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
    1179             :          * value is always attainable.
    1180             :          */
    1181           0 :         if (oom_adj == OOM_ADJUST_MAX)
    1182           0 :                 oom_adj = OOM_SCORE_ADJ_MAX;
    1183             :         else
    1184           0 :                 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
    1185             : 
    1186           0 :         err = __set_oom_adj(file, oom_adj, true);
    1187             : out:
    1188           0 :         return err < 0 ? err : count;
    1189             : }
    1190             : 
    1191             : static const struct file_operations proc_oom_adj_operations = {
    1192             :         .read           = oom_adj_read,
    1193             :         .write          = oom_adj_write,
    1194             :         .llseek         = generic_file_llseek,
    1195             : };
    1196             : 
    1197           0 : static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
    1198             :                                         size_t count, loff_t *ppos)
    1199             : {
    1200           0 :         struct task_struct *task = get_proc_task(file_inode(file));
    1201             :         char buffer[PROC_NUMBUF];
    1202           0 :         short oom_score_adj = OOM_SCORE_ADJ_MIN;
    1203             :         size_t len;
    1204             : 
    1205           0 :         if (!task)
    1206             :                 return -ESRCH;
    1207           0 :         oom_score_adj = task->signal->oom_score_adj;
    1208           0 :         put_task_struct(task);
    1209           0 :         len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
    1210           0 :         return simple_read_from_buffer(buf, count, ppos, buffer, len);
    1211             : }
    1212             : 
    1213           0 : static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
    1214             :                                         size_t count, loff_t *ppos)
    1215             : {
    1216             :         char buffer[PROC_NUMBUF];
    1217             :         int oom_score_adj;
    1218             :         int err;
    1219             : 
    1220           0 :         memset(buffer, 0, sizeof(buffer));
    1221           0 :         if (count > sizeof(buffer) - 1)
    1222           0 :                 count = sizeof(buffer) - 1;
    1223           0 :         if (copy_from_user(buffer, buf, count)) {
    1224             :                 err = -EFAULT;
    1225             :                 goto out;
    1226             :         }
    1227             : 
    1228           0 :         err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
    1229           0 :         if (err)
    1230             :                 goto out;
    1231           0 :         if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
    1232             :                         oom_score_adj > OOM_SCORE_ADJ_MAX) {
    1233             :                 err = -EINVAL;
    1234             :                 goto out;
    1235             :         }
    1236             : 
    1237           0 :         err = __set_oom_adj(file, oom_score_adj, false);
    1238             : out:
    1239           0 :         return err < 0 ? err : count;
    1240             : }
    1241             : 
    1242             : static const struct file_operations proc_oom_score_adj_operations = {
    1243             :         .read           = oom_score_adj_read,
    1244             :         .write          = oom_score_adj_write,
    1245             :         .llseek         = default_llseek,
    1246             : };
    1247             : 
    1248             : #ifdef CONFIG_AUDIT
    1249             : #define TMPBUFLEN 11
    1250             : static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
    1251             :                                   size_t count, loff_t *ppos)
    1252             : {
    1253             :         struct inode * inode = file_inode(file);
    1254             :         struct task_struct *task = get_proc_task(inode);
    1255             :         ssize_t length;
    1256             :         char tmpbuf[TMPBUFLEN];
    1257             : 
    1258             :         if (!task)
    1259             :                 return -ESRCH;
    1260             :         length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
    1261             :                            from_kuid(file->f_cred->user_ns,
    1262             :                                      audit_get_loginuid(task)));
    1263             :         put_task_struct(task);
    1264             :         return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
    1265             : }
    1266             : 
    1267             : static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
    1268             :                                    size_t count, loff_t *ppos)
    1269             : {
    1270             :         struct inode * inode = file_inode(file);
    1271             :         uid_t loginuid;
    1272             :         kuid_t kloginuid;
    1273             :         int rv;
    1274             : 
    1275             :         /* Don't let kthreads write their own loginuid */
    1276             :         if (current->flags & PF_KTHREAD)
    1277             :                 return -EPERM;
    1278             : 
    1279             :         rcu_read_lock();
    1280             :         if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
    1281             :                 rcu_read_unlock();
    1282             :                 return -EPERM;
    1283             :         }
    1284             :         rcu_read_unlock();
    1285             : 
    1286             :         if (*ppos != 0) {
    1287             :                 /* No partial writes. */
    1288             :                 return -EINVAL;
    1289             :         }
    1290             : 
    1291             :         rv = kstrtou32_from_user(buf, count, 10, &loginuid);
    1292             :         if (rv < 0)
    1293             :                 return rv;
    1294             : 
    1295             :         /* is userspace tring to explicitly UNSET the loginuid? */
    1296             :         if (loginuid == AUDIT_UID_UNSET) {
    1297             :                 kloginuid = INVALID_UID;
    1298             :         } else {
    1299             :                 kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
    1300             :                 if (!uid_valid(kloginuid))
    1301             :                         return -EINVAL;
    1302             :         }
    1303             : 
    1304             :         rv = audit_set_loginuid(kloginuid);
    1305             :         if (rv < 0)
    1306             :                 return rv;
    1307             :         return count;
    1308             : }
    1309             : 
    1310             : static const struct file_operations proc_loginuid_operations = {
    1311             :         .read           = proc_loginuid_read,
    1312             :         .write          = proc_loginuid_write,
    1313             :         .llseek         = generic_file_llseek,
    1314             : };
    1315             : 
    1316             : static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
    1317             :                                   size_t count, loff_t *ppos)
    1318             : {
    1319             :         struct inode * inode = file_inode(file);
    1320             :         struct task_struct *task = get_proc_task(inode);
    1321             :         ssize_t length;
    1322             :         char tmpbuf[TMPBUFLEN];
    1323             : 
    1324             :         if (!task)
    1325             :                 return -ESRCH;
    1326             :         length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
    1327             :                                 audit_get_sessionid(task));
    1328             :         put_task_struct(task);
    1329             :         return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
    1330             : }
    1331             : 
    1332             : static const struct file_operations proc_sessionid_operations = {
    1333             :         .read           = proc_sessionid_read,
    1334             :         .llseek         = generic_file_llseek,
    1335             : };
    1336             : #endif
    1337             : 
    1338             : #ifdef CONFIG_FAULT_INJECTION
    1339             : static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
    1340             :                                       size_t count, loff_t *ppos)
    1341             : {
    1342             :         struct task_struct *task = get_proc_task(file_inode(file));
    1343             :         char buffer[PROC_NUMBUF];
    1344             :         size_t len;
    1345             :         int make_it_fail;
    1346             : 
    1347             :         if (!task)
    1348             :                 return -ESRCH;
    1349             :         make_it_fail = task->make_it_fail;
    1350             :         put_task_struct(task);
    1351             : 
    1352             :         len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
    1353             : 
    1354             :         return simple_read_from_buffer(buf, count, ppos, buffer, len);
    1355             : }
    1356             : 
    1357             : static ssize_t proc_fault_inject_write(struct file * file,
    1358             :                         const char __user * buf, size_t count, loff_t *ppos)
    1359             : {
    1360             :         struct task_struct *task;
    1361             :         char buffer[PROC_NUMBUF];
    1362             :         int make_it_fail;
    1363             :         int rv;
    1364             : 
    1365             :         if (!capable(CAP_SYS_RESOURCE))
    1366             :                 return -EPERM;
    1367             :         memset(buffer, 0, sizeof(buffer));
    1368             :         if (count > sizeof(buffer) - 1)
    1369             :                 count = sizeof(buffer) - 1;
    1370             :         if (copy_from_user(buffer, buf, count))
    1371             :                 return -EFAULT;
    1372             :         rv = kstrtoint(strstrip(buffer), 0, &make_it_fail);
    1373             :         if (rv < 0)
    1374             :                 return rv;
    1375             :         if (make_it_fail < 0 || make_it_fail > 1)
    1376             :                 return -EINVAL;
    1377             : 
    1378             :         task = get_proc_task(file_inode(file));
    1379             :         if (!task)
    1380             :                 return -ESRCH;
    1381             :         task->make_it_fail = make_it_fail;
    1382             :         put_task_struct(task);
    1383             : 
    1384             :         return count;
    1385             : }
    1386             : 
    1387             : static const struct file_operations proc_fault_inject_operations = {
    1388             :         .read           = proc_fault_inject_read,
    1389             :         .write          = proc_fault_inject_write,
    1390             :         .llseek         = generic_file_llseek,
    1391             : };
    1392             : 
    1393             : static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
    1394             :                                    size_t count, loff_t *ppos)
    1395             : {
    1396             :         struct task_struct *task;
    1397             :         int err;
    1398             :         unsigned int n;
    1399             : 
    1400             :         err = kstrtouint_from_user(buf, count, 0, &n);
    1401             :         if (err)
    1402             :                 return err;
    1403             : 
    1404             :         task = get_proc_task(file_inode(file));
    1405             :         if (!task)
    1406             :                 return -ESRCH;
    1407             :         task->fail_nth = n;
    1408             :         put_task_struct(task);
    1409             : 
    1410             :         return count;
    1411             : }
    1412             : 
    1413             : static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
    1414             :                                   size_t count, loff_t *ppos)
    1415             : {
    1416             :         struct task_struct *task;
    1417             :         char numbuf[PROC_NUMBUF];
    1418             :         ssize_t len;
    1419             : 
    1420             :         task = get_proc_task(file_inode(file));
    1421             :         if (!task)
    1422             :                 return -ESRCH;
    1423             :         len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
    1424             :         put_task_struct(task);
    1425             :         return simple_read_from_buffer(buf, count, ppos, numbuf, len);
    1426             : }
    1427             : 
    1428             : static const struct file_operations proc_fail_nth_operations = {
    1429             :         .read           = proc_fail_nth_read,
    1430             :         .write          = proc_fail_nth_write,
    1431             : };
    1432             : #endif
    1433             : 
    1434             : 
    1435             : #ifdef CONFIG_SCHED_DEBUG
    1436             : /*
    1437             :  * Print out various scheduling related per-task fields:
    1438             :  */
    1439           0 : static int sched_show(struct seq_file *m, void *v)
    1440             : {
    1441           0 :         struct inode *inode = m->private;
    1442           0 :         struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
    1443             :         struct task_struct *p;
    1444             : 
    1445           0 :         p = get_proc_task(inode);
    1446           0 :         if (!p)
    1447             :                 return -ESRCH;
    1448           0 :         proc_sched_show_task(p, ns, m);
    1449             : 
    1450           0 :         put_task_struct(p);
    1451             : 
    1452           0 :         return 0;
    1453             : }
    1454             : 
    1455             : static ssize_t
    1456           0 : sched_write(struct file *file, const char __user *buf,
    1457             :             size_t count, loff_t *offset)
    1458             : {
    1459           0 :         struct inode *inode = file_inode(file);
    1460             :         struct task_struct *p;
    1461             : 
    1462           0 :         p = get_proc_task(inode);
    1463           0 :         if (!p)
    1464             :                 return -ESRCH;
    1465           0 :         proc_sched_set_task(p);
    1466             : 
    1467           0 :         put_task_struct(p);
    1468             : 
    1469           0 :         return count;
    1470             : }
    1471             : 
    1472           0 : static int sched_open(struct inode *inode, struct file *filp)
    1473             : {
    1474           0 :         return single_open(filp, sched_show, inode);
    1475             : }
    1476             : 
    1477             : static const struct file_operations proc_pid_sched_operations = {
    1478             :         .open           = sched_open,
    1479             :         .read           = seq_read,
    1480             :         .write          = sched_write,
    1481             :         .llseek         = seq_lseek,
    1482             :         .release        = single_release,
    1483             : };
    1484             : 
    1485             : #endif
    1486             : 
    1487             : #ifdef CONFIG_SCHED_AUTOGROUP
    1488             : /*
    1489             :  * Print out autogroup related information:
    1490             :  */
    1491             : static int sched_autogroup_show(struct seq_file *m, void *v)
    1492             : {
    1493             :         struct inode *inode = m->private;
    1494             :         struct task_struct *p;
    1495             : 
    1496             :         p = get_proc_task(inode);
    1497             :         if (!p)
    1498             :                 return -ESRCH;
    1499             :         proc_sched_autogroup_show_task(p, m);
    1500             : 
    1501             :         put_task_struct(p);
    1502             : 
    1503             :         return 0;
    1504             : }
    1505             : 
    1506             : static ssize_t
    1507             : sched_autogroup_write(struct file *file, const char __user *buf,
    1508             :             size_t count, loff_t *offset)
    1509             : {
    1510             :         struct inode *inode = file_inode(file);
    1511             :         struct task_struct *p;
    1512             :         char buffer[PROC_NUMBUF];
    1513             :         int nice;
    1514             :         int err;
    1515             : 
    1516             :         memset(buffer, 0, sizeof(buffer));
    1517             :         if (count > sizeof(buffer) - 1)
    1518             :                 count = sizeof(buffer) - 1;
    1519             :         if (copy_from_user(buffer, buf, count))
    1520             :                 return -EFAULT;
    1521             : 
    1522             :         err = kstrtoint(strstrip(buffer), 0, &nice);
    1523             :         if (err < 0)
    1524             :                 return err;
    1525             : 
    1526             :         p = get_proc_task(inode);
    1527             :         if (!p)
    1528             :                 return -ESRCH;
    1529             : 
    1530             :         err = proc_sched_autogroup_set_nice(p, nice);
    1531             :         if (err)
    1532             :                 count = err;
    1533             : 
    1534             :         put_task_struct(p);
    1535             : 
    1536             :         return count;
    1537             : }
    1538             : 
    1539             : static int sched_autogroup_open(struct inode *inode, struct file *filp)
    1540             : {
    1541             :         int ret;
    1542             : 
    1543             :         ret = single_open(filp, sched_autogroup_show, NULL);
    1544             :         if (!ret) {
    1545             :                 struct seq_file *m = filp->private_data;
    1546             : 
    1547             :                 m->private = inode;
    1548             :         }
    1549             :         return ret;
    1550             : }
    1551             : 
    1552             : static const struct file_operations proc_pid_sched_autogroup_operations = {
    1553             :         .open           = sched_autogroup_open,
    1554             :         .read           = seq_read,
    1555             :         .write          = sched_autogroup_write,
    1556             :         .llseek         = seq_lseek,
    1557             :         .release        = single_release,
    1558             : };
    1559             : 
    1560             : #endif /* CONFIG_SCHED_AUTOGROUP */
    1561             : 
    1562             : #ifdef CONFIG_TIME_NS
    1563             : static int timens_offsets_show(struct seq_file *m, void *v)
    1564             : {
    1565             :         struct task_struct *p;
    1566             : 
    1567             :         p = get_proc_task(file_inode(m->file));
    1568             :         if (!p)
    1569             :                 return -ESRCH;
    1570             :         proc_timens_show_offsets(p, m);
    1571             : 
    1572             :         put_task_struct(p);
    1573             : 
    1574             :         return 0;
    1575             : }
    1576             : 
    1577             : static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
    1578             :                                     size_t count, loff_t *ppos)
    1579             : {
    1580             :         struct inode *inode = file_inode(file);
    1581             :         struct proc_timens_offset offsets[2];
    1582             :         char *kbuf = NULL, *pos, *next_line;
    1583             :         struct task_struct *p;
    1584             :         int ret, noffsets;
    1585             : 
    1586             :         /* Only allow < page size writes at the beginning of the file */
    1587             :         if ((*ppos != 0) || (count >= PAGE_SIZE))
    1588             :                 return -EINVAL;
    1589             : 
    1590             :         /* Slurp in the user data */
    1591             :         kbuf = memdup_user_nul(buf, count);
    1592             :         if (IS_ERR(kbuf))
    1593             :                 return PTR_ERR(kbuf);
    1594             : 
    1595             :         /* Parse the user data */
    1596             :         ret = -EINVAL;
    1597             :         noffsets = 0;
    1598             :         for (pos = kbuf; pos; pos = next_line) {
    1599             :                 struct proc_timens_offset *off = &offsets[noffsets];
    1600             :                 char clock[10];
    1601             :                 int err;
    1602             : 
    1603             :                 /* Find the end of line and ensure we don't look past it */
    1604             :                 next_line = strchr(pos, '\n');
    1605             :                 if (next_line) {
    1606             :                         *next_line = '\0';
    1607             :                         next_line++;
    1608             :                         if (*next_line == '\0')
    1609             :                                 next_line = NULL;
    1610             :                 }
    1611             : 
    1612             :                 err = sscanf(pos, "%9s %lld %lu", clock,
    1613             :                                 &off->val.tv_sec, &off->val.tv_nsec);
    1614             :                 if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
    1615             :                         goto out;
    1616             : 
    1617             :                 clock[sizeof(clock) - 1] = 0;
    1618             :                 if (strcmp(clock, "monotonic") == 0 ||
    1619             :                     strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0)
    1620             :                         off->clockid = CLOCK_MONOTONIC;
    1621             :                 else if (strcmp(clock, "boottime") == 0 ||
    1622             :                          strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0)
    1623             :                         off->clockid = CLOCK_BOOTTIME;
    1624             :                 else
    1625             :                         goto out;
    1626             : 
    1627             :                 noffsets++;
    1628             :                 if (noffsets == ARRAY_SIZE(offsets)) {
    1629             :                         if (next_line)
    1630             :                                 count = next_line - kbuf;
    1631             :                         break;
    1632             :                 }
    1633             :         }
    1634             : 
    1635             :         ret = -ESRCH;
    1636             :         p = get_proc_task(inode);
    1637             :         if (!p)
    1638             :                 goto out;
    1639             :         ret = proc_timens_set_offset(file, p, offsets, noffsets);
    1640             :         put_task_struct(p);
    1641             :         if (ret)
    1642             :                 goto out;
    1643             : 
    1644             :         ret = count;
    1645             : out:
    1646             :         kfree(kbuf);
    1647             :         return ret;
    1648             : }
    1649             : 
    1650             : static int timens_offsets_open(struct inode *inode, struct file *filp)
    1651             : {
    1652             :         return single_open(filp, timens_offsets_show, inode);
    1653             : }
    1654             : 
    1655             : static const struct file_operations proc_timens_offsets_operations = {
    1656             :         .open           = timens_offsets_open,
    1657             :         .read           = seq_read,
    1658             :         .write          = timens_offsets_write,
    1659             :         .llseek         = seq_lseek,
    1660             :         .release        = single_release,
    1661             : };
    1662             : #endif /* CONFIG_TIME_NS */
    1663             : 
    1664           0 : static ssize_t comm_write(struct file *file, const char __user *buf,
    1665             :                                 size_t count, loff_t *offset)
    1666             : {
    1667           0 :         struct inode *inode = file_inode(file);
    1668             :         struct task_struct *p;
    1669             :         char buffer[TASK_COMM_LEN];
    1670           0 :         const size_t maxlen = sizeof(buffer) - 1;
    1671             : 
    1672           0 :         memset(buffer, 0, sizeof(buffer));
    1673           0 :         if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count))
    1674             :                 return -EFAULT;
    1675             : 
    1676           0 :         p = get_proc_task(inode);
    1677           0 :         if (!p)
    1678             :                 return -ESRCH;
    1679             : 
    1680           0 :         if (same_thread_group(current, p)) {
    1681             :                 set_task_comm(p, buffer);
    1682             :                 proc_comm_connector(p);
    1683             :         }
    1684             :         else
    1685             :                 count = -EINVAL;
    1686             : 
    1687           0 :         put_task_struct(p);
    1688             : 
    1689           0 :         return count;
    1690             : }
    1691             : 
    1692           0 : static int comm_show(struct seq_file *m, void *v)
    1693             : {
    1694           0 :         struct inode *inode = m->private;
    1695             :         struct task_struct *p;
    1696             : 
    1697           0 :         p = get_proc_task(inode);
    1698           0 :         if (!p)
    1699             :                 return -ESRCH;
    1700             : 
    1701           0 :         proc_task_name(m, p, false);
    1702           0 :         seq_putc(m, '\n');
    1703             : 
    1704           0 :         put_task_struct(p);
    1705             : 
    1706           0 :         return 0;
    1707             : }
    1708             : 
    1709           0 : static int comm_open(struct inode *inode, struct file *filp)
    1710             : {
    1711           0 :         return single_open(filp, comm_show, inode);
    1712             : }
    1713             : 
    1714             : static const struct file_operations proc_pid_set_comm_operations = {
    1715             :         .open           = comm_open,
    1716             :         .read           = seq_read,
    1717             :         .write          = comm_write,
    1718             :         .llseek         = seq_lseek,
    1719             :         .release        = single_release,
    1720             : };
    1721             : 
    1722           0 : static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
    1723             : {
    1724             :         struct task_struct *task;
    1725             :         struct file *exe_file;
    1726             : 
    1727           0 :         task = get_proc_task(d_inode(dentry));
    1728           0 :         if (!task)
    1729             :                 return -ENOENT;
    1730           0 :         exe_file = get_task_exe_file(task);
    1731           0 :         put_task_struct(task);
    1732           0 :         if (exe_file) {
    1733           0 :                 *exe_path = exe_file->f_path;
    1734           0 :                 path_get(&exe_file->f_path);
    1735           0 :                 fput(exe_file);
    1736           0 :                 return 0;
    1737             :         } else
    1738             :                 return -ENOENT;
    1739             : }
    1740             : 
    1741           0 : static const char *proc_pid_get_link(struct dentry *dentry,
    1742             :                                      struct inode *inode,
    1743             :                                      struct delayed_call *done)
    1744             : {
    1745             :         struct path path;
    1746           0 :         int error = -EACCES;
    1747             : 
    1748           0 :         if (!dentry)
    1749             :                 return ERR_PTR(-ECHILD);
    1750             : 
    1751             :         /* Are we allowed to snoop on the tasks file descriptors? */
    1752           0 :         if (!proc_fd_access_allowed(inode))
    1753             :                 goto out;
    1754             : 
    1755           0 :         error = PROC_I(inode)->op.proc_get_link(dentry, &path);
    1756           0 :         if (error)
    1757             :                 goto out;
    1758             : 
    1759           0 :         error = nd_jump_link(&path);
    1760             : out:
    1761           0 :         return ERR_PTR(error);
    1762             : }
    1763             : 
    1764           0 : static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
    1765             : {
    1766           0 :         char *tmp = kmalloc(PATH_MAX, GFP_KERNEL);
    1767             :         char *pathname;
    1768             :         int len;
    1769             : 
    1770           0 :         if (!tmp)
    1771             :                 return -ENOMEM;
    1772             : 
    1773           0 :         pathname = d_path(path, tmp, PATH_MAX);
    1774           0 :         len = PTR_ERR(pathname);
    1775           0 :         if (IS_ERR(pathname))
    1776             :                 goto out;
    1777           0 :         len = tmp + PATH_MAX - 1 - pathname;
    1778             : 
    1779           0 :         if (len > buflen)
    1780           0 :                 len = buflen;
    1781           0 :         if (copy_to_user(buffer, pathname, len))
    1782           0 :                 len = -EFAULT;
    1783             :  out:
    1784           0 :         kfree(tmp);
    1785           0 :         return len;
    1786             : }
    1787             : 
    1788           0 : static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
    1789             : {
    1790           0 :         int error = -EACCES;
    1791           0 :         struct inode *inode = d_inode(dentry);
    1792             :         struct path path;
    1793             : 
    1794             :         /* Are we allowed to snoop on the tasks file descriptors? */
    1795           0 :         if (!proc_fd_access_allowed(inode))
    1796             :                 goto out;
    1797             : 
    1798           0 :         error = PROC_I(inode)->op.proc_get_link(dentry, &path);
    1799           0 :         if (error)
    1800             :                 goto out;
    1801             : 
    1802           0 :         error = do_proc_readlink(&path, buffer, buflen);
    1803           0 :         path_put(&path);
    1804             : out:
    1805           0 :         return error;
    1806             : }
    1807             : 
    1808             : const struct inode_operations proc_pid_link_inode_operations = {
    1809             :         .readlink       = proc_pid_readlink,
    1810             :         .get_link       = proc_pid_get_link,
    1811             :         .setattr        = proc_setattr,
    1812             : };
    1813             : 
    1814             : 
    1815             : /* building an inode */
    1816             : 
    1817           0 : void task_dump_owner(struct task_struct *task, umode_t mode,
    1818             :                      kuid_t *ruid, kgid_t *rgid)
    1819             : {
    1820             :         /* Depending on the state of dumpable compute who should own a
    1821             :          * proc file for a task.
    1822             :          */
    1823             :         const struct cred *cred;
    1824             :         kuid_t uid;
    1825             :         kgid_t gid;
    1826             : 
    1827           0 :         if (unlikely(task->flags & PF_KTHREAD)) {
    1828           0 :                 *ruid = GLOBAL_ROOT_UID;
    1829           0 :                 *rgid = GLOBAL_ROOT_GID;
    1830           0 :                 return;
    1831             :         }
    1832             : 
    1833             :         /* Default to the tasks effective ownership */
    1834             :         rcu_read_lock();
    1835           0 :         cred = __task_cred(task);
    1836           0 :         uid = cred->euid;
    1837           0 :         gid = cred->egid;
    1838             :         rcu_read_unlock();
    1839             : 
    1840             :         /*
    1841             :          * Before the /proc/pid/status file was created the only way to read
    1842             :          * the effective uid of a /process was to stat /proc/pid.  Reading
    1843             :          * /proc/pid/status is slow enough that procps and other packages
    1844             :          * kept stating /proc/pid.  To keep the rules in /proc simple I have
    1845             :          * made this apply to all per process world readable and executable
    1846             :          * directories.
    1847             :          */
    1848           0 :         if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) {
    1849             :                 struct mm_struct *mm;
    1850           0 :                 task_lock(task);
    1851           0 :                 mm = task->mm;
    1852             :                 /* Make non-dumpable tasks owned by some root */
    1853           0 :                 if (mm) {
    1854           0 :                         if (get_dumpable(mm) != SUID_DUMP_USER) {
    1855           0 :                                 struct user_namespace *user_ns = mm->user_ns;
    1856             : 
    1857           0 :                                 uid = make_kuid(user_ns, 0);
    1858           0 :                                 if (!uid_valid(uid))
    1859             :                                         uid = GLOBAL_ROOT_UID;
    1860             : 
    1861           0 :                                 gid = make_kgid(user_ns, 0);
    1862           0 :                                 if (!gid_valid(gid))
    1863             :                                         gid = GLOBAL_ROOT_GID;
    1864             :                         }
    1865             :                 } else {
    1866             :                         uid = GLOBAL_ROOT_UID;
    1867             :                         gid = GLOBAL_ROOT_GID;
    1868             :                 }
    1869           0 :                 task_unlock(task);
    1870             :         }
    1871           0 :         *ruid = uid;
    1872           0 :         *rgid = gid;
    1873             : }
    1874             : 
    1875           0 : void proc_pid_evict_inode(struct proc_inode *ei)
    1876             : {
    1877           0 :         struct pid *pid = ei->pid;
    1878             : 
    1879           0 :         if (S_ISDIR(ei->vfs_inode.i_mode)) {
    1880           0 :                 spin_lock(&pid->lock);
    1881           0 :                 hlist_del_init_rcu(&ei->sibling_inodes);
    1882           0 :                 spin_unlock(&pid->lock);
    1883             :         }
    1884             : 
    1885           0 :         put_pid(pid);
    1886           0 : }
    1887             : 
    1888           0 : struct inode *proc_pid_make_inode(struct super_block * sb,
    1889             :                                   struct task_struct *task, umode_t mode)
    1890             : {
    1891             :         struct inode * inode;
    1892             :         struct proc_inode *ei;
    1893             :         struct pid *pid;
    1894             : 
    1895             :         /* We need a new inode */
    1896             : 
    1897           0 :         inode = new_inode(sb);
    1898           0 :         if (!inode)
    1899             :                 goto out;
    1900             : 
    1901             :         /* Common stuff */
    1902           0 :         ei = PROC_I(inode);
    1903           0 :         inode->i_mode = mode;
    1904           0 :         inode->i_ino = get_next_ino();
    1905           0 :         inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
    1906           0 :         inode->i_op = &proc_def_inode_operations;
    1907             : 
    1908             :         /*
    1909             :          * grab the reference to task.
    1910             :          */
    1911           0 :         pid = get_task_pid(task, PIDTYPE_PID);
    1912           0 :         if (!pid)
    1913             :                 goto out_unlock;
    1914             : 
    1915             :         /* Let the pid remember us for quick removal */
    1916           0 :         ei->pid = pid;
    1917           0 :         if (S_ISDIR(mode)) {
    1918           0 :                 spin_lock(&pid->lock);
    1919           0 :                 hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
    1920           0 :                 spin_unlock(&pid->lock);
    1921             :         }
    1922             : 
    1923           0 :         task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
    1924             :         security_task_to_inode(task, inode);
    1925             : 
    1926             : out:
    1927             :         return inode;
    1928             : 
    1929             : out_unlock:
    1930           0 :         iput(inode);
    1931           0 :         return NULL;
    1932             : }
    1933             : 
    1934           0 : int pid_getattr(struct user_namespace *mnt_userns, const struct path *path,
    1935             :                 struct kstat *stat, u32 request_mask, unsigned int query_flags)
    1936             : {
    1937           0 :         struct inode *inode = d_inode(path->dentry);
    1938           0 :         struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
    1939             :         struct task_struct *task;
    1940             : 
    1941           0 :         generic_fillattr(&init_user_ns, inode, stat);
    1942             : 
    1943           0 :         stat->uid = GLOBAL_ROOT_UID;
    1944           0 :         stat->gid = GLOBAL_ROOT_GID;
    1945             :         rcu_read_lock();
    1946           0 :         task = pid_task(proc_pid(inode), PIDTYPE_PID);
    1947           0 :         if (task) {
    1948           0 :                 if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
    1949             :                         rcu_read_unlock();
    1950             :                         /*
    1951             :                          * This doesn't prevent learning whether PID exists,
    1952             :                          * it only makes getattr() consistent with readdir().
    1953             :                          */
    1954           0 :                         return -ENOENT;
    1955             :                 }
    1956           0 :                 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
    1957             :         }
    1958             :         rcu_read_unlock();
    1959           0 :         return 0;
    1960             : }
    1961             : 
    1962             : /* dentry stuff */
    1963             : 
    1964             : /*
    1965             :  * Set <pid>/... inode ownership (can change due to setuid(), etc.)
    1966             :  */
    1967           0 : void pid_update_inode(struct task_struct *task, struct inode *inode)
    1968             : {
    1969           0 :         task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
    1970             : 
    1971           0 :         inode->i_mode &= ~(S_ISUID | S_ISGID);
    1972           0 :         security_task_to_inode(task, inode);
    1973           0 : }
    1974             : 
    1975             : /*
    1976             :  * Rewrite the inode's ownerships here because the owning task may have
    1977             :  * performed a setuid(), etc.
    1978             :  *
    1979             :  */
    1980           0 : static int pid_revalidate(struct dentry *dentry, unsigned int flags)
    1981             : {
    1982             :         struct inode *inode;
    1983             :         struct task_struct *task;
    1984           0 :         int ret = 0;
    1985             : 
    1986             :         rcu_read_lock();
    1987           0 :         inode = d_inode_rcu(dentry);
    1988           0 :         if (!inode)
    1989             :                 goto out;
    1990           0 :         task = pid_task(proc_pid(inode), PIDTYPE_PID);
    1991             : 
    1992           0 :         if (task) {
    1993           0 :                 pid_update_inode(task, inode);
    1994           0 :                 ret = 1;
    1995             :         }
    1996             : out:
    1997             :         rcu_read_unlock();
    1998           0 :         return ret;
    1999             : }
    2000             : 
    2001             : static inline bool proc_inode_is_dead(struct inode *inode)
    2002             : {
    2003           0 :         return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
    2004             : }
    2005             : 
    2006           0 : int pid_delete_dentry(const struct dentry *dentry)
    2007             : {
    2008             :         /* Is the task we represent dead?
    2009             :          * If so, then don't put the dentry on the lru list,
    2010             :          * kill it immediately.
    2011             :          */
    2012           0 :         return proc_inode_is_dead(d_inode(dentry));
    2013             : }
    2014             : 
    2015             : const struct dentry_operations pid_dentry_operations =
    2016             : {
    2017             :         .d_revalidate   = pid_revalidate,
    2018             :         .d_delete       = pid_delete_dentry,
    2019             : };
    2020             : 
    2021             : /* Lookups */
    2022             : 
    2023             : /*
    2024             :  * Fill a directory entry.
    2025             :  *
    2026             :  * If possible create the dcache entry and derive our inode number and
    2027             :  * file type from dcache entry.
    2028             :  *
    2029             :  * Since all of the proc inode numbers are dynamically generated, the inode
    2030             :  * numbers do not exist until the inode is cache.  This means creating
    2031             :  * the dcache entry in readdir is necessary to keep the inode numbers
    2032             :  * reported by readdir in sync with the inode numbers reported
    2033             :  * by stat.
    2034             :  */
    2035           0 : bool proc_fill_cache(struct file *file, struct dir_context *ctx,
    2036             :         const char *name, unsigned int len,
    2037             :         instantiate_t instantiate, struct task_struct *task, const void *ptr)
    2038             : {
    2039           0 :         struct dentry *child, *dir = file->f_path.dentry;
    2040           0 :         struct qstr qname = QSTR_INIT(name, len);
    2041             :         struct inode *inode;
    2042           0 :         unsigned type = DT_UNKNOWN;
    2043           0 :         ino_t ino = 1;
    2044             : 
    2045           0 :         child = d_hash_and_lookup(dir, &qname);
    2046           0 :         if (!child) {
    2047           0 :                 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
    2048           0 :                 child = d_alloc_parallel(dir, &qname, &wq);
    2049           0 :                 if (IS_ERR(child))
    2050             :                         goto end_instantiate;
    2051           0 :                 if (d_in_lookup(child)) {
    2052             :                         struct dentry *res;
    2053           0 :                         res = instantiate(child, task, ptr);
    2054           0 :                         d_lookup_done(child);
    2055           0 :                         if (unlikely(res)) {
    2056           0 :                                 dput(child);
    2057           0 :                                 child = res;
    2058           0 :                                 if (IS_ERR(child))
    2059             :                                         goto end_instantiate;
    2060             :                         }
    2061             :                 }
    2062             :         }
    2063           0 :         inode = d_inode(child);
    2064           0 :         ino = inode->i_ino;
    2065           0 :         type = inode->i_mode >> 12;
    2066           0 :         dput(child);
    2067             : end_instantiate:
    2068           0 :         return dir_emit(ctx, name, len, ino, type);
    2069             : }
    2070             : 
    2071             : /*
    2072             :  * dname_to_vma_addr - maps a dentry name into two unsigned longs
    2073             :  * which represent vma start and end addresses.
    2074             :  */
    2075           0 : static int dname_to_vma_addr(struct dentry *dentry,
    2076             :                              unsigned long *start, unsigned long *end)
    2077             : {
    2078           0 :         const char *str = dentry->d_name.name;
    2079             :         unsigned long long sval, eval;
    2080             :         unsigned int len;
    2081             : 
    2082           0 :         if (str[0] == '0' && str[1] != '-')
    2083             :                 return -EINVAL;
    2084           0 :         len = _parse_integer(str, 16, &sval);
    2085           0 :         if (len & KSTRTOX_OVERFLOW)
    2086             :                 return -EINVAL;
    2087             :         if (sval != (unsigned long)sval)
    2088             :                 return -EINVAL;
    2089           0 :         str += len;
    2090             : 
    2091           0 :         if (*str != '-')
    2092             :                 return -EINVAL;
    2093           0 :         str++;
    2094             : 
    2095           0 :         if (str[0] == '0' && str[1])
    2096             :                 return -EINVAL;
    2097           0 :         len = _parse_integer(str, 16, &eval);
    2098           0 :         if (len & KSTRTOX_OVERFLOW)
    2099             :                 return -EINVAL;
    2100             :         if (eval != (unsigned long)eval)
    2101             :                 return -EINVAL;
    2102           0 :         str += len;
    2103             : 
    2104           0 :         if (*str != '\0')
    2105             :                 return -EINVAL;
    2106             : 
    2107           0 :         *start = sval;
    2108           0 :         *end = eval;
    2109             : 
    2110             :         return 0;
    2111             : }
    2112             : 
    2113           0 : static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
    2114             : {
    2115             :         unsigned long vm_start, vm_end;
    2116           0 :         bool exact_vma_exists = false;
    2117           0 :         struct mm_struct *mm = NULL;
    2118             :         struct task_struct *task;
    2119             :         struct inode *inode;
    2120           0 :         int status = 0;
    2121             : 
    2122           0 :         if (flags & LOOKUP_RCU)
    2123             :                 return -ECHILD;
    2124             : 
    2125           0 :         inode = d_inode(dentry);
    2126           0 :         task = get_proc_task(inode);
    2127           0 :         if (!task)
    2128             :                 goto out_notask;
    2129             : 
    2130           0 :         mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
    2131           0 :         if (IS_ERR_OR_NULL(mm))
    2132             :                 goto out;
    2133             : 
    2134           0 :         if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
    2135           0 :                 status = mmap_read_lock_killable(mm);
    2136           0 :                 if (!status) {
    2137           0 :                         exact_vma_exists = !!find_exact_vma(mm, vm_start,
    2138             :                                                             vm_end);
    2139             :                         mmap_read_unlock(mm);
    2140             :                 }
    2141             :         }
    2142             : 
    2143           0 :         mmput(mm);
    2144             : 
    2145           0 :         if (exact_vma_exists) {
    2146           0 :                 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
    2147             : 
    2148             :                 security_task_to_inode(task, inode);
    2149             :                 status = 1;
    2150             :         }
    2151             : 
    2152             : out:
    2153           0 :         put_task_struct(task);
    2154             : 
    2155             : out_notask:
    2156             :         return status;
    2157             : }
    2158             : 
    2159             : static const struct dentry_operations tid_map_files_dentry_operations = {
    2160             :         .d_revalidate   = map_files_d_revalidate,
    2161             :         .d_delete       = pid_delete_dentry,
    2162             : };
    2163             : 
    2164           0 : static int map_files_get_link(struct dentry *dentry, struct path *path)
    2165             : {
    2166             :         unsigned long vm_start, vm_end;
    2167             :         struct vm_area_struct *vma;
    2168             :         struct task_struct *task;
    2169             :         struct mm_struct *mm;
    2170             :         int rc;
    2171             : 
    2172           0 :         rc = -ENOENT;
    2173           0 :         task = get_proc_task(d_inode(dentry));
    2174           0 :         if (!task)
    2175             :                 goto out;
    2176             : 
    2177           0 :         mm = get_task_mm(task);
    2178           0 :         put_task_struct(task);
    2179           0 :         if (!mm)
    2180             :                 goto out;
    2181             : 
    2182           0 :         rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
    2183           0 :         if (rc)
    2184             :                 goto out_mmput;
    2185             : 
    2186           0 :         rc = mmap_read_lock_killable(mm);
    2187           0 :         if (rc)
    2188             :                 goto out_mmput;
    2189             : 
    2190           0 :         rc = -ENOENT;
    2191           0 :         vma = find_exact_vma(mm, vm_start, vm_end);
    2192           0 :         if (vma && vma->vm_file) {
    2193           0 :                 *path = vma->vm_file->f_path;
    2194           0 :                 path_get(path);
    2195           0 :                 rc = 0;
    2196             :         }
    2197             :         mmap_read_unlock(mm);
    2198             : 
    2199             : out_mmput:
    2200           0 :         mmput(mm);
    2201             : out:
    2202           0 :         return rc;
    2203             : }
    2204             : 
    2205             : struct map_files_info {
    2206             :         unsigned long   start;
    2207             :         unsigned long   end;
    2208             :         fmode_t         mode;
    2209             : };
    2210             : 
    2211             : /*
    2212             :  * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due
    2213             :  * to concerns about how the symlinks may be used to bypass permissions on
    2214             :  * ancestor directories in the path to the file in question.
    2215             :  */
    2216             : static const char *
    2217           0 : proc_map_files_get_link(struct dentry *dentry,
    2218             :                         struct inode *inode,
    2219             :                         struct delayed_call *done)
    2220             : {
    2221           0 :         if (!checkpoint_restore_ns_capable(&init_user_ns))
    2222             :                 return ERR_PTR(-EPERM);
    2223             : 
    2224           0 :         return proc_pid_get_link(dentry, inode, done);
    2225             : }
    2226             : 
    2227             : /*
    2228             :  * Identical to proc_pid_link_inode_operations except for get_link()
    2229             :  */
    2230             : static const struct inode_operations proc_map_files_link_inode_operations = {
    2231             :         .readlink       = proc_pid_readlink,
    2232             :         .get_link       = proc_map_files_get_link,
    2233             :         .setattr        = proc_setattr,
    2234             : };
    2235             : 
    2236             : static struct dentry *
    2237           0 : proc_map_files_instantiate(struct dentry *dentry,
    2238             :                            struct task_struct *task, const void *ptr)
    2239             : {
    2240           0 :         fmode_t mode = (fmode_t)(unsigned long)ptr;
    2241             :         struct proc_inode *ei;
    2242             :         struct inode *inode;
    2243             : 
    2244           0 :         inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK |
    2245           0 :                                     ((mode & FMODE_READ ) ? S_IRUSR : 0) |
    2246           0 :                                     ((mode & FMODE_WRITE) ? S_IWUSR : 0));
    2247           0 :         if (!inode)
    2248             :                 return ERR_PTR(-ENOENT);
    2249             : 
    2250           0 :         ei = PROC_I(inode);
    2251           0 :         ei->op.proc_get_link = map_files_get_link;
    2252             : 
    2253           0 :         inode->i_op = &proc_map_files_link_inode_operations;
    2254           0 :         inode->i_size = 64;
    2255             : 
    2256           0 :         d_set_d_op(dentry, &tid_map_files_dentry_operations);
    2257           0 :         return d_splice_alias(inode, dentry);
    2258             : }
    2259             : 
    2260           0 : static struct dentry *proc_map_files_lookup(struct inode *dir,
    2261             :                 struct dentry *dentry, unsigned int flags)
    2262             : {
    2263             :         unsigned long vm_start, vm_end;
    2264             :         struct vm_area_struct *vma;
    2265             :         struct task_struct *task;
    2266             :         struct dentry *result;
    2267             :         struct mm_struct *mm;
    2268             : 
    2269           0 :         result = ERR_PTR(-ENOENT);
    2270           0 :         task = get_proc_task(dir);
    2271           0 :         if (!task)
    2272             :                 goto out;
    2273             : 
    2274           0 :         result = ERR_PTR(-EACCES);
    2275           0 :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
    2276             :                 goto out_put_task;
    2277             : 
    2278           0 :         result = ERR_PTR(-ENOENT);
    2279           0 :         if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
    2280             :                 goto out_put_task;
    2281             : 
    2282           0 :         mm = get_task_mm(task);
    2283           0 :         if (!mm)
    2284             :                 goto out_put_task;
    2285             : 
    2286           0 :         result = ERR_PTR(-EINTR);
    2287           0 :         if (mmap_read_lock_killable(mm))
    2288             :                 goto out_put_mm;
    2289             : 
    2290           0 :         result = ERR_PTR(-ENOENT);
    2291           0 :         vma = find_exact_vma(mm, vm_start, vm_end);
    2292           0 :         if (!vma)
    2293             :                 goto out_no_vma;
    2294             : 
    2295           0 :         if (vma->vm_file)
    2296           0 :                 result = proc_map_files_instantiate(dentry, task,
    2297           0 :                                 (void *)(unsigned long)vma->vm_file->f_mode);
    2298             : 
    2299             : out_no_vma:
    2300             :         mmap_read_unlock(mm);
    2301             : out_put_mm:
    2302           0 :         mmput(mm);
    2303             : out_put_task:
    2304           0 :         put_task_struct(task);
    2305             : out:
    2306           0 :         return result;
    2307             : }
    2308             : 
    2309             : static const struct inode_operations proc_map_files_inode_operations = {
    2310             :         .lookup         = proc_map_files_lookup,
    2311             :         .permission     = proc_fd_permission,
    2312             :         .setattr        = proc_setattr,
    2313             : };
    2314             : 
    2315             : static int
    2316           0 : proc_map_files_readdir(struct file *file, struct dir_context *ctx)
    2317             : {
    2318             :         struct vm_area_struct *vma;
    2319             :         struct task_struct *task;
    2320             :         struct mm_struct *mm;
    2321             :         unsigned long nr_files, pos, i;
    2322             :         GENRADIX(struct map_files_info) fa;
    2323             :         struct map_files_info *p;
    2324             :         int ret;
    2325             : 
    2326           0 :         genradix_init(&fa);
    2327             : 
    2328           0 :         ret = -ENOENT;
    2329           0 :         task = get_proc_task(file_inode(file));
    2330           0 :         if (!task)
    2331             :                 goto out;
    2332             : 
    2333           0 :         ret = -EACCES;
    2334           0 :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
    2335             :                 goto out_put_task;
    2336             : 
    2337           0 :         ret = 0;
    2338           0 :         if (!dir_emit_dots(file, ctx))
    2339             :                 goto out_put_task;
    2340             : 
    2341           0 :         mm = get_task_mm(task);
    2342           0 :         if (!mm)
    2343             :                 goto out_put_task;
    2344             : 
    2345           0 :         ret = mmap_read_lock_killable(mm);
    2346           0 :         if (ret) {
    2347           0 :                 mmput(mm);
    2348           0 :                 goto out_put_task;
    2349             :         }
    2350             : 
    2351           0 :         nr_files = 0;
    2352             : 
    2353             :         /*
    2354             :          * We need two passes here:
    2355             :          *
    2356             :          *  1) Collect vmas of mapped files with mmap_lock taken
    2357             :          *  2) Release mmap_lock and instantiate entries
    2358             :          *
    2359             :          * otherwise we get lockdep complained, since filldir()
    2360             :          * routine might require mmap_lock taken in might_fault().
    2361             :          */
    2362             : 
    2363           0 :         for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
    2364           0 :                 if (!vma->vm_file)
    2365           0 :                         continue;
    2366           0 :                 if (++pos <= ctx->pos)
    2367           0 :                         continue;
    2368             : 
    2369           0 :                 p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
    2370           0 :                 if (!p) {
    2371           0 :                         ret = -ENOMEM;
    2372           0 :                         mmap_read_unlock(mm);
    2373           0 :                         mmput(mm);
    2374           0 :                         goto out_put_task;
    2375             :                 }
    2376             : 
    2377           0 :                 p->start = vma->vm_start;
    2378           0 :                 p->end = vma->vm_end;
    2379           0 :                 p->mode = vma->vm_file->f_mode;
    2380             :         }
    2381           0 :         mmap_read_unlock(mm);
    2382           0 :         mmput(mm);
    2383             : 
    2384           0 :         for (i = 0; i < nr_files; i++) {
    2385             :                 char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
    2386             :                 unsigned int len;
    2387             : 
    2388           0 :                 p = genradix_ptr(&fa, i);
    2389           0 :                 len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
    2390           0 :                 if (!proc_fill_cache(file, ctx,
    2391             :                                       buf, len,
    2392             :                                       proc_map_files_instantiate,
    2393             :                                       task,
    2394           0 :                                       (void *)(unsigned long)p->mode))
    2395             :                         break;
    2396           0 :                 ctx->pos++;
    2397             :         }
    2398             : 
    2399             : out_put_task:
    2400           0 :         put_task_struct(task);
    2401             : out:
    2402           0 :         genradix_free(&fa);
    2403           0 :         return ret;
    2404             : }
    2405             : 
    2406             : static const struct file_operations proc_map_files_operations = {
    2407             :         .read           = generic_read_dir,
    2408             :         .iterate_shared = proc_map_files_readdir,
    2409             :         .llseek         = generic_file_llseek,
    2410             : };
    2411             : 
    2412             : #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
    2413             : struct timers_private {
    2414             :         struct pid *pid;
    2415             :         struct task_struct *task;
    2416             :         struct sighand_struct *sighand;
    2417             :         struct pid_namespace *ns;
    2418             :         unsigned long flags;
    2419             : };
    2420             : 
    2421             : static void *timers_start(struct seq_file *m, loff_t *pos)
    2422             : {
    2423             :         struct timers_private *tp = m->private;
    2424             : 
    2425             :         tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
    2426             :         if (!tp->task)
    2427             :                 return ERR_PTR(-ESRCH);
    2428             : 
    2429             :         tp->sighand = lock_task_sighand(tp->task, &tp->flags);
    2430             :         if (!tp->sighand)
    2431             :                 return ERR_PTR(-ESRCH);
    2432             : 
    2433             :         return seq_list_start(&tp->task->signal->posix_timers, *pos);
    2434             : }
    2435             : 
    2436             : static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
    2437             : {
    2438             :         struct timers_private *tp = m->private;
    2439             :         return seq_list_next(v, &tp->task->signal->posix_timers, pos);
    2440             : }
    2441             : 
    2442             : static void timers_stop(struct seq_file *m, void *v)
    2443             : {
    2444             :         struct timers_private *tp = m->private;
    2445             : 
    2446             :         if (tp->sighand) {
    2447             :                 unlock_task_sighand(tp->task, &tp->flags);
    2448             :                 tp->sighand = NULL;
    2449             :         }
    2450             : 
    2451             :         if (tp->task) {
    2452             :                 put_task_struct(tp->task);
    2453             :                 tp->task = NULL;
    2454             :         }
    2455             : }
    2456             : 
    2457             : static int show_timer(struct seq_file *m, void *v)
    2458             : {
    2459             :         struct k_itimer *timer;
    2460             :         struct timers_private *tp = m->private;
    2461             :         int notify;
    2462             :         static const char * const nstr[] = {
    2463             :                 [SIGEV_SIGNAL] = "signal",
    2464             :                 [SIGEV_NONE] = "none",
    2465             :                 [SIGEV_THREAD] = "thread",
    2466             :         };
    2467             : 
    2468             :         timer = list_entry((struct list_head *)v, struct k_itimer, list);
    2469             :         notify = timer->it_sigev_notify;
    2470             : 
    2471             :         seq_printf(m, "ID: %d\n", timer->it_id);
    2472             :         seq_printf(m, "signal: %d/%px\n",
    2473             :                    timer->sigq->info.si_signo,
    2474             :                    timer->sigq->info.si_value.sival_ptr);
    2475             :         seq_printf(m, "notify: %s/%s.%d\n",
    2476             :                    nstr[notify & ~SIGEV_THREAD_ID],
    2477             :                    (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
    2478             :                    pid_nr_ns(timer->it_pid, tp->ns));
    2479             :         seq_printf(m, "ClockID: %d\n", timer->it_clock);
    2480             : 
    2481             :         return 0;
    2482             : }
    2483             : 
    2484             : static const struct seq_operations proc_timers_seq_ops = {
    2485             :         .start  = timers_start,
    2486             :         .next   = timers_next,
    2487             :         .stop   = timers_stop,
    2488             :         .show   = show_timer,
    2489             : };
    2490             : 
    2491             : static int proc_timers_open(struct inode *inode, struct file *file)
    2492             : {
    2493             :         struct timers_private *tp;
    2494             : 
    2495             :         tp = __seq_open_private(file, &proc_timers_seq_ops,
    2496             :                         sizeof(struct timers_private));
    2497             :         if (!tp)
    2498             :                 return -ENOMEM;
    2499             : 
    2500             :         tp->pid = proc_pid(inode);
    2501             :         tp->ns = proc_pid_ns(inode->i_sb);
    2502             :         return 0;
    2503             : }
    2504             : 
    2505             : static const struct file_operations proc_timers_operations = {
    2506             :         .open           = proc_timers_open,
    2507             :         .read           = seq_read,
    2508             :         .llseek         = seq_lseek,
    2509             :         .release        = seq_release_private,
    2510             : };
    2511             : #endif
    2512             : 
    2513           0 : static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
    2514             :                                         size_t count, loff_t *offset)
    2515             : {
    2516           0 :         struct inode *inode = file_inode(file);
    2517             :         struct task_struct *p;
    2518             :         u64 slack_ns;
    2519             :         int err;
    2520             : 
    2521           0 :         err = kstrtoull_from_user(buf, count, 10, &slack_ns);
    2522           0 :         if (err < 0)
    2523           0 :                 return err;
    2524             : 
    2525           0 :         p = get_proc_task(inode);
    2526           0 :         if (!p)
    2527             :                 return -ESRCH;
    2528             : 
    2529           0 :         if (p != current) {
    2530             :                 rcu_read_lock();
    2531           0 :                 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
    2532             :                         rcu_read_unlock();
    2533           0 :                         count = -EPERM;
    2534           0 :                         goto out;
    2535             :                 }
    2536           0 :                 rcu_read_unlock();
    2537             : 
    2538           0 :                 err = security_task_setscheduler(p);
    2539           0 :                 if (err) {
    2540           0 :                         count = err;
    2541           0 :                         goto out;
    2542             :                 }
    2543             :         }
    2544             : 
    2545             :         task_lock(p);
    2546           0 :         if (slack_ns == 0)
    2547           0 :                 p->timer_slack_ns = p->default_timer_slack_ns;
    2548             :         else
    2549           0 :                 p->timer_slack_ns = slack_ns;
    2550             :         task_unlock(p);
    2551             : 
    2552             : out:
    2553           0 :         put_task_struct(p);
    2554             : 
    2555           0 :         return count;
    2556             : }
    2557             : 
    2558           0 : static int timerslack_ns_show(struct seq_file *m, void *v)
    2559             : {
    2560           0 :         struct inode *inode = m->private;
    2561             :         struct task_struct *p;
    2562           0 :         int err = 0;
    2563             : 
    2564           0 :         p = get_proc_task(inode);
    2565           0 :         if (!p)
    2566             :                 return -ESRCH;
    2567             : 
    2568           0 :         if (p != current) {
    2569             :                 rcu_read_lock();
    2570           0 :                 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
    2571             :                         rcu_read_unlock();
    2572           0 :                         err = -EPERM;
    2573           0 :                         goto out;
    2574             :                 }
    2575             :                 rcu_read_unlock();
    2576             : 
    2577           0 :                 err = security_task_getscheduler(p);
    2578             :                 if (err)
    2579             :                         goto out;
    2580             :         }
    2581             : 
    2582             :         task_lock(p);
    2583           0 :         seq_printf(m, "%llu\n", p->timer_slack_ns);
    2584             :         task_unlock(p);
    2585             : 
    2586             : out:
    2587           0 :         put_task_struct(p);
    2588             : 
    2589           0 :         return err;
    2590             : }
    2591             : 
    2592           0 : static int timerslack_ns_open(struct inode *inode, struct file *filp)
    2593             : {
    2594           0 :         return single_open(filp, timerslack_ns_show, inode);
    2595             : }
    2596             : 
    2597             : static const struct file_operations proc_pid_set_timerslack_ns_operations = {
    2598             :         .open           = timerslack_ns_open,
    2599             :         .read           = seq_read,
    2600             :         .write          = timerslack_ns_write,
    2601             :         .llseek         = seq_lseek,
    2602             :         .release        = single_release,
    2603             : };
    2604             : 
    2605           0 : static struct dentry *proc_pident_instantiate(struct dentry *dentry,
    2606             :         struct task_struct *task, const void *ptr)
    2607             : {
    2608           0 :         const struct pid_entry *p = ptr;
    2609             :         struct inode *inode;
    2610             :         struct proc_inode *ei;
    2611             : 
    2612           0 :         inode = proc_pid_make_inode(dentry->d_sb, task, p->mode);
    2613           0 :         if (!inode)
    2614             :                 return ERR_PTR(-ENOENT);
    2615             : 
    2616           0 :         ei = PROC_I(inode);
    2617           0 :         if (S_ISDIR(inode->i_mode))
    2618           0 :                 set_nlink(inode, 2);    /* Use getattr to fix if necessary */
    2619           0 :         if (p->iop)
    2620           0 :                 inode->i_op = p->iop;
    2621           0 :         if (p->fop)
    2622           0 :                 inode->i_fop = p->fop;
    2623           0 :         ei->op = p->op;
    2624           0 :         pid_update_inode(task, inode);
    2625           0 :         d_set_d_op(dentry, &pid_dentry_operations);
    2626           0 :         return d_splice_alias(inode, dentry);
    2627             : }
    2628             : 
    2629           0 : static struct dentry *proc_pident_lookup(struct inode *dir, 
    2630             :                                          struct dentry *dentry,
    2631             :                                          const struct pid_entry *p,
    2632             :                                          const struct pid_entry *end)
    2633             : {
    2634           0 :         struct task_struct *task = get_proc_task(dir);
    2635           0 :         struct dentry *res = ERR_PTR(-ENOENT);
    2636             : 
    2637           0 :         if (!task)
    2638             :                 goto out_no_task;
    2639             : 
    2640             :         /*
    2641             :          * Yes, it does not scale. And it should not. Don't add
    2642             :          * new entries into /proc/<tgid>/ without very good reasons.
    2643             :          */
    2644           0 :         for (; p < end; p++) {
    2645           0 :                 if (p->len != dentry->d_name.len)
    2646           0 :                         continue;
    2647           0 :                 if (!memcmp(dentry->d_name.name, p->name, p->len)) {
    2648           0 :                         res = proc_pident_instantiate(dentry, task, p);
    2649           0 :                         break;
    2650             :                 }
    2651             :         }
    2652           0 :         put_task_struct(task);
    2653             : out_no_task:
    2654           0 :         return res;
    2655             : }
    2656             : 
    2657           0 : static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
    2658             :                 const struct pid_entry *ents, unsigned int nents)
    2659             : {
    2660           0 :         struct task_struct *task = get_proc_task(file_inode(file));
    2661             :         const struct pid_entry *p;
    2662             : 
    2663           0 :         if (!task)
    2664             :                 return -ENOENT;
    2665             : 
    2666           0 :         if (!dir_emit_dots(file, ctx))
    2667             :                 goto out;
    2668             : 
    2669           0 :         if (ctx->pos >= nents + 2)
    2670             :                 goto out;
    2671             : 
    2672           0 :         for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
    2673           0 :                 if (!proc_fill_cache(file, ctx, p->name, p->len,
    2674             :                                 proc_pident_instantiate, task, p))
    2675             :                         break;
    2676           0 :                 ctx->pos++;
    2677             :         }
    2678             : out:
    2679           0 :         put_task_struct(task);
    2680           0 :         return 0;
    2681             : }
    2682             : 
    2683             : #ifdef CONFIG_SECURITY
    2684             : static int proc_pid_attr_open(struct inode *inode, struct file *file)
    2685             : {
    2686             :         file->private_data = NULL;
    2687             :         __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
    2688             :         return 0;
    2689             : }
    2690             : 
    2691             : static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
    2692             :                                   size_t count, loff_t *ppos)
    2693             : {
    2694             :         struct inode * inode = file_inode(file);
    2695             :         char *p = NULL;
    2696             :         ssize_t length;
    2697             :         struct task_struct *task = get_proc_task(inode);
    2698             : 
    2699             :         if (!task)
    2700             :                 return -ESRCH;
    2701             : 
    2702             :         length = security_getprocattr(task, PROC_I(inode)->op.lsm,
    2703             :                                       (char*)file->f_path.dentry->d_name.name,
    2704             :                                       &p);
    2705             :         put_task_struct(task);
    2706             :         if (length > 0)
    2707             :                 length = simple_read_from_buffer(buf, count, ppos, p, length);
    2708             :         kfree(p);
    2709             :         return length;
    2710             : }
    2711             : 
    2712             : static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
    2713             :                                    size_t count, loff_t *ppos)
    2714             : {
    2715             :         struct inode * inode = file_inode(file);
    2716             :         struct task_struct *task;
    2717             :         void *page;
    2718             :         int rv;
    2719             : 
    2720             :         /* A task may only write when it was the opener. */
    2721             :         if (file->private_data != current->mm)
    2722             :                 return -EPERM;
    2723             : 
    2724             :         rcu_read_lock();
    2725             :         task = pid_task(proc_pid(inode), PIDTYPE_PID);
    2726             :         if (!task) {
    2727             :                 rcu_read_unlock();
    2728             :                 return -ESRCH;
    2729             :         }
    2730             :         /* A task may only write its own attributes. */
    2731             :         if (current != task) {
    2732             :                 rcu_read_unlock();
    2733             :                 return -EACCES;
    2734             :         }
    2735             :         /* Prevent changes to overridden credentials. */
    2736             :         if (current_cred() != current_real_cred()) {
    2737             :                 rcu_read_unlock();
    2738             :                 return -EBUSY;
    2739             :         }
    2740             :         rcu_read_unlock();
    2741             : 
    2742             :         if (count > PAGE_SIZE)
    2743             :                 count = PAGE_SIZE;
    2744             : 
    2745             :         /* No partial writes. */
    2746             :         if (*ppos != 0)
    2747             :                 return -EINVAL;
    2748             : 
    2749             :         page = memdup_user(buf, count);
    2750             :         if (IS_ERR(page)) {
    2751             :                 rv = PTR_ERR(page);
    2752             :                 goto out;
    2753             :         }
    2754             : 
    2755             :         /* Guard against adverse ptrace interaction */
    2756             :         rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
    2757             :         if (rv < 0)
    2758             :                 goto out_free;
    2759             : 
    2760             :         rv = security_setprocattr(PROC_I(inode)->op.lsm,
    2761             :                                   file->f_path.dentry->d_name.name, page,
    2762             :                                   count);
    2763             :         mutex_unlock(&current->signal->cred_guard_mutex);
    2764             : out_free:
    2765             :         kfree(page);
    2766             : out:
    2767             :         return rv;
    2768             : }
    2769             : 
    2770             : static const struct file_operations proc_pid_attr_operations = {
    2771             :         .open           = proc_pid_attr_open,
    2772             :         .read           = proc_pid_attr_read,
    2773             :         .write          = proc_pid_attr_write,
    2774             :         .llseek         = generic_file_llseek,
    2775             :         .release        = mem_release,
    2776             : };
    2777             : 
    2778             : #define LSM_DIR_OPS(LSM) \
    2779             : static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
    2780             :                              struct dir_context *ctx) \
    2781             : { \
    2782             :         return proc_pident_readdir(filp, ctx, \
    2783             :                                    LSM##_attr_dir_stuff, \
    2784             :                                    ARRAY_SIZE(LSM##_attr_dir_stuff)); \
    2785             : } \
    2786             : \
    2787             : static const struct file_operations proc_##LSM##_attr_dir_ops = { \
    2788             :         .read           = generic_read_dir, \
    2789             :         .iterate        = proc_##LSM##_attr_dir_iterate, \
    2790             :         .llseek         = default_llseek, \
    2791             : }; \
    2792             : \
    2793             : static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \
    2794             :                                 struct dentry *dentry, unsigned int flags) \
    2795             : { \
    2796             :         return proc_pident_lookup(dir, dentry, \
    2797             :                                   LSM##_attr_dir_stuff, \
    2798             :                                   LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \
    2799             : } \
    2800             : \
    2801             : static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
    2802             :         .lookup         = proc_##LSM##_attr_dir_lookup, \
    2803             :         .getattr        = pid_getattr, \
    2804             :         .setattr        = proc_setattr, \
    2805             : }
    2806             : 
    2807             : #ifdef CONFIG_SECURITY_SMACK
    2808             : static const struct pid_entry smack_attr_dir_stuff[] = {
    2809             :         ATTR("smack", "current",    0666),
    2810             : };
    2811             : LSM_DIR_OPS(smack);
    2812             : #endif
    2813             : 
    2814             : #ifdef CONFIG_SECURITY_APPARMOR
    2815             : static const struct pid_entry apparmor_attr_dir_stuff[] = {
    2816             :         ATTR("apparmor", "current", 0666),
    2817             :         ATTR("apparmor", "prev",    0444),
    2818             :         ATTR("apparmor", "exec",    0666),
    2819             : };
    2820             : LSM_DIR_OPS(apparmor);
    2821             : #endif
    2822             : 
    2823             : static const struct pid_entry attr_dir_stuff[] = {
    2824             :         ATTR(NULL, "current",         0666),
    2825             :         ATTR(NULL, "prev",            0444),
    2826             :         ATTR(NULL, "exec",            0666),
    2827             :         ATTR(NULL, "fscreate",                0666),
    2828             :         ATTR(NULL, "keycreate",               0666),
    2829             :         ATTR(NULL, "sockcreate",      0666),
    2830             : #ifdef CONFIG_SECURITY_SMACK
    2831             :         DIR("smack",                  0555,
    2832             :             proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
    2833             : #endif
    2834             : #ifdef CONFIG_SECURITY_APPARMOR
    2835             :         DIR("apparmor",                       0555,
    2836             :             proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops),
    2837             : #endif
    2838             : };
    2839             : 
    2840             : static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
    2841             : {
    2842             :         return proc_pident_readdir(file, ctx, 
    2843             :                                    attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
    2844             : }
    2845             : 
    2846             : static const struct file_operations proc_attr_dir_operations = {
    2847             :         .read           = generic_read_dir,
    2848             :         .iterate_shared = proc_attr_dir_readdir,
    2849             :         .llseek         = generic_file_llseek,
    2850             : };
    2851             : 
    2852             : static struct dentry *proc_attr_dir_lookup(struct inode *dir,
    2853             :                                 struct dentry *dentry, unsigned int flags)
    2854             : {
    2855             :         return proc_pident_lookup(dir, dentry,
    2856             :                                   attr_dir_stuff,
    2857             :                                   attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff));
    2858             : }
    2859             : 
    2860             : static const struct inode_operations proc_attr_dir_inode_operations = {
    2861             :         .lookup         = proc_attr_dir_lookup,
    2862             :         .getattr        = pid_getattr,
    2863             :         .setattr        = proc_setattr,
    2864             : };
    2865             : 
    2866             : #endif
    2867             : 
    2868             : #ifdef CONFIG_ELF_CORE
    2869           0 : static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
    2870             :                                          size_t count, loff_t *ppos)
    2871             : {
    2872           0 :         struct task_struct *task = get_proc_task(file_inode(file));
    2873             :         struct mm_struct *mm;
    2874             :         char buffer[PROC_NUMBUF];
    2875             :         size_t len;
    2876             :         int ret;
    2877             : 
    2878           0 :         if (!task)
    2879             :                 return -ESRCH;
    2880             : 
    2881           0 :         ret = 0;
    2882           0 :         mm = get_task_mm(task);
    2883           0 :         if (mm) {
    2884           0 :                 len = snprintf(buffer, sizeof(buffer), "%08lx\n",
    2885           0 :                                ((mm->flags & MMF_DUMP_FILTER_MASK) >>
    2886             :                                 MMF_DUMP_FILTER_SHIFT));
    2887           0 :                 mmput(mm);
    2888           0 :                 ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
    2889             :         }
    2890             : 
    2891           0 :         put_task_struct(task);
    2892             : 
    2893           0 :         return ret;
    2894             : }
    2895             : 
    2896           0 : static ssize_t proc_coredump_filter_write(struct file *file,
    2897             :                                           const char __user *buf,
    2898             :                                           size_t count,
    2899             :                                           loff_t *ppos)
    2900             : {
    2901             :         struct task_struct *task;
    2902             :         struct mm_struct *mm;
    2903             :         unsigned int val;
    2904             :         int ret;
    2905             :         int i;
    2906             :         unsigned long mask;
    2907             : 
    2908           0 :         ret = kstrtouint_from_user(buf, count, 0, &val);
    2909           0 :         if (ret < 0)
    2910           0 :                 return ret;
    2911             : 
    2912           0 :         ret = -ESRCH;
    2913           0 :         task = get_proc_task(file_inode(file));
    2914           0 :         if (!task)
    2915             :                 goto out_no_task;
    2916             : 
    2917           0 :         mm = get_task_mm(task);
    2918           0 :         if (!mm)
    2919             :                 goto out_no_mm;
    2920             :         ret = 0;
    2921             : 
    2922           0 :         for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
    2923           0 :                 if (val & mask)
    2924           0 :                         set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
    2925             :                 else
    2926           0 :                         clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
    2927             :         }
    2928             : 
    2929           0 :         mmput(mm);
    2930             :  out_no_mm:
    2931           0 :         put_task_struct(task);
    2932             :  out_no_task:
    2933           0 :         if (ret < 0)
    2934           0 :                 return ret;
    2935           0 :         return count;
    2936             : }
    2937             : 
    2938             : static const struct file_operations proc_coredump_filter_operations = {
    2939             :         .read           = proc_coredump_filter_read,
    2940             :         .write          = proc_coredump_filter_write,
    2941             :         .llseek         = generic_file_llseek,
    2942             : };
    2943             : #endif
    2944             : 
    2945             : #ifdef CONFIG_TASK_IO_ACCOUNTING
    2946             : static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
    2947             : {
    2948             :         struct task_io_accounting acct = task->ioac;
    2949             :         unsigned long flags;
    2950             :         int result;
    2951             : 
    2952             :         result = down_read_killable(&task->signal->exec_update_lock);
    2953             :         if (result)
    2954             :                 return result;
    2955             : 
    2956             :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
    2957             :                 result = -EACCES;
    2958             :                 goto out_unlock;
    2959             :         }
    2960             : 
    2961             :         if (whole && lock_task_sighand(task, &flags)) {
    2962             :                 struct task_struct *t = task;
    2963             : 
    2964             :                 task_io_accounting_add(&acct, &task->signal->ioac);
    2965             :                 while_each_thread(task, t)
    2966             :                         task_io_accounting_add(&acct, &t->ioac);
    2967             : 
    2968             :                 unlock_task_sighand(task, &flags);
    2969             :         }
    2970             :         seq_printf(m,
    2971             :                    "rchar: %llu\n"
    2972             :                    "wchar: %llu\n"
    2973             :                    "syscr: %llu\n"
    2974             :                    "syscw: %llu\n"
    2975             :                    "read_bytes: %llu\n"
    2976             :                    "write_bytes: %llu\n"
    2977             :                    "cancelled_write_bytes: %llu\n",
    2978             :                    (unsigned long long)acct.rchar,
    2979             :                    (unsigned long long)acct.wchar,
    2980             :                    (unsigned long long)acct.syscr,
    2981             :                    (unsigned long long)acct.syscw,
    2982             :                    (unsigned long long)acct.read_bytes,
    2983             :                    (unsigned long long)acct.write_bytes,
    2984             :                    (unsigned long long)acct.cancelled_write_bytes);
    2985             :         result = 0;
    2986             : 
    2987             : out_unlock:
    2988             :         up_read(&task->signal->exec_update_lock);
    2989             :         return result;
    2990             : }
    2991             : 
    2992             : static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
    2993             :                                   struct pid *pid, struct task_struct *task)
    2994             : {
    2995             :         return do_io_accounting(task, m, 0);
    2996             : }
    2997             : 
    2998             : static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
    2999             :                                    struct pid *pid, struct task_struct *task)
    3000             : {
    3001             :         return do_io_accounting(task, m, 1);
    3002             : }
    3003             : #endif /* CONFIG_TASK_IO_ACCOUNTING */
    3004             : 
    3005             : #ifdef CONFIG_USER_NS
    3006             : static int proc_id_map_open(struct inode *inode, struct file *file,
    3007             :         const struct seq_operations *seq_ops)
    3008             : {
    3009             :         struct user_namespace *ns = NULL;
    3010             :         struct task_struct *task;
    3011             :         struct seq_file *seq;
    3012             :         int ret = -EINVAL;
    3013             : 
    3014             :         task = get_proc_task(inode);
    3015             :         if (task) {
    3016             :                 rcu_read_lock();
    3017             :                 ns = get_user_ns(task_cred_xxx(task, user_ns));
    3018             :                 rcu_read_unlock();
    3019             :                 put_task_struct(task);
    3020             :         }
    3021             :         if (!ns)
    3022             :                 goto err;
    3023             : 
    3024             :         ret = seq_open(file, seq_ops);
    3025             :         if (ret)
    3026             :                 goto err_put_ns;
    3027             : 
    3028             :         seq = file->private_data;
    3029             :         seq->private = ns;
    3030             : 
    3031             :         return 0;
    3032             : err_put_ns:
    3033             :         put_user_ns(ns);
    3034             : err:
    3035             :         return ret;
    3036             : }
    3037             : 
    3038             : static int proc_id_map_release(struct inode *inode, struct file *file)
    3039             : {
    3040             :         struct seq_file *seq = file->private_data;
    3041             :         struct user_namespace *ns = seq->private;
    3042             :         put_user_ns(ns);
    3043             :         return seq_release(inode, file);
    3044             : }
    3045             : 
    3046             : static int proc_uid_map_open(struct inode *inode, struct file *file)
    3047             : {
    3048             :         return proc_id_map_open(inode, file, &proc_uid_seq_operations);
    3049             : }
    3050             : 
    3051             : static int proc_gid_map_open(struct inode *inode, struct file *file)
    3052             : {
    3053             :         return proc_id_map_open(inode, file, &proc_gid_seq_operations);
    3054             : }
    3055             : 
    3056             : static int proc_projid_map_open(struct inode *inode, struct file *file)
    3057             : {
    3058             :         return proc_id_map_open(inode, file, &proc_projid_seq_operations);
    3059             : }
    3060             : 
    3061             : static const struct file_operations proc_uid_map_operations = {
    3062             :         .open           = proc_uid_map_open,
    3063             :         .write          = proc_uid_map_write,
    3064             :         .read           = seq_read,
    3065             :         .llseek         = seq_lseek,
    3066             :         .release        = proc_id_map_release,
    3067             : };
    3068             : 
    3069             : static const struct file_operations proc_gid_map_operations = {
    3070             :         .open           = proc_gid_map_open,
    3071             :         .write          = proc_gid_map_write,
    3072             :         .read           = seq_read,
    3073             :         .llseek         = seq_lseek,
    3074             :         .release        = proc_id_map_release,
    3075             : };
    3076             : 
    3077             : static const struct file_operations proc_projid_map_operations = {
    3078             :         .open           = proc_projid_map_open,
    3079             :         .write          = proc_projid_map_write,
    3080             :         .read           = seq_read,
    3081             :         .llseek         = seq_lseek,
    3082             :         .release        = proc_id_map_release,
    3083             : };
    3084             : 
    3085             : static int proc_setgroups_open(struct inode *inode, struct file *file)
    3086             : {
    3087             :         struct user_namespace *ns = NULL;
    3088             :         struct task_struct *task;
    3089             :         int ret;
    3090             : 
    3091             :         ret = -ESRCH;
    3092             :         task = get_proc_task(inode);
    3093             :         if (task) {
    3094             :                 rcu_read_lock();
    3095             :                 ns = get_user_ns(task_cred_xxx(task, user_ns));
    3096             :                 rcu_read_unlock();
    3097             :                 put_task_struct(task);
    3098             :         }
    3099             :         if (!ns)
    3100             :                 goto err;
    3101             : 
    3102             :         if (file->f_mode & FMODE_WRITE) {
    3103             :                 ret = -EACCES;
    3104             :                 if (!ns_capable(ns, CAP_SYS_ADMIN))
    3105             :                         goto err_put_ns;
    3106             :         }
    3107             : 
    3108             :         ret = single_open(file, &proc_setgroups_show, ns);
    3109             :         if (ret)
    3110             :                 goto err_put_ns;
    3111             : 
    3112             :         return 0;
    3113             : err_put_ns:
    3114             :         put_user_ns(ns);
    3115             : err:
    3116             :         return ret;
    3117             : }
    3118             : 
    3119             : static int proc_setgroups_release(struct inode *inode, struct file *file)
    3120             : {
    3121             :         struct seq_file *seq = file->private_data;
    3122             :         struct user_namespace *ns = seq->private;
    3123             :         int ret = single_release(inode, file);
    3124             :         put_user_ns(ns);
    3125             :         return ret;
    3126             : }
    3127             : 
    3128             : static const struct file_operations proc_setgroups_operations = {
    3129             :         .open           = proc_setgroups_open,
    3130             :         .write          = proc_setgroups_write,
    3131             :         .read           = seq_read,
    3132             :         .llseek         = seq_lseek,
    3133             :         .release        = proc_setgroups_release,
    3134             : };
    3135             : #endif /* CONFIG_USER_NS */
    3136             : 
    3137           0 : static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
    3138             :                                 struct pid *pid, struct task_struct *task)
    3139             : {
    3140           0 :         int err = lock_trace(task);
    3141           0 :         if (!err) {
    3142           0 :                 seq_printf(m, "%08x\n", task->personality);
    3143           0 :                 unlock_trace(task);
    3144             :         }
    3145           0 :         return err;
    3146             : }
    3147             : 
    3148             : #ifdef CONFIG_LIVEPATCH
    3149             : static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
    3150             :                                 struct pid *pid, struct task_struct *task)
    3151             : {
    3152             :         seq_printf(m, "%d\n", task->patch_state);
    3153             :         return 0;
    3154             : }
    3155             : #endif /* CONFIG_LIVEPATCH */
    3156             : 
    3157             : #ifdef CONFIG_STACKLEAK_METRICS
    3158             : static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
    3159             :                                 struct pid *pid, struct task_struct *task)
    3160             : {
    3161             :         unsigned long prev_depth = THREAD_SIZE -
    3162             :                                 (task->prev_lowest_stack & (THREAD_SIZE - 1));
    3163             :         unsigned long depth = THREAD_SIZE -
    3164             :                                 (task->lowest_stack & (THREAD_SIZE - 1));
    3165             : 
    3166             :         seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
    3167             :                                                         prev_depth, depth);
    3168             :         return 0;
    3169             : }
    3170             : #endif /* CONFIG_STACKLEAK_METRICS */
    3171             : 
    3172             : /*
    3173             :  * Thread groups
    3174             :  */
    3175             : static const struct file_operations proc_task_operations;
    3176             : static const struct inode_operations proc_task_inode_operations;
    3177             : 
    3178             : static const struct pid_entry tgid_base_stuff[] = {
    3179             :         DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
    3180             :         DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
    3181             :         DIR("map_files",  S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
    3182             :         DIR("fdinfo",     S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
    3183             :         DIR("ns",       S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
    3184             : #ifdef CONFIG_NET
    3185             :         DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
    3186             : #endif
    3187             :         REG("environ",    S_IRUSR, proc_environ_operations),
    3188             :         REG("auxv",       S_IRUSR, proc_auxv_operations),
    3189             :         ONE("status",     S_IRUGO, proc_pid_status),
    3190             :         ONE("personality", S_IRUSR, proc_pid_personality),
    3191             :         ONE("limits",   S_IRUGO, proc_pid_limits),
    3192             : #ifdef CONFIG_SCHED_DEBUG
    3193             :         REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
    3194             : #endif
    3195             : #ifdef CONFIG_SCHED_AUTOGROUP
    3196             :         REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
    3197             : #endif
    3198             : #ifdef CONFIG_TIME_NS
    3199             :         REG("timens_offsets",  S_IRUGO|S_IWUSR, proc_timens_offsets_operations),
    3200             : #endif
    3201             :         REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
    3202             : #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
    3203             :         ONE("syscall",    S_IRUSR, proc_pid_syscall),
    3204             : #endif
    3205             :         REG("cmdline",    S_IRUGO, proc_pid_cmdline_ops),
    3206             :         ONE("stat",       S_IRUGO, proc_tgid_stat),
    3207             :         ONE("statm",      S_IRUGO, proc_pid_statm),
    3208             :         REG("maps",       S_IRUGO, proc_pid_maps_operations),
    3209             : #ifdef CONFIG_NUMA
    3210             :         REG("numa_maps",  S_IRUGO, proc_pid_numa_maps_operations),
    3211             : #endif
    3212             :         REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
    3213             :         LNK("cwd",        proc_cwd_link),
    3214             :         LNK("root",       proc_root_link),
    3215             :         LNK("exe",        proc_exe_link),
    3216             :         REG("mounts",     S_IRUGO, proc_mounts_operations),
    3217             :         REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
    3218             :         REG("mountstats", S_IRUSR, proc_mountstats_operations),
    3219             : #ifdef CONFIG_PROC_PAGE_MONITOR
    3220             :         REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
    3221             :         REG("smaps",      S_IRUGO, proc_pid_smaps_operations),
    3222             :         REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
    3223             :         REG("pagemap",    S_IRUSR, proc_pagemap_operations),
    3224             : #endif
    3225             : #ifdef CONFIG_SECURITY
    3226             :         DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
    3227             : #endif
    3228             : #ifdef CONFIG_KALLSYMS
    3229             :         ONE("wchan",      S_IRUGO, proc_pid_wchan),
    3230             : #endif
    3231             : #ifdef CONFIG_STACKTRACE
    3232             :         ONE("stack",      S_IRUSR, proc_pid_stack),
    3233             : #endif
    3234             : #ifdef CONFIG_SCHED_INFO
    3235             :         ONE("schedstat",  S_IRUGO, proc_pid_schedstat),
    3236             : #endif
    3237             : #ifdef CONFIG_LATENCYTOP
    3238             :         REG("latency",  S_IRUGO, proc_lstats_operations),
    3239             : #endif
    3240             : #ifdef CONFIG_PROC_PID_CPUSET
    3241             :         ONE("cpuset",     S_IRUGO, proc_cpuset_show),
    3242             : #endif
    3243             : #ifdef CONFIG_CGROUPS
    3244             :         ONE("cgroup",  S_IRUGO, proc_cgroup_show),
    3245             : #endif
    3246             : #ifdef CONFIG_PROC_CPU_RESCTRL
    3247             :         ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
    3248             : #endif
    3249             :         ONE("oom_score",  S_IRUGO, proc_oom_score),
    3250             :         REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
    3251             :         REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
    3252             : #ifdef CONFIG_AUDIT
    3253             :         REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
    3254             :         REG("sessionid",  S_IRUGO, proc_sessionid_operations),
    3255             : #endif
    3256             : #ifdef CONFIG_FAULT_INJECTION
    3257             :         REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
    3258             :         REG("fail-nth", 0644, proc_fail_nth_operations),
    3259             : #endif
    3260             : #ifdef CONFIG_ELF_CORE
    3261             :         REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
    3262             : #endif
    3263             : #ifdef CONFIG_TASK_IO_ACCOUNTING
    3264             :         ONE("io",     S_IRUSR, proc_tgid_io_accounting),
    3265             : #endif
    3266             : #ifdef CONFIG_USER_NS
    3267             :         REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
    3268             :         REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
    3269             :         REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
    3270             :         REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
    3271             : #endif
    3272             : #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
    3273             :         REG("timers",   S_IRUGO, proc_timers_operations),
    3274             : #endif
    3275             :         REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
    3276             : #ifdef CONFIG_LIVEPATCH
    3277             :         ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
    3278             : #endif
    3279             : #ifdef CONFIG_STACKLEAK_METRICS
    3280             :         ONE("stack_depth", S_IRUGO, proc_stack_depth),
    3281             : #endif
    3282             : #ifdef CONFIG_PROC_PID_ARCH_STATUS
    3283             :         ONE("arch_status", S_IRUGO, proc_pid_arch_status),
    3284             : #endif
    3285             : #ifdef CONFIG_SECCOMP_CACHE_DEBUG
    3286             :         ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
    3287             : #endif
    3288             : };
    3289             : 
    3290           0 : static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
    3291             : {
    3292           0 :         return proc_pident_readdir(file, ctx,
    3293             :                                    tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
    3294             : }
    3295             : 
    3296             : static const struct file_operations proc_tgid_base_operations = {
    3297             :         .read           = generic_read_dir,
    3298             :         .iterate_shared = proc_tgid_base_readdir,
    3299             :         .llseek         = generic_file_llseek,
    3300             : };
    3301             : 
    3302           0 : struct pid *tgid_pidfd_to_pid(const struct file *file)
    3303             : {
    3304           0 :         if (file->f_op != &proc_tgid_base_operations)
    3305             :                 return ERR_PTR(-EBADF);
    3306             : 
    3307           0 :         return proc_pid(file_inode(file));
    3308             : }
    3309             : 
    3310           0 : static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
    3311             : {
    3312           0 :         return proc_pident_lookup(dir, dentry,
    3313             :                                   tgid_base_stuff,
    3314             :                                   tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff));
    3315             : }
    3316             : 
    3317             : static const struct inode_operations proc_tgid_base_inode_operations = {
    3318             :         .lookup         = proc_tgid_base_lookup,
    3319             :         .getattr        = pid_getattr,
    3320             :         .setattr        = proc_setattr,
    3321             :         .permission     = proc_pid_permission,
    3322             : };
    3323             : 
    3324             : /**
    3325             :  * proc_flush_pid -  Remove dcache entries for @pid from the /proc dcache.
    3326             :  * @pid: pid that should be flushed.
    3327             :  *
    3328             :  * This function walks a list of inodes (that belong to any proc
    3329             :  * filesystem) that are attached to the pid and flushes them from
    3330             :  * the dentry cache.
    3331             :  *
    3332             :  * It is safe and reasonable to cache /proc entries for a task until
    3333             :  * that task exits.  After that they just clog up the dcache with
    3334             :  * useless entries, possibly causing useful dcache entries to be
    3335             :  * flushed instead.  This routine is provided to flush those useless
    3336             :  * dcache entries when a process is reaped.
    3337             :  *
    3338             :  * NOTE: This routine is just an optimization so it does not guarantee
    3339             :  *       that no dcache entries will exist after a process is reaped
    3340             :  *       it just makes it very unlikely that any will persist.
    3341             :  */
    3342             : 
    3343          93 : void proc_flush_pid(struct pid *pid)
    3344             : {
    3345          93 :         proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock);
    3346          93 : }
    3347             : 
    3348           0 : static struct dentry *proc_pid_instantiate(struct dentry * dentry,
    3349             :                                    struct task_struct *task, const void *ptr)
    3350             : {
    3351             :         struct inode *inode;
    3352             : 
    3353           0 :         inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
    3354           0 :         if (!inode)
    3355             :                 return ERR_PTR(-ENOENT);
    3356             : 
    3357           0 :         inode->i_op = &proc_tgid_base_inode_operations;
    3358           0 :         inode->i_fop = &proc_tgid_base_operations;
    3359           0 :         inode->i_flags|=S_IMMUTABLE;
    3360             : 
    3361           0 :         set_nlink(inode, nlink_tgid);
    3362           0 :         pid_update_inode(task, inode);
    3363             : 
    3364           0 :         d_set_d_op(dentry, &pid_dentry_operations);
    3365           0 :         return d_splice_alias(inode, dentry);
    3366             : }
    3367             : 
    3368           0 : struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
    3369             : {
    3370             :         struct task_struct *task;
    3371             :         unsigned tgid;
    3372             :         struct proc_fs_info *fs_info;
    3373             :         struct pid_namespace *ns;
    3374           0 :         struct dentry *result = ERR_PTR(-ENOENT);
    3375             : 
    3376           0 :         tgid = name_to_int(&dentry->d_name);
    3377           0 :         if (tgid == ~0U)
    3378             :                 goto out;
    3379             : 
    3380           0 :         fs_info = proc_sb_info(dentry->d_sb);
    3381           0 :         ns = fs_info->pid_ns;
    3382             :         rcu_read_lock();
    3383           0 :         task = find_task_by_pid_ns(tgid, ns);
    3384           0 :         if (task)
    3385             :                 get_task_struct(task);
    3386             :         rcu_read_unlock();
    3387           0 :         if (!task)
    3388             :                 goto out;
    3389             : 
    3390             :         /* Limit procfs to only ptraceable tasks */
    3391           0 :         if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
    3392           0 :                 if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
    3393             :                         goto out_put_task;
    3394             :         }
    3395             : 
    3396           0 :         result = proc_pid_instantiate(dentry, task, NULL);
    3397             : out_put_task:
    3398           0 :         put_task_struct(task);
    3399             : out:
    3400           0 :         return result;
    3401             : }
    3402             : 
    3403             : /*
    3404             :  * Find the first task with tgid >= tgid
    3405             :  *
    3406             :  */
    3407             : struct tgid_iter {
    3408             :         unsigned int tgid;
    3409             :         struct task_struct *task;
    3410             : };
    3411           0 : static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
    3412             : {
    3413             :         struct pid *pid;
    3414             : 
    3415           0 :         if (iter.task)
    3416           0 :                 put_task_struct(iter.task);
    3417             :         rcu_read_lock();
    3418             : retry:
    3419           0 :         iter.task = NULL;
    3420           0 :         pid = find_ge_pid(iter.tgid, ns);
    3421           0 :         if (pid) {
    3422           0 :                 iter.tgid = pid_nr_ns(pid, ns);
    3423           0 :                 iter.task = pid_task(pid, PIDTYPE_TGID);
    3424           0 :                 if (!iter.task) {
    3425           0 :                         iter.tgid += 1;
    3426           0 :                         goto retry;
    3427             :                 }
    3428           0 :                 get_task_struct(iter.task);
    3429             :         }
    3430             :         rcu_read_unlock();
    3431           0 :         return iter;
    3432             : }
    3433             : 
    3434             : #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
    3435             : 
    3436             : /* for the /proc/ directory itself, after non-process stuff has been done */
    3437           0 : int proc_pid_readdir(struct file *file, struct dir_context *ctx)
    3438             : {
    3439             :         struct tgid_iter iter;
    3440           0 :         struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
    3441           0 :         struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb);
    3442           0 :         loff_t pos = ctx->pos;
    3443             : 
    3444           0 :         if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
    3445             :                 return 0;
    3446             : 
    3447           0 :         if (pos == TGID_OFFSET - 2) {
    3448           0 :                 struct inode *inode = d_inode(fs_info->proc_self);
    3449           0 :                 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
    3450             :                         return 0;
    3451           0 :                 ctx->pos = pos = pos + 1;
    3452             :         }
    3453           0 :         if (pos == TGID_OFFSET - 1) {
    3454           0 :                 struct inode *inode = d_inode(fs_info->proc_thread_self);
    3455           0 :                 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
    3456             :                         return 0;
    3457           0 :                 ctx->pos = pos = pos + 1;
    3458             :         }
    3459           0 :         iter.tgid = pos - TGID_OFFSET;
    3460           0 :         iter.task = NULL;
    3461           0 :         for (iter = next_tgid(ns, iter);
    3462             :              iter.task;
    3463           0 :              iter.tgid += 1, iter = next_tgid(ns, iter)) {
    3464             :                 char name[10 + 1];
    3465             :                 unsigned int len;
    3466             : 
    3467           0 :                 cond_resched();
    3468           0 :                 if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
    3469           0 :                         continue;
    3470             : 
    3471           0 :                 len = snprintf(name, sizeof(name), "%u", iter.tgid);
    3472           0 :                 ctx->pos = iter.tgid + TGID_OFFSET;
    3473           0 :                 if (!proc_fill_cache(file, ctx, name, len,
    3474             :                                      proc_pid_instantiate, iter.task, NULL)) {
    3475           0 :                         put_task_struct(iter.task);
    3476           0 :                         return 0;
    3477             :                 }
    3478             :         }
    3479           0 :         ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
    3480           0 :         return 0;
    3481             : }
    3482             : 
    3483             : /*
    3484             :  * proc_tid_comm_permission is a special permission function exclusively
    3485             :  * used for the node /proc/<pid>/task/<tid>/comm.
    3486             :  * It bypasses generic permission checks in the case where a task of the same
    3487             :  * task group attempts to access the node.
    3488             :  * The rationale behind this is that glibc and bionic access this node for
    3489             :  * cross thread naming (pthread_set/getname_np(!self)). However, if
    3490             :  * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
    3491             :  * which locks out the cross thread naming implementation.
    3492             :  * This function makes sure that the node is always accessible for members of
    3493             :  * same thread group.
    3494             :  */
    3495           0 : static int proc_tid_comm_permission(struct user_namespace *mnt_userns,
    3496             :                                     struct inode *inode, int mask)
    3497             : {
    3498             :         bool is_same_tgroup;
    3499             :         struct task_struct *task;
    3500             : 
    3501           0 :         task = get_proc_task(inode);
    3502           0 :         if (!task)
    3503             :                 return -ESRCH;
    3504           0 :         is_same_tgroup = same_thread_group(current, task);
    3505           0 :         put_task_struct(task);
    3506             : 
    3507           0 :         if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
    3508             :                 /* This file (/proc/<pid>/task/<tid>/comm) can always be
    3509             :                  * read or written by the members of the corresponding
    3510             :                  * thread group.
    3511             :                  */
    3512             :                 return 0;
    3513             :         }
    3514             : 
    3515           0 :         return generic_permission(&init_user_ns, inode, mask);
    3516             : }
    3517             : 
    3518             : static const struct inode_operations proc_tid_comm_inode_operations = {
    3519             :                 .permission = proc_tid_comm_permission,
    3520             : };
    3521             : 
    3522             : /*
    3523             :  * Tasks
    3524             :  */
    3525             : static const struct pid_entry tid_base_stuff[] = {
    3526             :         DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
    3527             :         DIR("fdinfo",    S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
    3528             :         DIR("ns",      S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
    3529             : #ifdef CONFIG_NET
    3530             :         DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
    3531             : #endif
    3532             :         REG("environ",   S_IRUSR, proc_environ_operations),
    3533             :         REG("auxv",      S_IRUSR, proc_auxv_operations),
    3534             :         ONE("status",    S_IRUGO, proc_pid_status),
    3535             :         ONE("personality", S_IRUSR, proc_pid_personality),
    3536             :         ONE("limits",  S_IRUGO, proc_pid_limits),
    3537             : #ifdef CONFIG_SCHED_DEBUG
    3538             :         REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
    3539             : #endif
    3540             :         NOD("comm",      S_IFREG|S_IRUGO|S_IWUSR,
    3541             :                          &proc_tid_comm_inode_operations,
    3542             :                          &proc_pid_set_comm_operations, {}),
    3543             : #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
    3544             :         ONE("syscall",   S_IRUSR, proc_pid_syscall),
    3545             : #endif
    3546             :         REG("cmdline",   S_IRUGO, proc_pid_cmdline_ops),
    3547             :         ONE("stat",      S_IRUGO, proc_tid_stat),
    3548             :         ONE("statm",     S_IRUGO, proc_pid_statm),
    3549             :         REG("maps",      S_IRUGO, proc_pid_maps_operations),
    3550             : #ifdef CONFIG_PROC_CHILDREN
    3551             :         REG("children",  S_IRUGO, proc_tid_children_operations),
    3552             : #endif
    3553             : #ifdef CONFIG_NUMA
    3554             :         REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
    3555             : #endif
    3556             :         REG("mem",       S_IRUSR|S_IWUSR, proc_mem_operations),
    3557             :         LNK("cwd",       proc_cwd_link),
    3558             :         LNK("root",      proc_root_link),
    3559             :         LNK("exe",       proc_exe_link),
    3560             :         REG("mounts",    S_IRUGO, proc_mounts_operations),
    3561             :         REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
    3562             : #ifdef CONFIG_PROC_PAGE_MONITOR
    3563             :         REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
    3564             :         REG("smaps",     S_IRUGO, proc_pid_smaps_operations),
    3565             :         REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
    3566             :         REG("pagemap",    S_IRUSR, proc_pagemap_operations),
    3567             : #endif
    3568             : #ifdef CONFIG_SECURITY
    3569             :         DIR("attr",      S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
    3570             : #endif
    3571             : #ifdef CONFIG_KALLSYMS
    3572             :         ONE("wchan",     S_IRUGO, proc_pid_wchan),
    3573             : #endif
    3574             : #ifdef CONFIG_STACKTRACE
    3575             :         ONE("stack",      S_IRUSR, proc_pid_stack),
    3576             : #endif
    3577             : #ifdef CONFIG_SCHED_INFO
    3578             :         ONE("schedstat", S_IRUGO, proc_pid_schedstat),
    3579             : #endif
    3580             : #ifdef CONFIG_LATENCYTOP
    3581             :         REG("latency",  S_IRUGO, proc_lstats_operations),
    3582             : #endif
    3583             : #ifdef CONFIG_PROC_PID_CPUSET
    3584             :         ONE("cpuset",    S_IRUGO, proc_cpuset_show),
    3585             : #endif
    3586             : #ifdef CONFIG_CGROUPS
    3587             :         ONE("cgroup",  S_IRUGO, proc_cgroup_show),
    3588             : #endif
    3589             : #ifdef CONFIG_PROC_CPU_RESCTRL
    3590             :         ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
    3591             : #endif
    3592             :         ONE("oom_score", S_IRUGO, proc_oom_score),
    3593             :         REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
    3594             :         REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
    3595             : #ifdef CONFIG_AUDIT
    3596             :         REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
    3597             :         REG("sessionid",  S_IRUGO, proc_sessionid_operations),
    3598             : #endif
    3599             : #ifdef CONFIG_FAULT_INJECTION
    3600             :         REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
    3601             :         REG("fail-nth", 0644, proc_fail_nth_operations),
    3602             : #endif
    3603             : #ifdef CONFIG_TASK_IO_ACCOUNTING
    3604             :         ONE("io",     S_IRUSR, proc_tid_io_accounting),
    3605             : #endif
    3606             : #ifdef CONFIG_USER_NS
    3607             :         REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
    3608             :         REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
    3609             :         REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
    3610             :         REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
    3611             : #endif
    3612             : #ifdef CONFIG_LIVEPATCH
    3613             :         ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
    3614             : #endif
    3615             : #ifdef CONFIG_PROC_PID_ARCH_STATUS
    3616             :         ONE("arch_status", S_IRUGO, proc_pid_arch_status),
    3617             : #endif
    3618             : #ifdef CONFIG_SECCOMP_CACHE_DEBUG
    3619             :         ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
    3620             : #endif
    3621             : };
    3622             : 
    3623           0 : static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
    3624             : {
    3625           0 :         return proc_pident_readdir(file, ctx,
    3626             :                                    tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
    3627             : }
    3628             : 
    3629           0 : static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
    3630             : {
    3631           0 :         return proc_pident_lookup(dir, dentry,
    3632             :                                   tid_base_stuff,
    3633             :                                   tid_base_stuff + ARRAY_SIZE(tid_base_stuff));
    3634             : }
    3635             : 
    3636             : static const struct file_operations proc_tid_base_operations = {
    3637             :         .read           = generic_read_dir,
    3638             :         .iterate_shared = proc_tid_base_readdir,
    3639             :         .llseek         = generic_file_llseek,
    3640             : };
    3641             : 
    3642             : static const struct inode_operations proc_tid_base_inode_operations = {
    3643             :         .lookup         = proc_tid_base_lookup,
    3644             :         .getattr        = pid_getattr,
    3645             :         .setattr        = proc_setattr,
    3646             : };
    3647             : 
    3648           0 : static struct dentry *proc_task_instantiate(struct dentry *dentry,
    3649             :         struct task_struct *task, const void *ptr)
    3650             : {
    3651             :         struct inode *inode;
    3652           0 :         inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
    3653           0 :         if (!inode)
    3654             :                 return ERR_PTR(-ENOENT);
    3655             : 
    3656           0 :         inode->i_op = &proc_tid_base_inode_operations;
    3657           0 :         inode->i_fop = &proc_tid_base_operations;
    3658           0 :         inode->i_flags |= S_IMMUTABLE;
    3659             : 
    3660           0 :         set_nlink(inode, nlink_tid);
    3661           0 :         pid_update_inode(task, inode);
    3662             : 
    3663           0 :         d_set_d_op(dentry, &pid_dentry_operations);
    3664           0 :         return d_splice_alias(inode, dentry);
    3665             : }
    3666             : 
    3667           0 : static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
    3668             : {
    3669             :         struct task_struct *task;
    3670           0 :         struct task_struct *leader = get_proc_task(dir);
    3671             :         unsigned tid;
    3672             :         struct proc_fs_info *fs_info;
    3673             :         struct pid_namespace *ns;
    3674           0 :         struct dentry *result = ERR_PTR(-ENOENT);
    3675             : 
    3676           0 :         if (!leader)
    3677             :                 goto out_no_task;
    3678             : 
    3679           0 :         tid = name_to_int(&dentry->d_name);
    3680           0 :         if (tid == ~0U)
    3681             :                 goto out;
    3682             : 
    3683           0 :         fs_info = proc_sb_info(dentry->d_sb);
    3684           0 :         ns = fs_info->pid_ns;
    3685             :         rcu_read_lock();
    3686           0 :         task = find_task_by_pid_ns(tid, ns);
    3687           0 :         if (task)
    3688             :                 get_task_struct(task);
    3689             :         rcu_read_unlock();
    3690           0 :         if (!task)
    3691             :                 goto out;
    3692           0 :         if (!same_thread_group(leader, task))
    3693             :                 goto out_drop_task;
    3694             : 
    3695           0 :         result = proc_task_instantiate(dentry, task, NULL);
    3696             : out_drop_task:
    3697           0 :         put_task_struct(task);
    3698             : out:
    3699           0 :         put_task_struct(leader);
    3700             : out_no_task:
    3701           0 :         return result;
    3702             : }
    3703             : 
    3704             : /*
    3705             :  * Find the first tid of a thread group to return to user space.
    3706             :  *
    3707             :  * Usually this is just the thread group leader, but if the users
    3708             :  * buffer was too small or there was a seek into the middle of the
    3709             :  * directory we have more work todo.
    3710             :  *
    3711             :  * In the case of a short read we start with find_task_by_pid.
    3712             :  *
    3713             :  * In the case of a seek we start with the leader and walk nr
    3714             :  * threads past it.
    3715             :  */
    3716           0 : static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos,
    3717             :                                         struct pid_namespace *ns)
    3718             : {
    3719             :         struct task_struct *pos, *task;
    3720           0 :         unsigned long nr = f_pos;
    3721             : 
    3722             :         if (nr != f_pos)        /* 32bit overflow? */
    3723             :                 return NULL;
    3724             : 
    3725             :         rcu_read_lock();
    3726           0 :         task = pid_task(pid, PIDTYPE_PID);
    3727           0 :         if (!task)
    3728             :                 goto fail;
    3729             : 
    3730             :         /* Attempt to start with the tid of a thread */
    3731           0 :         if (tid && nr) {
    3732           0 :                 pos = find_task_by_pid_ns(tid, ns);
    3733           0 :                 if (pos && same_thread_group(pos, task))
    3734             :                         goto found;
    3735             :         }
    3736             : 
    3737             :         /* If nr exceeds the number of threads there is nothing todo */
    3738           0 :         if (nr >= get_nr_threads(task))
    3739             :                 goto fail;
    3740             : 
    3741             :         /* If we haven't found our starting place yet start
    3742             :          * with the leader and walk nr threads forward.
    3743             :          */
    3744           0 :         pos = task = task->group_leader;
    3745             :         do {
    3746           0 :                 if (!nr--)
    3747             :                         goto found;
    3748           0 :         } while_each_thread(task, pos);
    3749             : fail:
    3750             :         pos = NULL;
    3751             :         goto out;
    3752             : found:
    3753             :         get_task_struct(pos);
    3754             : out:
    3755             :         rcu_read_unlock();
    3756             :         return pos;
    3757             : }
    3758             : 
    3759             : /*
    3760             :  * Find the next thread in the thread list.
    3761             :  * Return NULL if there is an error or no next thread.
    3762             :  *
    3763             :  * The reference to the input task_struct is released.
    3764             :  */
    3765           0 : static struct task_struct *next_tid(struct task_struct *start)
    3766             : {
    3767           0 :         struct task_struct *pos = NULL;
    3768             :         rcu_read_lock();
    3769           0 :         if (pid_alive(start)) {
    3770           0 :                 pos = next_thread(start);
    3771           0 :                 if (thread_group_leader(pos))
    3772             :                         pos = NULL;
    3773             :                 else
    3774             :                         get_task_struct(pos);
    3775             :         }
    3776             :         rcu_read_unlock();
    3777           0 :         put_task_struct(start);
    3778           0 :         return pos;
    3779             : }
    3780             : 
    3781             : /* for the /proc/TGID/task/ directories */
    3782           0 : static int proc_task_readdir(struct file *file, struct dir_context *ctx)
    3783             : {
    3784           0 :         struct inode *inode = file_inode(file);
    3785             :         struct task_struct *task;
    3786             :         struct pid_namespace *ns;
    3787             :         int tid;
    3788             : 
    3789           0 :         if (proc_inode_is_dead(inode))
    3790             :                 return -ENOENT;
    3791             : 
    3792           0 :         if (!dir_emit_dots(file, ctx))
    3793             :                 return 0;
    3794             : 
    3795             :         /* f_version caches the tgid value that the last readdir call couldn't
    3796             :          * return. lseek aka telldir automagically resets f_version to 0.
    3797             :          */
    3798           0 :         ns = proc_pid_ns(inode->i_sb);
    3799           0 :         tid = (int)file->f_version;
    3800           0 :         file->f_version = 0;
    3801           0 :         for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
    3802             :              task;
    3803           0 :              task = next_tid(task), ctx->pos++) {
    3804             :                 char name[10 + 1];
    3805             :                 unsigned int len;
    3806             : 
    3807           0 :                 tid = task_pid_nr_ns(task, ns);
    3808           0 :                 if (!tid)
    3809           0 :                         continue;       /* The task has just exited. */
    3810           0 :                 len = snprintf(name, sizeof(name), "%u", tid);
    3811           0 :                 if (!proc_fill_cache(file, ctx, name, len,
    3812             :                                 proc_task_instantiate, task, NULL)) {
    3813             :                         /* returning this tgid failed, save it as the first
    3814             :                          * pid for the next readir call */
    3815           0 :                         file->f_version = (u64)tid;
    3816           0 :                         put_task_struct(task);
    3817           0 :                         break;
    3818             :                 }
    3819             :         }
    3820             : 
    3821             :         return 0;
    3822             : }
    3823             : 
    3824           0 : static int proc_task_getattr(struct user_namespace *mnt_userns,
    3825             :                              const struct path *path, struct kstat *stat,
    3826             :                              u32 request_mask, unsigned int query_flags)
    3827             : {
    3828           0 :         struct inode *inode = d_inode(path->dentry);
    3829           0 :         struct task_struct *p = get_proc_task(inode);
    3830           0 :         generic_fillattr(&init_user_ns, inode, stat);
    3831             : 
    3832           0 :         if (p) {
    3833           0 :                 stat->nlink += get_nr_threads(p);
    3834           0 :                 put_task_struct(p);
    3835             :         }
    3836             : 
    3837           0 :         return 0;
    3838             : }
    3839             : 
    3840             : static const struct inode_operations proc_task_inode_operations = {
    3841             :         .lookup         = proc_task_lookup,
    3842             :         .getattr        = proc_task_getattr,
    3843             :         .setattr        = proc_setattr,
    3844             :         .permission     = proc_pid_permission,
    3845             : };
    3846             : 
    3847             : static const struct file_operations proc_task_operations = {
    3848             :         .read           = generic_read_dir,
    3849             :         .iterate_shared = proc_task_readdir,
    3850             :         .llseek         = generic_file_llseek,
    3851             : };
    3852             : 
    3853           1 : void __init set_proc_pid_nlink(void)
    3854             : {
    3855           1 :         nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
    3856           1 :         nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
    3857           1 : }

Generated by: LCOV version 1.14