Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /* Common capabilities, needed by capability.o.
3 : */
4 :
5 : #include <linux/capability.h>
6 : #include <linux/audit.h>
7 : #include <linux/init.h>
8 : #include <linux/kernel.h>
9 : #include <linux/lsm_hooks.h>
10 : #include <linux/file.h>
11 : #include <linux/mm.h>
12 : #include <linux/mman.h>
13 : #include <linux/pagemap.h>
14 : #include <linux/swap.h>
15 : #include <linux/skbuff.h>
16 : #include <linux/netlink.h>
17 : #include <linux/ptrace.h>
18 : #include <linux/xattr.h>
19 : #include <linux/hugetlb.h>
20 : #include <linux/mount.h>
21 : #include <linux/sched.h>
22 : #include <linux/prctl.h>
23 : #include <linux/securebits.h>
24 : #include <linux/user_namespace.h>
25 : #include <linux/binfmts.h>
26 : #include <linux/personality.h>
27 : #include <linux/mnt_idmapping.h>
28 :
29 : /*
30 : * If a non-root user executes a setuid-root binary in
31 : * !secure(SECURE_NOROOT) mode, then we raise capabilities.
32 : * However if fE is also set, then the intent is for only
33 : * the file capabilities to be applied, and the setuid-root
34 : * bit is left on either to change the uid (plausible) or
35 : * to get full privilege on a kernel without file capabilities
36 : * support. So in that case we do not raise capabilities.
37 : *
38 : * Warn if that happens, once per boot.
39 : */
40 : static void warn_setuid_and_fcaps_mixed(const char *fname)
41 : {
42 : static int warned;
43 0 : if (!warned) {
44 0 : printk(KERN_INFO "warning: `%s' has both setuid-root and"
45 : " effective capabilities. Therefore not raising all"
46 : " capabilities.\n", fname);
47 0 : warned = 1;
48 : }
49 : }
50 :
51 : /**
52 : * cap_capable - Determine whether a task has a particular effective capability
53 : * @cred: The credentials to use
54 : * @targ_ns: The user namespace in which we need the capability
55 : * @cap: The capability to check for
56 : * @opts: Bitmask of options defined in include/linux/security.h
57 : *
58 : * Determine whether the nominated task has the specified capability amongst
59 : * its effective set, returning 0 if it does, -ve if it does not.
60 : *
61 : * NOTE WELL: cap_has_capability() cannot be used like the kernel's capable()
62 : * and has_capability() functions. That is, it has the reverse semantics:
63 : * cap_has_capability() returns 0 when a task has a capability, but the
64 : * kernel's capable() and has_capability() returns 1 for this case.
65 : */
66 1 : int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
67 : int cap, unsigned int opts)
68 : {
69 1 : struct user_namespace *ns = targ_ns;
70 :
71 : /* See if cred has the capability in the target user namespace
72 : * by examining the target user namespace and all of the target
73 : * user namespace's parents.
74 : */
75 : for (;;) {
76 : /* Do we have the necessary capabilities? */
77 1 : if (ns == cred->user_ns)
78 1 : return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
79 :
80 : /*
81 : * If we're already at a lower level than we're looking for,
82 : * we're done searching.
83 : */
84 0 : if (ns->level <= cred->user_ns->level)
85 : return -EPERM;
86 :
87 : /*
88 : * The owner of the user namespace in the parent of the
89 : * user namespace has all caps.
90 : */
91 0 : if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
92 : return 0;
93 :
94 : /*
95 : * If you have a capability in a parent user ns, then you have
96 : * it over all children user namespaces as well.
97 : */
98 : ns = ns->parent;
99 : }
100 :
101 : /* We never get here */
102 : }
103 :
104 : /**
105 : * cap_settime - Determine whether the current process may set the system clock
106 : * @ts: The time to set
107 : * @tz: The timezone to set
108 : *
109 : * Determine whether the current process may set the system clock and timezone
110 : * information, returning 0 if permission granted, -ve if denied.
111 : */
112 0 : int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
113 : {
114 0 : if (!capable(CAP_SYS_TIME))
115 : return -EPERM;
116 0 : return 0;
117 : }
118 :
119 : /**
120 : * cap_ptrace_access_check - Determine whether the current process may access
121 : * another
122 : * @child: The process to be accessed
123 : * @mode: The mode of attachment.
124 : *
125 : * If we are in the same or an ancestor user_ns and have all the target
126 : * task's capabilities, then ptrace access is allowed.
127 : * If we have the ptrace capability to the target user_ns, then ptrace
128 : * access is allowed.
129 : * Else denied.
130 : *
131 : * Determine whether a process may access another, returning 0 if permission
132 : * granted, -ve if denied.
133 : */
134 0 : int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
135 : {
136 0 : int ret = 0;
137 : const struct cred *cred, *child_cred;
138 : const kernel_cap_t *caller_caps;
139 :
140 : rcu_read_lock();
141 0 : cred = current_cred();
142 0 : child_cred = __task_cred(child);
143 0 : if (mode & PTRACE_MODE_FSCREDS)
144 0 : caller_caps = &cred->cap_effective;
145 : else
146 0 : caller_caps = &cred->cap_permitted;
147 0 : if (cred->user_ns == child_cred->user_ns &&
148 0 : cap_issubset(child_cred->cap_permitted, *caller_caps))
149 : goto out;
150 0 : if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
151 : goto out;
152 0 : ret = -EPERM;
153 : out:
154 : rcu_read_unlock();
155 0 : return ret;
156 : }
157 :
158 : /**
159 : * cap_ptrace_traceme - Determine whether another process may trace the current
160 : * @parent: The task proposed to be the tracer
161 : *
162 : * If parent is in the same or an ancestor user_ns and has all current's
163 : * capabilities, then ptrace access is allowed.
164 : * If parent has the ptrace capability to current's user_ns, then ptrace
165 : * access is allowed.
166 : * Else denied.
167 : *
168 : * Determine whether the nominated task is permitted to trace the current
169 : * process, returning 0 if permission is granted, -ve if denied.
170 : */
171 0 : int cap_ptrace_traceme(struct task_struct *parent)
172 : {
173 0 : int ret = 0;
174 : const struct cred *cred, *child_cred;
175 :
176 : rcu_read_lock();
177 0 : cred = __task_cred(parent);
178 0 : child_cred = current_cred();
179 0 : if (cred->user_ns == child_cred->user_ns &&
180 0 : cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
181 : goto out;
182 0 : if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
183 : goto out;
184 0 : ret = -EPERM;
185 : out:
186 : rcu_read_unlock();
187 0 : return ret;
188 : }
189 :
190 : /**
191 : * cap_capget - Retrieve a task's capability sets
192 : * @target: The task from which to retrieve the capability sets
193 : * @effective: The place to record the effective set
194 : * @inheritable: The place to record the inheritable set
195 : * @permitted: The place to record the permitted set
196 : *
197 : * This function retrieves the capabilities of the nominated task and returns
198 : * them to the caller.
199 : */
200 0 : int cap_capget(struct task_struct *target, kernel_cap_t *effective,
201 : kernel_cap_t *inheritable, kernel_cap_t *permitted)
202 : {
203 : const struct cred *cred;
204 :
205 : /* Derived from kernel/capability.c:sys_capget. */
206 : rcu_read_lock();
207 0 : cred = __task_cred(target);
208 0 : *effective = cred->cap_effective;
209 0 : *inheritable = cred->cap_inheritable;
210 0 : *permitted = cred->cap_permitted;
211 : rcu_read_unlock();
212 0 : return 0;
213 : }
214 :
215 : /*
216 : * Determine whether the inheritable capabilities are limited to the old
217 : * permitted set. Returns 1 if they are limited, 0 if they are not.
218 : */
219 0 : static inline int cap_inh_is_capped(void)
220 : {
221 : /* they are so limited unless the current task has the CAP_SETPCAP
222 : * capability
223 : */
224 0 : if (cap_capable(current_cred(), current_cred()->user_ns,
225 : CAP_SETPCAP, CAP_OPT_NONE) == 0)
226 : return 0;
227 0 : return 1;
228 : }
229 :
230 : /**
231 : * cap_capset - Validate and apply proposed changes to current's capabilities
232 : * @new: The proposed new credentials; alterations should be made here
233 : * @old: The current task's current credentials
234 : * @effective: A pointer to the proposed new effective capabilities set
235 : * @inheritable: A pointer to the proposed new inheritable capabilities set
236 : * @permitted: A pointer to the proposed new permitted capabilities set
237 : *
238 : * This function validates and applies a proposed mass change to the current
239 : * process's capability sets. The changes are made to the proposed new
240 : * credentials, and assuming no error, will be committed by the caller of LSM.
241 : */
242 0 : int cap_capset(struct cred *new,
243 : const struct cred *old,
244 : const kernel_cap_t *effective,
245 : const kernel_cap_t *inheritable,
246 : const kernel_cap_t *permitted)
247 : {
248 0 : if (cap_inh_is_capped() &&
249 0 : !cap_issubset(*inheritable,
250 : cap_combine(old->cap_inheritable,
251 : old->cap_permitted)))
252 : /* incapable of using this inheritable set */
253 : return -EPERM;
254 :
255 0 : if (!cap_issubset(*inheritable,
256 : cap_combine(old->cap_inheritable,
257 : old->cap_bset)))
258 : /* no new pI capabilities outside bounding set */
259 : return -EPERM;
260 :
261 : /* verify restrictions on target's new Permitted set */
262 0 : if (!cap_issubset(*permitted, old->cap_permitted))
263 : return -EPERM;
264 :
265 : /* verify the _new_Effective_ is a subset of the _new_Permitted_ */
266 0 : if (!cap_issubset(*effective, *permitted))
267 : return -EPERM;
268 :
269 0 : new->cap_effective = *effective;
270 0 : new->cap_inheritable = *inheritable;
271 0 : new->cap_permitted = *permitted;
272 :
273 : /*
274 : * Mask off ambient bits that are no longer both permitted and
275 : * inheritable.
276 : */
277 0 : new->cap_ambient = cap_intersect(new->cap_ambient,
278 : cap_intersect(*permitted,
279 : *inheritable));
280 0 : if (WARN_ON(!cap_ambient_invariant_ok(new)))
281 : return -EINVAL;
282 0 : return 0;
283 : }
284 :
285 : /**
286 : * cap_inode_need_killpriv - Determine if inode change affects privileges
287 : * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV
288 : *
289 : * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV
290 : * affects the security markings on that inode, and if it is, should
291 : * inode_killpriv() be invoked or the change rejected.
292 : *
293 : * Return: 1 if security.capability has a value, meaning inode_killpriv()
294 : * is required, 0 otherwise, meaning inode_killpriv() is not required.
295 : */
296 0 : int cap_inode_need_killpriv(struct dentry *dentry)
297 : {
298 0 : struct inode *inode = d_backing_inode(dentry);
299 : int error;
300 :
301 0 : error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
302 0 : return error > 0;
303 : }
304 :
305 : /**
306 : * cap_inode_killpriv - Erase the security markings on an inode
307 : *
308 : * @mnt_userns: user namespace of the mount the inode was found from
309 : * @dentry: The inode/dentry to alter
310 : *
311 : * Erase the privilege-enhancing security markings on an inode.
312 : *
313 : * If the inode has been found through an idmapped mount the user namespace of
314 : * the vfsmount must be passed through @mnt_userns. This function will then
315 : * take care to map the inode according to @mnt_userns before checking
316 : * permissions. On non-idmapped mounts or if permission checking is to be
317 : * performed on the raw inode simply passs init_user_ns.
318 : *
319 : * Return: 0 if successful, -ve on error.
320 : */
321 0 : int cap_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry)
322 : {
323 : int error;
324 :
325 0 : error = __vfs_removexattr(mnt_userns, dentry, XATTR_NAME_CAPS);
326 0 : if (error == -EOPNOTSUPP)
327 0 : error = 0;
328 0 : return error;
329 : }
330 :
331 : static bool rootid_owns_currentns(kuid_t kroot)
332 : {
333 : struct user_namespace *ns;
334 :
335 0 : if (!uid_valid(kroot))
336 : return false;
337 :
338 0 : for (ns = current_user_ns(); ; ns = ns->parent) {
339 0 : if (from_kuid(ns, kroot) == 0)
340 : return true;
341 : if (ns == &init_user_ns)
342 : break;
343 : }
344 :
345 : return false;
346 : }
347 :
348 : static __u32 sansflags(__u32 m)
349 : {
350 0 : return m & ~VFS_CAP_FLAGS_EFFECTIVE;
351 : }
352 :
353 : static bool is_v2header(size_t size, const struct vfs_cap_data *cap)
354 : {
355 0 : if (size != XATTR_CAPS_SZ_2)
356 : return false;
357 0 : return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
358 : }
359 :
360 : static bool is_v3header(size_t size, const struct vfs_cap_data *cap)
361 : {
362 0 : if (size != XATTR_CAPS_SZ_3)
363 : return false;
364 0 : return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
365 : }
366 :
367 : /*
368 : * getsecurity: We are called for security.* before any attempt to read the
369 : * xattr from the inode itself.
370 : *
371 : * This gives us a chance to read the on-disk value and convert it. If we
372 : * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler.
373 : *
374 : * Note we are not called by vfs_getxattr_alloc(), but that is only called
375 : * by the integrity subsystem, which really wants the unconverted values -
376 : * so that's good.
377 : */
378 0 : int cap_inode_getsecurity(struct user_namespace *mnt_userns,
379 : struct inode *inode, const char *name, void **buffer,
380 : bool alloc)
381 : {
382 : int size, ret;
383 : kuid_t kroot;
384 : u32 nsmagic, magic;
385 : uid_t root, mappedroot;
386 0 : char *tmpbuf = NULL;
387 : struct vfs_cap_data *cap;
388 0 : struct vfs_ns_cap_data *nscap = NULL;
389 : struct dentry *dentry;
390 : struct user_namespace *fs_ns;
391 :
392 0 : if (strcmp(name, "capability") != 0)
393 : return -EOPNOTSUPP;
394 :
395 0 : dentry = d_find_any_alias(inode);
396 0 : if (!dentry)
397 : return -EINVAL;
398 :
399 0 : size = sizeof(struct vfs_ns_cap_data);
400 0 : ret = (int)vfs_getxattr_alloc(mnt_userns, dentry, XATTR_NAME_CAPS,
401 : &tmpbuf, size, GFP_NOFS);
402 0 : dput(dentry);
403 :
404 0 : if (ret < 0 || !tmpbuf)
405 : return ret;
406 :
407 0 : fs_ns = inode->i_sb->s_user_ns;
408 0 : cap = (struct vfs_cap_data *) tmpbuf;
409 0 : if (is_v2header((size_t) ret, cap)) {
410 : root = 0;
411 0 : } else if (is_v3header((size_t) ret, cap)) {
412 0 : nscap = (struct vfs_ns_cap_data *) tmpbuf;
413 0 : root = le32_to_cpu(nscap->rootid);
414 : } else {
415 : size = -EINVAL;
416 : goto out_free;
417 : }
418 :
419 0 : kroot = make_kuid(fs_ns, root);
420 :
421 : /* If this is an idmapped mount shift the kuid. */
422 0 : kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot);
423 :
424 : /* If the root kuid maps to a valid uid in current ns, then return
425 : * this as a nscap. */
426 0 : mappedroot = from_kuid(current_user_ns(), kroot);
427 0 : if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
428 0 : size = sizeof(struct vfs_ns_cap_data);
429 0 : if (alloc) {
430 0 : if (!nscap) {
431 : /* v2 -> v3 conversion */
432 0 : nscap = kzalloc(size, GFP_ATOMIC);
433 0 : if (!nscap) {
434 : size = -ENOMEM;
435 : goto out_free;
436 : }
437 0 : nsmagic = VFS_CAP_REVISION_3;
438 0 : magic = le32_to_cpu(cap->magic_etc);
439 0 : if (magic & VFS_CAP_FLAGS_EFFECTIVE)
440 0 : nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
441 0 : memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
442 0 : nscap->magic_etc = cpu_to_le32(nsmagic);
443 : } else {
444 : /* use allocated v3 buffer */
445 0 : tmpbuf = NULL;
446 : }
447 0 : nscap->rootid = cpu_to_le32(mappedroot);
448 0 : *buffer = nscap;
449 : }
450 : goto out_free;
451 : }
452 :
453 0 : if (!rootid_owns_currentns(kroot)) {
454 : size = -EOVERFLOW;
455 : goto out_free;
456 : }
457 :
458 : /* This comes from a parent namespace. Return as a v2 capability */
459 0 : size = sizeof(struct vfs_cap_data);
460 0 : if (alloc) {
461 0 : if (nscap) {
462 : /* v3 -> v2 conversion */
463 0 : cap = kzalloc(size, GFP_ATOMIC);
464 0 : if (!cap) {
465 : size = -ENOMEM;
466 : goto out_free;
467 : }
468 0 : magic = VFS_CAP_REVISION_2;
469 0 : nsmagic = le32_to_cpu(nscap->magic_etc);
470 0 : if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
471 0 : magic |= VFS_CAP_FLAGS_EFFECTIVE;
472 0 : memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
473 0 : cap->magic_etc = cpu_to_le32(magic);
474 : } else {
475 : /* use unconverted v2 */
476 0 : tmpbuf = NULL;
477 : }
478 0 : *buffer = cap;
479 : }
480 : out_free:
481 0 : kfree(tmpbuf);
482 0 : return size;
483 : }
484 :
485 : /**
486 : * rootid_from_xattr - translate root uid of vfs caps
487 : *
488 : * @value: vfs caps value which may be modified by this function
489 : * @size: size of @ivalue
490 : * @task_ns: user namespace of the caller
491 : * @mnt_userns: user namespace of the mount the inode was found from
492 : * @fs_userns: user namespace of the filesystem
493 : *
494 : * If the inode has been found through an idmapped mount the user namespace of
495 : * the vfsmount must be passed through @mnt_userns. This function will then
496 : * take care to map the inode according to @mnt_userns before checking
497 : * permissions. On non-idmapped mounts or if permission checking is to be
498 : * performed on the raw inode simply passs init_user_ns.
499 : */
500 : static kuid_t rootid_from_xattr(const void *value, size_t size,
501 : struct user_namespace *task_ns,
502 : struct user_namespace *mnt_userns,
503 : struct user_namespace *fs_userns)
504 : {
505 0 : const struct vfs_ns_cap_data *nscap = value;
506 : kuid_t rootkid;
507 0 : uid_t rootid = 0;
508 :
509 0 : if (size == XATTR_CAPS_SZ_3)
510 0 : rootid = le32_to_cpu(nscap->rootid);
511 :
512 0 : rootkid = make_kuid(task_ns, rootid);
513 0 : return mapped_kuid_user(mnt_userns, fs_userns, rootkid);
514 : }
515 :
516 : static bool validheader(size_t size, const struct vfs_cap_data *cap)
517 : {
518 0 : return is_v2header(size, cap) || is_v3header(size, cap);
519 : }
520 :
521 : /**
522 : * cap_convert_nscap - check vfs caps
523 : *
524 : * @mnt_userns: user namespace of the mount the inode was found from
525 : * @dentry: used to retrieve inode to check permissions on
526 : * @ivalue: vfs caps value which may be modified by this function
527 : * @size: size of @ivalue
528 : *
529 : * User requested a write of security.capability. If needed, update the
530 : * xattr to change from v2 to v3, or to fixup the v3 rootid.
531 : *
532 : * If the inode has been found through an idmapped mount the user namespace of
533 : * the vfsmount must be passed through @mnt_userns. This function will then
534 : * take care to map the inode according to @mnt_userns before checking
535 : * permissions. On non-idmapped mounts or if permission checking is to be
536 : * performed on the raw inode simply passs init_user_ns.
537 : *
538 : * Return: On success, return the new size; on error, return < 0.
539 : */
540 0 : int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
541 : const void **ivalue, size_t size)
542 : {
543 : struct vfs_ns_cap_data *nscap;
544 : uid_t nsrootid;
545 0 : const struct vfs_cap_data *cap = *ivalue;
546 : __u32 magic, nsmagic;
547 0 : struct inode *inode = d_backing_inode(dentry);
548 0 : struct user_namespace *task_ns = current_user_ns(),
549 0 : *fs_ns = inode->i_sb->s_user_ns;
550 : kuid_t rootid;
551 : size_t newsize;
552 :
553 0 : if (!*ivalue)
554 : return -EINVAL;
555 0 : if (!validheader(size, cap))
556 : return -EINVAL;
557 0 : if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
558 : return -EPERM;
559 0 : if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns))
560 0 : if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
561 : /* user is privileged, just write the v2 */
562 0 : return size;
563 :
564 0 : rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns);
565 0 : if (!uid_valid(rootid))
566 : return -EINVAL;
567 :
568 0 : nsrootid = from_kuid(fs_ns, rootid);
569 : if (nsrootid == -1)
570 : return -EINVAL;
571 :
572 0 : newsize = sizeof(struct vfs_ns_cap_data);
573 0 : nscap = kmalloc(newsize, GFP_ATOMIC);
574 0 : if (!nscap)
575 : return -ENOMEM;
576 0 : nscap->rootid = cpu_to_le32(nsrootid);
577 0 : nsmagic = VFS_CAP_REVISION_3;
578 0 : magic = le32_to_cpu(cap->magic_etc);
579 0 : if (magic & VFS_CAP_FLAGS_EFFECTIVE)
580 0 : nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
581 0 : nscap->magic_etc = cpu_to_le32(nsmagic);
582 0 : memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
583 :
584 0 : *ivalue = nscap;
585 0 : return newsize;
586 : }
587 :
588 : /*
589 : * Calculate the new process capability sets from the capability sets attached
590 : * to a file.
591 : */
592 0 : static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
593 : struct linux_binprm *bprm,
594 : bool *effective,
595 : bool *has_fcap)
596 : {
597 0 : struct cred *new = bprm->cred;
598 : unsigned i;
599 0 : int ret = 0;
600 :
601 0 : if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
602 0 : *effective = true;
603 :
604 0 : if (caps->magic_etc & VFS_CAP_REVISION_MASK)
605 0 : *has_fcap = true;
606 :
607 0 : CAP_FOR_EACH_U32(i) {
608 0 : __u32 permitted = caps->permitted.cap[i];
609 0 : __u32 inheritable = caps->inheritable.cap[i];
610 :
611 : /*
612 : * pP' = (X & fP) | (pI & fI)
613 : * The addition of pA' is handled later.
614 : */
615 0 : new->cap_permitted.cap[i] =
616 0 : (new->cap_bset.cap[i] & permitted) |
617 0 : (new->cap_inheritable.cap[i] & inheritable);
618 :
619 0 : if (permitted & ~new->cap_permitted.cap[i])
620 : /* insufficient to execute correctly */
621 0 : ret = -EPERM;
622 : }
623 :
624 : /*
625 : * For legacy apps, with no internal support for recognizing they
626 : * do not have enough capabilities, we return an error if they are
627 : * missing some "forced" (aka file-permitted) capabilities.
628 : */
629 0 : return *effective ? ret : 0;
630 : }
631 :
632 : /**
633 : * get_vfs_caps_from_disk - retrieve vfs caps from disk
634 : *
635 : * @mnt_userns: user namespace of the mount the inode was found from
636 : * @dentry: dentry from which @inode is retrieved
637 : * @cpu_caps: vfs capabilities
638 : *
639 : * Extract the on-exec-apply capability sets for an executable file.
640 : *
641 : * If the inode has been found through an idmapped mount the user namespace of
642 : * the vfsmount must be passed through @mnt_userns. This function will then
643 : * take care to map the inode according to @mnt_userns before checking
644 : * permissions. On non-idmapped mounts or if permission checking is to be
645 : * performed on the raw inode simply passs init_user_ns.
646 : */
647 0 : int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
648 : const struct dentry *dentry,
649 : struct cpu_vfs_cap_data *cpu_caps)
650 : {
651 0 : struct inode *inode = d_backing_inode(dentry);
652 : __u32 magic_etc;
653 : unsigned tocopy, i;
654 : int size;
655 0 : struct vfs_ns_cap_data data, *nscaps = &data;
656 0 : struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
657 : kuid_t rootkuid;
658 : struct user_namespace *fs_ns;
659 :
660 0 : memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
661 :
662 0 : if (!inode)
663 : return -ENODATA;
664 :
665 0 : fs_ns = inode->i_sb->s_user_ns;
666 0 : size = __vfs_getxattr((struct dentry *)dentry, inode,
667 : XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
668 0 : if (size == -ENODATA || size == -EOPNOTSUPP)
669 : /* no data, that's ok */
670 : return -ENODATA;
671 :
672 0 : if (size < 0)
673 : return size;
674 :
675 0 : if (size < sizeof(magic_etc))
676 : return -EINVAL;
677 :
678 0 : cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
679 :
680 0 : rootkuid = make_kuid(fs_ns, 0);
681 0 : switch (magic_etc & VFS_CAP_REVISION_MASK) {
682 : case VFS_CAP_REVISION_1:
683 0 : if (size != XATTR_CAPS_SZ_1)
684 : return -EINVAL;
685 : tocopy = VFS_CAP_U32_1;
686 : break;
687 : case VFS_CAP_REVISION_2:
688 0 : if (size != XATTR_CAPS_SZ_2)
689 : return -EINVAL;
690 : tocopy = VFS_CAP_U32_2;
691 : break;
692 : case VFS_CAP_REVISION_3:
693 0 : if (size != XATTR_CAPS_SZ_3)
694 : return -EINVAL;
695 0 : tocopy = VFS_CAP_U32_3;
696 0 : rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
697 : break;
698 :
699 : default:
700 : return -EINVAL;
701 : }
702 : /* Limit the caps to the mounter of the filesystem
703 : * or the more limited uid specified in the xattr.
704 : */
705 0 : rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid);
706 0 : if (!rootid_owns_currentns(rootkuid))
707 : return -ENODATA;
708 :
709 0 : CAP_FOR_EACH_U32(i) {
710 0 : if (i >= tocopy)
711 : break;
712 0 : cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
713 0 : cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
714 : }
715 :
716 0 : cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
717 0 : cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
718 :
719 0 : cpu_caps->rootid = rootkuid;
720 :
721 0 : return 0;
722 : }
723 :
724 : /*
725 : * Attempt to get the on-exec apply capability sets for an executable file from
726 : * its xattrs and, if present, apply them to the proposed credentials being
727 : * constructed by execve().
728 : */
729 0 : static int get_file_caps(struct linux_binprm *bprm, struct file *file,
730 : bool *effective, bool *has_fcap)
731 : {
732 0 : int rc = 0;
733 : struct cpu_vfs_cap_data vcaps;
734 :
735 0 : cap_clear(bprm->cred->cap_permitted);
736 :
737 0 : if (!file_caps_enabled)
738 : return 0;
739 :
740 0 : if (!mnt_may_suid(file->f_path.mnt))
741 : return 0;
742 :
743 : /*
744 : * This check is redundant with mnt_may_suid() but is kept to make
745 : * explicit that capability bits are limited to s_user_ns and its
746 : * descendants.
747 : */
748 0 : if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
749 : return 0;
750 :
751 0 : rc = get_vfs_caps_from_disk(file_mnt_user_ns(file),
752 0 : file->f_path.dentry, &vcaps);
753 0 : if (rc < 0) {
754 0 : if (rc == -EINVAL)
755 0 : printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
756 : bprm->filename);
757 0 : else if (rc == -ENODATA)
758 0 : rc = 0;
759 : goto out;
760 : }
761 :
762 0 : rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);
763 :
764 : out:
765 0 : if (rc)
766 0 : cap_clear(bprm->cred->cap_permitted);
767 :
768 : return rc;
769 : }
770 :
771 0 : static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }
772 :
773 : static inline bool __is_real(kuid_t uid, struct cred *cred)
774 0 : { return uid_eq(cred->uid, uid); }
775 :
776 : static inline bool __is_eff(kuid_t uid, struct cred *cred)
777 0 : { return uid_eq(cred->euid, uid); }
778 :
779 : static inline bool __is_suid(kuid_t uid, struct cred *cred)
780 0 : { return !__is_real(uid, cred) && __is_eff(uid, cred); }
781 :
782 : /*
783 : * handle_privileged_root - Handle case of privileged root
784 : * @bprm: The execution parameters, including the proposed creds
785 : * @has_fcap: Are any file capabilities set?
786 : * @effective: Do we have effective root privilege?
787 : * @root_uid: This namespace' root UID WRT initial USER namespace
788 : *
789 : * Handle the case where root is privileged and hasn't been neutered by
790 : * SECURE_NOROOT. If file capabilities are set, they won't be combined with
791 : * set UID root and nothing is changed. If we are root, cap_permitted is
792 : * updated. If we have become set UID root, the effective bit is set.
793 : */
794 0 : static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,
795 : bool *effective, kuid_t root_uid)
796 : {
797 0 : const struct cred *old = current_cred();
798 0 : struct cred *new = bprm->cred;
799 :
800 0 : if (!root_privileged())
801 : return;
802 : /*
803 : * If the legacy file capability is set, then don't set privs
804 : * for a setuid root binary run by a non-root user. Do set it
805 : * for a root user just to cause least surprise to an admin.
806 : */
807 0 : if (has_fcap && __is_suid(root_uid, new)) {
808 0 : warn_setuid_and_fcaps_mixed(bprm->filename);
809 : return;
810 : }
811 : /*
812 : * To support inheritance of root-permissions and suid-root
813 : * executables under compatibility mode, we override the
814 : * capability sets for the file.
815 : */
816 0 : if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {
817 : /* pP' = (cap_bset & ~0) | (pI & ~0) */
818 0 : new->cap_permitted = cap_combine(old->cap_bset,
819 : old->cap_inheritable);
820 : }
821 : /*
822 : * If only the real uid is 0, we do not set the effective bit.
823 : */
824 0 : if (__is_eff(root_uid, new))
825 0 : *effective = true;
826 : }
827 :
828 : #define __cap_gained(field, target, source) \
829 : !cap_issubset(target->cap_##field, source->cap_##field)
830 : #define __cap_grew(target, source, cred) \
831 : !cap_issubset(cred->cap_##target, cred->cap_##source)
832 : #define __cap_full(field, cred) \
833 : cap_issubset(CAP_FULL_SET, cred->cap_##field)
834 :
835 : static inline bool __is_setuid(struct cred *new, const struct cred *old)
836 0 : { return !uid_eq(new->euid, old->uid); }
837 :
838 : static inline bool __is_setgid(struct cred *new, const struct cred *old)
839 0 : { return !gid_eq(new->egid, old->gid); }
840 :
841 : /*
842 : * 1) Audit candidate if current->cap_effective is set
843 : *
844 : * We do not bother to audit if 3 things are true:
845 : * 1) cap_effective has all caps
846 : * 2) we became root *OR* are were already root
847 : * 3) root is supposed to have all caps (SECURE_NOROOT)
848 : * Since this is just a normal root execing a process.
849 : *
850 : * Number 1 above might fail if you don't have a full bset, but I think
851 : * that is interesting information to audit.
852 : *
853 : * A number of other conditions require logging:
854 : * 2) something prevented setuid root getting all caps
855 : * 3) non-setuid root gets fcaps
856 : * 4) non-setuid root gets ambient
857 : */
858 0 : static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,
859 : kuid_t root, bool has_fcap)
860 : {
861 0 : bool ret = false;
862 :
863 0 : if ((__cap_grew(effective, ambient, new) &&
864 0 : !(__cap_full(effective, new) &&
865 0 : (__is_eff(root, new) || __is_real(root, new)) &&
866 0 : root_privileged())) ||
867 0 : (root_privileged() &&
868 0 : __is_suid(root, new) &&
869 0 : !__cap_full(effective, new)) ||
870 0 : (!__is_setuid(new, old) &&
871 0 : ((has_fcap &&
872 0 : __cap_gained(permitted, new, old)) ||
873 0 : __cap_gained(ambient, new, old))))
874 :
875 : ret = true;
876 :
877 0 : return ret;
878 : }
879 :
880 : /**
881 : * cap_bprm_creds_from_file - Set up the proposed credentials for execve().
882 : * @bprm: The execution parameters, including the proposed creds
883 : * @file: The file to pull the credentials from
884 : *
885 : * Set up the proposed credentials for a new execution context being
886 : * constructed by execve(). The proposed creds in @bprm->cred is altered,
887 : * which won't take effect immediately.
888 : *
889 : * Return: 0 if successful, -ve on error.
890 : */
891 0 : int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file)
892 : {
893 : /* Process setpcap binaries and capabilities for uid 0 */
894 0 : const struct cred *old = current_cred();
895 0 : struct cred *new = bprm->cred;
896 0 : bool effective = false, has_fcap = false, is_setid;
897 : int ret;
898 : kuid_t root_uid;
899 :
900 0 : if (WARN_ON(!cap_ambient_invariant_ok(old)))
901 : return -EPERM;
902 :
903 0 : ret = get_file_caps(bprm, file, &effective, &has_fcap);
904 0 : if (ret < 0)
905 : return ret;
906 :
907 0 : root_uid = make_kuid(new->user_ns, 0);
908 :
909 0 : handle_privileged_root(bprm, has_fcap, &effective, root_uid);
910 :
911 : /* if we have fs caps, clear dangerous personality flags */
912 0 : if (__cap_gained(permitted, new, old))
913 0 : bprm->per_clear |= PER_CLEAR_ON_SETID;
914 :
915 : /* Don't let someone trace a set[ug]id/setpcap binary with the revised
916 : * credentials unless they have the appropriate permit.
917 : *
918 : * In addition, if NO_NEW_PRIVS, then ensure we get no new privs.
919 : */
920 0 : is_setid = __is_setuid(new, old) || __is_setgid(new, old);
921 :
922 0 : if ((is_setid || __cap_gained(permitted, new, old)) &&
923 0 : ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
924 0 : !ptracer_capable(current, new->user_ns))) {
925 : /* downgrade; they get no more than they had, and maybe less */
926 0 : if (!ns_capable(new->user_ns, CAP_SETUID) ||
927 0 : (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
928 0 : new->euid = new->uid;
929 0 : new->egid = new->gid;
930 : }
931 0 : new->cap_permitted = cap_intersect(new->cap_permitted,
932 : old->cap_permitted);
933 : }
934 :
935 0 : new->suid = new->fsuid = new->euid;
936 0 : new->sgid = new->fsgid = new->egid;
937 :
938 : /* File caps or setid cancels ambient. */
939 0 : if (has_fcap || is_setid)
940 0 : cap_clear(new->cap_ambient);
941 :
942 : /*
943 : * Now that we've computed pA', update pP' to give:
944 : * pP' = (X & fP) | (pI & fI) | pA'
945 : */
946 0 : new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
947 :
948 : /*
949 : * Set pE' = (fE ? pP' : pA'). Because pA' is zero if fE is set,
950 : * this is the same as pE' = (fE ? pP' : 0) | pA'.
951 : */
952 0 : if (effective)
953 0 : new->cap_effective = new->cap_permitted;
954 : else
955 0 : new->cap_effective = new->cap_ambient;
956 :
957 0 : if (WARN_ON(!cap_ambient_invariant_ok(new)))
958 : return -EPERM;
959 :
960 0 : if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
961 : ret = audit_log_bprm_fcaps(bprm, new, old);
962 : if (ret < 0)
963 : return ret;
964 : }
965 :
966 0 : new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
967 :
968 0 : if (WARN_ON(!cap_ambient_invariant_ok(new)))
969 : return -EPERM;
970 :
971 : /* Check for privilege-elevated exec. */
972 0 : if (is_setid ||
973 0 : (!__is_real(root_uid, new) &&
974 0 : (effective ||
975 0 : __cap_grew(permitted, ambient, new))))
976 0 : bprm->secureexec = 1;
977 :
978 : return 0;
979 : }
980 :
981 : /**
982 : * cap_inode_setxattr - Determine whether an xattr may be altered
983 : * @dentry: The inode/dentry being altered
984 : * @name: The name of the xattr to be changed
985 : * @value: The value that the xattr will be changed to
986 : * @size: The size of value
987 : * @flags: The replacement flag
988 : *
989 : * Determine whether an xattr may be altered or set on an inode, returning 0 if
990 : * permission is granted, -ve if denied.
991 : *
992 : * This is used to make sure security xattrs don't get updated or set by those
993 : * who aren't privileged to do so.
994 : */
995 0 : int cap_inode_setxattr(struct dentry *dentry, const char *name,
996 : const void *value, size_t size, int flags)
997 : {
998 0 : struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
999 :
1000 : /* Ignore non-security xattrs */
1001 0 : if (strncmp(name, XATTR_SECURITY_PREFIX,
1002 : XATTR_SECURITY_PREFIX_LEN) != 0)
1003 : return 0;
1004 :
1005 : /*
1006 : * For XATTR_NAME_CAPS the check will be done in
1007 : * cap_convert_nscap(), called by setxattr()
1008 : */
1009 0 : if (strcmp(name, XATTR_NAME_CAPS) == 0)
1010 : return 0;
1011 :
1012 0 : if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1013 : return -EPERM;
1014 0 : return 0;
1015 : }
1016 :
1017 : /**
1018 : * cap_inode_removexattr - Determine whether an xattr may be removed
1019 : *
1020 : * @mnt_userns: User namespace of the mount the inode was found from
1021 : * @dentry: The inode/dentry being altered
1022 : * @name: The name of the xattr to be changed
1023 : *
1024 : * Determine whether an xattr may be removed from an inode, returning 0 if
1025 : * permission is granted, -ve if denied.
1026 : *
1027 : * If the inode has been found through an idmapped mount the user namespace of
1028 : * the vfsmount must be passed through @mnt_userns. This function will then
1029 : * take care to map the inode according to @mnt_userns before checking
1030 : * permissions. On non-idmapped mounts or if permission checking is to be
1031 : * performed on the raw inode simply passs init_user_ns.
1032 : *
1033 : * This is used to make sure security xattrs don't get removed by those who
1034 : * aren't privileged to remove them.
1035 : */
1036 0 : int cap_inode_removexattr(struct user_namespace *mnt_userns,
1037 : struct dentry *dentry, const char *name)
1038 : {
1039 0 : struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
1040 :
1041 : /* Ignore non-security xattrs */
1042 0 : if (strncmp(name, XATTR_SECURITY_PREFIX,
1043 : XATTR_SECURITY_PREFIX_LEN) != 0)
1044 : return 0;
1045 :
1046 0 : if (strcmp(name, XATTR_NAME_CAPS) == 0) {
1047 : /* security.capability gets namespaced */
1048 0 : struct inode *inode = d_backing_inode(dentry);
1049 0 : if (!inode)
1050 : return -EINVAL;
1051 0 : if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
1052 : return -EPERM;
1053 0 : return 0;
1054 : }
1055 :
1056 0 : if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1057 : return -EPERM;
1058 0 : return 0;
1059 : }
1060 :
1061 : /*
1062 : * cap_emulate_setxuid() fixes the effective / permitted capabilities of
1063 : * a process after a call to setuid, setreuid, or setresuid.
1064 : *
1065 : * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
1066 : * {r,e,s}uid != 0, the permitted and effective capabilities are
1067 : * cleared.
1068 : *
1069 : * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
1070 : * capabilities of the process are cleared.
1071 : *
1072 : * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
1073 : * capabilities are set to the permitted capabilities.
1074 : *
1075 : * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
1076 : * never happen.
1077 : *
1078 : * -astor
1079 : *
1080 : * cevans - New behaviour, Oct '99
1081 : * A process may, via prctl(), elect to keep its capabilities when it
1082 : * calls setuid() and switches away from uid==0. Both permitted and
1083 : * effective sets will be retained.
1084 : * Without this change, it was impossible for a daemon to drop only some
1085 : * of its privilege. The call to setuid(!=0) would drop all privileges!
1086 : * Keeping uid 0 is not an option because uid 0 owns too many vital
1087 : * files..
1088 : * Thanks to Olaf Kirch and Peter Benie for spotting this.
1089 : */
1090 0 : static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
1091 : {
1092 0 : kuid_t root_uid = make_kuid(old->user_ns, 0);
1093 :
1094 0 : if ((uid_eq(old->uid, root_uid) ||
1095 0 : uid_eq(old->euid, root_uid) ||
1096 0 : uid_eq(old->suid, root_uid)) &&
1097 0 : (!uid_eq(new->uid, root_uid) &&
1098 0 : !uid_eq(new->euid, root_uid) &&
1099 0 : !uid_eq(new->suid, root_uid))) {
1100 0 : if (!issecure(SECURE_KEEP_CAPS)) {
1101 0 : cap_clear(new->cap_permitted);
1102 0 : cap_clear(new->cap_effective);
1103 : }
1104 :
1105 : /*
1106 : * Pre-ambient programs expect setresuid to nonroot followed
1107 : * by exec to drop capabilities. We should make sure that
1108 : * this remains the case.
1109 : */
1110 0 : cap_clear(new->cap_ambient);
1111 : }
1112 0 : if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
1113 0 : cap_clear(new->cap_effective);
1114 0 : if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
1115 0 : new->cap_effective = new->cap_permitted;
1116 0 : }
1117 :
1118 : /**
1119 : * cap_task_fix_setuid - Fix up the results of setuid() call
1120 : * @new: The proposed credentials
1121 : * @old: The current task's current credentials
1122 : * @flags: Indications of what has changed
1123 : *
1124 : * Fix up the results of setuid() call before the credential changes are
1125 : * actually applied.
1126 : *
1127 : * Return: 0 to grant the changes, -ve to deny them.
1128 : */
1129 0 : int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
1130 : {
1131 0 : switch (flags) {
1132 : case LSM_SETID_RE:
1133 : case LSM_SETID_ID:
1134 : case LSM_SETID_RES:
1135 : /* juggle the capabilities to follow [RES]UID changes unless
1136 : * otherwise suppressed */
1137 0 : if (!issecure(SECURE_NO_SETUID_FIXUP))
1138 0 : cap_emulate_setxuid(new, old);
1139 : break;
1140 :
1141 : case LSM_SETID_FS:
1142 : /* juggle the capabilties to follow FSUID changes, unless
1143 : * otherwise suppressed
1144 : *
1145 : * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
1146 : * if not, we might be a bit too harsh here.
1147 : */
1148 0 : if (!issecure(SECURE_NO_SETUID_FIXUP)) {
1149 0 : kuid_t root_uid = make_kuid(old->user_ns, 0);
1150 0 : if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
1151 0 : new->cap_effective =
1152 : cap_drop_fs_set(new->cap_effective);
1153 :
1154 0 : if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
1155 0 : new->cap_effective =
1156 : cap_raise_fs_set(new->cap_effective,
1157 : new->cap_permitted);
1158 : }
1159 : break;
1160 :
1161 : default:
1162 : return -EINVAL;
1163 : }
1164 :
1165 : return 0;
1166 : }
1167 :
1168 : /*
1169 : * Rationale: code calling task_setscheduler, task_setioprio, and
1170 : * task_setnice, assumes that
1171 : * . if capable(cap_sys_nice), then those actions should be allowed
1172 : * . if not capable(cap_sys_nice), but acting on your own processes,
1173 : * then those actions should be allowed
1174 : * This is insufficient now since you can call code without suid, but
1175 : * yet with increased caps.
1176 : * So we check for increased caps on the target process.
1177 : */
1178 0 : static int cap_safe_nice(struct task_struct *p)
1179 : {
1180 0 : int is_subset, ret = 0;
1181 :
1182 : rcu_read_lock();
1183 0 : is_subset = cap_issubset(__task_cred(p)->cap_permitted,
1184 0 : current_cred()->cap_permitted);
1185 0 : if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
1186 0 : ret = -EPERM;
1187 : rcu_read_unlock();
1188 :
1189 0 : return ret;
1190 : }
1191 :
1192 : /**
1193 : * cap_task_setscheduler - Detemine if scheduler policy change is permitted
1194 : * @p: The task to affect
1195 : *
1196 : * Detemine if the requested scheduler policy change is permitted for the
1197 : * specified task.
1198 : *
1199 : * Return: 0 if permission is granted, -ve if denied.
1200 : */
1201 0 : int cap_task_setscheduler(struct task_struct *p)
1202 : {
1203 0 : return cap_safe_nice(p);
1204 : }
1205 :
1206 : /**
1207 : * cap_task_setioprio - Detemine if I/O priority change is permitted
1208 : * @p: The task to affect
1209 : * @ioprio: The I/O priority to set
1210 : *
1211 : * Detemine if the requested I/O priority change is permitted for the specified
1212 : * task.
1213 : *
1214 : * Return: 0 if permission is granted, -ve if denied.
1215 : */
1216 0 : int cap_task_setioprio(struct task_struct *p, int ioprio)
1217 : {
1218 0 : return cap_safe_nice(p);
1219 : }
1220 :
1221 : /**
1222 : * cap_task_setnice - Detemine if task priority change is permitted
1223 : * @p: The task to affect
1224 : * @nice: The nice value to set
1225 : *
1226 : * Detemine if the requested task priority change is permitted for the
1227 : * specified task.
1228 : *
1229 : * Return: 0 if permission is granted, -ve if denied.
1230 : */
1231 0 : int cap_task_setnice(struct task_struct *p, int nice)
1232 : {
1233 0 : return cap_safe_nice(p);
1234 : }
1235 :
1236 : /*
1237 : * Implement PR_CAPBSET_DROP. Attempt to remove the specified capability from
1238 : * the current task's bounding set. Returns 0 on success, -ve on error.
1239 : */
1240 0 : static int cap_prctl_drop(unsigned long cap)
1241 : {
1242 : struct cred *new;
1243 :
1244 0 : if (!ns_capable(current_user_ns(), CAP_SETPCAP))
1245 : return -EPERM;
1246 0 : if (!cap_valid(cap))
1247 : return -EINVAL;
1248 :
1249 0 : new = prepare_creds();
1250 0 : if (!new)
1251 : return -ENOMEM;
1252 0 : cap_lower(new->cap_bset, cap);
1253 0 : return commit_creds(new);
1254 : }
1255 :
1256 : /**
1257 : * cap_task_prctl - Implement process control functions for this security module
1258 : * @option: The process control function requested
1259 : * @arg2: The argument data for this function
1260 : * @arg3: The argument data for this function
1261 : * @arg4: The argument data for this function
1262 : * @arg5: The argument data for this function
1263 : *
1264 : * Allow process control functions (sys_prctl()) to alter capabilities; may
1265 : * also deny access to other functions not otherwise implemented here.
1266 : *
1267 : * Return: 0 or +ve on success, -ENOSYS if this function is not implemented
1268 : * here, other -ve on error. If -ENOSYS is returned, sys_prctl() and other LSM
1269 : * modules will consider performing the function.
1270 : */
1271 0 : int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
1272 : unsigned long arg4, unsigned long arg5)
1273 : {
1274 0 : const struct cred *old = current_cred();
1275 : struct cred *new;
1276 :
1277 0 : switch (option) {
1278 : case PR_CAPBSET_READ:
1279 0 : if (!cap_valid(arg2))
1280 : return -EINVAL;
1281 0 : return !!cap_raised(old->cap_bset, arg2);
1282 :
1283 : case PR_CAPBSET_DROP:
1284 0 : return cap_prctl_drop(arg2);
1285 :
1286 : /*
1287 : * The next four prctl's remain to assist with transitioning a
1288 : * system from legacy UID=0 based privilege (when filesystem
1289 : * capabilities are not in use) to a system using filesystem
1290 : * capabilities only - as the POSIX.1e draft intended.
1291 : *
1292 : * Note:
1293 : *
1294 : * PR_SET_SECUREBITS =
1295 : * issecure_mask(SECURE_KEEP_CAPS_LOCKED)
1296 : * | issecure_mask(SECURE_NOROOT)
1297 : * | issecure_mask(SECURE_NOROOT_LOCKED)
1298 : * | issecure_mask(SECURE_NO_SETUID_FIXUP)
1299 : * | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)
1300 : *
1301 : * will ensure that the current process and all of its
1302 : * children will be locked into a pure
1303 : * capability-based-privilege environment.
1304 : */
1305 : case PR_SET_SECUREBITS:
1306 0 : if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
1307 0 : & (old->securebits ^ arg2)) /*[1]*/
1308 0 : || ((old->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/
1309 0 : || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/
1310 0 : || (cap_capable(current_cred(),
1311 0 : current_cred()->user_ns,
1312 : CAP_SETPCAP,
1313 : CAP_OPT_NONE) != 0) /*[4]*/
1314 : /*
1315 : * [1] no changing of bits that are locked
1316 : * [2] no unlocking of locks
1317 : * [3] no setting of unsupported bits
1318 : * [4] doing anything requires privilege (go read about
1319 : * the "sendmail capabilities bug")
1320 : */
1321 : )
1322 : /* cannot change a locked bit */
1323 : return -EPERM;
1324 :
1325 0 : new = prepare_creds();
1326 0 : if (!new)
1327 : return -ENOMEM;
1328 0 : new->securebits = arg2;
1329 0 : return commit_creds(new);
1330 :
1331 : case PR_GET_SECUREBITS:
1332 0 : return old->securebits;
1333 :
1334 : case PR_GET_KEEPCAPS:
1335 0 : return !!issecure(SECURE_KEEP_CAPS);
1336 :
1337 : case PR_SET_KEEPCAPS:
1338 0 : if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */
1339 : return -EINVAL;
1340 0 : if (issecure(SECURE_KEEP_CAPS_LOCKED))
1341 : return -EPERM;
1342 :
1343 0 : new = prepare_creds();
1344 0 : if (!new)
1345 : return -ENOMEM;
1346 0 : if (arg2)
1347 0 : new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
1348 : else
1349 0 : new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
1350 0 : return commit_creds(new);
1351 :
1352 : case PR_CAP_AMBIENT:
1353 0 : if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
1354 0 : if (arg3 | arg4 | arg5)
1355 : return -EINVAL;
1356 :
1357 0 : new = prepare_creds();
1358 0 : if (!new)
1359 : return -ENOMEM;
1360 0 : cap_clear(new->cap_ambient);
1361 0 : return commit_creds(new);
1362 : }
1363 :
1364 0 : if (((!cap_valid(arg3)) | arg4 | arg5))
1365 : return -EINVAL;
1366 :
1367 0 : if (arg2 == PR_CAP_AMBIENT_IS_SET) {
1368 0 : return !!cap_raised(current_cred()->cap_ambient, arg3);
1369 0 : } else if (arg2 != PR_CAP_AMBIENT_RAISE &&
1370 : arg2 != PR_CAP_AMBIENT_LOWER) {
1371 : return -EINVAL;
1372 : } else {
1373 0 : if (arg2 == PR_CAP_AMBIENT_RAISE &&
1374 0 : (!cap_raised(current_cred()->cap_permitted, arg3) ||
1375 0 : !cap_raised(current_cred()->cap_inheritable,
1376 0 : arg3) ||
1377 0 : issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
1378 : return -EPERM;
1379 :
1380 0 : new = prepare_creds();
1381 0 : if (!new)
1382 : return -ENOMEM;
1383 0 : if (arg2 == PR_CAP_AMBIENT_RAISE)
1384 0 : cap_raise(new->cap_ambient, arg3);
1385 : else
1386 0 : cap_lower(new->cap_ambient, arg3);
1387 0 : return commit_creds(new);
1388 : }
1389 :
1390 : default:
1391 : /* No functionality available - continue with default */
1392 : return -ENOSYS;
1393 : }
1394 : }
1395 :
1396 : /**
1397 : * cap_vm_enough_memory - Determine whether a new virtual mapping is permitted
1398 : * @mm: The VM space in which the new mapping is to be made
1399 : * @pages: The size of the mapping
1400 : *
1401 : * Determine whether the allocation of a new virtual mapping by the current
1402 : * task is permitted.
1403 : *
1404 : * Return: 1 if permission is granted, 0 if not.
1405 : */
1406 0 : int cap_vm_enough_memory(struct mm_struct *mm, long pages)
1407 : {
1408 0 : int cap_sys_admin = 0;
1409 :
1410 0 : if (cap_capable(current_cred(), &init_user_ns,
1411 : CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0)
1412 0 : cap_sys_admin = 1;
1413 :
1414 0 : return cap_sys_admin;
1415 : }
1416 :
1417 : /**
1418 : * cap_mmap_addr - check if able to map given addr
1419 : * @addr: address attempting to be mapped
1420 : *
1421 : * If the process is attempting to map memory below dac_mmap_min_addr they need
1422 : * CAP_SYS_RAWIO. The other parameters to this function are unused by the
1423 : * capability security module.
1424 : *
1425 : * Return: 0 if this mapping should be allowed or -EPERM if not.
1426 : */
1427 0 : int cap_mmap_addr(unsigned long addr)
1428 : {
1429 0 : int ret = 0;
1430 :
1431 0 : if (addr < dac_mmap_min_addr) {
1432 0 : ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
1433 : CAP_OPT_NONE);
1434 : /* set PF_SUPERPRIV if it turns out we allow the low mmap */
1435 0 : if (ret == 0)
1436 0 : current->flags |= PF_SUPERPRIV;
1437 : }
1438 0 : return ret;
1439 : }
1440 :
1441 0 : int cap_mmap_file(struct file *file, unsigned long reqprot,
1442 : unsigned long prot, unsigned long flags)
1443 : {
1444 0 : return 0;
1445 : }
1446 :
1447 : #ifdef CONFIG_SECURITY
1448 :
1449 : static struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
1450 : LSM_HOOK_INIT(capable, cap_capable),
1451 : LSM_HOOK_INIT(settime, cap_settime),
1452 : LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
1453 : LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
1454 : LSM_HOOK_INIT(capget, cap_capget),
1455 : LSM_HOOK_INIT(capset, cap_capset),
1456 : LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),
1457 : LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1458 : LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
1459 : LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
1460 : LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1461 : LSM_HOOK_INIT(mmap_file, cap_mmap_file),
1462 : LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
1463 : LSM_HOOK_INIT(task_prctl, cap_task_prctl),
1464 : LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
1465 : LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
1466 : LSM_HOOK_INIT(task_setnice, cap_task_setnice),
1467 : LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
1468 : };
1469 :
1470 : static int __init capability_init(void)
1471 : {
1472 : security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
1473 : "capability");
1474 : return 0;
1475 : }
1476 :
1477 : DEFINE_LSM(capability) = {
1478 : .name = "capability",
1479 : .order = LSM_ORDER_FIRST,
1480 : .init = capability_init,
1481 : };
1482 :
1483 : #endif /* CONFIG_SECURITY */
|