Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * linux/kernel/seccomp.c
4 : *
5 : * Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com>
6 : *
7 : * Copyright (C) 2012 Google, Inc.
8 : * Will Drewry <wad@chromium.org>
9 : *
10 : * This defines a simple but solid secure-computing facility.
11 : *
12 : * Mode 1 uses a fixed list of allowed system calls.
13 : * Mode 2 allows user-defined system call filters in the form
14 : * of Berkeley Packet Filters/Linux Socket Filters.
15 : */
16 : #define pr_fmt(fmt) "seccomp: " fmt
17 :
18 : #include <linux/refcount.h>
19 : #include <linux/audit.h>
20 : #include <linux/compat.h>
21 : #include <linux/coredump.h>
22 : #include <linux/kmemleak.h>
23 : #include <linux/nospec.h>
24 : #include <linux/prctl.h>
25 : #include <linux/sched.h>
26 : #include <linux/sched/task_stack.h>
27 : #include <linux/seccomp.h>
28 : #include <linux/slab.h>
29 : #include <linux/syscalls.h>
30 : #include <linux/sysctl.h>
31 :
32 : /* Not exposed in headers: strictly internal use only. */
33 : #define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1)
34 :
35 : #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
36 : #include <asm/syscall.h>
37 : #endif
38 :
39 : #ifdef CONFIG_SECCOMP_FILTER
40 : #include <linux/file.h>
41 : #include <linux/filter.h>
42 : #include <linux/pid.h>
43 : #include <linux/ptrace.h>
44 : #include <linux/capability.h>
45 : #include <linux/uaccess.h>
46 : #include <linux/anon_inodes.h>
47 : #include <linux/lockdep.h>
48 :
49 : /*
50 : * When SECCOMP_IOCTL_NOTIF_ID_VALID was first introduced, it had the
51 : * wrong direction flag in the ioctl number. This is the broken one,
52 : * which the kernel needs to keep supporting until all userspaces stop
53 : * using the wrong command number.
54 : */
55 : #define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64)
56 :
57 : enum notify_state {
58 : SECCOMP_NOTIFY_INIT,
59 : SECCOMP_NOTIFY_SENT,
60 : SECCOMP_NOTIFY_REPLIED,
61 : };
62 :
63 : struct seccomp_knotif {
64 : /* The struct pid of the task whose filter triggered the notification */
65 : struct task_struct *task;
66 :
67 : /* The "cookie" for this request; this is unique for this filter. */
68 : u64 id;
69 :
70 : /*
71 : * The seccomp data. This pointer is valid the entire time this
72 : * notification is active, since it comes from __seccomp_filter which
73 : * eclipses the entire lifecycle here.
74 : */
75 : const struct seccomp_data *data;
76 :
77 : /*
78 : * Notification states. When SECCOMP_RET_USER_NOTIF is returned, a
79 : * struct seccomp_knotif is created and starts out in INIT. Once the
80 : * handler reads the notification off of an FD, it transitions to SENT.
81 : * If a signal is received the state transitions back to INIT and
82 : * another message is sent. When the userspace handler replies, state
83 : * transitions to REPLIED.
84 : */
85 : enum notify_state state;
86 :
87 : /* The return values, only valid when in SECCOMP_NOTIFY_REPLIED */
88 : int error;
89 : long val;
90 : u32 flags;
91 :
92 : /*
93 : * Signals when this has changed states, such as the listener
94 : * dying, a new seccomp addfd message, or changing to REPLIED
95 : */
96 : struct completion ready;
97 :
98 : struct list_head list;
99 :
100 : /* outstanding addfd requests */
101 : struct list_head addfd;
102 : };
103 :
104 : /**
105 : * struct seccomp_kaddfd - container for seccomp_addfd ioctl messages
106 : *
107 : * @file: A reference to the file to install in the other task
108 : * @fd: The fd number to install it at. If the fd number is -1, it means the
109 : * installing process should allocate the fd as normal.
110 : * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
111 : * is allowed.
112 : * @ioctl_flags: The flags used for the seccomp_addfd ioctl.
113 : * @ret: The return value of the installing process. It is set to the fd num
114 : * upon success (>= 0).
115 : * @completion: Indicates that the installing process has completed fd
116 : * installation, or gone away (either due to successful
117 : * reply, or signal)
118 : *
119 : */
120 : struct seccomp_kaddfd {
121 : struct file *file;
122 : int fd;
123 : unsigned int flags;
124 : __u32 ioctl_flags;
125 :
126 : union {
127 : bool setfd;
128 : /* To only be set on reply */
129 : int ret;
130 : };
131 : struct completion completion;
132 : struct list_head list;
133 : };
134 :
135 : /**
136 : * struct notification - container for seccomp userspace notifications. Since
137 : * most seccomp filters will not have notification listeners attached and this
138 : * structure is fairly large, we store the notification-specific stuff in a
139 : * separate structure.
140 : *
141 : * @request: A semaphore that users of this notification can wait on for
142 : * changes. Actual reads and writes are still controlled with
143 : * filter->notify_lock.
144 : * @next_id: The id of the next request.
145 : * @notifications: A list of struct seccomp_knotif elements.
146 : */
147 : struct notification {
148 : struct semaphore request;
149 : u64 next_id;
150 : struct list_head notifications;
151 : };
152 :
153 : #ifdef SECCOMP_ARCH_NATIVE
154 : /**
155 : * struct action_cache - per-filter cache of seccomp actions per
156 : * arch/syscall pair
157 : *
158 : * @allow_native: A bitmap where each bit represents whether the
159 : * filter will always allow the syscall, for the
160 : * native architecture.
161 : * @allow_compat: A bitmap where each bit represents whether the
162 : * filter will always allow the syscall, for the
163 : * compat architecture.
164 : */
165 : struct action_cache {
166 : DECLARE_BITMAP(allow_native, SECCOMP_ARCH_NATIVE_NR);
167 : #ifdef SECCOMP_ARCH_COMPAT
168 : DECLARE_BITMAP(allow_compat, SECCOMP_ARCH_COMPAT_NR);
169 : #endif
170 : };
171 : #else
172 : struct action_cache { };
173 :
174 : static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
175 : const struct seccomp_data *sd)
176 : {
177 : return false;
178 : }
179 :
180 : static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
181 : {
182 : }
183 : #endif /* SECCOMP_ARCH_NATIVE */
184 :
185 : /**
186 : * struct seccomp_filter - container for seccomp BPF programs
187 : *
188 : * @refs: Reference count to manage the object lifetime.
189 : * A filter's reference count is incremented for each directly
190 : * attached task, once for the dependent filter, and if
191 : * requested for the user notifier. When @refs reaches zero,
192 : * the filter can be freed.
193 : * @users: A filter's @users count is incremented for each directly
194 : * attached task (filter installation, fork(), thread_sync),
195 : * and once for the dependent filter (tracked in filter->prev).
196 : * When it reaches zero it indicates that no direct or indirect
197 : * users of that filter exist. No new tasks can get associated with
198 : * this filter after reaching 0. The @users count is always smaller
199 : * or equal to @refs. Hence, reaching 0 for @users does not mean
200 : * the filter can be freed.
201 : * @cache: cache of arch/syscall mappings to actions
202 : * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
203 : * @prev: points to a previously installed, or inherited, filter
204 : * @prog: the BPF program to evaluate
205 : * @notif: the struct that holds all notification related information
206 : * @notify_lock: A lock for all notification-related accesses.
207 : * @wqh: A wait queue for poll if a notifier is in use.
208 : *
209 : * seccomp_filter objects are organized in a tree linked via the @prev
210 : * pointer. For any task, it appears to be a singly-linked list starting
211 : * with current->seccomp.filter, the most recently attached or inherited filter.
212 : * However, multiple filters may share a @prev node, by way of fork(), which
213 : * results in a unidirectional tree existing in memory. This is similar to
214 : * how namespaces work.
215 : *
216 : * seccomp_filter objects should never be modified after being attached
217 : * to a task_struct (other than @refs).
218 : */
219 : struct seccomp_filter {
220 : refcount_t refs;
221 : refcount_t users;
222 : bool log;
223 : struct action_cache cache;
224 : struct seccomp_filter *prev;
225 : struct bpf_prog *prog;
226 : struct notification *notif;
227 : struct mutex notify_lock;
228 : wait_queue_head_t wqh;
229 : };
230 :
231 : /* Limit any path through the tree to 256KB worth of instructions. */
232 : #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
233 :
234 : /*
235 : * Endianness is explicitly ignored and left for BPF program authors to manage
236 : * as per the specific architecture.
237 : */
238 : static void populate_seccomp_data(struct seccomp_data *sd)
239 : {
240 : /*
241 : * Instead of using current_pt_reg(), we're already doing the work
242 : * to safely fetch "current", so just use "task" everywhere below.
243 : */
244 : struct task_struct *task = current;
245 : struct pt_regs *regs = task_pt_regs(task);
246 : unsigned long args[6];
247 :
248 : sd->nr = syscall_get_nr(task, regs);
249 : sd->arch = syscall_get_arch(task);
250 : syscall_get_arguments(task, regs, args);
251 : sd->args[0] = args[0];
252 : sd->args[1] = args[1];
253 : sd->args[2] = args[2];
254 : sd->args[3] = args[3];
255 : sd->args[4] = args[4];
256 : sd->args[5] = args[5];
257 : sd->instruction_pointer = KSTK_EIP(task);
258 : }
259 :
260 : /**
261 : * seccomp_check_filter - verify seccomp filter code
262 : * @filter: filter to verify
263 : * @flen: length of filter
264 : *
265 : * Takes a previously checked filter (by bpf_check_classic) and
266 : * redirects all filter code that loads struct sk_buff data
267 : * and related data through seccomp_bpf_load. It also
268 : * enforces length and alignment checking of those loads.
269 : *
270 : * Returns 0 if the rule set is legal or -EINVAL if not.
271 : */
272 : static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
273 : {
274 : int pc;
275 : for (pc = 0; pc < flen; pc++) {
276 : struct sock_filter *ftest = &filter[pc];
277 : u16 code = ftest->code;
278 : u32 k = ftest->k;
279 :
280 : switch (code) {
281 : case BPF_LD | BPF_W | BPF_ABS:
282 : ftest->code = BPF_LDX | BPF_W | BPF_ABS;
283 : /* 32-bit aligned and not out of bounds. */
284 : if (k >= sizeof(struct seccomp_data) || k & 3)
285 : return -EINVAL;
286 : continue;
287 : case BPF_LD | BPF_W | BPF_LEN:
288 : ftest->code = BPF_LD | BPF_IMM;
289 : ftest->k = sizeof(struct seccomp_data);
290 : continue;
291 : case BPF_LDX | BPF_W | BPF_LEN:
292 : ftest->code = BPF_LDX | BPF_IMM;
293 : ftest->k = sizeof(struct seccomp_data);
294 : continue;
295 : /* Explicitly include allowed calls. */
296 : case BPF_RET | BPF_K:
297 : case BPF_RET | BPF_A:
298 : case BPF_ALU | BPF_ADD | BPF_K:
299 : case BPF_ALU | BPF_ADD | BPF_X:
300 : case BPF_ALU | BPF_SUB | BPF_K:
301 : case BPF_ALU | BPF_SUB | BPF_X:
302 : case BPF_ALU | BPF_MUL | BPF_K:
303 : case BPF_ALU | BPF_MUL | BPF_X:
304 : case BPF_ALU | BPF_DIV | BPF_K:
305 : case BPF_ALU | BPF_DIV | BPF_X:
306 : case BPF_ALU | BPF_AND | BPF_K:
307 : case BPF_ALU | BPF_AND | BPF_X:
308 : case BPF_ALU | BPF_OR | BPF_K:
309 : case BPF_ALU | BPF_OR | BPF_X:
310 : case BPF_ALU | BPF_XOR | BPF_K:
311 : case BPF_ALU | BPF_XOR | BPF_X:
312 : case BPF_ALU | BPF_LSH | BPF_K:
313 : case BPF_ALU | BPF_LSH | BPF_X:
314 : case BPF_ALU | BPF_RSH | BPF_K:
315 : case BPF_ALU | BPF_RSH | BPF_X:
316 : case BPF_ALU | BPF_NEG:
317 : case BPF_LD | BPF_IMM:
318 : case BPF_LDX | BPF_IMM:
319 : case BPF_MISC | BPF_TAX:
320 : case BPF_MISC | BPF_TXA:
321 : case BPF_LD | BPF_MEM:
322 : case BPF_LDX | BPF_MEM:
323 : case BPF_ST:
324 : case BPF_STX:
325 : case BPF_JMP | BPF_JA:
326 : case BPF_JMP | BPF_JEQ | BPF_K:
327 : case BPF_JMP | BPF_JEQ | BPF_X:
328 : case BPF_JMP | BPF_JGE | BPF_K:
329 : case BPF_JMP | BPF_JGE | BPF_X:
330 : case BPF_JMP | BPF_JGT | BPF_K:
331 : case BPF_JMP | BPF_JGT | BPF_X:
332 : case BPF_JMP | BPF_JSET | BPF_K:
333 : case BPF_JMP | BPF_JSET | BPF_X:
334 : continue;
335 : default:
336 : return -EINVAL;
337 : }
338 : }
339 : return 0;
340 : }
341 :
342 : #ifdef SECCOMP_ARCH_NATIVE
343 : static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap,
344 : size_t bitmap_size,
345 : int syscall_nr)
346 : {
347 : if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size))
348 : return false;
349 : syscall_nr = array_index_nospec(syscall_nr, bitmap_size);
350 :
351 : return test_bit(syscall_nr, bitmap);
352 : }
353 :
354 : /**
355 : * seccomp_cache_check_allow - lookup seccomp cache
356 : * @sfilter: The seccomp filter
357 : * @sd: The seccomp data to lookup the cache with
358 : *
359 : * Returns true if the seccomp_data is cached and allowed.
360 : */
361 : static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
362 : const struct seccomp_data *sd)
363 : {
364 : int syscall_nr = sd->nr;
365 : const struct action_cache *cache = &sfilter->cache;
366 :
367 : #ifndef SECCOMP_ARCH_COMPAT
368 : /* A native-only architecture doesn't need to check sd->arch. */
369 : return seccomp_cache_check_allow_bitmap(cache->allow_native,
370 : SECCOMP_ARCH_NATIVE_NR,
371 : syscall_nr);
372 : #else
373 : if (likely(sd->arch == SECCOMP_ARCH_NATIVE))
374 : return seccomp_cache_check_allow_bitmap(cache->allow_native,
375 : SECCOMP_ARCH_NATIVE_NR,
376 : syscall_nr);
377 : if (likely(sd->arch == SECCOMP_ARCH_COMPAT))
378 : return seccomp_cache_check_allow_bitmap(cache->allow_compat,
379 : SECCOMP_ARCH_COMPAT_NR,
380 : syscall_nr);
381 : #endif /* SECCOMP_ARCH_COMPAT */
382 :
383 : WARN_ON_ONCE(true);
384 : return false;
385 : }
386 : #endif /* SECCOMP_ARCH_NATIVE */
387 :
388 : /**
389 : * seccomp_run_filters - evaluates all seccomp filters against @sd
390 : * @sd: optional seccomp data to be passed to filters
391 : * @match: stores struct seccomp_filter that resulted in the return value,
392 : * unless filter returned SECCOMP_RET_ALLOW, in which case it will
393 : * be unchanged.
394 : *
395 : * Returns valid seccomp BPF response codes.
396 : */
397 : #define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
398 : static u32 seccomp_run_filters(const struct seccomp_data *sd,
399 : struct seccomp_filter **match)
400 : {
401 : u32 ret = SECCOMP_RET_ALLOW;
402 : /* Make sure cross-thread synced filter points somewhere sane. */
403 : struct seccomp_filter *f =
404 : READ_ONCE(current->seccomp.filter);
405 :
406 : /* Ensure unexpected behavior doesn't result in failing open. */
407 : if (WARN_ON(f == NULL))
408 : return SECCOMP_RET_KILL_PROCESS;
409 :
410 : if (seccomp_cache_check_allow(f, sd))
411 : return SECCOMP_RET_ALLOW;
412 :
413 : /*
414 : * All filters in the list are evaluated and the lowest BPF return
415 : * value always takes priority (ignoring the DATA).
416 : */
417 : for (; f; f = f->prev) {
418 : u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd);
419 :
420 : if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
421 : ret = cur_ret;
422 : *match = f;
423 : }
424 : }
425 : return ret;
426 : }
427 : #endif /* CONFIG_SECCOMP_FILTER */
428 :
429 : static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
430 : {
431 0 : assert_spin_locked(¤t->sighand->siglock);
432 :
433 0 : if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
434 : return false;
435 :
436 : return true;
437 : }
438 :
439 0 : void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
440 :
441 : static inline void seccomp_assign_mode(struct task_struct *task,
442 : unsigned long seccomp_mode,
443 : unsigned long flags)
444 : {
445 : assert_spin_locked(&task->sighand->siglock);
446 :
447 0 : task->seccomp.mode = seccomp_mode;
448 : /*
449 : * Make sure SYSCALL_WORK_SECCOMP cannot be set before the mode (and
450 : * filter) is set.
451 : */
452 0 : smp_mb__before_atomic();
453 : /* Assume default seccomp processes want spec flaw mitigation. */
454 : if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
455 0 : arch_seccomp_spec_mitigate(task);
456 0 : set_task_syscall_work(task, SECCOMP);
457 : }
458 :
459 : #ifdef CONFIG_SECCOMP_FILTER
460 : /* Returns 1 if the parent is an ancestor of the child. */
461 : static int is_ancestor(struct seccomp_filter *parent,
462 : struct seccomp_filter *child)
463 : {
464 : /* NULL is the root ancestor. */
465 : if (parent == NULL)
466 : return 1;
467 : for (; child; child = child->prev)
468 : if (child == parent)
469 : return 1;
470 : return 0;
471 : }
472 :
473 : /**
474 : * seccomp_can_sync_threads: checks if all threads can be synchronized
475 : *
476 : * Expects sighand and cred_guard_mutex locks to be held.
477 : *
478 : * Returns 0 on success, -ve on error, or the pid of a thread which was
479 : * either not in the correct seccomp mode or did not have an ancestral
480 : * seccomp filter.
481 : */
482 : static inline pid_t seccomp_can_sync_threads(void)
483 : {
484 : struct task_struct *thread, *caller;
485 :
486 : BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
487 : assert_spin_locked(¤t->sighand->siglock);
488 :
489 : /* Validate all threads being eligible for synchronization. */
490 : caller = current;
491 : for_each_thread(caller, thread) {
492 : pid_t failed;
493 :
494 : /* Skip current, since it is initiating the sync. */
495 : if (thread == caller)
496 : continue;
497 :
498 : if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
499 : (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
500 : is_ancestor(thread->seccomp.filter,
501 : caller->seccomp.filter)))
502 : continue;
503 :
504 : /* Return the first thread that cannot be synchronized. */
505 : failed = task_pid_vnr(thread);
506 : /* If the pid cannot be resolved, then return -ESRCH */
507 : if (WARN_ON(failed == 0))
508 : failed = -ESRCH;
509 : return failed;
510 : }
511 :
512 : return 0;
513 : }
514 :
515 : static inline void seccomp_filter_free(struct seccomp_filter *filter)
516 : {
517 : if (filter) {
518 : bpf_prog_destroy(filter->prog);
519 : kfree(filter);
520 : }
521 : }
522 :
523 : static void __seccomp_filter_orphan(struct seccomp_filter *orig)
524 : {
525 : while (orig && refcount_dec_and_test(&orig->users)) {
526 : if (waitqueue_active(&orig->wqh))
527 : wake_up_poll(&orig->wqh, EPOLLHUP);
528 : orig = orig->prev;
529 : }
530 : }
531 :
532 : static void __put_seccomp_filter(struct seccomp_filter *orig)
533 : {
534 : /* Clean up single-reference branches iteratively. */
535 : while (orig && refcount_dec_and_test(&orig->refs)) {
536 : struct seccomp_filter *freeme = orig;
537 : orig = orig->prev;
538 : seccomp_filter_free(freeme);
539 : }
540 : }
541 :
542 : static void __seccomp_filter_release(struct seccomp_filter *orig)
543 : {
544 : /* Notify about any unused filters in the task's former filter tree. */
545 : __seccomp_filter_orphan(orig);
546 : /* Finally drop all references to the task's former tree. */
547 : __put_seccomp_filter(orig);
548 : }
549 :
550 : /**
551 : * seccomp_filter_release - Detach the task from its filter tree,
552 : * drop its reference count, and notify
553 : * about unused filters
554 : *
555 : * This function should only be called when the task is exiting as
556 : * it detaches it from its filter tree. As such, READ_ONCE() and
557 : * barriers are not needed here, as would normally be needed.
558 : */
559 : void seccomp_filter_release(struct task_struct *tsk)
560 : {
561 : struct seccomp_filter *orig = tsk->seccomp.filter;
562 :
563 : /* We are effectively holding the siglock by not having any sighand. */
564 : WARN_ON(tsk->sighand != NULL);
565 :
566 : /* Detach task from its filter tree. */
567 : tsk->seccomp.filter = NULL;
568 : __seccomp_filter_release(orig);
569 : }
570 :
571 : /**
572 : * seccomp_sync_threads: sets all threads to use current's filter
573 : *
574 : * Expects sighand and cred_guard_mutex locks to be held, and for
575 : * seccomp_can_sync_threads() to have returned success already
576 : * without dropping the locks.
577 : *
578 : */
579 : static inline void seccomp_sync_threads(unsigned long flags)
580 : {
581 : struct task_struct *thread, *caller;
582 :
583 : BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
584 : assert_spin_locked(¤t->sighand->siglock);
585 :
586 : /* Synchronize all threads. */
587 : caller = current;
588 : for_each_thread(caller, thread) {
589 : /* Skip current, since it needs no changes. */
590 : if (thread == caller)
591 : continue;
592 :
593 : /* Get a task reference for the new leaf node. */
594 : get_seccomp_filter(caller);
595 :
596 : /*
597 : * Drop the task reference to the shared ancestor since
598 : * current's path will hold a reference. (This also
599 : * allows a put before the assignment.)
600 : */
601 : __seccomp_filter_release(thread->seccomp.filter);
602 :
603 : /* Make our new filter tree visible. */
604 : smp_store_release(&thread->seccomp.filter,
605 : caller->seccomp.filter);
606 : atomic_set(&thread->seccomp.filter_count,
607 : atomic_read(&caller->seccomp.filter_count));
608 :
609 : /*
610 : * Don't let an unprivileged task work around
611 : * the no_new_privs restriction by creating
612 : * a thread that sets it up, enters seccomp,
613 : * then dies.
614 : */
615 : if (task_no_new_privs(caller))
616 : task_set_no_new_privs(thread);
617 :
618 : /*
619 : * Opt the other thread into seccomp if needed.
620 : * As threads are considered to be trust-realm
621 : * equivalent (see ptrace_may_access), it is safe to
622 : * allow one thread to transition the other.
623 : */
624 : if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
625 : seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
626 : flags);
627 : }
628 : }
629 :
630 : /**
631 : * seccomp_prepare_filter: Prepares a seccomp filter for use.
632 : * @fprog: BPF program to install
633 : *
634 : * Returns filter on success or an ERR_PTR on failure.
635 : */
636 : static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
637 : {
638 : struct seccomp_filter *sfilter;
639 : int ret;
640 : const bool save_orig =
641 : #if defined(CONFIG_CHECKPOINT_RESTORE) || defined(SECCOMP_ARCH_NATIVE)
642 : true;
643 : #else
644 : false;
645 : #endif
646 :
647 : if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
648 : return ERR_PTR(-EINVAL);
649 :
650 : BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
651 :
652 : /*
653 : * Installing a seccomp filter requires that the task has
654 : * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
655 : * This avoids scenarios where unprivileged tasks can affect the
656 : * behavior of privileged children.
657 : */
658 : if (!task_no_new_privs(current) &&
659 : !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
660 : return ERR_PTR(-EACCES);
661 :
662 : /* Allocate a new seccomp_filter */
663 : sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
664 : if (!sfilter)
665 : return ERR_PTR(-ENOMEM);
666 :
667 : mutex_init(&sfilter->notify_lock);
668 : ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
669 : seccomp_check_filter, save_orig);
670 : if (ret < 0) {
671 : kfree(sfilter);
672 : return ERR_PTR(ret);
673 : }
674 :
675 : refcount_set(&sfilter->refs, 1);
676 : refcount_set(&sfilter->users, 1);
677 : init_waitqueue_head(&sfilter->wqh);
678 :
679 : return sfilter;
680 : }
681 :
682 : /**
683 : * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
684 : * @user_filter: pointer to the user data containing a sock_fprog.
685 : *
686 : * Returns 0 on success and non-zero otherwise.
687 : */
688 : static struct seccomp_filter *
689 : seccomp_prepare_user_filter(const char __user *user_filter)
690 : {
691 : struct sock_fprog fprog;
692 : struct seccomp_filter *filter = ERR_PTR(-EFAULT);
693 :
694 : #ifdef CONFIG_COMPAT
695 : if (in_compat_syscall()) {
696 : struct compat_sock_fprog fprog32;
697 : if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
698 : goto out;
699 : fprog.len = fprog32.len;
700 : fprog.filter = compat_ptr(fprog32.filter);
701 : } else /* falls through to the if below. */
702 : #endif
703 : if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
704 : goto out;
705 : filter = seccomp_prepare_filter(&fprog);
706 : out:
707 : return filter;
708 : }
709 :
710 : #ifdef SECCOMP_ARCH_NATIVE
711 : /**
712 : * seccomp_is_const_allow - check if filter is constant allow with given data
713 : * @fprog: The BPF programs
714 : * @sd: The seccomp data to check against, only syscall number and arch
715 : * number are considered constant.
716 : */
717 : static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
718 : struct seccomp_data *sd)
719 : {
720 : unsigned int reg_value = 0;
721 : unsigned int pc;
722 : bool op_res;
723 :
724 : if (WARN_ON_ONCE(!fprog))
725 : return false;
726 :
727 : for (pc = 0; pc < fprog->len; pc++) {
728 : struct sock_filter *insn = &fprog->filter[pc];
729 : u16 code = insn->code;
730 : u32 k = insn->k;
731 :
732 : switch (code) {
733 : case BPF_LD | BPF_W | BPF_ABS:
734 : switch (k) {
735 : case offsetof(struct seccomp_data, nr):
736 : reg_value = sd->nr;
737 : break;
738 : case offsetof(struct seccomp_data, arch):
739 : reg_value = sd->arch;
740 : break;
741 : default:
742 : /* can't optimize (non-constant value load) */
743 : return false;
744 : }
745 : break;
746 : case BPF_RET | BPF_K:
747 : /* reached return with constant values only, check allow */
748 : return k == SECCOMP_RET_ALLOW;
749 : case BPF_JMP | BPF_JA:
750 : pc += insn->k;
751 : break;
752 : case BPF_JMP | BPF_JEQ | BPF_K:
753 : case BPF_JMP | BPF_JGE | BPF_K:
754 : case BPF_JMP | BPF_JGT | BPF_K:
755 : case BPF_JMP | BPF_JSET | BPF_K:
756 : switch (BPF_OP(code)) {
757 : case BPF_JEQ:
758 : op_res = reg_value == k;
759 : break;
760 : case BPF_JGE:
761 : op_res = reg_value >= k;
762 : break;
763 : case BPF_JGT:
764 : op_res = reg_value > k;
765 : break;
766 : case BPF_JSET:
767 : op_res = !!(reg_value & k);
768 : break;
769 : default:
770 : /* can't optimize (unknown jump) */
771 : return false;
772 : }
773 :
774 : pc += op_res ? insn->jt : insn->jf;
775 : break;
776 : case BPF_ALU | BPF_AND | BPF_K:
777 : reg_value &= k;
778 : break;
779 : default:
780 : /* can't optimize (unknown insn) */
781 : return false;
782 : }
783 : }
784 :
785 : /* ran off the end of the filter?! */
786 : WARN_ON(1);
787 : return false;
788 : }
789 :
790 : static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
791 : void *bitmap, const void *bitmap_prev,
792 : size_t bitmap_size, int arch)
793 : {
794 : struct sock_fprog_kern *fprog = sfilter->prog->orig_prog;
795 : struct seccomp_data sd;
796 : int nr;
797 :
798 : if (bitmap_prev) {
799 : /* The new filter must be as restrictive as the last. */
800 : bitmap_copy(bitmap, bitmap_prev, bitmap_size);
801 : } else {
802 : /* Before any filters, all syscalls are always allowed. */
803 : bitmap_fill(bitmap, bitmap_size);
804 : }
805 :
806 : for (nr = 0; nr < bitmap_size; nr++) {
807 : /* No bitmap change: not a cacheable action. */
808 : if (!test_bit(nr, bitmap))
809 : continue;
810 :
811 : sd.nr = nr;
812 : sd.arch = arch;
813 :
814 : /* No bitmap change: continue to always allow. */
815 : if (seccomp_is_const_allow(fprog, &sd))
816 : continue;
817 :
818 : /*
819 : * Not a cacheable action: always run filters.
820 : * atomic clear_bit() not needed, filter not visible yet.
821 : */
822 : __clear_bit(nr, bitmap);
823 : }
824 : }
825 :
826 : /**
827 : * seccomp_cache_prepare - emulate the filter to find cacheable syscalls
828 : * @sfilter: The seccomp filter
829 : *
830 : * Returns 0 if successful or -errno if error occurred.
831 : */
832 : static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
833 : {
834 : struct action_cache *cache = &sfilter->cache;
835 : const struct action_cache *cache_prev =
836 : sfilter->prev ? &sfilter->prev->cache : NULL;
837 :
838 : seccomp_cache_prepare_bitmap(sfilter, cache->allow_native,
839 : cache_prev ? cache_prev->allow_native : NULL,
840 : SECCOMP_ARCH_NATIVE_NR,
841 : SECCOMP_ARCH_NATIVE);
842 :
843 : #ifdef SECCOMP_ARCH_COMPAT
844 : seccomp_cache_prepare_bitmap(sfilter, cache->allow_compat,
845 : cache_prev ? cache_prev->allow_compat : NULL,
846 : SECCOMP_ARCH_COMPAT_NR,
847 : SECCOMP_ARCH_COMPAT);
848 : #endif /* SECCOMP_ARCH_COMPAT */
849 : }
850 : #endif /* SECCOMP_ARCH_NATIVE */
851 :
852 : /**
853 : * seccomp_attach_filter: validate and attach filter
854 : * @flags: flags to change filter behavior
855 : * @filter: seccomp filter to add to the current process
856 : *
857 : * Caller must be holding current->sighand->siglock lock.
858 : *
859 : * Returns 0 on success, -ve on error, or
860 : * - in TSYNC mode: the pid of a thread which was either not in the correct
861 : * seccomp mode or did not have an ancestral seccomp filter
862 : * - in NEW_LISTENER mode: the fd of the new listener
863 : */
864 : static long seccomp_attach_filter(unsigned int flags,
865 : struct seccomp_filter *filter)
866 : {
867 : unsigned long total_insns;
868 : struct seccomp_filter *walker;
869 :
870 : assert_spin_locked(¤t->sighand->siglock);
871 :
872 : /* Validate resulting filter length. */
873 : total_insns = filter->prog->len;
874 : for (walker = current->seccomp.filter; walker; walker = walker->prev)
875 : total_insns += walker->prog->len + 4; /* 4 instr penalty */
876 : if (total_insns > MAX_INSNS_PER_PATH)
877 : return -ENOMEM;
878 :
879 : /* If thread sync has been requested, check that it is possible. */
880 : if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
881 : int ret;
882 :
883 : ret = seccomp_can_sync_threads();
884 : if (ret) {
885 : if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
886 : return -ESRCH;
887 : else
888 : return ret;
889 : }
890 : }
891 :
892 : /* Set log flag, if present. */
893 : if (flags & SECCOMP_FILTER_FLAG_LOG)
894 : filter->log = true;
895 :
896 : /*
897 : * If there is an existing filter, make it the prev and don't drop its
898 : * task reference.
899 : */
900 : filter->prev = current->seccomp.filter;
901 : seccomp_cache_prepare(filter);
902 : current->seccomp.filter = filter;
903 : atomic_inc(¤t->seccomp.filter_count);
904 :
905 : /* Now that the new filter is in place, synchronize to all threads. */
906 : if (flags & SECCOMP_FILTER_FLAG_TSYNC)
907 : seccomp_sync_threads(flags);
908 :
909 : return 0;
910 : }
911 :
912 : static void __get_seccomp_filter(struct seccomp_filter *filter)
913 : {
914 : refcount_inc(&filter->refs);
915 : }
916 :
917 : /* get_seccomp_filter - increments the reference count of the filter on @tsk */
918 : void get_seccomp_filter(struct task_struct *tsk)
919 : {
920 : struct seccomp_filter *orig = tsk->seccomp.filter;
921 : if (!orig)
922 : return;
923 : __get_seccomp_filter(orig);
924 : refcount_inc(&orig->users);
925 : }
926 :
927 : #endif /* CONFIG_SECCOMP_FILTER */
928 :
929 : /* For use with seccomp_actions_logged */
930 : #define SECCOMP_LOG_KILL_PROCESS (1 << 0)
931 : #define SECCOMP_LOG_KILL_THREAD (1 << 1)
932 : #define SECCOMP_LOG_TRAP (1 << 2)
933 : #define SECCOMP_LOG_ERRNO (1 << 3)
934 : #define SECCOMP_LOG_TRACE (1 << 4)
935 : #define SECCOMP_LOG_LOG (1 << 5)
936 : #define SECCOMP_LOG_ALLOW (1 << 6)
937 : #define SECCOMP_LOG_USER_NOTIF (1 << 7)
938 :
939 : static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
940 : SECCOMP_LOG_KILL_THREAD |
941 : SECCOMP_LOG_TRAP |
942 : SECCOMP_LOG_ERRNO |
943 : SECCOMP_LOG_USER_NOTIF |
944 : SECCOMP_LOG_TRACE |
945 : SECCOMP_LOG_LOG;
946 :
947 : static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
948 : bool requested)
949 : {
950 0 : bool log = false;
951 :
952 : switch (action) {
953 : case SECCOMP_RET_ALLOW:
954 : break;
955 : case SECCOMP_RET_TRAP:
956 : log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
957 : break;
958 : case SECCOMP_RET_ERRNO:
959 : log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
960 : break;
961 : case SECCOMP_RET_TRACE:
962 : log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
963 : break;
964 : case SECCOMP_RET_USER_NOTIF:
965 : log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF;
966 : break;
967 : case SECCOMP_RET_LOG:
968 : log = seccomp_actions_logged & SECCOMP_LOG_LOG;
969 : break;
970 : case SECCOMP_RET_KILL_THREAD:
971 : log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
972 : break;
973 : case SECCOMP_RET_KILL_PROCESS:
974 : default:
975 : log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
976 : }
977 :
978 : /*
979 : * Emit an audit message when the action is RET_KILL_*, RET_LOG, or the
980 : * FILTER_FLAG_LOG bit was set. The admin has the ability to silence
981 : * any action from being logged by removing the action name from the
982 : * seccomp_actions_logged sysctl.
983 : */
984 : if (!log)
985 : return;
986 :
987 : audit_seccomp(syscall, signr, action);
988 : }
989 :
990 : /*
991 : * Secure computing mode 1 allows only read/write/exit/sigreturn.
992 : * To be fully secure this must be combined with rlimit
993 : * to limit the stack allocations too.
994 : */
995 : static const int mode1_syscalls[] = {
996 : __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
997 : -1, /* negative terminated */
998 : };
999 :
1000 0 : static void __secure_computing_strict(int this_syscall)
1001 : {
1002 0 : const int *allowed_syscalls = mode1_syscalls;
1003 : #ifdef CONFIG_COMPAT
1004 : if (in_compat_syscall())
1005 : allowed_syscalls = get_compat_mode1_syscalls();
1006 : #endif
1007 : do {
1008 0 : if (*allowed_syscalls == this_syscall)
1009 0 : return;
1010 0 : } while (*++allowed_syscalls != -1);
1011 :
1012 : #ifdef SECCOMP_DEBUG
1013 : dump_stack();
1014 : #endif
1015 0 : current->seccomp.mode = SECCOMP_MODE_DEAD;
1016 0 : seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
1017 0 : do_exit(SIGKILL);
1018 : }
1019 :
1020 : #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
1021 : void secure_computing_strict(int this_syscall)
1022 : {
1023 : int mode = current->seccomp.mode;
1024 :
1025 : if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1026 : unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1027 : return;
1028 :
1029 : if (mode == SECCOMP_MODE_DISABLED)
1030 : return;
1031 : else if (mode == SECCOMP_MODE_STRICT)
1032 : __secure_computing_strict(this_syscall);
1033 : else
1034 : BUG();
1035 : }
1036 : #else
1037 :
1038 : #ifdef CONFIG_SECCOMP_FILTER
1039 : static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
1040 : {
1041 : /*
1042 : * Note: overflow is ok here, the id just needs to be unique per
1043 : * filter.
1044 : */
1045 : lockdep_assert_held(&filter->notify_lock);
1046 : return filter->notif->next_id++;
1047 : }
1048 :
1049 : static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
1050 : {
1051 : int fd;
1052 :
1053 : /*
1054 : * Remove the notification, and reset the list pointers, indicating
1055 : * that it has been handled.
1056 : */
1057 : list_del_init(&addfd->list);
1058 : if (!addfd->setfd)
1059 : fd = receive_fd(addfd->file, addfd->flags);
1060 : else
1061 : fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
1062 : addfd->ret = fd;
1063 :
1064 : if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
1065 : /* If we fail reset and return an error to the notifier */
1066 : if (fd < 0) {
1067 : n->state = SECCOMP_NOTIFY_SENT;
1068 : } else {
1069 : /* Return the FD we just added */
1070 : n->flags = 0;
1071 : n->error = 0;
1072 : n->val = fd;
1073 : }
1074 : }
1075 :
1076 : /*
1077 : * Mark the notification as completed. From this point, addfd mem
1078 : * might be invalidated and we can't safely read it anymore.
1079 : */
1080 : complete(&addfd->completion);
1081 : }
1082 :
1083 : static int seccomp_do_user_notification(int this_syscall,
1084 : struct seccomp_filter *match,
1085 : const struct seccomp_data *sd)
1086 : {
1087 : int err;
1088 : u32 flags = 0;
1089 : long ret = 0;
1090 : struct seccomp_knotif n = {};
1091 : struct seccomp_kaddfd *addfd, *tmp;
1092 :
1093 : mutex_lock(&match->notify_lock);
1094 : err = -ENOSYS;
1095 : if (!match->notif)
1096 : goto out;
1097 :
1098 : n.task = current;
1099 : n.state = SECCOMP_NOTIFY_INIT;
1100 : n.data = sd;
1101 : n.id = seccomp_next_notify_id(match);
1102 : init_completion(&n.ready);
1103 : list_add(&n.list, &match->notif->notifications);
1104 : INIT_LIST_HEAD(&n.addfd);
1105 :
1106 : up(&match->notif->request);
1107 : wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
1108 :
1109 : /*
1110 : * This is where we wait for a reply from userspace.
1111 : */
1112 : do {
1113 : mutex_unlock(&match->notify_lock);
1114 : err = wait_for_completion_interruptible(&n.ready);
1115 : mutex_lock(&match->notify_lock);
1116 : if (err != 0)
1117 : goto interrupted;
1118 :
1119 : addfd = list_first_entry_or_null(&n.addfd,
1120 : struct seccomp_kaddfd, list);
1121 : /* Check if we were woken up by a addfd message */
1122 : if (addfd)
1123 : seccomp_handle_addfd(addfd, &n);
1124 :
1125 : } while (n.state != SECCOMP_NOTIFY_REPLIED);
1126 :
1127 : ret = n.val;
1128 : err = n.error;
1129 : flags = n.flags;
1130 :
1131 : interrupted:
1132 : /* If there were any pending addfd calls, clear them out */
1133 : list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
1134 : /* The process went away before we got a chance to handle it */
1135 : addfd->ret = -ESRCH;
1136 : list_del_init(&addfd->list);
1137 : complete(&addfd->completion);
1138 : }
1139 :
1140 : /*
1141 : * Note that it's possible the listener died in between the time when
1142 : * we were notified of a response (or a signal) and when we were able to
1143 : * re-acquire the lock, so only delete from the list if the
1144 : * notification actually exists.
1145 : *
1146 : * Also note that this test is only valid because there's no way to
1147 : * *reattach* to a notifier right now. If one is added, we'll need to
1148 : * keep track of the notif itself and make sure they match here.
1149 : */
1150 : if (match->notif)
1151 : list_del(&n.list);
1152 : out:
1153 : mutex_unlock(&match->notify_lock);
1154 :
1155 : /* Userspace requests to continue the syscall. */
1156 : if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1157 : return 0;
1158 :
1159 : syscall_set_return_value(current, current_pt_regs(),
1160 : err, ret);
1161 : return -1;
1162 : }
1163 :
1164 : static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1165 : const bool recheck_after_trace)
1166 : {
1167 : u32 filter_ret, action;
1168 : struct seccomp_filter *match = NULL;
1169 : int data;
1170 : struct seccomp_data sd_local;
1171 :
1172 : /*
1173 : * Make sure that any changes to mode from another thread have
1174 : * been seen after SYSCALL_WORK_SECCOMP was seen.
1175 : */
1176 : smp_rmb();
1177 :
1178 : if (!sd) {
1179 : populate_seccomp_data(&sd_local);
1180 : sd = &sd_local;
1181 : }
1182 :
1183 : filter_ret = seccomp_run_filters(sd, &match);
1184 : data = filter_ret & SECCOMP_RET_DATA;
1185 : action = filter_ret & SECCOMP_RET_ACTION_FULL;
1186 :
1187 : switch (action) {
1188 : case SECCOMP_RET_ERRNO:
1189 : /* Set low-order bits as an errno, capped at MAX_ERRNO. */
1190 : if (data > MAX_ERRNO)
1191 : data = MAX_ERRNO;
1192 : syscall_set_return_value(current, current_pt_regs(),
1193 : -data, 0);
1194 : goto skip;
1195 :
1196 : case SECCOMP_RET_TRAP:
1197 : /* Show the handler the original registers. */
1198 : syscall_rollback(current, current_pt_regs());
1199 : /* Let the filter pass back 16 bits of data. */
1200 : force_sig_seccomp(this_syscall, data, false);
1201 : goto skip;
1202 :
1203 : case SECCOMP_RET_TRACE:
1204 : /* We've been put in this state by the ptracer already. */
1205 : if (recheck_after_trace)
1206 : return 0;
1207 :
1208 : /* ENOSYS these calls if there is no tracer attached. */
1209 : if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
1210 : syscall_set_return_value(current,
1211 : current_pt_regs(),
1212 : -ENOSYS, 0);
1213 : goto skip;
1214 : }
1215 :
1216 : /* Allow the BPF to provide the event message */
1217 : ptrace_event(PTRACE_EVENT_SECCOMP, data);
1218 : /*
1219 : * The delivery of a fatal signal during event
1220 : * notification may silently skip tracer notification,
1221 : * which could leave us with a potentially unmodified
1222 : * syscall that the tracer would have liked to have
1223 : * changed. Since the process is about to die, we just
1224 : * force the syscall to be skipped and let the signal
1225 : * kill the process and correctly handle any tracer exit
1226 : * notifications.
1227 : */
1228 : if (fatal_signal_pending(current))
1229 : goto skip;
1230 : /* Check if the tracer forced the syscall to be skipped. */
1231 : this_syscall = syscall_get_nr(current, current_pt_regs());
1232 : if (this_syscall < 0)
1233 : goto skip;
1234 :
1235 : /*
1236 : * Recheck the syscall, since it may have changed. This
1237 : * intentionally uses a NULL struct seccomp_data to force
1238 : * a reload of all registers. This does not goto skip since
1239 : * a skip would have already been reported.
1240 : */
1241 : if (__seccomp_filter(this_syscall, NULL, true))
1242 : return -1;
1243 :
1244 : return 0;
1245 :
1246 : case SECCOMP_RET_USER_NOTIF:
1247 : if (seccomp_do_user_notification(this_syscall, match, sd))
1248 : goto skip;
1249 :
1250 : return 0;
1251 :
1252 : case SECCOMP_RET_LOG:
1253 : seccomp_log(this_syscall, 0, action, true);
1254 : return 0;
1255 :
1256 : case SECCOMP_RET_ALLOW:
1257 : /*
1258 : * Note that the "match" filter will always be NULL for
1259 : * this action since SECCOMP_RET_ALLOW is the starting
1260 : * state in seccomp_run_filters().
1261 : */
1262 : return 0;
1263 :
1264 : case SECCOMP_RET_KILL_THREAD:
1265 : case SECCOMP_RET_KILL_PROCESS:
1266 : default:
1267 : current->seccomp.mode = SECCOMP_MODE_DEAD;
1268 : seccomp_log(this_syscall, SIGSYS, action, true);
1269 : /* Dump core only if this is the last remaining thread. */
1270 : if (action != SECCOMP_RET_KILL_THREAD ||
1271 : (atomic_read(¤t->signal->live) == 1)) {
1272 : /* Show the original registers in the dump. */
1273 : syscall_rollback(current, current_pt_regs());
1274 : /* Trigger a coredump with SIGSYS */
1275 : force_sig_seccomp(this_syscall, data, true);
1276 : } else {
1277 : do_exit(SIGSYS);
1278 : }
1279 : return -1; /* skip the syscall go directly to signal handling */
1280 : }
1281 :
1282 : unreachable();
1283 :
1284 : skip:
1285 : seccomp_log(this_syscall, 0, action, match ? match->log : false);
1286 : return -1;
1287 : }
1288 : #else
1289 0 : static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1290 : const bool recheck_after_trace)
1291 : {
1292 0 : BUG();
1293 :
1294 : return -1;
1295 : }
1296 : #endif
1297 :
1298 0 : int __secure_computing(const struct seccomp_data *sd)
1299 : {
1300 0 : int mode = current->seccomp.mode;
1301 : int this_syscall;
1302 :
1303 : if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1304 : unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1305 : return 0;
1306 :
1307 0 : this_syscall = sd ? sd->nr :
1308 0 : syscall_get_nr(current, current_pt_regs());
1309 :
1310 0 : switch (mode) {
1311 : case SECCOMP_MODE_STRICT:
1312 0 : __secure_computing_strict(this_syscall); /* may call do_exit */
1313 : return 0;
1314 : case SECCOMP_MODE_FILTER:
1315 0 : return __seccomp_filter(this_syscall, sd, false);
1316 : /* Surviving SECCOMP_RET_KILL_* must be proactively impossible. */
1317 : case SECCOMP_MODE_DEAD:
1318 0 : WARN_ON_ONCE(1);
1319 0 : do_exit(SIGKILL);
1320 : return -1;
1321 : default:
1322 0 : BUG();
1323 : }
1324 : }
1325 : #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
1326 :
1327 0 : long prctl_get_seccomp(void)
1328 : {
1329 0 : return current->seccomp.mode;
1330 : }
1331 :
1332 : /**
1333 : * seccomp_set_mode_strict: internal function for setting strict seccomp
1334 : *
1335 : * Once current->seccomp.mode is non-zero, it may not be changed.
1336 : *
1337 : * Returns 0 on success or -EINVAL on failure.
1338 : */
1339 0 : static long seccomp_set_mode_strict(void)
1340 : {
1341 0 : const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
1342 0 : long ret = -EINVAL;
1343 :
1344 0 : spin_lock_irq(¤t->sighand->siglock);
1345 :
1346 0 : if (!seccomp_may_assign_mode(seccomp_mode))
1347 : goto out;
1348 :
1349 : #ifdef TIF_NOTSC
1350 : disable_TSC();
1351 : #endif
1352 0 : seccomp_assign_mode(current, seccomp_mode, 0);
1353 0 : ret = 0;
1354 :
1355 : out:
1356 0 : spin_unlock_irq(¤t->sighand->siglock);
1357 :
1358 0 : return ret;
1359 : }
1360 :
1361 : #ifdef CONFIG_SECCOMP_FILTER
1362 : static void seccomp_notify_free(struct seccomp_filter *filter)
1363 : {
1364 : kfree(filter->notif);
1365 : filter->notif = NULL;
1366 : }
1367 :
1368 : static void seccomp_notify_detach(struct seccomp_filter *filter)
1369 : {
1370 : struct seccomp_knotif *knotif;
1371 :
1372 : if (!filter)
1373 : return;
1374 :
1375 : mutex_lock(&filter->notify_lock);
1376 :
1377 : /*
1378 : * If this file is being closed because e.g. the task who owned it
1379 : * died, let's wake everyone up who was waiting on us.
1380 : */
1381 : list_for_each_entry(knotif, &filter->notif->notifications, list) {
1382 : if (knotif->state == SECCOMP_NOTIFY_REPLIED)
1383 : continue;
1384 :
1385 : knotif->state = SECCOMP_NOTIFY_REPLIED;
1386 : knotif->error = -ENOSYS;
1387 : knotif->val = 0;
1388 :
1389 : /*
1390 : * We do not need to wake up any pending addfd messages, as
1391 : * the notifier will do that for us, as this just looks
1392 : * like a standard reply.
1393 : */
1394 : complete(&knotif->ready);
1395 : }
1396 :
1397 : seccomp_notify_free(filter);
1398 : mutex_unlock(&filter->notify_lock);
1399 : }
1400 :
1401 : static int seccomp_notify_release(struct inode *inode, struct file *file)
1402 : {
1403 : struct seccomp_filter *filter = file->private_data;
1404 :
1405 : seccomp_notify_detach(filter);
1406 : __put_seccomp_filter(filter);
1407 : return 0;
1408 : }
1409 :
1410 : /* must be called with notif_lock held */
1411 : static inline struct seccomp_knotif *
1412 : find_notification(struct seccomp_filter *filter, u64 id)
1413 : {
1414 : struct seccomp_knotif *cur;
1415 :
1416 : lockdep_assert_held(&filter->notify_lock);
1417 :
1418 : list_for_each_entry(cur, &filter->notif->notifications, list) {
1419 : if (cur->id == id)
1420 : return cur;
1421 : }
1422 :
1423 : return NULL;
1424 : }
1425 :
1426 :
1427 : static long seccomp_notify_recv(struct seccomp_filter *filter,
1428 : void __user *buf)
1429 : {
1430 : struct seccomp_knotif *knotif = NULL, *cur;
1431 : struct seccomp_notif unotif;
1432 : ssize_t ret;
1433 :
1434 : /* Verify that we're not given garbage to keep struct extensible. */
1435 : ret = check_zeroed_user(buf, sizeof(unotif));
1436 : if (ret < 0)
1437 : return ret;
1438 : if (!ret)
1439 : return -EINVAL;
1440 :
1441 : memset(&unotif, 0, sizeof(unotif));
1442 :
1443 : ret = down_interruptible(&filter->notif->request);
1444 : if (ret < 0)
1445 : return ret;
1446 :
1447 : mutex_lock(&filter->notify_lock);
1448 : list_for_each_entry(cur, &filter->notif->notifications, list) {
1449 : if (cur->state == SECCOMP_NOTIFY_INIT) {
1450 : knotif = cur;
1451 : break;
1452 : }
1453 : }
1454 :
1455 : /*
1456 : * If we didn't find a notification, it could be that the task was
1457 : * interrupted by a fatal signal between the time we were woken and
1458 : * when we were able to acquire the rw lock.
1459 : */
1460 : if (!knotif) {
1461 : ret = -ENOENT;
1462 : goto out;
1463 : }
1464 :
1465 : unotif.id = knotif->id;
1466 : unotif.pid = task_pid_vnr(knotif->task);
1467 : unotif.data = *(knotif->data);
1468 :
1469 : knotif->state = SECCOMP_NOTIFY_SENT;
1470 : wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
1471 : ret = 0;
1472 : out:
1473 : mutex_unlock(&filter->notify_lock);
1474 :
1475 : if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
1476 : ret = -EFAULT;
1477 :
1478 : /*
1479 : * Userspace screwed up. To make sure that we keep this
1480 : * notification alive, let's reset it back to INIT. It
1481 : * may have died when we released the lock, so we need to make
1482 : * sure it's still around.
1483 : */
1484 : mutex_lock(&filter->notify_lock);
1485 : knotif = find_notification(filter, unotif.id);
1486 : if (knotif) {
1487 : knotif->state = SECCOMP_NOTIFY_INIT;
1488 : up(&filter->notif->request);
1489 : }
1490 : mutex_unlock(&filter->notify_lock);
1491 : }
1492 :
1493 : return ret;
1494 : }
1495 :
1496 : static long seccomp_notify_send(struct seccomp_filter *filter,
1497 : void __user *buf)
1498 : {
1499 : struct seccomp_notif_resp resp = {};
1500 : struct seccomp_knotif *knotif;
1501 : long ret;
1502 :
1503 : if (copy_from_user(&resp, buf, sizeof(resp)))
1504 : return -EFAULT;
1505 :
1506 : if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1507 : return -EINVAL;
1508 :
1509 : if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) &&
1510 : (resp.error || resp.val))
1511 : return -EINVAL;
1512 :
1513 : ret = mutex_lock_interruptible(&filter->notify_lock);
1514 : if (ret < 0)
1515 : return ret;
1516 :
1517 : knotif = find_notification(filter, resp.id);
1518 : if (!knotif) {
1519 : ret = -ENOENT;
1520 : goto out;
1521 : }
1522 :
1523 : /* Allow exactly one reply. */
1524 : if (knotif->state != SECCOMP_NOTIFY_SENT) {
1525 : ret = -EINPROGRESS;
1526 : goto out;
1527 : }
1528 :
1529 : ret = 0;
1530 : knotif->state = SECCOMP_NOTIFY_REPLIED;
1531 : knotif->error = resp.error;
1532 : knotif->val = resp.val;
1533 : knotif->flags = resp.flags;
1534 : complete(&knotif->ready);
1535 : out:
1536 : mutex_unlock(&filter->notify_lock);
1537 : return ret;
1538 : }
1539 :
1540 : static long seccomp_notify_id_valid(struct seccomp_filter *filter,
1541 : void __user *buf)
1542 : {
1543 : struct seccomp_knotif *knotif;
1544 : u64 id;
1545 : long ret;
1546 :
1547 : if (copy_from_user(&id, buf, sizeof(id)))
1548 : return -EFAULT;
1549 :
1550 : ret = mutex_lock_interruptible(&filter->notify_lock);
1551 : if (ret < 0)
1552 : return ret;
1553 :
1554 : knotif = find_notification(filter, id);
1555 : if (knotif && knotif->state == SECCOMP_NOTIFY_SENT)
1556 : ret = 0;
1557 : else
1558 : ret = -ENOENT;
1559 :
1560 : mutex_unlock(&filter->notify_lock);
1561 : return ret;
1562 : }
1563 :
1564 : static long seccomp_notify_addfd(struct seccomp_filter *filter,
1565 : struct seccomp_notif_addfd __user *uaddfd,
1566 : unsigned int size)
1567 : {
1568 : struct seccomp_notif_addfd addfd;
1569 : struct seccomp_knotif *knotif;
1570 : struct seccomp_kaddfd kaddfd;
1571 : int ret;
1572 :
1573 : BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
1574 : BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
1575 :
1576 : if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
1577 : return -EINVAL;
1578 :
1579 : ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
1580 : if (ret)
1581 : return ret;
1582 :
1583 : if (addfd.newfd_flags & ~O_CLOEXEC)
1584 : return -EINVAL;
1585 :
1586 : if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
1587 : return -EINVAL;
1588 :
1589 : if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
1590 : return -EINVAL;
1591 :
1592 : kaddfd.file = fget(addfd.srcfd);
1593 : if (!kaddfd.file)
1594 : return -EBADF;
1595 :
1596 : kaddfd.ioctl_flags = addfd.flags;
1597 : kaddfd.flags = addfd.newfd_flags;
1598 : kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
1599 : kaddfd.fd = addfd.newfd;
1600 : init_completion(&kaddfd.completion);
1601 :
1602 : ret = mutex_lock_interruptible(&filter->notify_lock);
1603 : if (ret < 0)
1604 : goto out;
1605 :
1606 : knotif = find_notification(filter, addfd.id);
1607 : if (!knotif) {
1608 : ret = -ENOENT;
1609 : goto out_unlock;
1610 : }
1611 :
1612 : /*
1613 : * We do not want to allow for FD injection to occur before the
1614 : * notification has been picked up by a userspace handler, or after
1615 : * the notification has been replied to.
1616 : */
1617 : if (knotif->state != SECCOMP_NOTIFY_SENT) {
1618 : ret = -EINPROGRESS;
1619 : goto out_unlock;
1620 : }
1621 :
1622 : if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
1623 : /*
1624 : * Disallow queuing an atomic addfd + send reply while there are
1625 : * some addfd requests still to process.
1626 : *
1627 : * There is no clear reason to support it and allows us to keep
1628 : * the loop on the other side straight-forward.
1629 : */
1630 : if (!list_empty(&knotif->addfd)) {
1631 : ret = -EBUSY;
1632 : goto out_unlock;
1633 : }
1634 :
1635 : /* Allow exactly only one reply */
1636 : knotif->state = SECCOMP_NOTIFY_REPLIED;
1637 : }
1638 :
1639 : list_add(&kaddfd.list, &knotif->addfd);
1640 : complete(&knotif->ready);
1641 : mutex_unlock(&filter->notify_lock);
1642 :
1643 : /* Now we wait for it to be processed or be interrupted */
1644 : ret = wait_for_completion_interruptible(&kaddfd.completion);
1645 : if (ret == 0) {
1646 : /*
1647 : * We had a successful completion. The other side has already
1648 : * removed us from the addfd queue, and
1649 : * wait_for_completion_interruptible has a memory barrier upon
1650 : * success that lets us read this value directly without
1651 : * locking.
1652 : */
1653 : ret = kaddfd.ret;
1654 : goto out;
1655 : }
1656 :
1657 : mutex_lock(&filter->notify_lock);
1658 : /*
1659 : * Even though we were woken up by a signal and not a successful
1660 : * completion, a completion may have happened in the mean time.
1661 : *
1662 : * We need to check again if the addfd request has been handled,
1663 : * and if not, we will remove it from the queue.
1664 : */
1665 : if (list_empty(&kaddfd.list))
1666 : ret = kaddfd.ret;
1667 : else
1668 : list_del(&kaddfd.list);
1669 :
1670 : out_unlock:
1671 : mutex_unlock(&filter->notify_lock);
1672 : out:
1673 : fput(kaddfd.file);
1674 :
1675 : return ret;
1676 : }
1677 :
1678 : static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
1679 : unsigned long arg)
1680 : {
1681 : struct seccomp_filter *filter = file->private_data;
1682 : void __user *buf = (void __user *)arg;
1683 :
1684 : /* Fixed-size ioctls */
1685 : switch (cmd) {
1686 : case SECCOMP_IOCTL_NOTIF_RECV:
1687 : return seccomp_notify_recv(filter, buf);
1688 : case SECCOMP_IOCTL_NOTIF_SEND:
1689 : return seccomp_notify_send(filter, buf);
1690 : case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
1691 : case SECCOMP_IOCTL_NOTIF_ID_VALID:
1692 : return seccomp_notify_id_valid(filter, buf);
1693 : }
1694 :
1695 : /* Extensible Argument ioctls */
1696 : #define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
1697 : switch (EA_IOCTL(cmd)) {
1698 : case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
1699 : return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
1700 : default:
1701 : return -EINVAL;
1702 : }
1703 : }
1704 :
1705 : static __poll_t seccomp_notify_poll(struct file *file,
1706 : struct poll_table_struct *poll_tab)
1707 : {
1708 : struct seccomp_filter *filter = file->private_data;
1709 : __poll_t ret = 0;
1710 : struct seccomp_knotif *cur;
1711 :
1712 : poll_wait(file, &filter->wqh, poll_tab);
1713 :
1714 : if (mutex_lock_interruptible(&filter->notify_lock) < 0)
1715 : return EPOLLERR;
1716 :
1717 : list_for_each_entry(cur, &filter->notif->notifications, list) {
1718 : if (cur->state == SECCOMP_NOTIFY_INIT)
1719 : ret |= EPOLLIN | EPOLLRDNORM;
1720 : if (cur->state == SECCOMP_NOTIFY_SENT)
1721 : ret |= EPOLLOUT | EPOLLWRNORM;
1722 : if ((ret & EPOLLIN) && (ret & EPOLLOUT))
1723 : break;
1724 : }
1725 :
1726 : mutex_unlock(&filter->notify_lock);
1727 :
1728 : if (refcount_read(&filter->users) == 0)
1729 : ret |= EPOLLHUP;
1730 :
1731 : return ret;
1732 : }
1733 :
1734 : static const struct file_operations seccomp_notify_ops = {
1735 : .poll = seccomp_notify_poll,
1736 : .release = seccomp_notify_release,
1737 : .unlocked_ioctl = seccomp_notify_ioctl,
1738 : .compat_ioctl = seccomp_notify_ioctl,
1739 : };
1740 :
1741 : static struct file *init_listener(struct seccomp_filter *filter)
1742 : {
1743 : struct file *ret;
1744 :
1745 : ret = ERR_PTR(-ENOMEM);
1746 : filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
1747 : if (!filter->notif)
1748 : goto out;
1749 :
1750 : sema_init(&filter->notif->request, 0);
1751 : filter->notif->next_id = get_random_u64();
1752 : INIT_LIST_HEAD(&filter->notif->notifications);
1753 :
1754 : ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops,
1755 : filter, O_RDWR);
1756 : if (IS_ERR(ret))
1757 : goto out_notif;
1758 :
1759 : /* The file has a reference to it now */
1760 : __get_seccomp_filter(filter);
1761 :
1762 : out_notif:
1763 : if (IS_ERR(ret))
1764 : seccomp_notify_free(filter);
1765 : out:
1766 : return ret;
1767 : }
1768 :
1769 : /*
1770 : * Does @new_child have a listener while an ancestor also has a listener?
1771 : * If so, we'll want to reject this filter.
1772 : * This only has to be tested for the current process, even in the TSYNC case,
1773 : * because TSYNC installs @child with the same parent on all threads.
1774 : * Note that @new_child is not hooked up to its parent at this point yet, so
1775 : * we use current->seccomp.filter.
1776 : */
1777 : static bool has_duplicate_listener(struct seccomp_filter *new_child)
1778 : {
1779 : struct seccomp_filter *cur;
1780 :
1781 : /* must be protected against concurrent TSYNC */
1782 : lockdep_assert_held(¤t->sighand->siglock);
1783 :
1784 : if (!new_child->notif)
1785 : return false;
1786 : for (cur = current->seccomp.filter; cur; cur = cur->prev) {
1787 : if (cur->notif)
1788 : return true;
1789 : }
1790 :
1791 : return false;
1792 : }
1793 :
1794 : /**
1795 : * seccomp_set_mode_filter: internal function for setting seccomp filter
1796 : * @flags: flags to change filter behavior
1797 : * @filter: struct sock_fprog containing filter
1798 : *
1799 : * This function may be called repeatedly to install additional filters.
1800 : * Every filter successfully installed will be evaluated (in reverse order)
1801 : * for each system call the task makes.
1802 : *
1803 : * Once current->seccomp.mode is non-zero, it may not be changed.
1804 : *
1805 : * Returns 0 on success or -EINVAL on failure.
1806 : */
1807 : static long seccomp_set_mode_filter(unsigned int flags,
1808 : const char __user *filter)
1809 : {
1810 : const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
1811 : struct seccomp_filter *prepared = NULL;
1812 : long ret = -EINVAL;
1813 : int listener = -1;
1814 : struct file *listener_f = NULL;
1815 :
1816 : /* Validate flags. */
1817 : if (flags & ~SECCOMP_FILTER_FLAG_MASK)
1818 : return -EINVAL;
1819 :
1820 : /*
1821 : * In the successful case, NEW_LISTENER returns the new listener fd.
1822 : * But in the failure case, TSYNC returns the thread that died. If you
1823 : * combine these two flags, there's no way to tell whether something
1824 : * succeeded or failed. So, let's disallow this combination if the user
1825 : * has not explicitly requested no errors from TSYNC.
1826 : */
1827 : if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
1828 : (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
1829 : ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
1830 : return -EINVAL;
1831 :
1832 : /* Prepare the new filter before holding any locks. */
1833 : prepared = seccomp_prepare_user_filter(filter);
1834 : if (IS_ERR(prepared))
1835 : return PTR_ERR(prepared);
1836 :
1837 : if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1838 : listener = get_unused_fd_flags(O_CLOEXEC);
1839 : if (listener < 0) {
1840 : ret = listener;
1841 : goto out_free;
1842 : }
1843 :
1844 : listener_f = init_listener(prepared);
1845 : if (IS_ERR(listener_f)) {
1846 : put_unused_fd(listener);
1847 : ret = PTR_ERR(listener_f);
1848 : goto out_free;
1849 : }
1850 : }
1851 :
1852 : /*
1853 : * Make sure we cannot change seccomp or nnp state via TSYNC
1854 : * while another thread is in the middle of calling exec.
1855 : */
1856 : if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
1857 : mutex_lock_killable(¤t->signal->cred_guard_mutex))
1858 : goto out_put_fd;
1859 :
1860 : spin_lock_irq(¤t->sighand->siglock);
1861 :
1862 : if (!seccomp_may_assign_mode(seccomp_mode))
1863 : goto out;
1864 :
1865 : if (has_duplicate_listener(prepared)) {
1866 : ret = -EBUSY;
1867 : goto out;
1868 : }
1869 :
1870 : ret = seccomp_attach_filter(flags, prepared);
1871 : if (ret)
1872 : goto out;
1873 : /* Do not free the successfully attached filter. */
1874 : prepared = NULL;
1875 :
1876 : seccomp_assign_mode(current, seccomp_mode, flags);
1877 : out:
1878 : spin_unlock_irq(¤t->sighand->siglock);
1879 : if (flags & SECCOMP_FILTER_FLAG_TSYNC)
1880 : mutex_unlock(¤t->signal->cred_guard_mutex);
1881 : out_put_fd:
1882 : if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1883 : if (ret) {
1884 : listener_f->private_data = NULL;
1885 : fput(listener_f);
1886 : put_unused_fd(listener);
1887 : seccomp_notify_detach(prepared);
1888 : } else {
1889 : fd_install(listener, listener_f);
1890 : ret = listener;
1891 : }
1892 : }
1893 : out_free:
1894 : seccomp_filter_free(prepared);
1895 : return ret;
1896 : }
1897 : #else
1898 : static inline long seccomp_set_mode_filter(unsigned int flags,
1899 : const char __user *filter)
1900 : {
1901 : return -EINVAL;
1902 : }
1903 : #endif
1904 :
1905 0 : static long seccomp_get_action_avail(const char __user *uaction)
1906 : {
1907 : u32 action;
1908 :
1909 0 : if (copy_from_user(&action, uaction, sizeof(action)))
1910 : return -EFAULT;
1911 :
1912 0 : switch (action) {
1913 : case SECCOMP_RET_KILL_PROCESS:
1914 : case SECCOMP_RET_KILL_THREAD:
1915 : case SECCOMP_RET_TRAP:
1916 : case SECCOMP_RET_ERRNO:
1917 : case SECCOMP_RET_USER_NOTIF:
1918 : case SECCOMP_RET_TRACE:
1919 : case SECCOMP_RET_LOG:
1920 : case SECCOMP_RET_ALLOW:
1921 : break;
1922 : default:
1923 : return -EOPNOTSUPP;
1924 : }
1925 :
1926 0 : return 0;
1927 : }
1928 :
1929 : static long seccomp_get_notif_sizes(void __user *usizes)
1930 : {
1931 0 : struct seccomp_notif_sizes sizes = {
1932 : .seccomp_notif = sizeof(struct seccomp_notif),
1933 : .seccomp_notif_resp = sizeof(struct seccomp_notif_resp),
1934 : .seccomp_data = sizeof(struct seccomp_data),
1935 : };
1936 :
1937 0 : if (copy_to_user(usizes, &sizes, sizeof(sizes)))
1938 : return -EFAULT;
1939 :
1940 : return 0;
1941 : }
1942 :
1943 : /* Common entry point for both prctl and syscall. */
1944 0 : static long do_seccomp(unsigned int op, unsigned int flags,
1945 : void __user *uargs)
1946 : {
1947 0 : switch (op) {
1948 : case SECCOMP_SET_MODE_STRICT:
1949 0 : if (flags != 0 || uargs != NULL)
1950 : return -EINVAL;
1951 0 : return seccomp_set_mode_strict();
1952 : case SECCOMP_SET_MODE_FILTER:
1953 : return seccomp_set_mode_filter(flags, uargs);
1954 : case SECCOMP_GET_ACTION_AVAIL:
1955 0 : if (flags != 0)
1956 : return -EINVAL;
1957 :
1958 0 : return seccomp_get_action_avail(uargs);
1959 : case SECCOMP_GET_NOTIF_SIZES:
1960 0 : if (flags != 0)
1961 : return -EINVAL;
1962 :
1963 0 : return seccomp_get_notif_sizes(uargs);
1964 : default:
1965 : return -EINVAL;
1966 : }
1967 : }
1968 :
1969 0 : SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
1970 : void __user *, uargs)
1971 : {
1972 0 : return do_seccomp(op, flags, uargs);
1973 : }
1974 :
1975 : /**
1976 : * prctl_set_seccomp: configures current->seccomp.mode
1977 : * @seccomp_mode: requested mode to use
1978 : * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
1979 : *
1980 : * Returns 0 on success or -EINVAL on failure.
1981 : */
1982 0 : long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter)
1983 : {
1984 : unsigned int op;
1985 : void __user *uargs;
1986 :
1987 0 : switch (seccomp_mode) {
1988 : case SECCOMP_MODE_STRICT:
1989 : op = SECCOMP_SET_MODE_STRICT;
1990 : /*
1991 : * Setting strict mode through prctl always ignored filter,
1992 : * so make sure it is always NULL here to pass the internal
1993 : * check in do_seccomp().
1994 : */
1995 : uargs = NULL;
1996 : break;
1997 : case SECCOMP_MODE_FILTER:
1998 0 : op = SECCOMP_SET_MODE_FILTER;
1999 0 : uargs = filter;
2000 0 : break;
2001 : default:
2002 : return -EINVAL;
2003 : }
2004 :
2005 : /* prctl interface doesn't have flags, so they are always zero. */
2006 0 : return do_seccomp(op, 0, uargs);
2007 : }
2008 :
2009 : #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
2010 : static struct seccomp_filter *get_nth_filter(struct task_struct *task,
2011 : unsigned long filter_off)
2012 : {
2013 : struct seccomp_filter *orig, *filter;
2014 : unsigned long count;
2015 :
2016 : /*
2017 : * Note: this is only correct because the caller should be the (ptrace)
2018 : * tracer of the task, otherwise lock_task_sighand is needed.
2019 : */
2020 : spin_lock_irq(&task->sighand->siglock);
2021 :
2022 : if (task->seccomp.mode != SECCOMP_MODE_FILTER) {
2023 : spin_unlock_irq(&task->sighand->siglock);
2024 : return ERR_PTR(-EINVAL);
2025 : }
2026 :
2027 : orig = task->seccomp.filter;
2028 : __get_seccomp_filter(orig);
2029 : spin_unlock_irq(&task->sighand->siglock);
2030 :
2031 : count = 0;
2032 : for (filter = orig; filter; filter = filter->prev)
2033 : count++;
2034 :
2035 : if (filter_off >= count) {
2036 : filter = ERR_PTR(-ENOENT);
2037 : goto out;
2038 : }
2039 :
2040 : count -= filter_off;
2041 : for (filter = orig; filter && count > 1; filter = filter->prev)
2042 : count--;
2043 :
2044 : if (WARN_ON(count != 1 || !filter)) {
2045 : filter = ERR_PTR(-ENOENT);
2046 : goto out;
2047 : }
2048 :
2049 : __get_seccomp_filter(filter);
2050 :
2051 : out:
2052 : __put_seccomp_filter(orig);
2053 : return filter;
2054 : }
2055 :
2056 : long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
2057 : void __user *data)
2058 : {
2059 : struct seccomp_filter *filter;
2060 : struct sock_fprog_kern *fprog;
2061 : long ret;
2062 :
2063 : if (!capable(CAP_SYS_ADMIN) ||
2064 : current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2065 : return -EACCES;
2066 : }
2067 :
2068 : filter = get_nth_filter(task, filter_off);
2069 : if (IS_ERR(filter))
2070 : return PTR_ERR(filter);
2071 :
2072 : fprog = filter->prog->orig_prog;
2073 : if (!fprog) {
2074 : /* This must be a new non-cBPF filter, since we save
2075 : * every cBPF filter's orig_prog above when
2076 : * CONFIG_CHECKPOINT_RESTORE is enabled.
2077 : */
2078 : ret = -EMEDIUMTYPE;
2079 : goto out;
2080 : }
2081 :
2082 : ret = fprog->len;
2083 : if (!data)
2084 : goto out;
2085 :
2086 : if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
2087 : ret = -EFAULT;
2088 :
2089 : out:
2090 : __put_seccomp_filter(filter);
2091 : return ret;
2092 : }
2093 :
2094 : long seccomp_get_metadata(struct task_struct *task,
2095 : unsigned long size, void __user *data)
2096 : {
2097 : long ret;
2098 : struct seccomp_filter *filter;
2099 : struct seccomp_metadata kmd = {};
2100 :
2101 : if (!capable(CAP_SYS_ADMIN) ||
2102 : current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2103 : return -EACCES;
2104 : }
2105 :
2106 : size = min_t(unsigned long, size, sizeof(kmd));
2107 :
2108 : if (size < sizeof(kmd.filter_off))
2109 : return -EINVAL;
2110 :
2111 : if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
2112 : return -EFAULT;
2113 :
2114 : filter = get_nth_filter(task, kmd.filter_off);
2115 : if (IS_ERR(filter))
2116 : return PTR_ERR(filter);
2117 :
2118 : if (filter->log)
2119 : kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
2120 :
2121 : ret = size;
2122 : if (copy_to_user(data, &kmd, size))
2123 : ret = -EFAULT;
2124 :
2125 : __put_seccomp_filter(filter);
2126 : return ret;
2127 : }
2128 : #endif
2129 :
2130 : #ifdef CONFIG_SYSCTL
2131 :
2132 : /* Human readable action names for friendly sysctl interaction */
2133 : #define SECCOMP_RET_KILL_PROCESS_NAME "kill_process"
2134 : #define SECCOMP_RET_KILL_THREAD_NAME "kill_thread"
2135 : #define SECCOMP_RET_TRAP_NAME "trap"
2136 : #define SECCOMP_RET_ERRNO_NAME "errno"
2137 : #define SECCOMP_RET_USER_NOTIF_NAME "user_notif"
2138 : #define SECCOMP_RET_TRACE_NAME "trace"
2139 : #define SECCOMP_RET_LOG_NAME "log"
2140 : #define SECCOMP_RET_ALLOW_NAME "allow"
2141 :
2142 : static const char seccomp_actions_avail[] =
2143 : SECCOMP_RET_KILL_PROCESS_NAME " "
2144 : SECCOMP_RET_KILL_THREAD_NAME " "
2145 : SECCOMP_RET_TRAP_NAME " "
2146 : SECCOMP_RET_ERRNO_NAME " "
2147 : SECCOMP_RET_USER_NOTIF_NAME " "
2148 : SECCOMP_RET_TRACE_NAME " "
2149 : SECCOMP_RET_LOG_NAME " "
2150 : SECCOMP_RET_ALLOW_NAME;
2151 :
2152 : struct seccomp_log_name {
2153 : u32 log;
2154 : const char *name;
2155 : };
2156 :
2157 : static const struct seccomp_log_name seccomp_log_names[] = {
2158 : { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
2159 : { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
2160 : { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
2161 : { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
2162 : { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME },
2163 : { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
2164 : { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
2165 : { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
2166 : { }
2167 : };
2168 :
2169 0 : static bool seccomp_names_from_actions_logged(char *names, size_t size,
2170 : u32 actions_logged,
2171 : const char *sep)
2172 : {
2173 : const struct seccomp_log_name *cur;
2174 0 : bool append_sep = false;
2175 :
2176 0 : for (cur = seccomp_log_names; cur->name && size; cur++) {
2177 : ssize_t ret;
2178 :
2179 0 : if (!(actions_logged & cur->log))
2180 0 : continue;
2181 :
2182 0 : if (append_sep) {
2183 0 : ret = strscpy(names, sep, size);
2184 0 : if (ret < 0)
2185 : return false;
2186 :
2187 0 : names += ret;
2188 0 : size -= ret;
2189 : } else
2190 : append_sep = true;
2191 :
2192 0 : ret = strscpy(names, cur->name, size);
2193 0 : if (ret < 0)
2194 : return false;
2195 :
2196 0 : names += ret;
2197 0 : size -= ret;
2198 : }
2199 :
2200 : return true;
2201 : }
2202 :
2203 0 : static bool seccomp_action_logged_from_name(u32 *action_logged,
2204 : const char *name)
2205 : {
2206 : const struct seccomp_log_name *cur;
2207 :
2208 0 : for (cur = seccomp_log_names; cur->name; cur++) {
2209 0 : if (!strcmp(cur->name, name)) {
2210 0 : *action_logged = cur->log;
2211 0 : return true;
2212 : }
2213 : }
2214 :
2215 : return false;
2216 : }
2217 :
2218 0 : static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
2219 : {
2220 : char *name;
2221 :
2222 0 : *actions_logged = 0;
2223 0 : while ((name = strsep(&names, " ")) && *name) {
2224 0 : u32 action_logged = 0;
2225 :
2226 0 : if (!seccomp_action_logged_from_name(&action_logged, name))
2227 0 : return false;
2228 :
2229 0 : *actions_logged |= action_logged;
2230 : }
2231 :
2232 : return true;
2233 : }
2234 :
2235 0 : static int read_actions_logged(struct ctl_table *ro_table, void *buffer,
2236 : size_t *lenp, loff_t *ppos)
2237 : {
2238 : char names[sizeof(seccomp_actions_avail)];
2239 : struct ctl_table table;
2240 :
2241 0 : memset(names, 0, sizeof(names));
2242 :
2243 0 : if (!seccomp_names_from_actions_logged(names, sizeof(names),
2244 : seccomp_actions_logged, " "))
2245 : return -EINVAL;
2246 :
2247 0 : table = *ro_table;
2248 0 : table.data = names;
2249 0 : table.maxlen = sizeof(names);
2250 0 : return proc_dostring(&table, 0, buffer, lenp, ppos);
2251 : }
2252 :
2253 0 : static int write_actions_logged(struct ctl_table *ro_table, void *buffer,
2254 : size_t *lenp, loff_t *ppos, u32 *actions_logged)
2255 : {
2256 : char names[sizeof(seccomp_actions_avail)];
2257 : struct ctl_table table;
2258 : int ret;
2259 :
2260 0 : if (!capable(CAP_SYS_ADMIN))
2261 : return -EPERM;
2262 :
2263 0 : memset(names, 0, sizeof(names));
2264 :
2265 0 : table = *ro_table;
2266 0 : table.data = names;
2267 0 : table.maxlen = sizeof(names);
2268 0 : ret = proc_dostring(&table, 1, buffer, lenp, ppos);
2269 0 : if (ret)
2270 : return ret;
2271 :
2272 0 : if (!seccomp_actions_logged_from_names(actions_logged, table.data))
2273 : return -EINVAL;
2274 :
2275 0 : if (*actions_logged & SECCOMP_LOG_ALLOW)
2276 : return -EINVAL;
2277 :
2278 0 : seccomp_actions_logged = *actions_logged;
2279 0 : return 0;
2280 : }
2281 :
2282 : static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
2283 : int ret)
2284 : {
2285 : char names[sizeof(seccomp_actions_avail)];
2286 : char old_names[sizeof(seccomp_actions_avail)];
2287 0 : const char *new = names;
2288 0 : const char *old = old_names;
2289 :
2290 : if (!audit_enabled)
2291 : return;
2292 :
2293 : memset(names, 0, sizeof(names));
2294 : memset(old_names, 0, sizeof(old_names));
2295 :
2296 : if (ret)
2297 : new = "?";
2298 : else if (!actions_logged)
2299 : new = "(none)";
2300 : else if (!seccomp_names_from_actions_logged(names, sizeof(names),
2301 : actions_logged, ","))
2302 : new = "?";
2303 :
2304 : if (!old_actions_logged)
2305 : old = "(none)";
2306 : else if (!seccomp_names_from_actions_logged(old_names,
2307 : sizeof(old_names),
2308 : old_actions_logged, ","))
2309 : old = "?";
2310 :
2311 : return audit_seccomp_actions_logged(new, old, !ret);
2312 : }
2313 :
2314 0 : static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
2315 : void *buffer, size_t *lenp,
2316 : loff_t *ppos)
2317 : {
2318 : int ret;
2319 :
2320 0 : if (write) {
2321 0 : u32 actions_logged = 0;
2322 0 : u32 old_actions_logged = seccomp_actions_logged;
2323 :
2324 0 : ret = write_actions_logged(ro_table, buffer, lenp, ppos,
2325 : &actions_logged);
2326 0 : audit_actions_logged(actions_logged, old_actions_logged, ret);
2327 : } else
2328 0 : ret = read_actions_logged(ro_table, buffer, lenp, ppos);
2329 :
2330 0 : return ret;
2331 : }
2332 :
2333 : static struct ctl_path seccomp_sysctl_path[] = {
2334 : { .procname = "kernel", },
2335 : { .procname = "seccomp", },
2336 : { }
2337 : };
2338 :
2339 : static struct ctl_table seccomp_sysctl_table[] = {
2340 : {
2341 : .procname = "actions_avail",
2342 : .data = (void *) &seccomp_actions_avail,
2343 : .maxlen = sizeof(seccomp_actions_avail),
2344 : .mode = 0444,
2345 : .proc_handler = proc_dostring,
2346 : },
2347 : {
2348 : .procname = "actions_logged",
2349 : .mode = 0644,
2350 : .proc_handler = seccomp_actions_logged_handler,
2351 : },
2352 : { }
2353 : };
2354 :
2355 1 : static int __init seccomp_sysctl_init(void)
2356 : {
2357 : struct ctl_table_header *hdr;
2358 :
2359 1 : hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
2360 1 : if (!hdr)
2361 0 : pr_warn("sysctl registration failed\n");
2362 : else
2363 : kmemleak_not_leak(hdr);
2364 :
2365 1 : return 0;
2366 : }
2367 :
2368 : device_initcall(seccomp_sysctl_init)
2369 :
2370 : #endif /* CONFIG_SYSCTL */
2371 :
2372 : #ifdef CONFIG_SECCOMP_CACHE_DEBUG
2373 : /* Currently CONFIG_SECCOMP_CACHE_DEBUG implies SECCOMP_ARCH_NATIVE */
2374 : static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
2375 : const void *bitmap, size_t bitmap_size)
2376 : {
2377 : int nr;
2378 :
2379 : for (nr = 0; nr < bitmap_size; nr++) {
2380 : bool cached = test_bit(nr, bitmap);
2381 : char *status = cached ? "ALLOW" : "FILTER";
2382 :
2383 : seq_printf(m, "%s %d %s\n", name, nr, status);
2384 : }
2385 : }
2386 :
2387 : int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
2388 : struct pid *pid, struct task_struct *task)
2389 : {
2390 : struct seccomp_filter *f;
2391 : unsigned long flags;
2392 :
2393 : /*
2394 : * We don't want some sandboxed process to know what their seccomp
2395 : * filters consist of.
2396 : */
2397 : if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
2398 : return -EACCES;
2399 :
2400 : if (!lock_task_sighand(task, &flags))
2401 : return -ESRCH;
2402 :
2403 : f = READ_ONCE(task->seccomp.filter);
2404 : if (!f) {
2405 : unlock_task_sighand(task, &flags);
2406 : return 0;
2407 : }
2408 :
2409 : /* prevent filter from being freed while we are printing it */
2410 : __get_seccomp_filter(f);
2411 : unlock_task_sighand(task, &flags);
2412 :
2413 : proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME,
2414 : f->cache.allow_native,
2415 : SECCOMP_ARCH_NATIVE_NR);
2416 :
2417 : #ifdef SECCOMP_ARCH_COMPAT
2418 : proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME,
2419 : f->cache.allow_compat,
2420 : SECCOMP_ARCH_COMPAT_NR);
2421 : #endif /* SECCOMP_ARCH_COMPAT */
2422 :
2423 : __put_seccomp_filter(f);
2424 : return 0;
2425 : }
2426 : #endif /* CONFIG_SECCOMP_CACHE_DEBUG */
|