Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * fs/kernfs/file.c - kernfs file implementation
4 : *
5 : * Copyright (c) 2001-3 Patrick Mochel
6 : * Copyright (c) 2007 SUSE Linux Products GmbH
7 : * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
8 : */
9 :
10 : #include <linux/fs.h>
11 : #include <linux/seq_file.h>
12 : #include <linux/slab.h>
13 : #include <linux/poll.h>
14 : #include <linux/pagemap.h>
15 : #include <linux/sched/mm.h>
16 : #include <linux/fsnotify.h>
17 : #include <linux/uio.h>
18 :
19 : #include "kernfs-internal.h"
20 :
21 : /*
22 : * There's one kernfs_open_file for each open file and one kernfs_open_node
23 : * for each kernfs_node with one or more open files.
24 : *
25 : * kernfs_node->attr.open points to kernfs_open_node. attr.open is
26 : * protected by kernfs_open_node_lock.
27 : *
28 : * filp->private_data points to seq_file whose ->private points to
29 : * kernfs_open_file. kernfs_open_files are chained at
30 : * kernfs_open_node->files, which is protected by kernfs_open_file_mutex.
31 : */
32 : static DEFINE_SPINLOCK(kernfs_open_node_lock);
33 : static DEFINE_MUTEX(kernfs_open_file_mutex);
34 :
35 : struct kernfs_open_node {
36 : atomic_t refcnt;
37 : atomic_t event;
38 : wait_queue_head_t poll;
39 : struct list_head files; /* goes through kernfs_open_file.list */
40 : };
41 :
42 : /*
43 : * kernfs_notify() may be called from any context and bounces notifications
44 : * through a work item. To minimize space overhead in kernfs_node, the
45 : * pending queue is implemented as a singly linked list of kernfs_nodes.
46 : * The list is terminated with the self pointer so that whether a
47 : * kernfs_node is on the list or not can be determined by testing the next
48 : * pointer for NULL.
49 : */
50 : #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list)
51 :
52 : static DEFINE_SPINLOCK(kernfs_notify_lock);
53 : static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
54 :
55 : static struct kernfs_open_file *kernfs_of(struct file *file)
56 : {
57 0 : return ((struct seq_file *)file->private_data)->private;
58 : }
59 :
60 : /*
61 : * Determine the kernfs_ops for the given kernfs_node. This function must
62 : * be called while holding an active reference.
63 : */
64 : static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn)
65 : {
66 : if (kn->flags & KERNFS_LOCKDEP)
67 : lockdep_assert_held(kn);
68 : return kn->attr.ops;
69 : }
70 :
71 : /*
72 : * As kernfs_seq_stop() is also called after kernfs_seq_start() or
73 : * kernfs_seq_next() failure, it needs to distinguish whether it's stopping
74 : * a seq_file iteration which is fully initialized with an active reference
75 : * or an aborted kernfs_seq_start() due to get_active failure. The
76 : * position pointer is the only context for each seq_file iteration and
77 : * thus the stop condition should be encoded in it. As the return value is
78 : * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable
79 : * choice to indicate get_active failure.
80 : *
81 : * Unfortunately, this is complicated due to the optional custom seq_file
82 : * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop()
83 : * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or
84 : * custom seq_file operations and thus can't decide whether put_active
85 : * should be performed or not only on ERR_PTR(-ENODEV).
86 : *
87 : * This is worked around by factoring out the custom seq_stop() and
88 : * put_active part into kernfs_seq_stop_active(), skipping it from
89 : * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after
90 : * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures
91 : * that kernfs_seq_stop_active() is skipped only after get_active failure.
92 : */
93 0 : static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
94 : {
95 0 : struct kernfs_open_file *of = sf->private;
96 0 : const struct kernfs_ops *ops = kernfs_ops(of->kn);
97 :
98 0 : if (ops->seq_stop)
99 0 : ops->seq_stop(sf, v);
100 0 : kernfs_put_active(of->kn);
101 0 : }
102 :
103 0 : static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
104 : {
105 0 : struct kernfs_open_file *of = sf->private;
106 : const struct kernfs_ops *ops;
107 :
108 : /*
109 : * @of->mutex nests outside active ref and is primarily to ensure that
110 : * the ops aren't called concurrently for the same open file.
111 : */
112 0 : mutex_lock(&of->mutex);
113 0 : if (!kernfs_get_active(of->kn))
114 : return ERR_PTR(-ENODEV);
115 :
116 0 : ops = kernfs_ops(of->kn);
117 0 : if (ops->seq_start) {
118 0 : void *next = ops->seq_start(sf, ppos);
119 : /* see the comment above kernfs_seq_stop_active() */
120 0 : if (next == ERR_PTR(-ENODEV))
121 0 : kernfs_seq_stop_active(sf, next);
122 : return next;
123 : }
124 0 : return single_start(sf, ppos);
125 : }
126 :
127 0 : static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
128 : {
129 0 : struct kernfs_open_file *of = sf->private;
130 0 : const struct kernfs_ops *ops = kernfs_ops(of->kn);
131 :
132 0 : if (ops->seq_next) {
133 0 : void *next = ops->seq_next(sf, v, ppos);
134 : /* see the comment above kernfs_seq_stop_active() */
135 0 : if (next == ERR_PTR(-ENODEV))
136 0 : kernfs_seq_stop_active(sf, next);
137 : return next;
138 : } else {
139 : /*
140 : * The same behavior and code as single_open(), always
141 : * terminate after the initial read.
142 : */
143 0 : ++*ppos;
144 0 : return NULL;
145 : }
146 : }
147 :
148 0 : static void kernfs_seq_stop(struct seq_file *sf, void *v)
149 : {
150 0 : struct kernfs_open_file *of = sf->private;
151 :
152 0 : if (v != ERR_PTR(-ENODEV))
153 0 : kernfs_seq_stop_active(sf, v);
154 0 : mutex_unlock(&of->mutex);
155 0 : }
156 :
157 0 : static int kernfs_seq_show(struct seq_file *sf, void *v)
158 : {
159 0 : struct kernfs_open_file *of = sf->private;
160 :
161 0 : of->event = atomic_read(&of->kn->attr.open->event);
162 :
163 0 : return of->kn->attr.ops->seq_show(sf, v);
164 : }
165 :
166 : static const struct seq_operations kernfs_seq_ops = {
167 : .start = kernfs_seq_start,
168 : .next = kernfs_seq_next,
169 : .stop = kernfs_seq_stop,
170 : .show = kernfs_seq_show,
171 : };
172 :
173 : /*
174 : * As reading a bin file can have side-effects, the exact offset and bytes
175 : * specified in read(2) call should be passed to the read callback making
176 : * it difficult to use seq_file. Implement simplistic custom buffering for
177 : * bin files.
178 : */
179 0 : static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
180 : {
181 0 : struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
182 0 : ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
183 : const struct kernfs_ops *ops;
184 : char *buf;
185 :
186 0 : buf = of->prealloc_buf;
187 0 : if (buf)
188 0 : mutex_lock(&of->prealloc_mutex);
189 : else
190 0 : buf = kmalloc(len, GFP_KERNEL);
191 0 : if (!buf)
192 : return -ENOMEM;
193 :
194 : /*
195 : * @of->mutex nests outside active ref and is used both to ensure that
196 : * the ops aren't called concurrently for the same open file.
197 : */
198 0 : mutex_lock(&of->mutex);
199 0 : if (!kernfs_get_active(of->kn)) {
200 0 : len = -ENODEV;
201 0 : mutex_unlock(&of->mutex);
202 : goto out_free;
203 : }
204 :
205 0 : of->event = atomic_read(&of->kn->attr.open->event);
206 0 : ops = kernfs_ops(of->kn);
207 0 : if (ops->read)
208 0 : len = ops->read(of, buf, len, iocb->ki_pos);
209 : else
210 : len = -EINVAL;
211 :
212 0 : kernfs_put_active(of->kn);
213 0 : mutex_unlock(&of->mutex);
214 :
215 0 : if (len < 0)
216 : goto out_free;
217 :
218 0 : if (copy_to_iter(buf, len, iter) != len) {
219 : len = -EFAULT;
220 : goto out_free;
221 : }
222 :
223 0 : iocb->ki_pos += len;
224 :
225 : out_free:
226 0 : if (buf == of->prealloc_buf)
227 0 : mutex_unlock(&of->prealloc_mutex);
228 : else
229 0 : kfree(buf);
230 : return len;
231 : }
232 :
233 0 : static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter)
234 : {
235 0 : if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW)
236 0 : return seq_read_iter(iocb, iter);
237 0 : return kernfs_file_read_iter(iocb, iter);
238 : }
239 :
240 : /*
241 : * Copy data in from userland and pass it to the matching kernfs write
242 : * operation.
243 : *
244 : * There is no easy way for us to know if userspace is only doing a partial
245 : * write, so we don't support them. We expect the entire buffer to come on
246 : * the first write. Hint: if you're writing a value, first read the file,
247 : * modify only the the value you're changing, then write entire buffer
248 : * back.
249 : */
250 0 : static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
251 : {
252 0 : struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
253 0 : ssize_t len = iov_iter_count(iter);
254 : const struct kernfs_ops *ops;
255 : char *buf;
256 :
257 0 : if (of->atomic_write_len) {
258 0 : if (len > of->atomic_write_len)
259 : return -E2BIG;
260 : } else {
261 0 : len = min_t(size_t, len, PAGE_SIZE);
262 : }
263 :
264 0 : buf = of->prealloc_buf;
265 0 : if (buf)
266 0 : mutex_lock(&of->prealloc_mutex);
267 : else
268 0 : buf = kmalloc(len + 1, GFP_KERNEL);
269 0 : if (!buf)
270 : return -ENOMEM;
271 :
272 0 : if (copy_from_iter(buf, len, iter) != len) {
273 : len = -EFAULT;
274 : goto out_free;
275 : }
276 0 : buf[len] = '\0'; /* guarantee string termination */
277 :
278 : /*
279 : * @of->mutex nests outside active ref and is used both to ensure that
280 : * the ops aren't called concurrently for the same open file.
281 : */
282 0 : mutex_lock(&of->mutex);
283 0 : if (!kernfs_get_active(of->kn)) {
284 0 : mutex_unlock(&of->mutex);
285 0 : len = -ENODEV;
286 0 : goto out_free;
287 : }
288 :
289 0 : ops = kernfs_ops(of->kn);
290 0 : if (ops->write)
291 0 : len = ops->write(of, buf, len, iocb->ki_pos);
292 : else
293 : len = -EINVAL;
294 :
295 0 : kernfs_put_active(of->kn);
296 0 : mutex_unlock(&of->mutex);
297 :
298 0 : if (len > 0)
299 0 : iocb->ki_pos += len;
300 :
301 : out_free:
302 0 : if (buf == of->prealloc_buf)
303 0 : mutex_unlock(&of->prealloc_mutex);
304 : else
305 0 : kfree(buf);
306 : return len;
307 : }
308 :
309 0 : static void kernfs_vma_open(struct vm_area_struct *vma)
310 : {
311 0 : struct file *file = vma->vm_file;
312 0 : struct kernfs_open_file *of = kernfs_of(file);
313 :
314 0 : if (!of->vm_ops)
315 : return;
316 :
317 0 : if (!kernfs_get_active(of->kn))
318 : return;
319 :
320 0 : if (of->vm_ops->open)
321 0 : of->vm_ops->open(vma);
322 :
323 0 : kernfs_put_active(of->kn);
324 : }
325 :
326 0 : static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
327 : {
328 0 : struct file *file = vmf->vma->vm_file;
329 0 : struct kernfs_open_file *of = kernfs_of(file);
330 : vm_fault_t ret;
331 :
332 0 : if (!of->vm_ops)
333 : return VM_FAULT_SIGBUS;
334 :
335 0 : if (!kernfs_get_active(of->kn))
336 : return VM_FAULT_SIGBUS;
337 :
338 0 : ret = VM_FAULT_SIGBUS;
339 0 : if (of->vm_ops->fault)
340 0 : ret = of->vm_ops->fault(vmf);
341 :
342 0 : kernfs_put_active(of->kn);
343 0 : return ret;
344 : }
345 :
346 0 : static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
347 : {
348 0 : struct file *file = vmf->vma->vm_file;
349 0 : struct kernfs_open_file *of = kernfs_of(file);
350 : vm_fault_t ret;
351 :
352 0 : if (!of->vm_ops)
353 : return VM_FAULT_SIGBUS;
354 :
355 0 : if (!kernfs_get_active(of->kn))
356 : return VM_FAULT_SIGBUS;
357 :
358 0 : ret = 0;
359 0 : if (of->vm_ops->page_mkwrite)
360 0 : ret = of->vm_ops->page_mkwrite(vmf);
361 : else
362 0 : file_update_time(file);
363 :
364 0 : kernfs_put_active(of->kn);
365 0 : return ret;
366 : }
367 :
368 0 : static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
369 : void *buf, int len, int write)
370 : {
371 0 : struct file *file = vma->vm_file;
372 0 : struct kernfs_open_file *of = kernfs_of(file);
373 : int ret;
374 :
375 0 : if (!of->vm_ops)
376 : return -EINVAL;
377 :
378 0 : if (!kernfs_get_active(of->kn))
379 : return -EINVAL;
380 :
381 0 : ret = -EINVAL;
382 0 : if (of->vm_ops->access)
383 0 : ret = of->vm_ops->access(vma, addr, buf, len, write);
384 :
385 0 : kernfs_put_active(of->kn);
386 0 : return ret;
387 : }
388 :
389 : #ifdef CONFIG_NUMA
390 : static int kernfs_vma_set_policy(struct vm_area_struct *vma,
391 : struct mempolicy *new)
392 : {
393 : struct file *file = vma->vm_file;
394 : struct kernfs_open_file *of = kernfs_of(file);
395 : int ret;
396 :
397 : if (!of->vm_ops)
398 : return 0;
399 :
400 : if (!kernfs_get_active(of->kn))
401 : return -EINVAL;
402 :
403 : ret = 0;
404 : if (of->vm_ops->set_policy)
405 : ret = of->vm_ops->set_policy(vma, new);
406 :
407 : kernfs_put_active(of->kn);
408 : return ret;
409 : }
410 :
411 : static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
412 : unsigned long addr)
413 : {
414 : struct file *file = vma->vm_file;
415 : struct kernfs_open_file *of = kernfs_of(file);
416 : struct mempolicy *pol;
417 :
418 : if (!of->vm_ops)
419 : return vma->vm_policy;
420 :
421 : if (!kernfs_get_active(of->kn))
422 : return vma->vm_policy;
423 :
424 : pol = vma->vm_policy;
425 : if (of->vm_ops->get_policy)
426 : pol = of->vm_ops->get_policy(vma, addr);
427 :
428 : kernfs_put_active(of->kn);
429 : return pol;
430 : }
431 :
432 : #endif
433 :
434 : static const struct vm_operations_struct kernfs_vm_ops = {
435 : .open = kernfs_vma_open,
436 : .fault = kernfs_vma_fault,
437 : .page_mkwrite = kernfs_vma_page_mkwrite,
438 : .access = kernfs_vma_access,
439 : #ifdef CONFIG_NUMA
440 : .set_policy = kernfs_vma_set_policy,
441 : .get_policy = kernfs_vma_get_policy,
442 : #endif
443 : };
444 :
445 0 : static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
446 : {
447 0 : struct kernfs_open_file *of = kernfs_of(file);
448 : const struct kernfs_ops *ops;
449 : int rc;
450 :
451 : /*
452 : * mmap path and of->mutex are prone to triggering spurious lockdep
453 : * warnings and we don't want to add spurious locking dependency
454 : * between the two. Check whether mmap is actually implemented
455 : * without grabbing @of->mutex by testing HAS_MMAP flag. See the
456 : * comment in kernfs_file_open() for more details.
457 : */
458 0 : if (!(of->kn->flags & KERNFS_HAS_MMAP))
459 : return -ENODEV;
460 :
461 0 : mutex_lock(&of->mutex);
462 :
463 0 : rc = -ENODEV;
464 0 : if (!kernfs_get_active(of->kn))
465 : goto out_unlock;
466 :
467 0 : ops = kernfs_ops(of->kn);
468 0 : rc = ops->mmap(of, vma);
469 0 : if (rc)
470 : goto out_put;
471 :
472 : /*
473 : * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
474 : * to satisfy versions of X which crash if the mmap fails: that
475 : * substitutes a new vm_file, and we don't then want bin_vm_ops.
476 : */
477 0 : if (vma->vm_file != file)
478 : goto out_put;
479 :
480 0 : rc = -EINVAL;
481 0 : if (of->mmapped && of->vm_ops != vma->vm_ops)
482 : goto out_put;
483 :
484 : /*
485 : * It is not possible to successfully wrap close.
486 : * So error if someone is trying to use close.
487 : */
488 0 : rc = -EINVAL;
489 0 : if (vma->vm_ops && vma->vm_ops->close)
490 : goto out_put;
491 :
492 0 : rc = 0;
493 0 : of->mmapped = true;
494 0 : of->vm_ops = vma->vm_ops;
495 0 : vma->vm_ops = &kernfs_vm_ops;
496 : out_put:
497 0 : kernfs_put_active(of->kn);
498 : out_unlock:
499 0 : mutex_unlock(&of->mutex);
500 :
501 0 : return rc;
502 : }
503 :
504 : /**
505 : * kernfs_get_open_node - get or create kernfs_open_node
506 : * @kn: target kernfs_node
507 : * @of: kernfs_open_file for this instance of open
508 : *
509 : * If @kn->attr.open exists, increment its reference count; otherwise,
510 : * create one. @of is chained to the files list.
511 : *
512 : * LOCKING:
513 : * Kernel thread context (may sleep).
514 : *
515 : * RETURNS:
516 : * 0 on success, -errno on failure.
517 : */
518 0 : static int kernfs_get_open_node(struct kernfs_node *kn,
519 : struct kernfs_open_file *of)
520 : {
521 0 : struct kernfs_open_node *on, *new_on = NULL;
522 :
523 : retry:
524 0 : mutex_lock(&kernfs_open_file_mutex);
525 0 : spin_lock_irq(&kernfs_open_node_lock);
526 :
527 0 : if (!kn->attr.open && new_on) {
528 0 : kn->attr.open = new_on;
529 0 : new_on = NULL;
530 : }
531 :
532 0 : on = kn->attr.open;
533 0 : if (on) {
534 0 : atomic_inc(&on->refcnt);
535 0 : list_add_tail(&of->list, &on->files);
536 : }
537 :
538 0 : spin_unlock_irq(&kernfs_open_node_lock);
539 0 : mutex_unlock(&kernfs_open_file_mutex);
540 :
541 0 : if (on) {
542 0 : kfree(new_on);
543 : return 0;
544 : }
545 :
546 : /* not there, initialize a new one and retry */
547 0 : new_on = kmalloc(sizeof(*new_on), GFP_KERNEL);
548 0 : if (!new_on)
549 : return -ENOMEM;
550 :
551 0 : atomic_set(&new_on->refcnt, 0);
552 0 : atomic_set(&new_on->event, 1);
553 0 : init_waitqueue_head(&new_on->poll);
554 0 : INIT_LIST_HEAD(&new_on->files);
555 : goto retry;
556 : }
557 :
558 : /**
559 : * kernfs_put_open_node - put kernfs_open_node
560 : * @kn: target kernfs_nodet
561 : * @of: associated kernfs_open_file
562 : *
563 : * Put @kn->attr.open and unlink @of from the files list. If
564 : * reference count reaches zero, disassociate and free it.
565 : *
566 : * LOCKING:
567 : * None.
568 : */
569 0 : static void kernfs_put_open_node(struct kernfs_node *kn,
570 : struct kernfs_open_file *of)
571 : {
572 0 : struct kernfs_open_node *on = kn->attr.open;
573 : unsigned long flags;
574 :
575 0 : mutex_lock(&kernfs_open_file_mutex);
576 0 : spin_lock_irqsave(&kernfs_open_node_lock, flags);
577 :
578 0 : if (of)
579 0 : list_del(&of->list);
580 :
581 0 : if (atomic_dec_and_test(&on->refcnt))
582 0 : kn->attr.open = NULL;
583 : else
584 : on = NULL;
585 :
586 0 : spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
587 0 : mutex_unlock(&kernfs_open_file_mutex);
588 :
589 0 : kfree(on);
590 0 : }
591 :
592 0 : static int kernfs_fop_open(struct inode *inode, struct file *file)
593 : {
594 0 : struct kernfs_node *kn = inode->i_private;
595 0 : struct kernfs_root *root = kernfs_root(kn);
596 : const struct kernfs_ops *ops;
597 : struct kernfs_open_file *of;
598 : bool has_read, has_write, has_mmap;
599 0 : int error = -EACCES;
600 :
601 0 : if (!kernfs_get_active(kn))
602 : return -ENODEV;
603 :
604 0 : ops = kernfs_ops(kn);
605 :
606 0 : has_read = ops->seq_show || ops->read || ops->mmap;
607 0 : has_write = ops->write || ops->mmap;
608 0 : has_mmap = ops->mmap;
609 :
610 : /* see the flag definition for details */
611 0 : if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) {
612 0 : if ((file->f_mode & FMODE_WRITE) &&
613 0 : (!(inode->i_mode & S_IWUGO) || !has_write))
614 : goto err_out;
615 :
616 0 : if ((file->f_mode & FMODE_READ) &&
617 0 : (!(inode->i_mode & S_IRUGO) || !has_read))
618 : goto err_out;
619 : }
620 :
621 : /* allocate a kernfs_open_file for the file */
622 0 : error = -ENOMEM;
623 0 : of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL);
624 0 : if (!of)
625 : goto err_out;
626 :
627 : /*
628 : * The following is done to give a different lockdep key to
629 : * @of->mutex for files which implement mmap. This is a rather
630 : * crude way to avoid false positive lockdep warning around
631 : * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and
632 : * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
633 : * which mm->mmap_lock nests, while holding @of->mutex. As each
634 : * open file has a separate mutex, it's okay as long as those don't
635 : * happen on the same file. At this point, we can't easily give
636 : * each file a separate locking class. Let's differentiate on
637 : * whether the file has mmap or not for now.
638 : *
639 : * Both paths of the branch look the same. They're supposed to
640 : * look that way and give @of->mutex different static lockdep keys.
641 : */
642 0 : if (has_mmap)
643 0 : mutex_init(&of->mutex);
644 : else
645 0 : mutex_init(&of->mutex);
646 :
647 0 : of->kn = kn;
648 0 : of->file = file;
649 :
650 : /*
651 : * Write path needs to atomic_write_len outside active reference.
652 : * Cache it in open_file. See kernfs_fop_write_iter() for details.
653 : */
654 0 : of->atomic_write_len = ops->atomic_write_len;
655 :
656 0 : error = -EINVAL;
657 : /*
658 : * ->seq_show is incompatible with ->prealloc,
659 : * as seq_read does its own allocation.
660 : * ->read must be used instead.
661 : */
662 0 : if (ops->prealloc && ops->seq_show)
663 : goto err_free;
664 0 : if (ops->prealloc) {
665 0 : int len = of->atomic_write_len ?: PAGE_SIZE;
666 0 : of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
667 0 : error = -ENOMEM;
668 0 : if (!of->prealloc_buf)
669 : goto err_free;
670 0 : mutex_init(&of->prealloc_mutex);
671 : }
672 :
673 : /*
674 : * Always instantiate seq_file even if read access doesn't use
675 : * seq_file or is not requested. This unifies private data access
676 : * and readable regular files are the vast majority anyway.
677 : */
678 0 : if (ops->seq_show)
679 0 : error = seq_open(file, &kernfs_seq_ops);
680 : else
681 0 : error = seq_open(file, NULL);
682 0 : if (error)
683 : goto err_free;
684 :
685 0 : of->seq_file = file->private_data;
686 0 : of->seq_file->private = of;
687 :
688 : /* seq_file clears PWRITE unconditionally, restore it if WRITE */
689 0 : if (file->f_mode & FMODE_WRITE)
690 0 : file->f_mode |= FMODE_PWRITE;
691 :
692 : /* make sure we have open node struct */
693 0 : error = kernfs_get_open_node(kn, of);
694 0 : if (error)
695 : goto err_seq_release;
696 :
697 0 : if (ops->open) {
698 : /* nobody has access to @of yet, skip @of->mutex */
699 0 : error = ops->open(of);
700 0 : if (error)
701 : goto err_put_node;
702 : }
703 :
704 : /* open succeeded, put active references */
705 0 : kernfs_put_active(kn);
706 0 : return 0;
707 :
708 : err_put_node:
709 0 : kernfs_put_open_node(kn, of);
710 : err_seq_release:
711 0 : seq_release(inode, file);
712 : err_free:
713 0 : kfree(of->prealloc_buf);
714 0 : kfree(of);
715 : err_out:
716 0 : kernfs_put_active(kn);
717 0 : return error;
718 : }
719 :
720 : /* used from release/drain to ensure that ->release() is called exactly once */
721 : static void kernfs_release_file(struct kernfs_node *kn,
722 : struct kernfs_open_file *of)
723 : {
724 : /*
725 : * @of is guaranteed to have no other file operations in flight and
726 : * we just want to synchronize release and drain paths.
727 : * @kernfs_open_file_mutex is enough. @of->mutex can't be used
728 : * here because drain path may be called from places which can
729 : * cause circular dependency.
730 : */
731 : lockdep_assert_held(&kernfs_open_file_mutex);
732 :
733 0 : if (!of->released) {
734 : /*
735 : * A file is never detached without being released and we
736 : * need to be able to release files which are deactivated
737 : * and being drained. Don't use kernfs_ops().
738 : */
739 0 : kn->attr.ops->release(of);
740 0 : of->released = true;
741 : }
742 : }
743 :
744 0 : static int kernfs_fop_release(struct inode *inode, struct file *filp)
745 : {
746 0 : struct kernfs_node *kn = inode->i_private;
747 0 : struct kernfs_open_file *of = kernfs_of(filp);
748 :
749 0 : if (kn->flags & KERNFS_HAS_RELEASE) {
750 0 : mutex_lock(&kernfs_open_file_mutex);
751 0 : kernfs_release_file(kn, of);
752 0 : mutex_unlock(&kernfs_open_file_mutex);
753 : }
754 :
755 0 : kernfs_put_open_node(kn, of);
756 0 : seq_release(inode, filp);
757 0 : kfree(of->prealloc_buf);
758 0 : kfree(of);
759 :
760 0 : return 0;
761 : }
762 :
763 2325 : void kernfs_drain_open_files(struct kernfs_node *kn)
764 : {
765 : struct kernfs_open_node *on;
766 : struct kernfs_open_file *of;
767 :
768 2325 : if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
769 : return;
770 :
771 0 : spin_lock_irq(&kernfs_open_node_lock);
772 0 : on = kn->attr.open;
773 0 : if (on)
774 0 : atomic_inc(&on->refcnt);
775 0 : spin_unlock_irq(&kernfs_open_node_lock);
776 0 : if (!on)
777 : return;
778 :
779 0 : mutex_lock(&kernfs_open_file_mutex);
780 :
781 0 : list_for_each_entry(of, &on->files, list) {
782 0 : struct inode *inode = file_inode(of->file);
783 :
784 0 : if (kn->flags & KERNFS_HAS_MMAP)
785 0 : unmap_mapping_range(inode->i_mapping, 0, 0, 1);
786 :
787 0 : if (kn->flags & KERNFS_HAS_RELEASE)
788 0 : kernfs_release_file(kn, of);
789 : }
790 :
791 0 : mutex_unlock(&kernfs_open_file_mutex);
792 :
793 0 : kernfs_put_open_node(kn, NULL);
794 : }
795 :
796 : /*
797 : * Kernfs attribute files are pollable. The idea is that you read
798 : * the content and then you use 'poll' or 'select' to wait for
799 : * the content to change. When the content changes (assuming the
800 : * manager for the kobject supports notification), poll will
801 : * return EPOLLERR|EPOLLPRI, and select will return the fd whether
802 : * it is waiting for read, write, or exceptions.
803 : * Once poll/select indicates that the value has changed, you
804 : * need to close and re-open the file, or seek to 0 and read again.
805 : * Reminder: this only works for attributes which actively support
806 : * it, and it is not possible to test an attribute from userspace
807 : * to see if it supports poll (Neither 'poll' nor 'select' return
808 : * an appropriate error code). When in doubt, set a suitable timeout value.
809 : */
810 0 : __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
811 : {
812 0 : struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry);
813 0 : struct kernfs_open_node *on = kn->attr.open;
814 :
815 0 : poll_wait(of->file, &on->poll, wait);
816 :
817 0 : if (of->event != atomic_read(&on->event))
818 : return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
819 :
820 0 : return DEFAULT_POLLMASK;
821 : }
822 :
823 0 : static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
824 : {
825 0 : struct kernfs_open_file *of = kernfs_of(filp);
826 0 : struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
827 : __poll_t ret;
828 :
829 0 : if (!kernfs_get_active(kn))
830 : return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
831 :
832 0 : if (kn->attr.ops->poll)
833 0 : ret = kn->attr.ops->poll(of, wait);
834 : else
835 0 : ret = kernfs_generic_poll(of, wait);
836 :
837 0 : kernfs_put_active(kn);
838 0 : return ret;
839 : }
840 :
841 1 : static void kernfs_notify_workfn(struct work_struct *work)
842 : {
843 : struct kernfs_node *kn;
844 : struct kernfs_super_info *info;
845 : struct kernfs_root *root;
846 : repeat:
847 : /* pop one off the notify_list */
848 2 : spin_lock_irq(&kernfs_notify_lock);
849 2 : kn = kernfs_notify_list;
850 2 : if (kn == KERNFS_NOTIFY_EOL) {
851 1 : spin_unlock_irq(&kernfs_notify_lock);
852 1 : return;
853 : }
854 1 : kernfs_notify_list = kn->attr.notify_next;
855 1 : kn->attr.notify_next = NULL;
856 1 : spin_unlock_irq(&kernfs_notify_lock);
857 :
858 1 : root = kernfs_root(kn);
859 : /* kick fsnotify */
860 1 : down_write(&root->kernfs_rwsem);
861 :
862 2 : list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
863 : struct kernfs_node *parent;
864 0 : struct inode *p_inode = NULL;
865 : struct inode *inode;
866 : struct qstr name;
867 :
868 : /*
869 : * We want fsnotify_modify() on @kn but as the
870 : * modifications aren't originating from userland don't
871 : * have the matching @file available. Look up the inodes
872 : * and generate the events manually.
873 : */
874 0 : inode = ilookup(info->sb, kernfs_ino(kn));
875 0 : if (!inode)
876 0 : continue;
877 :
878 0 : name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name));
879 0 : parent = kernfs_get_parent(kn);
880 0 : if (parent) {
881 0 : p_inode = ilookup(info->sb, kernfs_ino(parent));
882 0 : if (p_inode) {
883 0 : fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD,
884 : inode, FSNOTIFY_EVENT_INODE,
885 : p_inode, &name, inode, 0);
886 0 : iput(p_inode);
887 : }
888 :
889 0 : kernfs_put(parent);
890 : }
891 :
892 0 : if (!p_inode)
893 0 : fsnotify_inode(inode, FS_MODIFY);
894 :
895 0 : iput(inode);
896 : }
897 :
898 1 : up_write(&root->kernfs_rwsem);
899 1 : kernfs_put(kn);
900 1 : goto repeat;
901 : }
902 :
903 : /**
904 : * kernfs_notify - notify a kernfs file
905 : * @kn: file to notify
906 : *
907 : * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any
908 : * context.
909 : */
910 2 : void kernfs_notify(struct kernfs_node *kn)
911 : {
912 : static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
913 : unsigned long flags;
914 : struct kernfs_open_node *on;
915 :
916 4 : if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
917 : return;
918 :
919 : /* kick poll immediately */
920 2 : spin_lock_irqsave(&kernfs_open_node_lock, flags);
921 2 : on = kn->attr.open;
922 2 : if (on) {
923 0 : atomic_inc(&on->event);
924 0 : wake_up_interruptible(&on->poll);
925 : }
926 2 : spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
927 :
928 : /* schedule work to kick fsnotify */
929 2 : spin_lock_irqsave(&kernfs_notify_lock, flags);
930 2 : if (!kn->attr.notify_next) {
931 1 : kernfs_get(kn);
932 1 : kn->attr.notify_next = kernfs_notify_list;
933 1 : kernfs_notify_list = kn;
934 : schedule_work(&kernfs_notify_work);
935 : }
936 : spin_unlock_irqrestore(&kernfs_notify_lock, flags);
937 : }
938 : EXPORT_SYMBOL_GPL(kernfs_notify);
939 :
940 : const struct file_operations kernfs_file_fops = {
941 : .read_iter = kernfs_fop_read_iter,
942 : .write_iter = kernfs_fop_write_iter,
943 : .llseek = generic_file_llseek,
944 : .mmap = kernfs_fop_mmap,
945 : .open = kernfs_fop_open,
946 : .release = kernfs_fop_release,
947 : .poll = kernfs_fop_poll,
948 : .fsync = noop_fsync,
949 : .splice_read = generic_file_splice_read,
950 : .splice_write = iter_file_splice_write,
951 : };
952 :
953 : /**
954 : * __kernfs_create_file - kernfs internal function to create a file
955 : * @parent: directory to create the file in
956 : * @name: name of the file
957 : * @mode: mode of the file
958 : * @uid: uid of the file
959 : * @gid: gid of the file
960 : * @size: size of the file
961 : * @ops: kernfs operations for the file
962 : * @priv: private data for the file
963 : * @ns: optional namespace tag of the file
964 : * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
965 : *
966 : * Returns the created node on success, ERR_PTR() value on error.
967 : */
968 8055 : struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
969 : const char *name,
970 : umode_t mode, kuid_t uid, kgid_t gid,
971 : loff_t size,
972 : const struct kernfs_ops *ops,
973 : void *priv, const void *ns,
974 : struct lock_class_key *key)
975 : {
976 : struct kernfs_node *kn;
977 : unsigned flags;
978 : int rc;
979 :
980 8055 : flags = KERNFS_FILE;
981 :
982 8055 : kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
983 : uid, gid, flags);
984 8055 : if (!kn)
985 : return ERR_PTR(-ENOMEM);
986 :
987 8055 : kn->attr.ops = ops;
988 8055 : kn->attr.size = size;
989 8055 : kn->ns = ns;
990 8055 : kn->priv = priv;
991 :
992 : #ifdef CONFIG_DEBUG_LOCK_ALLOC
993 : if (key) {
994 : lockdep_init_map(&kn->dep_map, "kn->active", key, 0);
995 : kn->flags |= KERNFS_LOCKDEP;
996 : }
997 : #endif
998 :
999 : /*
1000 : * kn->attr.ops is accessible only while holding active ref. We
1001 : * need to know whether some ops are implemented outside active
1002 : * ref. Cache their existence in flags.
1003 : */
1004 8055 : if (ops->seq_show)
1005 8046 : kn->flags |= KERNFS_HAS_SEQ_SHOW;
1006 8055 : if (ops->mmap)
1007 0 : kn->flags |= KERNFS_HAS_MMAP;
1008 8055 : if (ops->release)
1009 0 : kn->flags |= KERNFS_HAS_RELEASE;
1010 :
1011 8055 : rc = kernfs_add_one(kn);
1012 8055 : if (rc) {
1013 0 : kernfs_put(kn);
1014 0 : return ERR_PTR(rc);
1015 : }
1016 : return kn;
1017 : }
|