Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * fs/kernfs/dir.c - kernfs directory implementation
4 : *
5 : * Copyright (c) 2001-3 Patrick Mochel
6 : * Copyright (c) 2007 SUSE Linux Products GmbH
7 : * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
8 : */
9 :
10 : #include <linux/sched.h>
11 : #include <linux/fs.h>
12 : #include <linux/namei.h>
13 : #include <linux/idr.h>
14 : #include <linux/slab.h>
15 : #include <linux/security.h>
16 : #include <linux/hash.h>
17 :
18 : #include "kernfs-internal.h"
19 :
20 : static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
21 : static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
22 : static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
23 :
24 : #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
25 :
26 : static bool kernfs_active(struct kernfs_node *kn)
27 : {
28 15699 : lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem);
29 31398 : return atomic_read(&kn->active) >= 0;
30 : }
31 :
32 : static bool kernfs_lockdep(struct kernfs_node *kn)
33 : {
34 : #ifdef CONFIG_DEBUG_LOCK_ALLOC
35 : return kn->flags & KERNFS_LOCKDEP;
36 : #else
37 : return false;
38 : #endif
39 : }
40 :
41 0 : static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
42 : {
43 0 : if (!kn)
44 0 : return strlcpy(buf, "(null)", buflen);
45 :
46 0 : return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
47 : }
48 :
49 : /* kernfs_node_depth - compute depth from @from to @to */
50 : static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
51 : {
52 0 : size_t depth = 0;
53 :
54 0 : while (to->parent && to != from) {
55 0 : depth++;
56 0 : to = to->parent;
57 : }
58 : return depth;
59 : }
60 :
61 0 : static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
62 : struct kernfs_node *b)
63 : {
64 : size_t da, db;
65 0 : struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
66 :
67 0 : if (ra != rb)
68 : return NULL;
69 :
70 0 : da = kernfs_depth(ra->kn, a);
71 0 : db = kernfs_depth(rb->kn, b);
72 :
73 0 : while (da > db) {
74 0 : a = a->parent;
75 0 : da--;
76 : }
77 0 : while (db > da) {
78 0 : b = b->parent;
79 0 : db--;
80 : }
81 :
82 : /* worst case b and a will be the same at root */
83 0 : while (b != a) {
84 0 : b = b->parent;
85 0 : a = a->parent;
86 : }
87 :
88 : return a;
89 : }
90 :
91 : /**
92 : * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to,
93 : * where kn_from is treated as root of the path.
94 : * @kn_from: kernfs node which should be treated as root for the path
95 : * @kn_to: kernfs node to which path is needed
96 : * @buf: buffer to copy the path into
97 : * @buflen: size of @buf
98 : *
99 : * We need to handle couple of scenarios here:
100 : * [1] when @kn_from is an ancestor of @kn_to at some level
101 : * kn_from: /n1/n2/n3
102 : * kn_to: /n1/n2/n3/n4/n5
103 : * result: /n4/n5
104 : *
105 : * [2] when @kn_from is on a different hierarchy and we need to find common
106 : * ancestor between @kn_from and @kn_to.
107 : * kn_from: /n1/n2/n3/n4
108 : * kn_to: /n1/n2/n5
109 : * result: /../../n5
110 : * OR
111 : * kn_from: /n1/n2/n3/n4/n5 [depth=5]
112 : * kn_to: /n1/n2/n3 [depth=3]
113 : * result: /../..
114 : *
115 : * [3] when @kn_to is NULL result will be "(null)"
116 : *
117 : * Returns the length of the full path. If the full length is equal to or
118 : * greater than @buflen, @buf contains the truncated path with the trailing
119 : * '\0'. On error, -errno is returned.
120 : */
121 0 : static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
122 : struct kernfs_node *kn_from,
123 : char *buf, size_t buflen)
124 : {
125 : struct kernfs_node *kn, *common;
126 0 : const char parent_str[] = "/..";
127 0 : size_t depth_from, depth_to, len = 0;
128 : int i, j;
129 :
130 0 : if (!kn_to)
131 0 : return strlcpy(buf, "(null)", buflen);
132 :
133 0 : if (!kn_from)
134 0 : kn_from = kernfs_root(kn_to)->kn;
135 :
136 0 : if (kn_from == kn_to)
137 0 : return strlcpy(buf, "/", buflen);
138 :
139 0 : if (!buf)
140 : return -EINVAL;
141 :
142 0 : common = kernfs_common_ancestor(kn_from, kn_to);
143 0 : if (WARN_ON(!common))
144 : return -EINVAL;
145 :
146 : depth_to = kernfs_depth(common, kn_to);
147 0 : depth_from = kernfs_depth(common, kn_from);
148 :
149 0 : buf[0] = '\0';
150 :
151 0 : for (i = 0; i < depth_from; i++)
152 0 : len += strlcpy(buf + len, parent_str,
153 : len < buflen ? buflen - len : 0);
154 :
155 : /* Calculate how many bytes we need for the rest */
156 0 : for (i = depth_to - 1; i >= 0; i--) {
157 0 : for (kn = kn_to, j = 0; j < i; j++)
158 0 : kn = kn->parent;
159 0 : len += strlcpy(buf + len, "/",
160 : len < buflen ? buflen - len : 0);
161 0 : len += strlcpy(buf + len, kn->name,
162 : len < buflen ? buflen - len : 0);
163 : }
164 :
165 0 : return len;
166 : }
167 :
168 : /**
169 : * kernfs_name - obtain the name of a given node
170 : * @kn: kernfs_node of interest
171 : * @buf: buffer to copy @kn's name into
172 : * @buflen: size of @buf
173 : *
174 : * Copies the name of @kn into @buf of @buflen bytes. The behavior is
175 : * similar to strlcpy(). It returns the length of @kn's name and if @buf
176 : * isn't long enough, it's filled upto @buflen-1 and nul terminated.
177 : *
178 : * Fills buffer with "(null)" if @kn is NULL.
179 : *
180 : * This function can be called from any context.
181 : */
182 0 : int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
183 : {
184 : unsigned long flags;
185 : int ret;
186 :
187 0 : spin_lock_irqsave(&kernfs_rename_lock, flags);
188 0 : ret = kernfs_name_locked(kn, buf, buflen);
189 0 : spin_unlock_irqrestore(&kernfs_rename_lock, flags);
190 0 : return ret;
191 : }
192 :
193 : /**
194 : * kernfs_path_from_node - build path of node @to relative to @from.
195 : * @from: parent kernfs_node relative to which we need to build the path
196 : * @to: kernfs_node of interest
197 : * @buf: buffer to copy @to's path into
198 : * @buflen: size of @buf
199 : *
200 : * Builds @to's path relative to @from in @buf. @from and @to must
201 : * be on the same kernfs-root. If @from is not parent of @to, then a relative
202 : * path (which includes '..'s) as needed to reach from @from to @to is
203 : * returned.
204 : *
205 : * Returns the length of the full path. If the full length is equal to or
206 : * greater than @buflen, @buf contains the truncated path with the trailing
207 : * '\0'. On error, -errno is returned.
208 : */
209 0 : int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
210 : char *buf, size_t buflen)
211 : {
212 : unsigned long flags;
213 : int ret;
214 :
215 0 : spin_lock_irqsave(&kernfs_rename_lock, flags);
216 0 : ret = kernfs_path_from_node_locked(to, from, buf, buflen);
217 0 : spin_unlock_irqrestore(&kernfs_rename_lock, flags);
218 0 : return ret;
219 : }
220 : EXPORT_SYMBOL_GPL(kernfs_path_from_node);
221 :
222 : /**
223 : * pr_cont_kernfs_name - pr_cont name of a kernfs_node
224 : * @kn: kernfs_node of interest
225 : *
226 : * This function can be called from any context.
227 : */
228 0 : void pr_cont_kernfs_name(struct kernfs_node *kn)
229 : {
230 : unsigned long flags;
231 :
232 0 : spin_lock_irqsave(&kernfs_rename_lock, flags);
233 :
234 0 : kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
235 0 : pr_cont("%s", kernfs_pr_cont_buf);
236 :
237 0 : spin_unlock_irqrestore(&kernfs_rename_lock, flags);
238 0 : }
239 :
240 : /**
241 : * pr_cont_kernfs_path - pr_cont path of a kernfs_node
242 : * @kn: kernfs_node of interest
243 : *
244 : * This function can be called from any context.
245 : */
246 0 : void pr_cont_kernfs_path(struct kernfs_node *kn)
247 : {
248 : unsigned long flags;
249 : int sz;
250 :
251 0 : spin_lock_irqsave(&kernfs_rename_lock, flags);
252 :
253 0 : sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
254 : sizeof(kernfs_pr_cont_buf));
255 0 : if (sz < 0) {
256 0 : pr_cont("(error)");
257 0 : goto out;
258 : }
259 :
260 0 : if (sz >= sizeof(kernfs_pr_cont_buf)) {
261 0 : pr_cont("(name too long)");
262 0 : goto out;
263 : }
264 :
265 0 : pr_cont("%s", kernfs_pr_cont_buf);
266 :
267 : out:
268 0 : spin_unlock_irqrestore(&kernfs_rename_lock, flags);
269 0 : }
270 :
271 : /**
272 : * kernfs_get_parent - determine the parent node and pin it
273 : * @kn: kernfs_node of interest
274 : *
275 : * Determines @kn's parent, pins and returns it. This function can be
276 : * called from any context.
277 : */
278 0 : struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
279 : {
280 : struct kernfs_node *parent;
281 : unsigned long flags;
282 :
283 0 : spin_lock_irqsave(&kernfs_rename_lock, flags);
284 0 : parent = kn->parent;
285 0 : kernfs_get(parent);
286 0 : spin_unlock_irqrestore(&kernfs_rename_lock, flags);
287 :
288 0 : return parent;
289 : }
290 :
291 : /**
292 : * kernfs_name_hash
293 : * @name: Null terminated string to hash
294 : * @ns: Namespace tag to hash
295 : *
296 : * Returns 31 bit hash of ns + name (so it fits in an off_t )
297 : */
298 14000 : static unsigned int kernfs_name_hash(const char *name, const void *ns)
299 : {
300 14000 : unsigned long hash = init_name_hash(ns);
301 14000 : unsigned int len = strlen(name);
302 168746 : while (len--)
303 281492 : hash = partial_name_hash(*name++, hash);
304 14000 : hash = end_name_hash(hash);
305 14000 : hash &= 0x7fffffffU;
306 : /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
307 14000 : if (hash < 2)
308 0 : hash += 2;
309 14000 : if (hash >= INT_MAX)
310 0 : hash = INT_MAX - 1;
311 14000 : return hash;
312 : }
313 :
314 43503 : static int kernfs_name_compare(unsigned int hash, const char *name,
315 : const void *ns, const struct kernfs_node *kn)
316 : {
317 43503 : if (hash < kn->hash)
318 : return -1;
319 22231 : if (hash > kn->hash)
320 : return 1;
321 2863 : if (ns < kn->ns)
322 : return -1;
323 2863 : if (ns > kn->ns)
324 : return 1;
325 2863 : return strcmp(name, kn->name);
326 : }
327 :
328 : static int kernfs_sd_compare(const struct kernfs_node *left,
329 : const struct kernfs_node *right)
330 : {
331 35549 : return kernfs_name_compare(left->hash, left->name, left->ns, right);
332 : }
333 :
334 : /**
335 : * kernfs_link_sibling - link kernfs_node into sibling rbtree
336 : * @kn: kernfs_node of interest
337 : *
338 : * Link @kn into its sibling rbtree which starts from
339 : * @kn->parent->dir.children.
340 : *
341 : * Locking:
342 : * kernfs_rwsem held exclusive
343 : *
344 : * RETURNS:
345 : * 0 on susccess -EEXIST on failure.
346 : */
347 11049 : static int kernfs_link_sibling(struct kernfs_node *kn)
348 : {
349 11049 : struct rb_node **node = &kn->parent->dir.children.rb_node;
350 11049 : struct rb_node *parent = NULL;
351 :
352 57647 : while (*node) {
353 : struct kernfs_node *pos;
354 : int result;
355 :
356 35549 : pos = rb_to_kn(*node);
357 35549 : parent = *node;
358 35549 : result = kernfs_sd_compare(kn, pos);
359 35549 : if (result < 0)
360 18838 : node = &pos->rb.rb_left;
361 16711 : else if (result > 0)
362 16711 : node = &pos->rb.rb_right;
363 : else
364 : return -EEXIST;
365 : }
366 :
367 : /* add new node and rebalance the tree */
368 22098 : rb_link_node(&kn->rb, parent, node);
369 11049 : rb_insert_color(&kn->rb, &kn->parent->dir.children);
370 :
371 : /* successfully added, account subdir number */
372 22098 : if (kernfs_type(kn) == KERNFS_DIR)
373 1363 : kn->parent->dir.subdirs++;
374 11049 : kernfs_inc_rev(kn->parent);
375 :
376 11049 : return 0;
377 : }
378 :
379 : /**
380 : * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
381 : * @kn: kernfs_node of interest
382 : *
383 : * Try to unlink @kn from its sibling rbtree which starts from
384 : * kn->parent->dir.children. Returns %true if @kn was actually
385 : * removed, %false if @kn wasn't on the rbtree.
386 : *
387 : * Locking:
388 : * kernfs_rwsem held exclusive
389 : */
390 2325 : static bool kernfs_unlink_sibling(struct kernfs_node *kn)
391 : {
392 2325 : if (RB_EMPTY_NODE(&kn->rb))
393 : return false;
394 :
395 4650 : if (kernfs_type(kn) == KERNFS_DIR)
396 95 : kn->parent->dir.subdirs--;
397 2325 : kernfs_inc_rev(kn->parent);
398 :
399 2325 : rb_erase(&kn->rb, &kn->parent->dir.children);
400 2325 : RB_CLEAR_NODE(&kn->rb);
401 2325 : return true;
402 : }
403 :
404 : /**
405 : * kernfs_get_active - get an active reference to kernfs_node
406 : * @kn: kernfs_node to get an active reference to
407 : *
408 : * Get an active reference of @kn. This function is noop if @kn
409 : * is NULL.
410 : *
411 : * RETURNS:
412 : * Pointer to @kn on success, NULL on failure.
413 : */
414 0 : struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
415 : {
416 0 : if (unlikely(!kn))
417 : return NULL;
418 :
419 0 : if (!atomic_inc_unless_negative(&kn->active))
420 : return NULL;
421 :
422 0 : if (kernfs_lockdep(kn))
423 : rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
424 0 : return kn;
425 : }
426 :
427 : /**
428 : * kernfs_put_active - put an active reference to kernfs_node
429 : * @kn: kernfs_node to put an active reference to
430 : *
431 : * Put an active reference to @kn. This function is noop if @kn
432 : * is NULL.
433 : */
434 0 : void kernfs_put_active(struct kernfs_node *kn)
435 : {
436 : int v;
437 :
438 0 : if (unlikely(!kn))
439 : return;
440 :
441 0 : if (kernfs_lockdep(kn))
442 : rwsem_release(&kn->dep_map, _RET_IP_);
443 0 : v = atomic_dec_return(&kn->active);
444 0 : if (likely(v != KN_DEACTIVATED_BIAS))
445 : return;
446 :
447 0 : wake_up_all(&kernfs_root(kn)->deactivate_waitq);
448 : }
449 :
450 : /**
451 : * kernfs_drain - drain kernfs_node
452 : * @kn: kernfs_node to drain
453 : *
454 : * Drain existing usages and nuke all existing mmaps of @kn. Mutiple
455 : * removers may invoke this function concurrently on @kn and all will
456 : * return after draining is complete.
457 : */
458 2325 : static void kernfs_drain(struct kernfs_node *kn)
459 : __releases(&kernfs_root(kn)->kernfs_rwsem)
460 : __acquires(&kernfs_root(kn)->kernfs_rwsem)
461 : {
462 2325 : struct kernfs_root *root = kernfs_root(kn);
463 :
464 : lockdep_assert_held_write(&root->kernfs_rwsem);
465 2325 : WARN_ON_ONCE(kernfs_active(kn));
466 :
467 2325 : up_write(&root->kernfs_rwsem);
468 :
469 2325 : if (kernfs_lockdep(kn)) {
470 : rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
471 : if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
472 : lock_contended(&kn->dep_map, _RET_IP_);
473 : }
474 :
475 : /* but everyone should wait for draining */
476 4650 : wait_event(root->deactivate_waitq,
477 : atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
478 :
479 2325 : if (kernfs_lockdep(kn)) {
480 : lock_acquired(&kn->dep_map, _RET_IP_);
481 : rwsem_release(&kn->dep_map, _RET_IP_);
482 : }
483 :
484 2325 : kernfs_drain_open_files(kn);
485 :
486 2325 : down_write(&root->kernfs_rwsem);
487 2325 : }
488 :
489 : /**
490 : * kernfs_get - get a reference count on a kernfs_node
491 : * @kn: the target kernfs_node
492 : */
493 18727 : void kernfs_get(struct kernfs_node *kn)
494 : {
495 18727 : if (kn) {
496 37454 : WARN_ON(!atomic_read(&kn->count));
497 18727 : atomic_inc(&kn->count);
498 : }
499 18727 : }
500 : EXPORT_SYMBOL_GPL(kernfs_get);
501 :
502 : /**
503 : * kernfs_put - put a reference count on a kernfs_node
504 : * @kn: the target kernfs_node
505 : *
506 : * Put a reference count of @kn and destroy it if it reached zero.
507 : */
508 7662 : void kernfs_put(struct kernfs_node *kn)
509 : {
510 : struct kernfs_node *parent;
511 : struct kernfs_root *root;
512 :
513 15324 : if (!kn || !atomic_dec_and_test(&kn->count))
514 : return;
515 2325 : root = kernfs_root(kn);
516 : repeat:
517 : /*
518 : * Moving/renaming is always done while holding reference.
519 : * kn->parent won't change beneath us.
520 : */
521 2325 : parent = kn->parent;
522 :
523 4650 : WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
524 : "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
525 : parent ? parent->name : "", kn->name, atomic_read(&kn->active));
526 :
527 4650 : if (kernfs_type(kn) == KERNFS_LINK)
528 0 : kernfs_put(kn->symlink.target_kn);
529 :
530 2325 : kfree_const(kn->name);
531 :
532 2325 : if (kn->iattr) {
533 0 : simple_xattrs_free(&kn->iattr->xattrs);
534 0 : kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
535 : }
536 2325 : spin_lock(&kernfs_idr_lock);
537 4650 : idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
538 2325 : spin_unlock(&kernfs_idr_lock);
539 2325 : kmem_cache_free(kernfs_node_cache, kn);
540 :
541 2325 : kn = parent;
542 2325 : if (kn) {
543 4650 : if (atomic_dec_and_test(&kn->count))
544 : goto repeat;
545 : } else {
546 : /* just released the root kn, free @root too */
547 0 : idr_destroy(&root->ino_idr);
548 0 : kfree(root);
549 : }
550 : }
551 : EXPORT_SYMBOL_GPL(kernfs_put);
552 :
553 : /**
554 : * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
555 : * @dentry: the dentry in question
556 : *
557 : * Return the kernfs_node associated with @dentry. If @dentry is not a
558 : * kernfs one, %NULL is returned.
559 : *
560 : * While the returned kernfs_node will stay accessible as long as @dentry
561 : * is accessible, the returned node can be in any state and the caller is
562 : * fully responsible for determining what's accessible.
563 : */
564 0 : struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
565 : {
566 0 : if (dentry->d_sb->s_op == &kernfs_sops)
567 : return kernfs_dentry_node(dentry);
568 : return NULL;
569 : }
570 :
571 11050 : static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
572 : struct kernfs_node *parent,
573 : const char *name, umode_t mode,
574 : kuid_t uid, kgid_t gid,
575 : unsigned flags)
576 : {
577 : struct kernfs_node *kn;
578 : u32 id_highbits;
579 : int ret;
580 :
581 11050 : name = kstrdup_const(name, GFP_KERNEL);
582 11050 : if (!name)
583 : return NULL;
584 :
585 22100 : kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
586 11050 : if (!kn)
587 : goto err_out1;
588 :
589 11050 : idr_preload(GFP_KERNEL);
590 11050 : spin_lock(&kernfs_idr_lock);
591 11050 : ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
592 11050 : if (ret >= 0 && ret < root->last_id_lowbits)
593 0 : root->id_highbits++;
594 11050 : id_highbits = root->id_highbits;
595 11050 : root->last_id_lowbits = ret;
596 11050 : spin_unlock(&kernfs_idr_lock);
597 : idr_preload_end();
598 11050 : if (ret < 0)
599 : goto err_out2;
600 :
601 11050 : kn->id = (u64)id_highbits << 32 | ret;
602 :
603 22100 : atomic_set(&kn->count, 1);
604 22100 : atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
605 11050 : RB_CLEAR_NODE(&kn->rb);
606 :
607 11050 : kn->name = name;
608 11050 : kn->mode = mode;
609 11050 : kn->flags = flags;
610 :
611 11050 : if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
612 0 : struct iattr iattr = {
613 : .ia_valid = ATTR_UID | ATTR_GID,
614 : .ia_uid = uid,
615 : .ia_gid = gid,
616 : };
617 :
618 0 : ret = __kernfs_setattr(kn, &iattr);
619 0 : if (ret < 0)
620 : goto err_out3;
621 : }
622 :
623 : if (parent) {
624 : ret = security_kernfs_init_security(parent, kn);
625 : if (ret)
626 : goto err_out3;
627 : }
628 :
629 : return kn;
630 :
631 : err_out3:
632 0 : idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
633 : err_out2:
634 0 : kmem_cache_free(kernfs_node_cache, kn);
635 : err_out1:
636 0 : kfree_const(name);
637 0 : return NULL;
638 : }
639 :
640 11049 : struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
641 : const char *name, umode_t mode,
642 : kuid_t uid, kgid_t gid,
643 : unsigned flags)
644 : {
645 : struct kernfs_node *kn;
646 :
647 22098 : kn = __kernfs_new_node(kernfs_root(parent), parent,
648 : name, mode, uid, gid, flags);
649 11049 : if (kn) {
650 11049 : kernfs_get(parent);
651 11049 : kn->parent = parent;
652 : }
653 11049 : return kn;
654 : }
655 :
656 : /*
657 : * kernfs_find_and_get_node_by_id - get kernfs_node from node id
658 : * @root: the kernfs root
659 : * @id: the target node id
660 : *
661 : * @id's lower 32bits encode ino and upper gen. If the gen portion is
662 : * zero, all generations are matched.
663 : *
664 : * RETURNS:
665 : * NULL on failure. Return a kernfs node with reference counter incremented
666 : */
667 0 : struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
668 : u64 id)
669 : {
670 : struct kernfs_node *kn;
671 0 : ino_t ino = kernfs_id_ino(id);
672 0 : u32 gen = kernfs_id_gen(id);
673 :
674 0 : spin_lock(&kernfs_idr_lock);
675 :
676 0 : kn = idr_find(&root->ino_idr, (u32)ino);
677 0 : if (!kn)
678 : goto err_unlock;
679 :
680 : if (sizeof(ino_t) >= sizeof(u64)) {
681 : /* we looked up with the low 32bits, compare the whole */
682 0 : if (kernfs_ino(kn) != ino)
683 : goto err_unlock;
684 : } else {
685 : /* 0 matches all generations */
686 : if (unlikely(gen && kernfs_gen(kn) != gen))
687 : goto err_unlock;
688 : }
689 :
690 : /*
691 : * ACTIVATED is protected with kernfs_mutex but it was clear when
692 : * @kn was added to idr and we just wanna see it set. No need to
693 : * grab kernfs_mutex.
694 : */
695 0 : if (unlikely(!(kn->flags & KERNFS_ACTIVATED) ||
696 : !atomic_inc_not_zero(&kn->count)))
697 : goto err_unlock;
698 :
699 0 : spin_unlock(&kernfs_idr_lock);
700 0 : return kn;
701 : err_unlock:
702 0 : spin_unlock(&kernfs_idr_lock);
703 0 : return NULL;
704 : }
705 :
706 : /**
707 : * kernfs_add_one - add kernfs_node to parent without warning
708 : * @kn: kernfs_node to be added
709 : *
710 : * The caller must already have initialized @kn->parent. This
711 : * function increments nlink of the parent's inode if @kn is a
712 : * directory and link into the children list of the parent.
713 : *
714 : * RETURNS:
715 : * 0 on success, -EEXIST if entry with the given name already
716 : * exists.
717 : */
718 11049 : int kernfs_add_one(struct kernfs_node *kn)
719 : {
720 11049 : struct kernfs_node *parent = kn->parent;
721 11049 : struct kernfs_root *root = kernfs_root(parent);
722 : struct kernfs_iattrs *ps_iattr;
723 : bool has_ns;
724 : int ret;
725 :
726 11049 : down_write(&root->kernfs_rwsem);
727 :
728 11049 : ret = -EINVAL;
729 22098 : has_ns = kernfs_ns_enabled(parent);
730 11049 : if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
731 : has_ns ? "required" : "invalid", parent->name, kn->name))
732 : goto out_unlock;
733 :
734 22098 : if (kernfs_type(parent) != KERNFS_DIR)
735 : goto out_unlock;
736 :
737 11049 : ret = -ENOENT;
738 11049 : if (parent->flags & KERNFS_EMPTY_DIR)
739 : goto out_unlock;
740 :
741 22098 : if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
742 : goto out_unlock;
743 :
744 11049 : kn->hash = kernfs_name_hash(kn->name, kn->ns);
745 :
746 11049 : ret = kernfs_link_sibling(kn);
747 11049 : if (ret)
748 : goto out_unlock;
749 :
750 : /* Update timestamps on the parent */
751 11049 : ps_iattr = parent->iattr;
752 11049 : if (ps_iattr) {
753 0 : ktime_get_real_ts64(&ps_iattr->ia_ctime);
754 0 : ps_iattr->ia_mtime = ps_iattr->ia_ctime;
755 : }
756 :
757 11049 : up_write(&root->kernfs_rwsem);
758 :
759 : /*
760 : * Activate the new node unless CREATE_DEACTIVATED is requested.
761 : * If not activated here, the kernfs user is responsible for
762 : * activating the node with kernfs_activate(). A node which hasn't
763 : * been activated is not visible to userland and its removal won't
764 : * trigger deactivation.
765 : */
766 11049 : if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
767 11049 : kernfs_activate(kn);
768 : return 0;
769 :
770 : out_unlock:
771 0 : up_write(&root->kernfs_rwsem);
772 0 : return ret;
773 : }
774 :
775 : /**
776 : * kernfs_find_ns - find kernfs_node with the given name
777 : * @parent: kernfs_node to search under
778 : * @name: name to look for
779 : * @ns: the namespace tag to use
780 : *
781 : * Look for kernfs_node with name @name under @parent. Returns pointer to
782 : * the found kernfs_node on success, %NULL on failure.
783 : */
784 2951 : static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
785 : const unsigned char *name,
786 : const void *ns)
787 : {
788 2951 : struct rb_node *node = parent->dir.children.rb_node;
789 5902 : bool has_ns = kernfs_ns_enabled(parent);
790 : unsigned int hash;
791 :
792 2951 : lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem);
793 :
794 2951 : if (has_ns != (bool)ns) {
795 0 : WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
796 : has_ns ? "required" : "invalid", parent->name, name);
797 0 : return NULL;
798 : }
799 :
800 2951 : hash = kernfs_name_hash(name, ns);
801 10993 : while (node) {
802 : struct kernfs_node *kn;
803 : int result;
804 :
805 7954 : kn = rb_to_kn(node);
806 7954 : result = kernfs_name_compare(hash, name, ns, kn);
807 7954 : if (result < 0)
808 2434 : node = node->rb_left;
809 5520 : else if (result > 0)
810 2657 : node = node->rb_right;
811 : else
812 : return kn;
813 : }
814 : return NULL;
815 : }
816 :
817 0 : static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
818 : const unsigned char *path,
819 : const void *ns)
820 : {
821 : size_t len;
822 : char *p, *name;
823 :
824 0 : lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem);
825 :
826 : /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
827 0 : spin_lock_irq(&kernfs_rename_lock);
828 :
829 0 : len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
830 :
831 0 : if (len >= sizeof(kernfs_pr_cont_buf)) {
832 0 : spin_unlock_irq(&kernfs_rename_lock);
833 0 : return NULL;
834 : }
835 :
836 0 : p = kernfs_pr_cont_buf;
837 :
838 0 : while ((name = strsep(&p, "/")) && parent) {
839 0 : if (*name == '\0')
840 0 : continue;
841 0 : parent = kernfs_find_ns(parent, name, ns);
842 : }
843 :
844 0 : spin_unlock_irq(&kernfs_rename_lock);
845 :
846 0 : return parent;
847 : }
848 :
849 : /**
850 : * kernfs_find_and_get_ns - find and get kernfs_node with the given name
851 : * @parent: kernfs_node to search under
852 : * @name: name to look for
853 : * @ns: the namespace tag to use
854 : *
855 : * Look for kernfs_node with name @name under @parent and get a reference
856 : * if found. This function may sleep and returns pointer to the found
857 : * kernfs_node on success, %NULL on failure.
858 : */
859 633 : struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
860 : const char *name, const void *ns)
861 : {
862 : struct kernfs_node *kn;
863 633 : struct kernfs_root *root = kernfs_root(parent);
864 :
865 633 : down_read(&root->kernfs_rwsem);
866 633 : kn = kernfs_find_ns(parent, name, ns);
867 633 : kernfs_get(kn);
868 633 : up_read(&root->kernfs_rwsem);
869 :
870 633 : return kn;
871 : }
872 : EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
873 :
874 : /**
875 : * kernfs_walk_and_get_ns - find and get kernfs_node with the given path
876 : * @parent: kernfs_node to search under
877 : * @path: path to look for
878 : * @ns: the namespace tag to use
879 : *
880 : * Look for kernfs_node with path @path under @parent and get a reference
881 : * if found. This function may sleep and returns pointer to the found
882 : * kernfs_node on success, %NULL on failure.
883 : */
884 0 : struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
885 : const char *path, const void *ns)
886 : {
887 : struct kernfs_node *kn;
888 0 : struct kernfs_root *root = kernfs_root(parent);
889 :
890 0 : down_read(&root->kernfs_rwsem);
891 0 : kn = kernfs_walk_ns(parent, path, ns);
892 0 : kernfs_get(kn);
893 0 : up_read(&root->kernfs_rwsem);
894 :
895 0 : return kn;
896 : }
897 :
898 : /**
899 : * kernfs_create_root - create a new kernfs hierarchy
900 : * @scops: optional syscall operations for the hierarchy
901 : * @flags: KERNFS_ROOT_* flags
902 : * @priv: opaque data associated with the new directory
903 : *
904 : * Returns the root of the new hierarchy on success, ERR_PTR() value on
905 : * failure.
906 : */
907 1 : struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
908 : unsigned int flags, void *priv)
909 : {
910 : struct kernfs_root *root;
911 : struct kernfs_node *kn;
912 :
913 1 : root = kzalloc(sizeof(*root), GFP_KERNEL);
914 1 : if (!root)
915 : return ERR_PTR(-ENOMEM);
916 :
917 2 : idr_init(&root->ino_idr);
918 1 : init_rwsem(&root->kernfs_rwsem);
919 2 : INIT_LIST_HEAD(&root->supers);
920 :
921 : /*
922 : * On 64bit ino setups, id is ino. On 32bit, low 32bits are ino.
923 : * High bits generation. The starting value for both ino and
924 : * genenration is 1. Initialize upper 32bit allocation
925 : * accordingly.
926 : */
927 : if (sizeof(ino_t) >= sizeof(u64))
928 1 : root->id_highbits = 0;
929 : else
930 : root->id_highbits = 1;
931 :
932 1 : kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO,
933 1 : GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
934 : KERNFS_DIR);
935 1 : if (!kn) {
936 0 : idr_destroy(&root->ino_idr);
937 0 : kfree(root);
938 0 : return ERR_PTR(-ENOMEM);
939 : }
940 :
941 1 : kn->priv = priv;
942 1 : kn->dir.root = root;
943 :
944 1 : root->syscall_ops = scops;
945 1 : root->flags = flags;
946 1 : root->kn = kn;
947 1 : init_waitqueue_head(&root->deactivate_waitq);
948 :
949 1 : if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
950 1 : kernfs_activate(kn);
951 :
952 : return root;
953 : }
954 :
955 : /**
956 : * kernfs_destroy_root - destroy a kernfs hierarchy
957 : * @root: root of the hierarchy to destroy
958 : *
959 : * Destroy the hierarchy anchored at @root by removing all existing
960 : * directories and destroying @root.
961 : */
962 0 : void kernfs_destroy_root(struct kernfs_root *root)
963 : {
964 : /*
965 : * kernfs_remove holds kernfs_rwsem from the root so the root
966 : * shouldn't be freed during the operation.
967 : */
968 0 : kernfs_get(root->kn);
969 0 : kernfs_remove(root->kn);
970 0 : kernfs_put(root->kn); /* will also free @root */
971 0 : }
972 :
973 : /**
974 : * kernfs_root_to_node - return the kernfs_node associated with a kernfs_root
975 : * @root: root to use to lookup
976 : */
977 1 : struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root)
978 : {
979 1 : return root->kn;
980 : }
981 :
982 : /**
983 : * kernfs_create_dir_ns - create a directory
984 : * @parent: parent in which to create a new directory
985 : * @name: name of the new directory
986 : * @mode: mode of the new directory
987 : * @uid: uid of the new directory
988 : * @gid: gid of the new directory
989 : * @priv: opaque data associated with the new directory
990 : * @ns: optional namespace tag of the directory
991 : *
992 : * Returns the created node on success, ERR_PTR() value on failure.
993 : */
994 1363 : struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
995 : const char *name, umode_t mode,
996 : kuid_t uid, kgid_t gid,
997 : void *priv, const void *ns)
998 : {
999 : struct kernfs_node *kn;
1000 : int rc;
1001 :
1002 : /* allocate */
1003 1363 : kn = kernfs_new_node(parent, name, mode | S_IFDIR,
1004 : uid, gid, KERNFS_DIR);
1005 1363 : if (!kn)
1006 : return ERR_PTR(-ENOMEM);
1007 :
1008 1363 : kn->dir.root = parent->dir.root;
1009 1363 : kn->ns = ns;
1010 1363 : kn->priv = priv;
1011 :
1012 : /* link in */
1013 1363 : rc = kernfs_add_one(kn);
1014 1363 : if (!rc)
1015 : return kn;
1016 :
1017 0 : kernfs_put(kn);
1018 0 : return ERR_PTR(rc);
1019 : }
1020 :
1021 : /**
1022 : * kernfs_create_empty_dir - create an always empty directory
1023 : * @parent: parent in which to create a new directory
1024 : * @name: name of the new directory
1025 : *
1026 : * Returns the created node on success, ERR_PTR() value on failure.
1027 : */
1028 0 : struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
1029 : const char *name)
1030 : {
1031 : struct kernfs_node *kn;
1032 : int rc;
1033 :
1034 : /* allocate */
1035 0 : kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
1036 0 : GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
1037 0 : if (!kn)
1038 : return ERR_PTR(-ENOMEM);
1039 :
1040 0 : kn->flags |= KERNFS_EMPTY_DIR;
1041 0 : kn->dir.root = parent->dir.root;
1042 0 : kn->ns = NULL;
1043 0 : kn->priv = NULL;
1044 :
1045 : /* link in */
1046 0 : rc = kernfs_add_one(kn);
1047 0 : if (!rc)
1048 : return kn;
1049 :
1050 0 : kernfs_put(kn);
1051 0 : return ERR_PTR(rc);
1052 : }
1053 :
1054 0 : static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
1055 : {
1056 : struct kernfs_node *kn;
1057 : struct kernfs_root *root;
1058 :
1059 0 : if (flags & LOOKUP_RCU)
1060 : return -ECHILD;
1061 :
1062 : /* Negative hashed dentry? */
1063 0 : if (d_really_is_negative(dentry)) {
1064 : struct kernfs_node *parent;
1065 :
1066 : /* If the kernfs parent node has changed discard and
1067 : * proceed to ->lookup.
1068 : */
1069 0 : spin_lock(&dentry->d_lock);
1070 0 : parent = kernfs_dentry_node(dentry->d_parent);
1071 0 : if (parent) {
1072 0 : spin_unlock(&dentry->d_lock);
1073 0 : root = kernfs_root(parent);
1074 0 : down_read(&root->kernfs_rwsem);
1075 0 : if (kernfs_dir_changed(parent, dentry)) {
1076 0 : up_read(&root->kernfs_rwsem);
1077 0 : return 0;
1078 : }
1079 0 : up_read(&root->kernfs_rwsem);
1080 : } else
1081 0 : spin_unlock(&dentry->d_lock);
1082 :
1083 : /* The kernfs parent node hasn't changed, leave the
1084 : * dentry negative and return success.
1085 : */
1086 : return 1;
1087 : }
1088 :
1089 0 : kn = kernfs_dentry_node(dentry);
1090 0 : root = kernfs_root(kn);
1091 0 : down_read(&root->kernfs_rwsem);
1092 :
1093 : /* The kernfs node has been deactivated */
1094 0 : if (!kernfs_active(kn))
1095 : goto out_bad;
1096 :
1097 : /* The kernfs node has been moved? */
1098 0 : if (kernfs_dentry_node(dentry->d_parent) != kn->parent)
1099 : goto out_bad;
1100 :
1101 : /* The kernfs node has been renamed */
1102 0 : if (strcmp(dentry->d_name.name, kn->name) != 0)
1103 : goto out_bad;
1104 :
1105 : /* The kernfs node has been moved to a different namespace */
1106 0 : if (kn->parent && kernfs_ns_enabled(kn->parent) &&
1107 0 : kernfs_info(dentry->d_sb)->ns != kn->ns)
1108 : goto out_bad;
1109 :
1110 0 : up_read(&root->kernfs_rwsem);
1111 0 : return 1;
1112 : out_bad:
1113 0 : up_read(&root->kernfs_rwsem);
1114 0 : return 0;
1115 : }
1116 :
1117 : const struct dentry_operations kernfs_dops = {
1118 : .d_revalidate = kernfs_dop_revalidate,
1119 : };
1120 :
1121 0 : static struct dentry *kernfs_iop_lookup(struct inode *dir,
1122 : struct dentry *dentry,
1123 : unsigned int flags)
1124 : {
1125 0 : struct kernfs_node *parent = dir->i_private;
1126 : struct kernfs_node *kn;
1127 : struct kernfs_root *root;
1128 0 : struct inode *inode = NULL;
1129 0 : const void *ns = NULL;
1130 :
1131 0 : root = kernfs_root(parent);
1132 0 : down_read(&root->kernfs_rwsem);
1133 0 : if (kernfs_ns_enabled(parent))
1134 0 : ns = kernfs_info(dir->i_sb)->ns;
1135 :
1136 0 : kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
1137 : /* attach dentry and inode */
1138 0 : if (kn) {
1139 : /* Inactive nodes are invisible to the VFS so don't
1140 : * create a negative.
1141 : */
1142 0 : if (!kernfs_active(kn)) {
1143 0 : up_read(&root->kernfs_rwsem);
1144 0 : return NULL;
1145 : }
1146 0 : inode = kernfs_get_inode(dir->i_sb, kn);
1147 0 : if (!inode)
1148 0 : inode = ERR_PTR(-ENOMEM);
1149 : }
1150 : /*
1151 : * Needed for negative dentry validation.
1152 : * The negative dentry can be created in kernfs_iop_lookup()
1153 : * or transforms from positive dentry in dentry_unlink_inode()
1154 : * called from vfs_rmdir().
1155 : */
1156 0 : if (!IS_ERR(inode))
1157 0 : kernfs_set_rev(parent, dentry);
1158 0 : up_read(&root->kernfs_rwsem);
1159 :
1160 : /* instantiate and hash (possibly negative) dentry */
1161 0 : return d_splice_alias(inode, dentry);
1162 : }
1163 :
1164 0 : static int kernfs_iop_mkdir(struct user_namespace *mnt_userns,
1165 : struct inode *dir, struct dentry *dentry,
1166 : umode_t mode)
1167 : {
1168 0 : struct kernfs_node *parent = dir->i_private;
1169 0 : struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
1170 : int ret;
1171 :
1172 0 : if (!scops || !scops->mkdir)
1173 : return -EPERM;
1174 :
1175 0 : if (!kernfs_get_active(parent))
1176 : return -ENODEV;
1177 :
1178 0 : ret = scops->mkdir(parent, dentry->d_name.name, mode);
1179 :
1180 0 : kernfs_put_active(parent);
1181 0 : return ret;
1182 : }
1183 :
1184 0 : static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
1185 : {
1186 0 : struct kernfs_node *kn = kernfs_dentry_node(dentry);
1187 0 : struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1188 : int ret;
1189 :
1190 0 : if (!scops || !scops->rmdir)
1191 : return -EPERM;
1192 :
1193 0 : if (!kernfs_get_active(kn))
1194 : return -ENODEV;
1195 :
1196 0 : ret = scops->rmdir(kn);
1197 :
1198 0 : kernfs_put_active(kn);
1199 0 : return ret;
1200 : }
1201 :
1202 0 : static int kernfs_iop_rename(struct user_namespace *mnt_userns,
1203 : struct inode *old_dir, struct dentry *old_dentry,
1204 : struct inode *new_dir, struct dentry *new_dentry,
1205 : unsigned int flags)
1206 : {
1207 0 : struct kernfs_node *kn = kernfs_dentry_node(old_dentry);
1208 0 : struct kernfs_node *new_parent = new_dir->i_private;
1209 0 : struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1210 : int ret;
1211 :
1212 0 : if (flags)
1213 : return -EINVAL;
1214 :
1215 0 : if (!scops || !scops->rename)
1216 : return -EPERM;
1217 :
1218 0 : if (!kernfs_get_active(kn))
1219 : return -ENODEV;
1220 :
1221 0 : if (!kernfs_get_active(new_parent)) {
1222 0 : kernfs_put_active(kn);
1223 0 : return -ENODEV;
1224 : }
1225 :
1226 0 : ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
1227 :
1228 0 : kernfs_put_active(new_parent);
1229 0 : kernfs_put_active(kn);
1230 0 : return ret;
1231 : }
1232 :
1233 : const struct inode_operations kernfs_dir_iops = {
1234 : .lookup = kernfs_iop_lookup,
1235 : .permission = kernfs_iop_permission,
1236 : .setattr = kernfs_iop_setattr,
1237 : .getattr = kernfs_iop_getattr,
1238 : .listxattr = kernfs_iop_listxattr,
1239 :
1240 : .mkdir = kernfs_iop_mkdir,
1241 : .rmdir = kernfs_iop_rmdir,
1242 : .rename = kernfs_iop_rename,
1243 : };
1244 :
1245 : static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
1246 : {
1247 : struct kernfs_node *last;
1248 :
1249 : while (true) {
1250 : struct rb_node *rbn;
1251 :
1252 15700 : last = pos;
1253 :
1254 15700 : if (kernfs_type(pos) != KERNFS_DIR)
1255 : break;
1256 :
1257 1554 : rbn = rb_first(&pos->dir.children);
1258 1554 : if (!rbn)
1259 : break;
1260 :
1261 0 : pos = rb_to_kn(rbn);
1262 : }
1263 :
1264 : return last;
1265 : }
1266 :
1267 : /**
1268 : * kernfs_next_descendant_post - find the next descendant for post-order walk
1269 : * @pos: the current position (%NULL to initiate traversal)
1270 : * @root: kernfs_node whose descendants to walk
1271 : *
1272 : * Find the next descendant to visit for post-order traversal of @root's
1273 : * descendants. @root is included in the iteration and the last node to be
1274 : * visited.
1275 : */
1276 26750 : static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
1277 : struct kernfs_node *root)
1278 : {
1279 : struct rb_node *rbn;
1280 :
1281 26750 : lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem);
1282 :
1283 : /* if first iteration, visit leftmost descendant which may be root */
1284 26750 : if (!pos)
1285 : return kernfs_leftmost_descendant(root);
1286 :
1287 : /* if we visited @root, we're done */
1288 13375 : if (pos == root)
1289 : return NULL;
1290 :
1291 : /* if there's an unvisited sibling, visit its leftmost descendant */
1292 0 : rbn = rb_next(&pos->rb);
1293 0 : if (rbn)
1294 0 : return kernfs_leftmost_descendant(rb_to_kn(rbn));
1295 :
1296 : /* no sibling left, visit parent */
1297 0 : return pos->parent;
1298 : }
1299 :
1300 : /**
1301 : * kernfs_activate - activate a node which started deactivated
1302 : * @kn: kernfs_node whose subtree is to be activated
1303 : *
1304 : * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
1305 : * needs to be explicitly activated. A node which hasn't been activated
1306 : * isn't visible to userland and deactivation is skipped during its
1307 : * removal. This is useful to construct atomic init sequences where
1308 : * creation of multiple nodes should either succeed or fail atomically.
1309 : *
1310 : * The caller is responsible for ensuring that this function is not called
1311 : * after kernfs_remove*() is invoked on @kn.
1312 : */
1313 11050 : void kernfs_activate(struct kernfs_node *kn)
1314 : {
1315 : struct kernfs_node *pos;
1316 11050 : struct kernfs_root *root = kernfs_root(kn);
1317 :
1318 11050 : down_write(&root->kernfs_rwsem);
1319 :
1320 11050 : pos = NULL;
1321 33150 : while ((pos = kernfs_next_descendant_post(pos, kn))) {
1322 11050 : if (pos->flags & KERNFS_ACTIVATED)
1323 0 : continue;
1324 :
1325 11050 : WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
1326 22100 : WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
1327 :
1328 22100 : atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
1329 11050 : pos->flags |= KERNFS_ACTIVATED;
1330 : }
1331 :
1332 11050 : up_write(&root->kernfs_rwsem);
1333 11050 : }
1334 :
1335 2325 : static void __kernfs_remove(struct kernfs_node *kn)
1336 : {
1337 : struct kernfs_node *pos;
1338 :
1339 2325 : lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);
1340 :
1341 : /*
1342 : * Short-circuit if non-root @kn has already finished removal.
1343 : * This is for kernfs_remove_self() which plays with active ref
1344 : * after removal.
1345 : */
1346 2325 : if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
1347 : return;
1348 :
1349 : pr_debug("kernfs %s: removing\n", kn->name);
1350 :
1351 : /* prevent any new usage under @kn by deactivating all nodes */
1352 : pos = NULL;
1353 4650 : while ((pos = kernfs_next_descendant_post(pos, kn)))
1354 2325 : if (kernfs_active(pos))
1355 2325 : atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1356 :
1357 : /* deactivate and unlink the subtree node-by-node */
1358 : do {
1359 2325 : pos = kernfs_leftmost_descendant(kn);
1360 :
1361 : /*
1362 : * kernfs_drain() drops kernfs_rwsem temporarily and @pos's
1363 : * base ref could have been put by someone else by the time
1364 : * the function returns. Make sure it doesn't go away
1365 : * underneath us.
1366 : */
1367 2325 : kernfs_get(pos);
1368 :
1369 : /*
1370 : * Drain iff @kn was activated. This avoids draining and
1371 : * its lockdep annotations for nodes which have never been
1372 : * activated and allows embedding kernfs_remove() in create
1373 : * error paths without worrying about draining.
1374 : */
1375 2325 : if (kn->flags & KERNFS_ACTIVATED)
1376 2325 : kernfs_drain(pos);
1377 : else
1378 0 : WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1379 :
1380 : /*
1381 : * kernfs_unlink_sibling() succeeds once per node. Use it
1382 : * to decide who's responsible for cleanups.
1383 : */
1384 2325 : if (!pos->parent || kernfs_unlink_sibling(pos)) {
1385 2325 : struct kernfs_iattrs *ps_iattr =
1386 2325 : pos->parent ? pos->parent->iattr : NULL;
1387 :
1388 : /* update timestamps on the parent */
1389 2325 : if (ps_iattr) {
1390 0 : ktime_get_real_ts64(&ps_iattr->ia_ctime);
1391 0 : ps_iattr->ia_mtime = ps_iattr->ia_ctime;
1392 : }
1393 :
1394 2325 : kernfs_put(pos);
1395 : }
1396 :
1397 2325 : kernfs_put(pos);
1398 2325 : } while (pos != kn);
1399 : }
1400 :
1401 : /**
1402 : * kernfs_remove - remove a kernfs_node recursively
1403 : * @kn: the kernfs_node to remove
1404 : *
1405 : * Remove @kn along with all its subdirectories and files.
1406 : */
1407 95 : void kernfs_remove(struct kernfs_node *kn)
1408 : {
1409 : struct kernfs_root *root;
1410 :
1411 95 : if (!kn)
1412 : return;
1413 :
1414 95 : root = kernfs_root(kn);
1415 :
1416 95 : down_write(&root->kernfs_rwsem);
1417 95 : __kernfs_remove(kn);
1418 95 : up_write(&root->kernfs_rwsem);
1419 : }
1420 :
1421 : /**
1422 : * kernfs_break_active_protection - break out of active protection
1423 : * @kn: the self kernfs_node
1424 : *
1425 : * The caller must be running off of a kernfs operation which is invoked
1426 : * with an active reference - e.g. one of kernfs_ops. Each invocation of
1427 : * this function must also be matched with an invocation of
1428 : * kernfs_unbreak_active_protection().
1429 : *
1430 : * This function releases the active reference of @kn the caller is
1431 : * holding. Once this function is called, @kn may be removed at any point
1432 : * and the caller is solely responsible for ensuring that the objects it
1433 : * dereferences are accessible.
1434 : */
1435 0 : void kernfs_break_active_protection(struct kernfs_node *kn)
1436 : {
1437 : /*
1438 : * Take out ourself out of the active ref dependency chain. If
1439 : * we're called without an active ref, lockdep will complain.
1440 : */
1441 0 : kernfs_put_active(kn);
1442 0 : }
1443 :
1444 : /**
1445 : * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
1446 : * @kn: the self kernfs_node
1447 : *
1448 : * If kernfs_break_active_protection() was called, this function must be
1449 : * invoked before finishing the kernfs operation. Note that while this
1450 : * function restores the active reference, it doesn't and can't actually
1451 : * restore the active protection - @kn may already or be in the process of
1452 : * being removed. Once kernfs_break_active_protection() is invoked, that
1453 : * protection is irreversibly gone for the kernfs operation instance.
1454 : *
1455 : * While this function may be called at any point after
1456 : * kernfs_break_active_protection() is invoked, its most useful location
1457 : * would be right before the enclosing kernfs operation returns.
1458 : */
1459 0 : void kernfs_unbreak_active_protection(struct kernfs_node *kn)
1460 : {
1461 : /*
1462 : * @kn->active could be in any state; however, the increment we do
1463 : * here will be undone as soon as the enclosing kernfs operation
1464 : * finishes and this temporary bump can't break anything. If @kn
1465 : * is alive, nothing changes. If @kn is being deactivated, the
1466 : * soon-to-follow put will either finish deactivation or restore
1467 : * deactivated state. If @kn is already removed, the temporary
1468 : * bump is guaranteed to be gone before @kn is released.
1469 : */
1470 0 : atomic_inc(&kn->active);
1471 0 : if (kernfs_lockdep(kn))
1472 : rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
1473 0 : }
1474 :
1475 : /**
1476 : * kernfs_remove_self - remove a kernfs_node from its own method
1477 : * @kn: the self kernfs_node to remove
1478 : *
1479 : * The caller must be running off of a kernfs operation which is invoked
1480 : * with an active reference - e.g. one of kernfs_ops. This can be used to
1481 : * implement a file operation which deletes itself.
1482 : *
1483 : * For example, the "delete" file for a sysfs device directory can be
1484 : * implemented by invoking kernfs_remove_self() on the "delete" file
1485 : * itself. This function breaks the circular dependency of trying to
1486 : * deactivate self while holding an active ref itself. It isn't necessary
1487 : * to modify the usual removal path to use kernfs_remove_self(). The
1488 : * "delete" implementation can simply invoke kernfs_remove_self() on self
1489 : * before proceeding with the usual removal path. kernfs will ignore later
1490 : * kernfs_remove() on self.
1491 : *
1492 : * kernfs_remove_self() can be called multiple times concurrently on the
1493 : * same kernfs_node. Only the first one actually performs removal and
1494 : * returns %true. All others will wait until the kernfs operation which
1495 : * won self-removal finishes and return %false. Note that the losers wait
1496 : * for the completion of not only the winning kernfs_remove_self() but also
1497 : * the whole kernfs_ops which won the arbitration. This can be used to
1498 : * guarantee, for example, all concurrent writes to a "delete" file to
1499 : * finish only after the whole operation is complete.
1500 : */
1501 0 : bool kernfs_remove_self(struct kernfs_node *kn)
1502 : {
1503 : bool ret;
1504 0 : struct kernfs_root *root = kernfs_root(kn);
1505 :
1506 0 : down_write(&root->kernfs_rwsem);
1507 0 : kernfs_break_active_protection(kn);
1508 :
1509 : /*
1510 : * SUICIDAL is used to arbitrate among competing invocations. Only
1511 : * the first one will actually perform removal. When the removal
1512 : * is complete, SUICIDED is set and the active ref is restored
1513 : * while kernfs_rwsem for held exclusive. The ones which lost
1514 : * arbitration waits for SUICIDED && drained which can happen only
1515 : * after the enclosing kernfs operation which executed the winning
1516 : * instance of kernfs_remove_self() finished.
1517 : */
1518 0 : if (!(kn->flags & KERNFS_SUICIDAL)) {
1519 0 : kn->flags |= KERNFS_SUICIDAL;
1520 0 : __kernfs_remove(kn);
1521 0 : kn->flags |= KERNFS_SUICIDED;
1522 0 : ret = true;
1523 : } else {
1524 0 : wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1525 0 : DEFINE_WAIT(wait);
1526 :
1527 : while (true) {
1528 0 : prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1529 :
1530 0 : if ((kn->flags & KERNFS_SUICIDED) &&
1531 0 : atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1532 : break;
1533 :
1534 0 : up_write(&root->kernfs_rwsem);
1535 0 : schedule();
1536 0 : down_write(&root->kernfs_rwsem);
1537 : }
1538 0 : finish_wait(waitq, &wait);
1539 0 : WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1540 0 : ret = false;
1541 : }
1542 :
1543 : /*
1544 : * This must be done while kernfs_rwsem held exclusive; otherwise,
1545 : * waiting for SUICIDED && deactivated could finish prematurely.
1546 : */
1547 0 : kernfs_unbreak_active_protection(kn);
1548 :
1549 0 : up_write(&root->kernfs_rwsem);
1550 0 : return ret;
1551 : }
1552 :
1553 : /**
1554 : * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
1555 : * @parent: parent of the target
1556 : * @name: name of the kernfs_node to remove
1557 : * @ns: namespace tag of the kernfs_node to remove
1558 : *
1559 : * Look for the kernfs_node with @name and @ns under @parent and remove it.
1560 : * Returns 0 on success, -ENOENT if such entry doesn't exist.
1561 : */
1562 2318 : int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1563 : const void *ns)
1564 : {
1565 : struct kernfs_node *kn;
1566 : struct kernfs_root *root;
1567 :
1568 2318 : if (!parent) {
1569 0 : WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1570 : name);
1571 0 : return -ENOENT;
1572 : }
1573 :
1574 2318 : root = kernfs_root(parent);
1575 2318 : down_write(&root->kernfs_rwsem);
1576 :
1577 2318 : kn = kernfs_find_ns(parent, name, ns);
1578 2318 : if (kn)
1579 2230 : __kernfs_remove(kn);
1580 :
1581 2318 : up_write(&root->kernfs_rwsem);
1582 :
1583 2318 : if (kn)
1584 : return 0;
1585 : else
1586 88 : return -ENOENT;
1587 : }
1588 :
1589 : /**
1590 : * kernfs_rename_ns - move and rename a kernfs_node
1591 : * @kn: target node
1592 : * @new_parent: new parent to put @sd under
1593 : * @new_name: new name
1594 : * @new_ns: new namespace tag
1595 : */
1596 0 : int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1597 : const char *new_name, const void *new_ns)
1598 : {
1599 : struct kernfs_node *old_parent;
1600 : struct kernfs_root *root;
1601 0 : const char *old_name = NULL;
1602 : int error;
1603 :
1604 : /* can't move or rename root */
1605 0 : if (!kn->parent)
1606 : return -EINVAL;
1607 :
1608 0 : root = kernfs_root(kn);
1609 0 : down_write(&root->kernfs_rwsem);
1610 :
1611 0 : error = -ENOENT;
1612 0 : if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
1613 0 : (new_parent->flags & KERNFS_EMPTY_DIR))
1614 : goto out;
1615 :
1616 0 : error = 0;
1617 0 : if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1618 0 : (strcmp(kn->name, new_name) == 0))
1619 : goto out; /* nothing to rename */
1620 :
1621 0 : error = -EEXIST;
1622 0 : if (kernfs_find_ns(new_parent, new_name, new_ns))
1623 : goto out;
1624 :
1625 : /* rename kernfs_node */
1626 0 : if (strcmp(kn->name, new_name) != 0) {
1627 0 : error = -ENOMEM;
1628 0 : new_name = kstrdup_const(new_name, GFP_KERNEL);
1629 0 : if (!new_name)
1630 : goto out;
1631 : } else {
1632 : new_name = NULL;
1633 : }
1634 :
1635 : /*
1636 : * Move to the appropriate place in the appropriate directories rbtree.
1637 : */
1638 0 : kernfs_unlink_sibling(kn);
1639 0 : kernfs_get(new_parent);
1640 :
1641 : /* rename_lock protects ->parent and ->name accessors */
1642 0 : spin_lock_irq(&kernfs_rename_lock);
1643 :
1644 0 : old_parent = kn->parent;
1645 0 : kn->parent = new_parent;
1646 :
1647 0 : kn->ns = new_ns;
1648 0 : if (new_name) {
1649 0 : old_name = kn->name;
1650 0 : kn->name = new_name;
1651 : }
1652 :
1653 0 : spin_unlock_irq(&kernfs_rename_lock);
1654 :
1655 0 : kn->hash = kernfs_name_hash(kn->name, kn->ns);
1656 0 : kernfs_link_sibling(kn);
1657 :
1658 0 : kernfs_put(old_parent);
1659 0 : kfree_const(old_name);
1660 :
1661 0 : error = 0;
1662 : out:
1663 0 : up_write(&root->kernfs_rwsem);
1664 0 : return error;
1665 : }
1666 :
1667 : /* Relationship between mode and the DT_xxx types */
1668 : static inline unsigned char dt_type(struct kernfs_node *kn)
1669 : {
1670 0 : return (kn->mode >> 12) & 15;
1671 : }
1672 :
1673 0 : static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1674 : {
1675 0 : kernfs_put(filp->private_data);
1676 0 : return 0;
1677 : }
1678 :
1679 0 : static struct kernfs_node *kernfs_dir_pos(const void *ns,
1680 : struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1681 : {
1682 0 : if (pos) {
1683 0 : int valid = kernfs_active(pos) &&
1684 0 : pos->parent == parent && hash == pos->hash;
1685 0 : kernfs_put(pos);
1686 0 : if (!valid)
1687 0 : pos = NULL;
1688 : }
1689 0 : if (!pos && (hash > 1) && (hash < INT_MAX)) {
1690 0 : struct rb_node *node = parent->dir.children.rb_node;
1691 0 : while (node) {
1692 0 : pos = rb_to_kn(node);
1693 :
1694 0 : if (hash < pos->hash)
1695 0 : node = node->rb_left;
1696 0 : else if (hash > pos->hash)
1697 0 : node = node->rb_right;
1698 : else
1699 : break;
1700 : }
1701 : }
1702 : /* Skip over entries which are dying/dead or in the wrong namespace */
1703 0 : while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
1704 0 : struct rb_node *node = rb_next(&pos->rb);
1705 0 : if (!node)
1706 : pos = NULL;
1707 : else
1708 0 : pos = rb_to_kn(node);
1709 : }
1710 0 : return pos;
1711 : }
1712 :
1713 0 : static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1714 : struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1715 : {
1716 0 : pos = kernfs_dir_pos(ns, parent, ino, pos);
1717 0 : if (pos) {
1718 : do {
1719 0 : struct rb_node *node = rb_next(&pos->rb);
1720 0 : if (!node)
1721 : pos = NULL;
1722 : else
1723 0 : pos = rb_to_kn(node);
1724 0 : } while (pos && (!kernfs_active(pos) || pos->ns != ns));
1725 : }
1726 0 : return pos;
1727 : }
1728 :
1729 0 : static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1730 : {
1731 0 : struct dentry *dentry = file->f_path.dentry;
1732 0 : struct kernfs_node *parent = kernfs_dentry_node(dentry);
1733 0 : struct kernfs_node *pos = file->private_data;
1734 : struct kernfs_root *root;
1735 0 : const void *ns = NULL;
1736 :
1737 0 : if (!dir_emit_dots(file, ctx))
1738 : return 0;
1739 :
1740 0 : root = kernfs_root(parent);
1741 0 : down_read(&root->kernfs_rwsem);
1742 :
1743 0 : if (kernfs_ns_enabled(parent))
1744 0 : ns = kernfs_info(dentry->d_sb)->ns;
1745 :
1746 0 : for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1747 : pos;
1748 0 : pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1749 0 : const char *name = pos->name;
1750 0 : unsigned int type = dt_type(pos);
1751 0 : int len = strlen(name);
1752 0 : ino_t ino = kernfs_ino(pos);
1753 :
1754 0 : ctx->pos = pos->hash;
1755 0 : file->private_data = pos;
1756 0 : kernfs_get(pos);
1757 :
1758 0 : up_read(&root->kernfs_rwsem);
1759 0 : if (!dir_emit(ctx, name, len, ino, type))
1760 : return 0;
1761 0 : down_read(&root->kernfs_rwsem);
1762 : }
1763 0 : up_read(&root->kernfs_rwsem);
1764 0 : file->private_data = NULL;
1765 0 : ctx->pos = INT_MAX;
1766 0 : return 0;
1767 : }
1768 :
1769 : const struct file_operations kernfs_dir_fops = {
1770 : .read = generic_read_dir,
1771 : .iterate_shared = kernfs_fop_readdir,
1772 : .release = kernfs_dir_fop_release,
1773 : .llseek = generic_file_llseek,
1774 : };
|