xref: /linux/fs/tracefs/event_inode.c (revision d642ef71)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  event_inode.c - part of tracefs, a pseudo file system for activating tracing
4  *
5  *  Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org>
6  *  Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
7  *  Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org>
8  *
9  *  eventfs is used to dynamically create inodes and dentries based on the
10  *  meta data provided by the tracing system.
11  *
12  *  eventfs stores the meta-data of files/dirs and holds off on creating
13  *  inodes/dentries of the files. When accessed, the eventfs will create the
14  *  inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up
15  *  and delete the inodes/dentries when they are no longer referenced.
16  */
17 #include <linux/fsnotify.h>
18 #include <linux/fs.h>
19 #include <linux/namei.h>
20 #include <linux/workqueue.h>
21 #include <linux/security.h>
22 #include <linux/tracefs.h>
23 #include <linux/kref.h>
24 #include <linux/delay.h>
25 #include "internal.h"
26 
27 /*
28  * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
29  * to the ei->dentry must be done under this mutex and after checking
30  * if ei->is_freed is not set. When ei->is_freed is set, the dentry
31  * is on its way to being freed after the last dput() is made on it.
32  */
33 static DEFINE_MUTEX(eventfs_mutex);
34 
35 /*
36  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
37  * its parent's list and will have is_freed set (under eventfs_mutex).
38  * After the SRCU grace period is over and the last dput() is called
39  * the ei is freed.
40  */
41 DEFINE_STATIC_SRCU(eventfs_srcu);
42 
43 /* Mode is unsigned short, use the upper bits for flags */
44 enum {
45 	EVENTFS_SAVE_MODE	= BIT(16),
46 	EVENTFS_SAVE_UID	= BIT(17),
47 	EVENTFS_SAVE_GID	= BIT(18),
48 };
49 
50 #define EVENTFS_MODE_MASK	(EVENTFS_SAVE_MODE - 1)
51 
52 static struct dentry *eventfs_root_lookup(struct inode *dir,
53 					  struct dentry *dentry,
54 					  unsigned int flags);
55 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
56 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
57 static int eventfs_release(struct inode *inode, struct file *file);
58 
59 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
60 {
61 	unsigned int ia_valid = iattr->ia_valid;
62 
63 	if (ia_valid & ATTR_MODE) {
64 		attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
65 			(iattr->ia_mode & EVENTFS_MODE_MASK) |
66 			EVENTFS_SAVE_MODE;
67 	}
68 	if (ia_valid & ATTR_UID) {
69 		attr->mode |= EVENTFS_SAVE_UID;
70 		attr->uid = iattr->ia_uid;
71 	}
72 	if (ia_valid & ATTR_GID) {
73 		attr->mode |= EVENTFS_SAVE_GID;
74 		attr->gid = iattr->ia_gid;
75 	}
76 }
77 
78 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
79 			    struct iattr *iattr)
80 {
81 	const struct eventfs_entry *entry;
82 	struct eventfs_inode *ei;
83 	const char *name;
84 	int ret;
85 
86 	mutex_lock(&eventfs_mutex);
87 	ei = dentry->d_fsdata;
88 	if (ei->is_freed) {
89 		/* Do not allow changes if the event is about to be removed. */
90 		mutex_unlock(&eventfs_mutex);
91 		return -ENODEV;
92 	}
93 
94 	/* Preallocate the children mode array if necessary */
95 	if (!(dentry->d_inode->i_mode & S_IFDIR)) {
96 		if (!ei->entry_attrs) {
97 			ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
98 						  GFP_NOFS);
99 			if (!ei->entry_attrs) {
100 				ret = -ENOMEM;
101 				goto out;
102 			}
103 		}
104 	}
105 
106 	ret = simple_setattr(idmap, dentry, iattr);
107 	if (ret < 0)
108 		goto out;
109 
110 	/*
111 	 * If this is a dir, then update the ei cache, only the file
112 	 * mode is saved in the ei->m_children, and the ownership is
113 	 * determined by the parent directory.
114 	 */
115 	if (dentry->d_inode->i_mode & S_IFDIR) {
116 		update_attr(&ei->attr, iattr);
117 
118 	} else {
119 		name = dentry->d_name.name;
120 
121 		for (int i = 0; i < ei->nr_entries; i++) {
122 			entry = &ei->entries[i];
123 			if (strcmp(name, entry->name) == 0) {
124 				update_attr(&ei->entry_attrs[i], iattr);
125 				break;
126 			}
127 		}
128 	}
129  out:
130 	mutex_unlock(&eventfs_mutex);
131 	return ret;
132 }
133 
134 static const struct inode_operations eventfs_root_dir_inode_operations = {
135 	.lookup		= eventfs_root_lookup,
136 	.setattr	= eventfs_set_attr,
137 };
138 
139 static const struct inode_operations eventfs_file_inode_operations = {
140 	.setattr	= eventfs_set_attr,
141 };
142 
143 static const struct file_operations eventfs_file_operations = {
144 	.open		= dcache_dir_open_wrapper,
145 	.read		= generic_read_dir,
146 	.iterate_shared	= dcache_readdir_wrapper,
147 	.llseek		= generic_file_llseek,
148 	.release	= eventfs_release,
149 };
150 
151 static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode)
152 {
153 	if (!attr) {
154 		inode->i_mode = mode;
155 		return;
156 	}
157 
158 	if (attr->mode & EVENTFS_SAVE_MODE)
159 		inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
160 	else
161 		inode->i_mode = mode;
162 
163 	if (attr->mode & EVENTFS_SAVE_UID)
164 		inode->i_uid = attr->uid;
165 
166 	if (attr->mode & EVENTFS_SAVE_GID)
167 		inode->i_gid = attr->gid;
168 }
169 
170 /**
171  * create_file - create a file in the tracefs filesystem
172  * @name: the name of the file to create.
173  * @mode: the permission that the file should have.
174  * @attr: saved attributes changed by user
175  * @parent: parent dentry for this file.
176  * @data: something that the caller will want to get to later on.
177  * @fop: struct file_operations that should be used for this file.
178  *
179  * This function creates a dentry that represents a file in the eventsfs_inode
180  * directory. The inode.i_private pointer will point to @data in the open()
181  * call.
182  */
183 static struct dentry *create_file(const char *name, umode_t mode,
184 				  struct eventfs_attr *attr,
185 				  struct dentry *parent, void *data,
186 				  const struct file_operations *fop)
187 {
188 	struct tracefs_inode *ti;
189 	struct dentry *dentry;
190 	struct inode *inode;
191 
192 	if (!(mode & S_IFMT))
193 		mode |= S_IFREG;
194 
195 	if (WARN_ON_ONCE(!S_ISREG(mode)))
196 		return NULL;
197 
198 	WARN_ON_ONCE(!parent);
199 	dentry = eventfs_start_creating(name, parent);
200 
201 	if (IS_ERR(dentry))
202 		return dentry;
203 
204 	inode = tracefs_get_inode(dentry->d_sb);
205 	if (unlikely(!inode))
206 		return eventfs_failed_creating(dentry);
207 
208 	/* If the user updated the directory's attributes, use them */
209 	update_inode_attr(inode, attr, mode);
210 
211 	inode->i_op = &eventfs_file_inode_operations;
212 	inode->i_fop = fop;
213 	inode->i_private = data;
214 
215 	ti = get_tracefs(inode);
216 	ti->flags |= TRACEFS_EVENT_INODE;
217 	d_instantiate(dentry, inode);
218 	fsnotify_create(dentry->d_parent->d_inode, dentry);
219 	return eventfs_end_creating(dentry);
220 };
221 
222 /**
223  * create_dir - create a dir in the tracefs filesystem
224  * @ei: the eventfs_inode that represents the directory to create
225  * @parent: parent dentry for this file.
226  *
227  * This function will create a dentry for a directory represented by
228  * a eventfs_inode.
229  */
230 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
231 {
232 	struct tracefs_inode *ti;
233 	struct dentry *dentry;
234 	struct inode *inode;
235 
236 	dentry = eventfs_start_creating(ei->name, parent);
237 	if (IS_ERR(dentry))
238 		return dentry;
239 
240 	inode = tracefs_get_inode(dentry->d_sb);
241 	if (unlikely(!inode))
242 		return eventfs_failed_creating(dentry);
243 
244 	/* If the user updated the directory's attributes, use them */
245 	update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
246 
247 	inode->i_op = &eventfs_root_dir_inode_operations;
248 	inode->i_fop = &eventfs_file_operations;
249 
250 	ti = get_tracefs(inode);
251 	ti->flags |= TRACEFS_EVENT_INODE;
252 
253 	inc_nlink(inode);
254 	d_instantiate(dentry, inode);
255 	inc_nlink(dentry->d_parent->d_inode);
256 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
257 	return eventfs_end_creating(dentry);
258 }
259 
260 static void free_ei(struct eventfs_inode *ei)
261 {
262 	kfree_const(ei->name);
263 	kfree(ei->d_children);
264 	kfree(ei->entry_attrs);
265 	kfree(ei);
266 }
267 
268 /**
269  * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
270  * @ti: the tracefs_inode of the dentry
271  * @dentry: dentry which has the reference to remove.
272  *
273  * Remove the association between a dentry from an eventfs_inode.
274  */
275 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
276 {
277 	struct eventfs_inode *ei;
278 	int i;
279 
280 	mutex_lock(&eventfs_mutex);
281 
282 	ei = dentry->d_fsdata;
283 	if (!ei)
284 		goto out;
285 
286 	/* This could belong to one of the files of the ei */
287 	if (ei->dentry != dentry) {
288 		for (i = 0; i < ei->nr_entries; i++) {
289 			if (ei->d_children[i] == dentry)
290 				break;
291 		}
292 		if (WARN_ON_ONCE(i == ei->nr_entries))
293 			goto out;
294 		ei->d_children[i] = NULL;
295 	} else if (ei->is_freed) {
296 		free_ei(ei);
297 	} else {
298 		ei->dentry = NULL;
299 	}
300 
301 	dentry->d_fsdata = NULL;
302  out:
303 	mutex_unlock(&eventfs_mutex);
304 }
305 
306 /**
307  * create_file_dentry - create a dentry for a file of an eventfs_inode
308  * @ei: the eventfs_inode that the file will be created under
309  * @idx: the index into the d_children[] of the @ei
310  * @parent: The parent dentry of the created file.
311  * @name: The name of the file to create
312  * @mode: The mode of the file.
313  * @data: The data to use to set the inode of the file with on open()
314  * @fops: The fops of the file to be created.
315  * @lookup: If called by the lookup routine, in which case, dput() the created dentry.
316  *
317  * Create a dentry for a file of an eventfs_inode @ei and place it into the
318  * address located at @e_dentry. If the @e_dentry already has a dentry, then
319  * just do a dget() on it and return. Otherwise create the dentry and attach it.
320  */
321 static struct dentry *
322 create_file_dentry(struct eventfs_inode *ei, int idx,
323 		   struct dentry *parent, const char *name, umode_t mode, void *data,
324 		   const struct file_operations *fops, bool lookup)
325 {
326 	struct eventfs_attr *attr = NULL;
327 	struct dentry **e_dentry = &ei->d_children[idx];
328 	struct dentry *dentry;
329 
330 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
331 
332 	mutex_lock(&eventfs_mutex);
333 	if (ei->is_freed) {
334 		mutex_unlock(&eventfs_mutex);
335 		return NULL;
336 	}
337 	/* If the e_dentry already has a dentry, use it */
338 	if (*e_dentry) {
339 		/* lookup does not need to up the ref count */
340 		if (!lookup)
341 			dget(*e_dentry);
342 		mutex_unlock(&eventfs_mutex);
343 		return *e_dentry;
344 	}
345 
346 	/* ei->entry_attrs are protected by SRCU */
347 	if (ei->entry_attrs)
348 		attr = &ei->entry_attrs[idx];
349 
350 	mutex_unlock(&eventfs_mutex);
351 
352 	dentry = create_file(name, mode, attr, parent, data, fops);
353 
354 	mutex_lock(&eventfs_mutex);
355 
356 	if (IS_ERR_OR_NULL(dentry)) {
357 		/*
358 		 * When the mutex was released, something else could have
359 		 * created the dentry for this e_dentry. In which case
360 		 * use that one.
361 		 *
362 		 * If ei->is_freed is set, the e_dentry is currently on its
363 		 * way to being freed, don't return it. If e_dentry is NULL
364 		 * it means it was already freed.
365 		 */
366 		if (ei->is_freed)
367 			dentry = NULL;
368 		else
369 			dentry = *e_dentry;
370 		/* The lookup does not need to up the dentry refcount */
371 		if (dentry && !lookup)
372 			dget(dentry);
373 		mutex_unlock(&eventfs_mutex);
374 		return dentry;
375 	}
376 
377 	if (!*e_dentry && !ei->is_freed) {
378 		*e_dentry = dentry;
379 		dentry->d_fsdata = ei;
380 	} else {
381 		/*
382 		 * Should never happen unless we get here due to being freed.
383 		 * Otherwise it means two dentries exist with the same name.
384 		 */
385 		WARN_ON_ONCE(!ei->is_freed);
386 		dentry = NULL;
387 	}
388 	mutex_unlock(&eventfs_mutex);
389 
390 	if (lookup)
391 		dput(dentry);
392 
393 	return dentry;
394 }
395 
396 /**
397  * eventfs_post_create_dir - post create dir routine
398  * @ei: eventfs_inode of recently created dir
399  *
400  * Map the meta-data of files within an eventfs dir to their parent dentry
401  */
402 static void eventfs_post_create_dir(struct eventfs_inode *ei)
403 {
404 	struct eventfs_inode *ei_child;
405 	struct tracefs_inode *ti;
406 
407 	lockdep_assert_held(&eventfs_mutex);
408 
409 	/* srcu lock already held */
410 	/* fill parent-child relation */
411 	list_for_each_entry_srcu(ei_child, &ei->children, list,
412 				 srcu_read_lock_held(&eventfs_srcu)) {
413 		ei_child->d_parent = ei->dentry;
414 	}
415 
416 	ti = get_tracefs(ei->dentry->d_inode);
417 	ti->private = ei;
418 }
419 
420 /**
421  * create_dir_dentry - Create a directory dentry for the eventfs_inode
422  * @pei: The eventfs_inode parent of ei.
423  * @ei: The eventfs_inode to create the directory for
424  * @parent: The dentry of the parent of this directory
425  * @lookup: True if this is called by the lookup code
426  *
427  * This creates and attaches a directory dentry to the eventfs_inode @ei.
428  */
429 static struct dentry *
430 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
431 		  struct dentry *parent, bool lookup)
432 {
433 	struct dentry *dentry = NULL;
434 
435 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
436 
437 	mutex_lock(&eventfs_mutex);
438 	if (pei->is_freed || ei->is_freed) {
439 		mutex_unlock(&eventfs_mutex);
440 		return NULL;
441 	}
442 	if (ei->dentry) {
443 		/* If the dentry already has a dentry, use it */
444 		dentry = ei->dentry;
445 		/* lookup does not need to up the ref count */
446 		if (!lookup)
447 			dget(dentry);
448 		mutex_unlock(&eventfs_mutex);
449 		return dentry;
450 	}
451 	mutex_unlock(&eventfs_mutex);
452 
453 	dentry = create_dir(ei, parent);
454 
455 	mutex_lock(&eventfs_mutex);
456 
457 	if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
458 		/*
459 		 * When the mutex was released, something else could have
460 		 * created the dentry for this e_dentry. In which case
461 		 * use that one.
462 		 *
463 		 * If ei->is_freed is set, the e_dentry is currently on its
464 		 * way to being freed.
465 		 */
466 		dentry = ei->dentry;
467 		if (dentry && !lookup)
468 			dget(dentry);
469 		mutex_unlock(&eventfs_mutex);
470 		return dentry;
471 	}
472 
473 	if (!ei->dentry && !ei->is_freed) {
474 		ei->dentry = dentry;
475 		eventfs_post_create_dir(ei);
476 		dentry->d_fsdata = ei;
477 	} else {
478 		/*
479 		 * Should never happen unless we get here due to being freed.
480 		 * Otherwise it means two dentries exist with the same name.
481 		 */
482 		WARN_ON_ONCE(!ei->is_freed);
483 		dentry = NULL;
484 	}
485 	mutex_unlock(&eventfs_mutex);
486 
487 	if (lookup)
488 		dput(dentry);
489 
490 	return dentry;
491 }
492 
493 /**
494  * eventfs_root_lookup - lookup routine to create file/dir
495  * @dir: in which a lookup is being done
496  * @dentry: file/dir dentry
497  * @flags: Just passed to simple_lookup()
498  *
499  * Used to create dynamic file/dir with-in @dir, search with-in @ei
500  * list, if @dentry found go ahead and create the file/dir
501  */
502 
503 static struct dentry *eventfs_root_lookup(struct inode *dir,
504 					  struct dentry *dentry,
505 					  unsigned int flags)
506 {
507 	const struct file_operations *fops;
508 	const struct eventfs_entry *entry;
509 	struct eventfs_inode *ei_child;
510 	struct tracefs_inode *ti;
511 	struct eventfs_inode *ei;
512 	struct dentry *ei_dentry = NULL;
513 	struct dentry *ret = NULL;
514 	const char *name = dentry->d_name.name;
515 	bool created = false;
516 	umode_t mode;
517 	void *data;
518 	int idx;
519 	int i;
520 	int r;
521 
522 	ti = get_tracefs(dir);
523 	if (!(ti->flags & TRACEFS_EVENT_INODE))
524 		return NULL;
525 
526 	/* Grab srcu to prevent the ei from going away */
527 	idx = srcu_read_lock(&eventfs_srcu);
528 
529 	/*
530 	 * Grab the eventfs_mutex to consistent value from ti->private.
531 	 * This s
532 	 */
533 	mutex_lock(&eventfs_mutex);
534 	ei = READ_ONCE(ti->private);
535 	if (ei && !ei->is_freed)
536 		ei_dentry = READ_ONCE(ei->dentry);
537 	mutex_unlock(&eventfs_mutex);
538 
539 	if (!ei || !ei_dentry)
540 		goto out;
541 
542 	data = ei->data;
543 
544 	list_for_each_entry_srcu(ei_child, &ei->children, list,
545 				 srcu_read_lock_held(&eventfs_srcu)) {
546 		if (strcmp(ei_child->name, name) != 0)
547 			continue;
548 		ret = simple_lookup(dir, dentry, flags);
549 		create_dir_dentry(ei, ei_child, ei_dentry, true);
550 		created = true;
551 		break;
552 	}
553 
554 	if (created)
555 		goto out;
556 
557 	for (i = 0; i < ei->nr_entries; i++) {
558 		entry = &ei->entries[i];
559 		if (strcmp(name, entry->name) == 0) {
560 			void *cdata = data;
561 			mutex_lock(&eventfs_mutex);
562 			/* If ei->is_freed, then the event itself may be too */
563 			if (!ei->is_freed)
564 				r = entry->callback(name, &mode, &cdata, &fops);
565 			else
566 				r = -1;
567 			mutex_unlock(&eventfs_mutex);
568 			if (r <= 0)
569 				continue;
570 			ret = simple_lookup(dir, dentry, flags);
571 			create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
572 					   fops, true);
573 			break;
574 		}
575 	}
576  out:
577 	srcu_read_unlock(&eventfs_srcu, idx);
578 	return ret;
579 }
580 
581 struct dentry_list {
582 	void			*cursor;
583 	struct dentry		**dentries;
584 };
585 
586 /**
587  * eventfs_release - called to release eventfs file/dir
588  * @inode: inode to be released
589  * @file: file to be released (not used)
590  */
591 static int eventfs_release(struct inode *inode, struct file *file)
592 {
593 	struct tracefs_inode *ti;
594 	struct dentry_list *dlist = file->private_data;
595 	void *cursor;
596 	int i;
597 
598 	ti = get_tracefs(inode);
599 	if (!(ti->flags & TRACEFS_EVENT_INODE))
600 		return -EINVAL;
601 
602 	if (WARN_ON_ONCE(!dlist))
603 		return -EINVAL;
604 
605 	for (i = 0; dlist->dentries && dlist->dentries[i]; i++) {
606 		dput(dlist->dentries[i]);
607 	}
608 
609 	cursor = dlist->cursor;
610 	kfree(dlist->dentries);
611 	kfree(dlist);
612 	file->private_data = cursor;
613 	return dcache_dir_close(inode, file);
614 }
615 
616 static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
617 {
618 	struct dentry **tmp;
619 
620 	tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
621 	if (!tmp)
622 		return -1;
623 	tmp[cnt] = d;
624 	tmp[cnt + 1] = NULL;
625 	*dentries = tmp;
626 	return 0;
627 }
628 
629 /**
630  * dcache_dir_open_wrapper - eventfs open wrapper
631  * @inode: not used
632  * @file: dir to be opened (to create it's children)
633  *
634  * Used to dynamic create file/dir with-in @file, all the
635  * file/dir will be created. If already created then references
636  * will be increased
637  */
638 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
639 {
640 	const struct file_operations *fops;
641 	const struct eventfs_entry *entry;
642 	struct eventfs_inode *ei_child;
643 	struct tracefs_inode *ti;
644 	struct eventfs_inode *ei;
645 	struct dentry_list *dlist;
646 	struct dentry **dentries = NULL;
647 	struct dentry *parent = file_dentry(file);
648 	struct dentry *d;
649 	struct inode *f_inode = file_inode(file);
650 	const char *name = parent->d_name.name;
651 	umode_t mode;
652 	void *data;
653 	int cnt = 0;
654 	int idx;
655 	int ret;
656 	int i;
657 	int r;
658 
659 	ti = get_tracefs(f_inode);
660 	if (!(ti->flags & TRACEFS_EVENT_INODE))
661 		return -EINVAL;
662 
663 	if (WARN_ON_ONCE(file->private_data))
664 		return -EINVAL;
665 
666 	idx = srcu_read_lock(&eventfs_srcu);
667 
668 	mutex_lock(&eventfs_mutex);
669 	ei = READ_ONCE(ti->private);
670 	mutex_unlock(&eventfs_mutex);
671 
672 	if (!ei) {
673 		srcu_read_unlock(&eventfs_srcu, idx);
674 		return -EINVAL;
675 	}
676 
677 
678 	data = ei->data;
679 
680 	dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
681 	if (!dlist) {
682 		srcu_read_unlock(&eventfs_srcu, idx);
683 		return -ENOMEM;
684 	}
685 
686 	inode_lock(parent->d_inode);
687 	list_for_each_entry_srcu(ei_child, &ei->children, list,
688 				 srcu_read_lock_held(&eventfs_srcu)) {
689 		d = create_dir_dentry(ei, ei_child, parent, false);
690 		if (d) {
691 			ret = add_dentries(&dentries, d, cnt);
692 			if (ret < 0)
693 				break;
694 			cnt++;
695 		}
696 	}
697 
698 	for (i = 0; i < ei->nr_entries; i++) {
699 		void *cdata = data;
700 		entry = &ei->entries[i];
701 		name = entry->name;
702 		mutex_lock(&eventfs_mutex);
703 		/* If ei->is_freed, then the event itself may be too */
704 		if (!ei->is_freed)
705 			r = entry->callback(name, &mode, &cdata, &fops);
706 		else
707 			r = -1;
708 		mutex_unlock(&eventfs_mutex);
709 		if (r <= 0)
710 			continue;
711 		d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
712 		if (d) {
713 			ret = add_dentries(&dentries, d, cnt);
714 			if (ret < 0)
715 				break;
716 			cnt++;
717 		}
718 	}
719 	inode_unlock(parent->d_inode);
720 	srcu_read_unlock(&eventfs_srcu, idx);
721 	ret = dcache_dir_open(inode, file);
722 
723 	/*
724 	 * dcache_dir_open() sets file->private_data to a dentry cursor.
725 	 * Need to save that but also save all the dentries that were
726 	 * opened by this function.
727 	 */
728 	dlist->cursor = file->private_data;
729 	dlist->dentries = dentries;
730 	file->private_data = dlist;
731 	return ret;
732 }
733 
734 /*
735  * This just sets the file->private_data back to the cursor and back.
736  */
737 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx)
738 {
739 	struct dentry_list *dlist = file->private_data;
740 	int ret;
741 
742 	file->private_data = dlist->cursor;
743 	ret = dcache_readdir(file, ctx);
744 	dlist->cursor = file->private_data;
745 	file->private_data = dlist;
746 	return ret;
747 }
748 
749 /**
750  * eventfs_create_dir - Create the eventfs_inode for this directory
751  * @name: The name of the directory to create.
752  * @parent: The eventfs_inode of the parent directory.
753  * @entries: A list of entries that represent the files under this directory
754  * @size: The number of @entries
755  * @data: The default data to pass to the files (an entry may override it).
756  *
757  * This function creates the descriptor to represent a directory in the
758  * eventfs. This descriptor is an eventfs_inode, and it is returned to be
759  * used to create other children underneath.
760  *
761  * The @entries is an array of eventfs_entry structures which has:
762  *	const char		 *name
763  *	eventfs_callback	callback;
764  *
765  * The name is the name of the file, and the callback is a pointer to a function
766  * that will be called when the file is reference (either by lookup or by
767  * reading a directory). The callback is of the prototype:
768  *
769  *    int callback(const char *name, umode_t *mode, void **data,
770  *		   const struct file_operations **fops);
771  *
772  * When a file needs to be created, this callback will be called with
773  *   name = the name of the file being created (so that the same callback
774  *          may be used for multiple files).
775  *   mode = a place to set the file's mode
776  *   data = A pointer to @data, and the callback may replace it, which will
777  *         cause the file created to pass the new data to the open() call.
778  *   fops = the fops to use for the created file.
779  *
780  * NB. @callback is called while holding internal locks of the eventfs
781  *     system. The callback must not call any code that might also call into
782  *     the tracefs or eventfs system or it will risk creating a deadlock.
783  */
784 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
785 					 const struct eventfs_entry *entries,
786 					 int size, void *data)
787 {
788 	struct eventfs_inode *ei;
789 
790 	if (!parent)
791 		return ERR_PTR(-EINVAL);
792 
793 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
794 	if (!ei)
795 		return ERR_PTR(-ENOMEM);
796 
797 	ei->name = kstrdup_const(name, GFP_KERNEL);
798 	if (!ei->name) {
799 		kfree(ei);
800 		return ERR_PTR(-ENOMEM);
801 	}
802 
803 	if (size) {
804 		ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
805 		if (!ei->d_children) {
806 			kfree_const(ei->name);
807 			kfree(ei);
808 			return ERR_PTR(-ENOMEM);
809 		}
810 	}
811 
812 	ei->entries = entries;
813 	ei->nr_entries = size;
814 	ei->data = data;
815 	INIT_LIST_HEAD(&ei->children);
816 	INIT_LIST_HEAD(&ei->list);
817 
818 	mutex_lock(&eventfs_mutex);
819 	if (!parent->is_freed) {
820 		list_add_tail(&ei->list, &parent->children);
821 		ei->d_parent = parent->dentry;
822 	}
823 	mutex_unlock(&eventfs_mutex);
824 
825 	/* Was the parent freed? */
826 	if (list_empty(&ei->list)) {
827 		free_ei(ei);
828 		ei = NULL;
829 	}
830 	return ei;
831 }
832 
833 /**
834  * eventfs_create_events_dir - create the top level events directory
835  * @name: The name of the top level directory to create.
836  * @parent: Parent dentry for this file in the tracefs directory.
837  * @entries: A list of entries that represent the files under this directory
838  * @size: The number of @entries
839  * @data: The default data to pass to the files (an entry may override it).
840  *
841  * This function creates the top of the trace event directory.
842  *
843  * See eventfs_create_dir() for use of @entries.
844  */
845 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
846 						const struct eventfs_entry *entries,
847 						int size, void *data)
848 {
849 	struct dentry *dentry = tracefs_start_creating(name, parent);
850 	struct eventfs_inode *ei;
851 	struct tracefs_inode *ti;
852 	struct inode *inode;
853 
854 	if (security_locked_down(LOCKDOWN_TRACEFS))
855 		return NULL;
856 
857 	if (IS_ERR(dentry))
858 		return ERR_CAST(dentry);
859 
860 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
861 	if (!ei)
862 		goto fail_ei;
863 
864 	inode = tracefs_get_inode(dentry->d_sb);
865 	if (unlikely(!inode))
866 		goto fail;
867 
868 	if (size) {
869 		ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
870 		if (!ei->d_children)
871 			goto fail;
872 	}
873 
874 	ei->dentry = dentry;
875 	ei->entries = entries;
876 	ei->nr_entries = size;
877 	ei->data = data;
878 	ei->name = kstrdup_const(name, GFP_KERNEL);
879 	if (!ei->name)
880 		goto fail;
881 
882 	INIT_LIST_HEAD(&ei->children);
883 	INIT_LIST_HEAD(&ei->list);
884 
885 	ti = get_tracefs(inode);
886 	ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
887 	ti->private = ei;
888 
889 	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
890 	inode->i_op = &eventfs_root_dir_inode_operations;
891 	inode->i_fop = &eventfs_file_operations;
892 
893 	dentry->d_fsdata = ei;
894 
895 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
896 	inc_nlink(inode);
897 	d_instantiate(dentry, inode);
898 	inc_nlink(dentry->d_parent->d_inode);
899 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
900 	tracefs_end_creating(dentry);
901 
902 	return ei;
903 
904  fail:
905 	kfree(ei->d_children);
906 	kfree(ei);
907  fail_ei:
908 	tracefs_failed_creating(dentry);
909 	return ERR_PTR(-ENOMEM);
910 }
911 
912 static LLIST_HEAD(free_list);
913 
914 static void eventfs_workfn(struct work_struct *work)
915 {
916         struct eventfs_inode *ei, *tmp;
917         struct llist_node *llnode;
918 
919 	llnode = llist_del_all(&free_list);
920         llist_for_each_entry_safe(ei, tmp, llnode, llist) {
921 		/* This dput() matches the dget() from unhook_dentry() */
922 		for (int i = 0; i < ei->nr_entries; i++) {
923 			if (ei->d_children[i])
924 				dput(ei->d_children[i]);
925 		}
926 		/* This should only get here if it had a dentry */
927 		if (!WARN_ON_ONCE(!ei->dentry))
928 			dput(ei->dentry);
929         }
930 }
931 
932 static DECLARE_WORK(eventfs_work, eventfs_workfn);
933 
934 static void free_rcu_ei(struct rcu_head *head)
935 {
936 	struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
937 
938 	if (ei->dentry) {
939 		/* Do not free the ei until all references of dentry are gone */
940 		if (llist_add(&ei->llist, &free_list))
941 			queue_work(system_unbound_wq, &eventfs_work);
942 		return;
943 	}
944 
945 	/* If the ei doesn't have a dentry, neither should its children */
946 	for (int i = 0; i < ei->nr_entries; i++) {
947 		WARN_ON_ONCE(ei->d_children[i]);
948 	}
949 
950 	free_ei(ei);
951 }
952 
953 static void unhook_dentry(struct dentry *dentry)
954 {
955 	if (!dentry)
956 		return;
957 	/*
958 	 * Need to add a reference to the dentry that is expected by
959 	 * simple_recursive_removal(), which will include a dput().
960 	 */
961 	dget(dentry);
962 
963 	/*
964 	 * Also add a reference for the dput() in eventfs_workfn().
965 	 * That is required as that dput() will free the ei after
966 	 * the SRCU grace period is over.
967 	 */
968 	dget(dentry);
969 }
970 
971 /**
972  * eventfs_remove_rec - remove eventfs dir or file from list
973  * @ei: eventfs_inode to be removed.
974  * @level: prevent recursion from going more than 3 levels deep.
975  *
976  * This function recursively removes eventfs_inodes which
977  * contains info of files and/or directories.
978  */
979 static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
980 {
981 	struct eventfs_inode *ei_child;
982 
983 	if (!ei)
984 		return;
985 	/*
986 	 * Check recursion depth. It should never be greater than 3:
987 	 * 0 - events/
988 	 * 1 - events/group/
989 	 * 2 - events/group/event/
990 	 * 3 - events/group/event/file
991 	 */
992 	if (WARN_ON_ONCE(level > 3))
993 		return;
994 
995 	/* search for nested folders or files */
996 	list_for_each_entry_srcu(ei_child, &ei->children, list,
997 				 lockdep_is_held(&eventfs_mutex)) {
998 		/* Children only have dentry if parent does */
999 		WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
1000 		eventfs_remove_rec(ei_child, level + 1);
1001 	}
1002 
1003 
1004 	ei->is_freed = 1;
1005 
1006 	for (int i = 0; i < ei->nr_entries; i++) {
1007 		if (ei->d_children[i]) {
1008 			/* Children only have dentry if parent does */
1009 			WARN_ON_ONCE(!ei->dentry);
1010 			unhook_dentry(ei->d_children[i]);
1011 		}
1012 	}
1013 
1014 	unhook_dentry(ei->dentry);
1015 
1016 	list_del_rcu(&ei->list);
1017 	call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
1018 }
1019 
1020 /**
1021  * eventfs_remove_dir - remove eventfs dir or file from list
1022  * @ei: eventfs_inode to be removed.
1023  *
1024  * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
1025  */
1026 void eventfs_remove_dir(struct eventfs_inode *ei)
1027 {
1028 	struct dentry *dentry;
1029 
1030 	if (!ei)
1031 		return;
1032 
1033 	mutex_lock(&eventfs_mutex);
1034 	dentry = ei->dentry;
1035 	eventfs_remove_rec(ei, 0);
1036 	mutex_unlock(&eventfs_mutex);
1037 
1038 	/*
1039 	 * If any of the ei children has a dentry, then the ei itself
1040 	 * must have a dentry.
1041 	 */
1042 	if (dentry)
1043 		simple_recursive_removal(dentry, NULL);
1044 }
1045 
1046 /**
1047  * eventfs_remove_events_dir - remove the top level eventfs directory
1048  * @ei: the event_inode returned by eventfs_create_events_dir().
1049  *
1050  * This function removes the events main directory
1051  */
1052 void eventfs_remove_events_dir(struct eventfs_inode *ei)
1053 {
1054 	struct dentry *dentry;
1055 
1056 	dentry = ei->dentry;
1057 	eventfs_remove_dir(ei);
1058 
1059 	/*
1060 	 * Matches the dget() done by tracefs_start_creating()
1061 	 * in eventfs_create_events_dir() when it the dentry was
1062 	 * created. In other words, it's a normal dentry that
1063 	 * sticks around while the other ei->dentry are created
1064 	 * and destroyed dynamically.
1065 	 */
1066 	dput(dentry);
1067 }
1068