1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * event_inode.c - part of tracefs, a pseudo file system for activating tracing 4 * 5 * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org> 6 * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com> 7 * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org> 8 * 9 * eventfs is used to dynamically create inodes and dentries based on the 10 * meta data provided by the tracing system. 11 * 12 * eventfs stores the meta-data of files/dirs and holds off on creating 13 * inodes/dentries of the files. When accessed, the eventfs will create the 14 * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up 15 * and delete the inodes/dentries when they are no longer referenced. 16 */ 17 #include <linux/fsnotify.h> 18 #include <linux/fs.h> 19 #include <linux/namei.h> 20 #include <linux/workqueue.h> 21 #include <linux/security.h> 22 #include <linux/tracefs.h> 23 #include <linux/kref.h> 24 #include <linux/delay.h> 25 #include "internal.h" 26 27 /* 28 * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access 29 * to the ei->dentry must be done under this mutex and after checking 30 * if ei->is_freed is not set. When ei->is_freed is set, the dentry 31 * is on its way to being freed after the last dput() is made on it. 32 */ 33 static DEFINE_MUTEX(eventfs_mutex); 34 35 /* 36 * The eventfs_inode (ei) itself is protected by SRCU. It is released from 37 * its parent's list and will have is_freed set (under eventfs_mutex). 38 * After the SRCU grace period is over and the last dput() is called 39 * the ei is freed. 40 */ 41 DEFINE_STATIC_SRCU(eventfs_srcu); 42 43 /* Mode is unsigned short, use the upper bits for flags */ 44 enum { 45 EVENTFS_SAVE_MODE = BIT(16), 46 EVENTFS_SAVE_UID = BIT(17), 47 EVENTFS_SAVE_GID = BIT(18), 48 }; 49 50 #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) 51 52 static struct dentry *eventfs_root_lookup(struct inode *dir, 53 struct dentry *dentry, 54 unsigned int flags); 55 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file); 56 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx); 57 static int eventfs_release(struct inode *inode, struct file *file); 58 59 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr) 60 { 61 unsigned int ia_valid = iattr->ia_valid; 62 63 if (ia_valid & ATTR_MODE) { 64 attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) | 65 (iattr->ia_mode & EVENTFS_MODE_MASK) | 66 EVENTFS_SAVE_MODE; 67 } 68 if (ia_valid & ATTR_UID) { 69 attr->mode |= EVENTFS_SAVE_UID; 70 attr->uid = iattr->ia_uid; 71 } 72 if (ia_valid & ATTR_GID) { 73 attr->mode |= EVENTFS_SAVE_GID; 74 attr->gid = iattr->ia_gid; 75 } 76 } 77 78 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, 79 struct iattr *iattr) 80 { 81 const struct eventfs_entry *entry; 82 struct eventfs_inode *ei; 83 const char *name; 84 int ret; 85 86 mutex_lock(&eventfs_mutex); 87 ei = dentry->d_fsdata; 88 if (ei->is_freed) { 89 /* Do not allow changes if the event is about to be removed. */ 90 mutex_unlock(&eventfs_mutex); 91 return -ENODEV; 92 } 93 94 /* Preallocate the children mode array if necessary */ 95 if (!(dentry->d_inode->i_mode & S_IFDIR)) { 96 if (!ei->entry_attrs) { 97 ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries, 98 GFP_NOFS); 99 if (!ei->entry_attrs) { 100 ret = -ENOMEM; 101 goto out; 102 } 103 } 104 } 105 106 ret = simple_setattr(idmap, dentry, iattr); 107 if (ret < 0) 108 goto out; 109 110 /* 111 * If this is a dir, then update the ei cache, only the file 112 * mode is saved in the ei->m_children, and the ownership is 113 * determined by the parent directory. 114 */ 115 if (dentry->d_inode->i_mode & S_IFDIR) { 116 update_attr(&ei->attr, iattr); 117 118 } else { 119 name = dentry->d_name.name; 120 121 for (int i = 0; i < ei->nr_entries; i++) { 122 entry = &ei->entries[i]; 123 if (strcmp(name, entry->name) == 0) { 124 update_attr(&ei->entry_attrs[i], iattr); 125 break; 126 } 127 } 128 } 129 out: 130 mutex_unlock(&eventfs_mutex); 131 return ret; 132 } 133 134 static const struct inode_operations eventfs_root_dir_inode_operations = { 135 .lookup = eventfs_root_lookup, 136 .setattr = eventfs_set_attr, 137 }; 138 139 static const struct inode_operations eventfs_file_inode_operations = { 140 .setattr = eventfs_set_attr, 141 }; 142 143 static const struct file_operations eventfs_file_operations = { 144 .open = dcache_dir_open_wrapper, 145 .read = generic_read_dir, 146 .iterate_shared = dcache_readdir_wrapper, 147 .llseek = generic_file_llseek, 148 .release = eventfs_release, 149 }; 150 151 static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode) 152 { 153 if (!attr) { 154 inode->i_mode = mode; 155 return; 156 } 157 158 if (attr->mode & EVENTFS_SAVE_MODE) 159 inode->i_mode = attr->mode & EVENTFS_MODE_MASK; 160 else 161 inode->i_mode = mode; 162 163 if (attr->mode & EVENTFS_SAVE_UID) 164 inode->i_uid = attr->uid; 165 166 if (attr->mode & EVENTFS_SAVE_GID) 167 inode->i_gid = attr->gid; 168 } 169 170 /** 171 * create_file - create a file in the tracefs filesystem 172 * @name: the name of the file to create. 173 * @mode: the permission that the file should have. 174 * @attr: saved attributes changed by user 175 * @parent: parent dentry for this file. 176 * @data: something that the caller will want to get to later on. 177 * @fop: struct file_operations that should be used for this file. 178 * 179 * This function creates a dentry that represents a file in the eventsfs_inode 180 * directory. The inode.i_private pointer will point to @data in the open() 181 * call. 182 */ 183 static struct dentry *create_file(const char *name, umode_t mode, 184 struct eventfs_attr *attr, 185 struct dentry *parent, void *data, 186 const struct file_operations *fop) 187 { 188 struct tracefs_inode *ti; 189 struct dentry *dentry; 190 struct inode *inode; 191 192 if (!(mode & S_IFMT)) 193 mode |= S_IFREG; 194 195 if (WARN_ON_ONCE(!S_ISREG(mode))) 196 return NULL; 197 198 WARN_ON_ONCE(!parent); 199 dentry = eventfs_start_creating(name, parent); 200 201 if (IS_ERR(dentry)) 202 return dentry; 203 204 inode = tracefs_get_inode(dentry->d_sb); 205 if (unlikely(!inode)) 206 return eventfs_failed_creating(dentry); 207 208 /* If the user updated the directory's attributes, use them */ 209 update_inode_attr(inode, attr, mode); 210 211 inode->i_op = &eventfs_file_inode_operations; 212 inode->i_fop = fop; 213 inode->i_private = data; 214 215 ti = get_tracefs(inode); 216 ti->flags |= TRACEFS_EVENT_INODE; 217 d_instantiate(dentry, inode); 218 fsnotify_create(dentry->d_parent->d_inode, dentry); 219 return eventfs_end_creating(dentry); 220 }; 221 222 /** 223 * create_dir - create a dir in the tracefs filesystem 224 * @ei: the eventfs_inode that represents the directory to create 225 * @parent: parent dentry for this file. 226 * 227 * This function will create a dentry for a directory represented by 228 * a eventfs_inode. 229 */ 230 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) 231 { 232 struct tracefs_inode *ti; 233 struct dentry *dentry; 234 struct inode *inode; 235 236 dentry = eventfs_start_creating(ei->name, parent); 237 if (IS_ERR(dentry)) 238 return dentry; 239 240 inode = tracefs_get_inode(dentry->d_sb); 241 if (unlikely(!inode)) 242 return eventfs_failed_creating(dentry); 243 244 /* If the user updated the directory's attributes, use them */ 245 update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); 246 247 inode->i_op = &eventfs_root_dir_inode_operations; 248 inode->i_fop = &eventfs_file_operations; 249 250 ti = get_tracefs(inode); 251 ti->flags |= TRACEFS_EVENT_INODE; 252 253 inc_nlink(inode); 254 d_instantiate(dentry, inode); 255 inc_nlink(dentry->d_parent->d_inode); 256 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 257 return eventfs_end_creating(dentry); 258 } 259 260 static void free_ei(struct eventfs_inode *ei) 261 { 262 kfree_const(ei->name); 263 kfree(ei->d_children); 264 kfree(ei->entry_attrs); 265 kfree(ei); 266 } 267 268 /** 269 * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode 270 * @ti: the tracefs_inode of the dentry 271 * @dentry: dentry which has the reference to remove. 272 * 273 * Remove the association between a dentry from an eventfs_inode. 274 */ 275 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) 276 { 277 struct eventfs_inode *ei; 278 int i; 279 280 mutex_lock(&eventfs_mutex); 281 282 ei = dentry->d_fsdata; 283 if (!ei) 284 goto out; 285 286 /* This could belong to one of the files of the ei */ 287 if (ei->dentry != dentry) { 288 for (i = 0; i < ei->nr_entries; i++) { 289 if (ei->d_children[i] == dentry) 290 break; 291 } 292 if (WARN_ON_ONCE(i == ei->nr_entries)) 293 goto out; 294 ei->d_children[i] = NULL; 295 } else if (ei->is_freed) { 296 free_ei(ei); 297 } else { 298 ei->dentry = NULL; 299 } 300 301 dentry->d_fsdata = NULL; 302 out: 303 mutex_unlock(&eventfs_mutex); 304 } 305 306 /** 307 * create_file_dentry - create a dentry for a file of an eventfs_inode 308 * @ei: the eventfs_inode that the file will be created under 309 * @idx: the index into the d_children[] of the @ei 310 * @parent: The parent dentry of the created file. 311 * @name: The name of the file to create 312 * @mode: The mode of the file. 313 * @data: The data to use to set the inode of the file with on open() 314 * @fops: The fops of the file to be created. 315 * @lookup: If called by the lookup routine, in which case, dput() the created dentry. 316 * 317 * Create a dentry for a file of an eventfs_inode @ei and place it into the 318 * address located at @e_dentry. If the @e_dentry already has a dentry, then 319 * just do a dget() on it and return. Otherwise create the dentry and attach it. 320 */ 321 static struct dentry * 322 create_file_dentry(struct eventfs_inode *ei, int idx, 323 struct dentry *parent, const char *name, umode_t mode, void *data, 324 const struct file_operations *fops, bool lookup) 325 { 326 struct eventfs_attr *attr = NULL; 327 struct dentry **e_dentry = &ei->d_children[idx]; 328 struct dentry *dentry; 329 330 WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 331 332 mutex_lock(&eventfs_mutex); 333 if (ei->is_freed) { 334 mutex_unlock(&eventfs_mutex); 335 return NULL; 336 } 337 /* If the e_dentry already has a dentry, use it */ 338 if (*e_dentry) { 339 /* lookup does not need to up the ref count */ 340 if (!lookup) 341 dget(*e_dentry); 342 mutex_unlock(&eventfs_mutex); 343 return *e_dentry; 344 } 345 346 /* ei->entry_attrs are protected by SRCU */ 347 if (ei->entry_attrs) 348 attr = &ei->entry_attrs[idx]; 349 350 mutex_unlock(&eventfs_mutex); 351 352 dentry = create_file(name, mode, attr, parent, data, fops); 353 354 mutex_lock(&eventfs_mutex); 355 356 if (IS_ERR_OR_NULL(dentry)) { 357 /* 358 * When the mutex was released, something else could have 359 * created the dentry for this e_dentry. In which case 360 * use that one. 361 * 362 * If ei->is_freed is set, the e_dentry is currently on its 363 * way to being freed, don't return it. If e_dentry is NULL 364 * it means it was already freed. 365 */ 366 if (ei->is_freed) 367 dentry = NULL; 368 else 369 dentry = *e_dentry; 370 /* The lookup does not need to up the dentry refcount */ 371 if (dentry && !lookup) 372 dget(dentry); 373 mutex_unlock(&eventfs_mutex); 374 return dentry; 375 } 376 377 if (!*e_dentry && !ei->is_freed) { 378 *e_dentry = dentry; 379 dentry->d_fsdata = ei; 380 } else { 381 /* 382 * Should never happen unless we get here due to being freed. 383 * Otherwise it means two dentries exist with the same name. 384 */ 385 WARN_ON_ONCE(!ei->is_freed); 386 dentry = NULL; 387 } 388 mutex_unlock(&eventfs_mutex); 389 390 if (lookup) 391 dput(dentry); 392 393 return dentry; 394 } 395 396 /** 397 * eventfs_post_create_dir - post create dir routine 398 * @ei: eventfs_inode of recently created dir 399 * 400 * Map the meta-data of files within an eventfs dir to their parent dentry 401 */ 402 static void eventfs_post_create_dir(struct eventfs_inode *ei) 403 { 404 struct eventfs_inode *ei_child; 405 struct tracefs_inode *ti; 406 407 lockdep_assert_held(&eventfs_mutex); 408 409 /* srcu lock already held */ 410 /* fill parent-child relation */ 411 list_for_each_entry_srcu(ei_child, &ei->children, list, 412 srcu_read_lock_held(&eventfs_srcu)) { 413 ei_child->d_parent = ei->dentry; 414 } 415 416 ti = get_tracefs(ei->dentry->d_inode); 417 ti->private = ei; 418 } 419 420 /** 421 * create_dir_dentry - Create a directory dentry for the eventfs_inode 422 * @pei: The eventfs_inode parent of ei. 423 * @ei: The eventfs_inode to create the directory for 424 * @parent: The dentry of the parent of this directory 425 * @lookup: True if this is called by the lookup code 426 * 427 * This creates and attaches a directory dentry to the eventfs_inode @ei. 428 */ 429 static struct dentry * 430 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, 431 struct dentry *parent, bool lookup) 432 { 433 struct dentry *dentry = NULL; 434 435 WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 436 437 mutex_lock(&eventfs_mutex); 438 if (pei->is_freed || ei->is_freed) { 439 mutex_unlock(&eventfs_mutex); 440 return NULL; 441 } 442 if (ei->dentry) { 443 /* If the dentry already has a dentry, use it */ 444 dentry = ei->dentry; 445 /* lookup does not need to up the ref count */ 446 if (!lookup) 447 dget(dentry); 448 mutex_unlock(&eventfs_mutex); 449 return dentry; 450 } 451 mutex_unlock(&eventfs_mutex); 452 453 dentry = create_dir(ei, parent); 454 455 mutex_lock(&eventfs_mutex); 456 457 if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { 458 /* 459 * When the mutex was released, something else could have 460 * created the dentry for this e_dentry. In which case 461 * use that one. 462 * 463 * If ei->is_freed is set, the e_dentry is currently on its 464 * way to being freed. 465 */ 466 dentry = ei->dentry; 467 if (dentry && !lookup) 468 dget(dentry); 469 mutex_unlock(&eventfs_mutex); 470 return dentry; 471 } 472 473 if (!ei->dentry && !ei->is_freed) { 474 ei->dentry = dentry; 475 eventfs_post_create_dir(ei); 476 dentry->d_fsdata = ei; 477 } else { 478 /* 479 * Should never happen unless we get here due to being freed. 480 * Otherwise it means two dentries exist with the same name. 481 */ 482 WARN_ON_ONCE(!ei->is_freed); 483 dentry = NULL; 484 } 485 mutex_unlock(&eventfs_mutex); 486 487 if (lookup) 488 dput(dentry); 489 490 return dentry; 491 } 492 493 /** 494 * eventfs_root_lookup - lookup routine to create file/dir 495 * @dir: in which a lookup is being done 496 * @dentry: file/dir dentry 497 * @flags: Just passed to simple_lookup() 498 * 499 * Used to create dynamic file/dir with-in @dir, search with-in @ei 500 * list, if @dentry found go ahead and create the file/dir 501 */ 502 503 static struct dentry *eventfs_root_lookup(struct inode *dir, 504 struct dentry *dentry, 505 unsigned int flags) 506 { 507 const struct file_operations *fops; 508 const struct eventfs_entry *entry; 509 struct eventfs_inode *ei_child; 510 struct tracefs_inode *ti; 511 struct eventfs_inode *ei; 512 struct dentry *ei_dentry = NULL; 513 struct dentry *ret = NULL; 514 const char *name = dentry->d_name.name; 515 bool created = false; 516 umode_t mode; 517 void *data; 518 int idx; 519 int i; 520 int r; 521 522 ti = get_tracefs(dir); 523 if (!(ti->flags & TRACEFS_EVENT_INODE)) 524 return NULL; 525 526 /* Grab srcu to prevent the ei from going away */ 527 idx = srcu_read_lock(&eventfs_srcu); 528 529 /* 530 * Grab the eventfs_mutex to consistent value from ti->private. 531 * This s 532 */ 533 mutex_lock(&eventfs_mutex); 534 ei = READ_ONCE(ti->private); 535 if (ei && !ei->is_freed) 536 ei_dentry = READ_ONCE(ei->dentry); 537 mutex_unlock(&eventfs_mutex); 538 539 if (!ei || !ei_dentry) 540 goto out; 541 542 data = ei->data; 543 544 list_for_each_entry_srcu(ei_child, &ei->children, list, 545 srcu_read_lock_held(&eventfs_srcu)) { 546 if (strcmp(ei_child->name, name) != 0) 547 continue; 548 ret = simple_lookup(dir, dentry, flags); 549 create_dir_dentry(ei, ei_child, ei_dentry, true); 550 created = true; 551 break; 552 } 553 554 if (created) 555 goto out; 556 557 for (i = 0; i < ei->nr_entries; i++) { 558 entry = &ei->entries[i]; 559 if (strcmp(name, entry->name) == 0) { 560 void *cdata = data; 561 mutex_lock(&eventfs_mutex); 562 /* If ei->is_freed, then the event itself may be too */ 563 if (!ei->is_freed) 564 r = entry->callback(name, &mode, &cdata, &fops); 565 else 566 r = -1; 567 mutex_unlock(&eventfs_mutex); 568 if (r <= 0) 569 continue; 570 ret = simple_lookup(dir, dentry, flags); 571 create_file_dentry(ei, i, ei_dentry, name, mode, cdata, 572 fops, true); 573 break; 574 } 575 } 576 out: 577 srcu_read_unlock(&eventfs_srcu, idx); 578 return ret; 579 } 580 581 struct dentry_list { 582 void *cursor; 583 struct dentry **dentries; 584 }; 585 586 /** 587 * eventfs_release - called to release eventfs file/dir 588 * @inode: inode to be released 589 * @file: file to be released (not used) 590 */ 591 static int eventfs_release(struct inode *inode, struct file *file) 592 { 593 struct tracefs_inode *ti; 594 struct dentry_list *dlist = file->private_data; 595 void *cursor; 596 int i; 597 598 ti = get_tracefs(inode); 599 if (!(ti->flags & TRACEFS_EVENT_INODE)) 600 return -EINVAL; 601 602 if (WARN_ON_ONCE(!dlist)) 603 return -EINVAL; 604 605 for (i = 0; dlist->dentries && dlist->dentries[i]; i++) { 606 dput(dlist->dentries[i]); 607 } 608 609 cursor = dlist->cursor; 610 kfree(dlist->dentries); 611 kfree(dlist); 612 file->private_data = cursor; 613 return dcache_dir_close(inode, file); 614 } 615 616 static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt) 617 { 618 struct dentry **tmp; 619 620 tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS); 621 if (!tmp) 622 return -1; 623 tmp[cnt] = d; 624 tmp[cnt + 1] = NULL; 625 *dentries = tmp; 626 return 0; 627 } 628 629 /** 630 * dcache_dir_open_wrapper - eventfs open wrapper 631 * @inode: not used 632 * @file: dir to be opened (to create it's children) 633 * 634 * Used to dynamic create file/dir with-in @file, all the 635 * file/dir will be created. If already created then references 636 * will be increased 637 */ 638 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) 639 { 640 const struct file_operations *fops; 641 const struct eventfs_entry *entry; 642 struct eventfs_inode *ei_child; 643 struct tracefs_inode *ti; 644 struct eventfs_inode *ei; 645 struct dentry_list *dlist; 646 struct dentry **dentries = NULL; 647 struct dentry *parent = file_dentry(file); 648 struct dentry *d; 649 struct inode *f_inode = file_inode(file); 650 const char *name = parent->d_name.name; 651 umode_t mode; 652 void *data; 653 int cnt = 0; 654 int idx; 655 int ret; 656 int i; 657 int r; 658 659 ti = get_tracefs(f_inode); 660 if (!(ti->flags & TRACEFS_EVENT_INODE)) 661 return -EINVAL; 662 663 if (WARN_ON_ONCE(file->private_data)) 664 return -EINVAL; 665 666 idx = srcu_read_lock(&eventfs_srcu); 667 668 mutex_lock(&eventfs_mutex); 669 ei = READ_ONCE(ti->private); 670 mutex_unlock(&eventfs_mutex); 671 672 if (!ei) { 673 srcu_read_unlock(&eventfs_srcu, idx); 674 return -EINVAL; 675 } 676 677 678 data = ei->data; 679 680 dlist = kmalloc(sizeof(*dlist), GFP_KERNEL); 681 if (!dlist) { 682 srcu_read_unlock(&eventfs_srcu, idx); 683 return -ENOMEM; 684 } 685 686 inode_lock(parent->d_inode); 687 list_for_each_entry_srcu(ei_child, &ei->children, list, 688 srcu_read_lock_held(&eventfs_srcu)) { 689 d = create_dir_dentry(ei, ei_child, parent, false); 690 if (d) { 691 ret = add_dentries(&dentries, d, cnt); 692 if (ret < 0) 693 break; 694 cnt++; 695 } 696 } 697 698 for (i = 0; i < ei->nr_entries; i++) { 699 void *cdata = data; 700 entry = &ei->entries[i]; 701 name = entry->name; 702 mutex_lock(&eventfs_mutex); 703 /* If ei->is_freed, then the event itself may be too */ 704 if (!ei->is_freed) 705 r = entry->callback(name, &mode, &cdata, &fops); 706 else 707 r = -1; 708 mutex_unlock(&eventfs_mutex); 709 if (r <= 0) 710 continue; 711 d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false); 712 if (d) { 713 ret = add_dentries(&dentries, d, cnt); 714 if (ret < 0) 715 break; 716 cnt++; 717 } 718 } 719 inode_unlock(parent->d_inode); 720 srcu_read_unlock(&eventfs_srcu, idx); 721 ret = dcache_dir_open(inode, file); 722 723 /* 724 * dcache_dir_open() sets file->private_data to a dentry cursor. 725 * Need to save that but also save all the dentries that were 726 * opened by this function. 727 */ 728 dlist->cursor = file->private_data; 729 dlist->dentries = dentries; 730 file->private_data = dlist; 731 return ret; 732 } 733 734 /* 735 * This just sets the file->private_data back to the cursor and back. 736 */ 737 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx) 738 { 739 struct dentry_list *dlist = file->private_data; 740 int ret; 741 742 file->private_data = dlist->cursor; 743 ret = dcache_readdir(file, ctx); 744 dlist->cursor = file->private_data; 745 file->private_data = dlist; 746 return ret; 747 } 748 749 /** 750 * eventfs_create_dir - Create the eventfs_inode for this directory 751 * @name: The name of the directory to create. 752 * @parent: The eventfs_inode of the parent directory. 753 * @entries: A list of entries that represent the files under this directory 754 * @size: The number of @entries 755 * @data: The default data to pass to the files (an entry may override it). 756 * 757 * This function creates the descriptor to represent a directory in the 758 * eventfs. This descriptor is an eventfs_inode, and it is returned to be 759 * used to create other children underneath. 760 * 761 * The @entries is an array of eventfs_entry structures which has: 762 * const char *name 763 * eventfs_callback callback; 764 * 765 * The name is the name of the file, and the callback is a pointer to a function 766 * that will be called when the file is reference (either by lookup or by 767 * reading a directory). The callback is of the prototype: 768 * 769 * int callback(const char *name, umode_t *mode, void **data, 770 * const struct file_operations **fops); 771 * 772 * When a file needs to be created, this callback will be called with 773 * name = the name of the file being created (so that the same callback 774 * may be used for multiple files). 775 * mode = a place to set the file's mode 776 * data = A pointer to @data, and the callback may replace it, which will 777 * cause the file created to pass the new data to the open() call. 778 * fops = the fops to use for the created file. 779 * 780 * NB. @callback is called while holding internal locks of the eventfs 781 * system. The callback must not call any code that might also call into 782 * the tracefs or eventfs system or it will risk creating a deadlock. 783 */ 784 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, 785 const struct eventfs_entry *entries, 786 int size, void *data) 787 { 788 struct eventfs_inode *ei; 789 790 if (!parent) 791 return ERR_PTR(-EINVAL); 792 793 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 794 if (!ei) 795 return ERR_PTR(-ENOMEM); 796 797 ei->name = kstrdup_const(name, GFP_KERNEL); 798 if (!ei->name) { 799 kfree(ei); 800 return ERR_PTR(-ENOMEM); 801 } 802 803 if (size) { 804 ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); 805 if (!ei->d_children) { 806 kfree_const(ei->name); 807 kfree(ei); 808 return ERR_PTR(-ENOMEM); 809 } 810 } 811 812 ei->entries = entries; 813 ei->nr_entries = size; 814 ei->data = data; 815 INIT_LIST_HEAD(&ei->children); 816 INIT_LIST_HEAD(&ei->list); 817 818 mutex_lock(&eventfs_mutex); 819 if (!parent->is_freed) { 820 list_add_tail(&ei->list, &parent->children); 821 ei->d_parent = parent->dentry; 822 } 823 mutex_unlock(&eventfs_mutex); 824 825 /* Was the parent freed? */ 826 if (list_empty(&ei->list)) { 827 free_ei(ei); 828 ei = NULL; 829 } 830 return ei; 831 } 832 833 /** 834 * eventfs_create_events_dir - create the top level events directory 835 * @name: The name of the top level directory to create. 836 * @parent: Parent dentry for this file in the tracefs directory. 837 * @entries: A list of entries that represent the files under this directory 838 * @size: The number of @entries 839 * @data: The default data to pass to the files (an entry may override it). 840 * 841 * This function creates the top of the trace event directory. 842 * 843 * See eventfs_create_dir() for use of @entries. 844 */ 845 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, 846 const struct eventfs_entry *entries, 847 int size, void *data) 848 { 849 struct dentry *dentry = tracefs_start_creating(name, parent); 850 struct eventfs_inode *ei; 851 struct tracefs_inode *ti; 852 struct inode *inode; 853 854 if (security_locked_down(LOCKDOWN_TRACEFS)) 855 return NULL; 856 857 if (IS_ERR(dentry)) 858 return ERR_CAST(dentry); 859 860 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 861 if (!ei) 862 goto fail_ei; 863 864 inode = tracefs_get_inode(dentry->d_sb); 865 if (unlikely(!inode)) 866 goto fail; 867 868 if (size) { 869 ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); 870 if (!ei->d_children) 871 goto fail; 872 } 873 874 ei->dentry = dentry; 875 ei->entries = entries; 876 ei->nr_entries = size; 877 ei->data = data; 878 ei->name = kstrdup_const(name, GFP_KERNEL); 879 if (!ei->name) 880 goto fail; 881 882 INIT_LIST_HEAD(&ei->children); 883 INIT_LIST_HEAD(&ei->list); 884 885 ti = get_tracefs(inode); 886 ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE; 887 ti->private = ei; 888 889 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 890 inode->i_op = &eventfs_root_dir_inode_operations; 891 inode->i_fop = &eventfs_file_operations; 892 893 dentry->d_fsdata = ei; 894 895 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 896 inc_nlink(inode); 897 d_instantiate(dentry, inode); 898 inc_nlink(dentry->d_parent->d_inode); 899 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 900 tracefs_end_creating(dentry); 901 902 return ei; 903 904 fail: 905 kfree(ei->d_children); 906 kfree(ei); 907 fail_ei: 908 tracefs_failed_creating(dentry); 909 return ERR_PTR(-ENOMEM); 910 } 911 912 static LLIST_HEAD(free_list); 913 914 static void eventfs_workfn(struct work_struct *work) 915 { 916 struct eventfs_inode *ei, *tmp; 917 struct llist_node *llnode; 918 919 llnode = llist_del_all(&free_list); 920 llist_for_each_entry_safe(ei, tmp, llnode, llist) { 921 /* This dput() matches the dget() from unhook_dentry() */ 922 for (int i = 0; i < ei->nr_entries; i++) { 923 if (ei->d_children[i]) 924 dput(ei->d_children[i]); 925 } 926 /* This should only get here if it had a dentry */ 927 if (!WARN_ON_ONCE(!ei->dentry)) 928 dput(ei->dentry); 929 } 930 } 931 932 static DECLARE_WORK(eventfs_work, eventfs_workfn); 933 934 static void free_rcu_ei(struct rcu_head *head) 935 { 936 struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); 937 938 if (ei->dentry) { 939 /* Do not free the ei until all references of dentry are gone */ 940 if (llist_add(&ei->llist, &free_list)) 941 queue_work(system_unbound_wq, &eventfs_work); 942 return; 943 } 944 945 /* If the ei doesn't have a dentry, neither should its children */ 946 for (int i = 0; i < ei->nr_entries; i++) { 947 WARN_ON_ONCE(ei->d_children[i]); 948 } 949 950 free_ei(ei); 951 } 952 953 static void unhook_dentry(struct dentry *dentry) 954 { 955 if (!dentry) 956 return; 957 /* 958 * Need to add a reference to the dentry that is expected by 959 * simple_recursive_removal(), which will include a dput(). 960 */ 961 dget(dentry); 962 963 /* 964 * Also add a reference for the dput() in eventfs_workfn(). 965 * That is required as that dput() will free the ei after 966 * the SRCU grace period is over. 967 */ 968 dget(dentry); 969 } 970 971 /** 972 * eventfs_remove_rec - remove eventfs dir or file from list 973 * @ei: eventfs_inode to be removed. 974 * @level: prevent recursion from going more than 3 levels deep. 975 * 976 * This function recursively removes eventfs_inodes which 977 * contains info of files and/or directories. 978 */ 979 static void eventfs_remove_rec(struct eventfs_inode *ei, int level) 980 { 981 struct eventfs_inode *ei_child; 982 983 if (!ei) 984 return; 985 /* 986 * Check recursion depth. It should never be greater than 3: 987 * 0 - events/ 988 * 1 - events/group/ 989 * 2 - events/group/event/ 990 * 3 - events/group/event/file 991 */ 992 if (WARN_ON_ONCE(level > 3)) 993 return; 994 995 /* search for nested folders or files */ 996 list_for_each_entry_srcu(ei_child, &ei->children, list, 997 lockdep_is_held(&eventfs_mutex)) { 998 /* Children only have dentry if parent does */ 999 WARN_ON_ONCE(ei_child->dentry && !ei->dentry); 1000 eventfs_remove_rec(ei_child, level + 1); 1001 } 1002 1003 1004 ei->is_freed = 1; 1005 1006 for (int i = 0; i < ei->nr_entries; i++) { 1007 if (ei->d_children[i]) { 1008 /* Children only have dentry if parent does */ 1009 WARN_ON_ONCE(!ei->dentry); 1010 unhook_dentry(ei->d_children[i]); 1011 } 1012 } 1013 1014 unhook_dentry(ei->dentry); 1015 1016 list_del_rcu(&ei->list); 1017 call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); 1018 } 1019 1020 /** 1021 * eventfs_remove_dir - remove eventfs dir or file from list 1022 * @ei: eventfs_inode to be removed. 1023 * 1024 * This function acquire the eventfs_mutex lock and call eventfs_remove_rec() 1025 */ 1026 void eventfs_remove_dir(struct eventfs_inode *ei) 1027 { 1028 struct dentry *dentry; 1029 1030 if (!ei) 1031 return; 1032 1033 mutex_lock(&eventfs_mutex); 1034 dentry = ei->dentry; 1035 eventfs_remove_rec(ei, 0); 1036 mutex_unlock(&eventfs_mutex); 1037 1038 /* 1039 * If any of the ei children has a dentry, then the ei itself 1040 * must have a dentry. 1041 */ 1042 if (dentry) 1043 simple_recursive_removal(dentry, NULL); 1044 } 1045 1046 /** 1047 * eventfs_remove_events_dir - remove the top level eventfs directory 1048 * @ei: the event_inode returned by eventfs_create_events_dir(). 1049 * 1050 * This function removes the events main directory 1051 */ 1052 void eventfs_remove_events_dir(struct eventfs_inode *ei) 1053 { 1054 struct dentry *dentry; 1055 1056 dentry = ei->dentry; 1057 eventfs_remove_dir(ei); 1058 1059 /* 1060 * Matches the dget() done by tracefs_start_creating() 1061 * in eventfs_create_events_dir() when it the dentry was 1062 * created. In other words, it's a normal dentry that 1063 * sticks around while the other ei->dentry are created 1064 * and destroyed dynamically. 1065 */ 1066 dput(dentry); 1067 } 1068