1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> 4 */ 5 6 /* 7 * fsnotify inode mark locking/lifetime/and refcnting 8 * 9 * REFCNT: 10 * The group->recnt and mark->refcnt tell how many "things" in the kernel 11 * currently are referencing the objects. Both kind of objects typically will 12 * live inside the kernel with a refcnt of 2, one for its creation and one for 13 * the reference a group and a mark hold to each other. 14 * If you are holding the appropriate locks, you can take a reference and the 15 * object itself is guaranteed to survive until the reference is dropped. 16 * 17 * LOCKING: 18 * There are 3 locks involved with fsnotify inode marks and they MUST be taken 19 * in order as follows: 20 * 21 * group->mark_mutex 22 * mark->lock 23 * mark->connector->lock 24 * 25 * group->mark_mutex protects the marks_list anchored inside a given group and 26 * each mark is hooked via the g_list. It also protects the groups private 27 * data (i.e group limits). 28 29 * mark->lock protects the marks attributes like its masks and flags. 30 * Furthermore it protects the access to a reference of the group that the mark 31 * is assigned to as well as the access to a reference of the inode/vfsmount 32 * that is being watched by the mark. 33 * 34 * mark->connector->lock protects the list of marks anchored inside an 35 * inode / vfsmount and each mark is hooked via the i_list. 36 * 37 * A list of notification marks relating to inode / mnt is contained in 38 * fsnotify_mark_connector. That structure is alive as long as there are any 39 * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets 40 * detached from fsnotify_mark_connector when last reference to the mark is 41 * dropped. Thus having mark reference is enough to protect mark->connector 42 * pointer and to make sure fsnotify_mark_connector cannot disappear. Also 43 * because we remove mark from g_list before dropping mark reference associated 44 * with that, any mark found through g_list is guaranteed to have 45 * mark->connector set until we drop group->mark_mutex. 46 * 47 * LIFETIME: 48 * Inode marks survive between when they are added to an inode and when their 49 * refcnt==0. Marks are also protected by fsnotify_mark_srcu. 50 * 51 * The inode mark can be cleared for a number of different reasons including: 52 * - The inode is unlinked for the last time. (fsnotify_inode_remove) 53 * - The inode is being evicted from cache. (fsnotify_inode_delete) 54 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) 55 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark) 56 * - The fsnotify_group associated with the mark is going away and all such marks 57 * need to be cleaned up. (fsnotify_clear_marks_by_group) 58 * 59 * This has the very interesting property of being able to run concurrently with 60 * any (or all) other directions. 61 */ 62 63 #include <linux/fs.h> 64 #include <linux/init.h> 65 #include <linux/kernel.h> 66 #include <linux/kthread.h> 67 #include <linux/module.h> 68 #include <linux/mutex.h> 69 #include <linux/slab.h> 70 #include <linux/spinlock.h> 71 #include <linux/srcu.h> 72 #include <linux/ratelimit.h> 73 74 #include <linux/atomic.h> 75 76 #include <linux/fsnotify_backend.h> 77 #include "fsnotify.h" 78 79 #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ 80 81 struct srcu_struct fsnotify_mark_srcu; 82 struct kmem_cache *fsnotify_mark_connector_cachep; 83 84 static DEFINE_SPINLOCK(destroy_lock); 85 static LIST_HEAD(destroy_list); 86 static struct fsnotify_mark_connector *connector_destroy_list; 87 88 static void fsnotify_mark_destroy_workfn(struct work_struct *work); 89 static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); 90 91 static void fsnotify_connector_destroy_workfn(struct work_struct *work); 92 static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); 93 94 void fsnotify_get_mark(struct fsnotify_mark *mark) 95 { 96 WARN_ON_ONCE(!refcount_read(&mark->refcnt)); 97 refcount_inc(&mark->refcnt); 98 } 99 100 static fsnotify_connp_t *fsnotify_object_connp(void *obj, 101 enum fsnotify_obj_type obj_type) 102 { 103 switch (obj_type) { 104 case FSNOTIFY_OBJ_TYPE_INODE: 105 return &((struct inode *)obj)->i_fsnotify_marks; 106 case FSNOTIFY_OBJ_TYPE_VFSMOUNT: 107 return &real_mount(obj)->mnt_fsnotify_marks; 108 case FSNOTIFY_OBJ_TYPE_SB: 109 return fsnotify_sb_marks(obj); 110 case FSNOTIFY_OBJ_TYPE_MNTNS: 111 return &((struct mnt_namespace *)obj)->n_fsnotify_marks; 112 default: 113 return NULL; 114 } 115 } 116 117 static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) 118 { 119 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) 120 return &fsnotify_conn_inode(conn)->i_fsnotify_mask; 121 else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) 122 return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; 123 else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) 124 return &fsnotify_conn_sb(conn)->s_fsnotify_mask; 125 else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) 126 return &fsnotify_conn_mntns(conn)->n_fsnotify_mask; 127 return NULL; 128 } 129 130 __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) 131 { 132 if (WARN_ON(!fsnotify_valid_obj_type(conn->type))) 133 return 0; 134 135 return READ_ONCE(*fsnotify_conn_mask_p(conn)); 136 } 137 138 static void fsnotify_get_sb_watched_objects(struct super_block *sb) 139 { 140 atomic_long_inc(fsnotify_sb_watched_objects(sb)); 141 } 142 143 static void fsnotify_put_sb_watched_objects(struct super_block *sb) 144 { 145 atomic_long_t *watched_objects = fsnotify_sb_watched_objects(sb); 146 147 /* the superblock can go away after this decrement */ 148 if (atomic_long_dec_and_test(watched_objects)) 149 wake_up_var(watched_objects); 150 } 151 152 static void fsnotify_get_inode_ref(struct inode *inode) 153 { 154 ihold(inode); 155 fsnotify_get_sb_watched_objects(inode->i_sb); 156 } 157 158 static void fsnotify_put_inode_ref(struct inode *inode) 159 { 160 /* read ->i_sb before the inode can go away */ 161 struct super_block *sb = inode->i_sb; 162 163 iput(inode); 164 fsnotify_put_sb_watched_objects(sb); 165 } 166 167 /* 168 * Grab or drop watched objects reference depending on whether the connector 169 * is attached and has any marks attached. 170 */ 171 static void fsnotify_update_sb_watchers(struct super_block *sb, 172 struct fsnotify_mark_connector *conn) 173 { 174 struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); 175 bool is_watched = conn->flags & FSNOTIFY_CONN_FLAG_IS_WATCHED; 176 struct fsnotify_mark *first_mark = NULL; 177 unsigned int highest_prio = 0; 178 179 if (conn->obj) 180 first_mark = hlist_entry_safe(conn->list.first, 181 struct fsnotify_mark, obj_list); 182 if (first_mark) 183 highest_prio = first_mark->group->priority; 184 if (WARN_ON(highest_prio >= __FSNOTIFY_PRIO_NUM)) 185 highest_prio = 0; 186 187 /* 188 * If the highest priority of group watching this object is prio, 189 * then watched object has a reference on counters [0..prio]. 190 * Update priority >= 1 watched objects counters. 191 */ 192 for (unsigned int p = conn->prio + 1; p <= highest_prio; p++) 193 atomic_long_inc(&sbinfo->watched_objects[p]); 194 for (unsigned int p = conn->prio; p > highest_prio; p--) 195 atomic_long_dec(&sbinfo->watched_objects[p]); 196 conn->prio = highest_prio; 197 198 /* Update priority >= 0 (a.k.a total) watched objects counter */ 199 BUILD_BUG_ON(FSNOTIFY_PRIO_NORMAL != 0); 200 if (first_mark && !is_watched) { 201 conn->flags |= FSNOTIFY_CONN_FLAG_IS_WATCHED; 202 fsnotify_get_sb_watched_objects(sb); 203 } else if (!first_mark && is_watched) { 204 conn->flags &= ~FSNOTIFY_CONN_FLAG_IS_WATCHED; 205 fsnotify_put_sb_watched_objects(sb); 206 } 207 } 208 209 /* 210 * Grab or drop inode reference for the connector if needed. 211 * 212 * When it's time to drop the reference, we only clear the HAS_IREF flag and 213 * return the inode object. fsnotify_drop_object() will be resonsible for doing 214 * iput() outside of spinlocks. This happens when last mark that wanted iref is 215 * detached. 216 */ 217 static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn, 218 bool want_iref) 219 { 220 bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF; 221 struct inode *inode = NULL; 222 223 if (conn->type != FSNOTIFY_OBJ_TYPE_INODE || 224 want_iref == has_iref) 225 return NULL; 226 227 if (want_iref) { 228 /* Pin inode if any mark wants inode refcount held */ 229 fsnotify_get_inode_ref(fsnotify_conn_inode(conn)); 230 conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF; 231 } else { 232 /* Unpin inode after detach of last mark that wanted iref */ 233 inode = fsnotify_conn_inode(conn); 234 conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF; 235 } 236 237 return inode; 238 } 239 240 static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 241 { 242 u32 new_mask = 0; 243 bool want_iref = false; 244 struct fsnotify_mark *mark; 245 246 assert_spin_locked(&conn->lock); 247 /* We can get detached connector here when inode is getting unlinked. */ 248 if (!fsnotify_valid_obj_type(conn->type)) 249 return NULL; 250 hlist_for_each_entry(mark, &conn->list, obj_list) { 251 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) 252 continue; 253 new_mask |= fsnotify_calc_mask(mark); 254 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE && 255 !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) 256 want_iref = true; 257 } 258 /* 259 * We use WRITE_ONCE() to prevent silly compiler optimizations from 260 * confusing readers not holding conn->lock with partial updates. 261 */ 262 WRITE_ONCE(*fsnotify_conn_mask_p(conn), new_mask); 263 264 return fsnotify_update_iref(conn, want_iref); 265 } 266 267 static bool fsnotify_conn_watches_children( 268 struct fsnotify_mark_connector *conn) 269 { 270 if (conn->type != FSNOTIFY_OBJ_TYPE_INODE) 271 return false; 272 273 return fsnotify_inode_watches_children(fsnotify_conn_inode(conn)); 274 } 275 276 static void fsnotify_conn_set_children_dentry_flags( 277 struct fsnotify_mark_connector *conn) 278 { 279 if (conn->type != FSNOTIFY_OBJ_TYPE_INODE) 280 return; 281 282 fsnotify_set_children_dentry_flags(fsnotify_conn_inode(conn)); 283 } 284 285 /* 286 * Calculate mask of events for a list of marks. The caller must make sure 287 * connector and connector->obj cannot disappear under us. Callers achieve 288 * this by holding a mark->lock or mark->group->mark_mutex for a mark on this 289 * list. 290 */ 291 void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 292 { 293 bool update_children; 294 295 if (!conn) 296 return; 297 298 spin_lock(&conn->lock); 299 update_children = !fsnotify_conn_watches_children(conn); 300 __fsnotify_recalc_mask(conn); 301 update_children &= fsnotify_conn_watches_children(conn); 302 spin_unlock(&conn->lock); 303 /* 304 * Set children's PARENT_WATCHED flags only if parent started watching. 305 * When parent stops watching, we clear false positive PARENT_WATCHED 306 * flags lazily in __fsnotify_parent(). 307 */ 308 if (update_children) 309 fsnotify_conn_set_children_dentry_flags(conn); 310 } 311 312 /* Free all connectors queued for freeing once SRCU period ends */ 313 static void fsnotify_connector_destroy_workfn(struct work_struct *work) 314 { 315 struct fsnotify_mark_connector *conn, *free; 316 317 spin_lock(&destroy_lock); 318 conn = connector_destroy_list; 319 connector_destroy_list = NULL; 320 spin_unlock(&destroy_lock); 321 322 synchronize_srcu(&fsnotify_mark_srcu); 323 while (conn) { 324 free = conn; 325 conn = conn->destroy_next; 326 kmem_cache_free(fsnotify_mark_connector_cachep, free); 327 } 328 } 329 330 static void *fsnotify_detach_connector_from_object( 331 struct fsnotify_mark_connector *conn, 332 unsigned int *type) 333 { 334 fsnotify_connp_t *connp = fsnotify_object_connp(conn->obj, conn->type); 335 struct super_block *sb = fsnotify_connector_sb(conn); 336 struct inode *inode = NULL; 337 338 *type = conn->type; 339 if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) 340 return NULL; 341 342 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { 343 inode = fsnotify_conn_inode(conn); 344 inode->i_fsnotify_mask = 0; 345 346 /* Unpin inode when detaching from connector */ 347 if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF)) 348 inode = NULL; 349 } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { 350 fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; 351 } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { 352 fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; 353 } else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) { 354 fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0; 355 } 356 357 rcu_assign_pointer(*connp, NULL); 358 conn->obj = NULL; 359 conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; 360 if (sb) 361 fsnotify_update_sb_watchers(sb, conn); 362 363 return inode; 364 } 365 366 static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) 367 { 368 struct fsnotify_group *group = mark->group; 369 370 if (WARN_ON_ONCE(!group)) 371 return; 372 group->ops->free_mark(mark); 373 fsnotify_put_group(group); 374 } 375 376 /* Drop object reference originally held by a connector */ 377 static void fsnotify_drop_object(unsigned int type, void *objp) 378 { 379 if (!objp) 380 return; 381 /* Currently only inode references are passed to be dropped */ 382 if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE)) 383 return; 384 fsnotify_put_inode_ref(objp); 385 } 386 387 void fsnotify_put_mark(struct fsnotify_mark *mark) 388 { 389 struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector); 390 void *objp = NULL; 391 unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED; 392 bool free_conn = false; 393 394 /* Catch marks that were actually never attached to object */ 395 if (!conn) { 396 if (refcount_dec_and_test(&mark->refcnt)) 397 fsnotify_final_mark_destroy(mark); 398 return; 399 } 400 401 /* 402 * We have to be careful so that traversals of obj_list under lock can 403 * safely grab mark reference. 404 */ 405 if (!refcount_dec_and_lock(&mark->refcnt, &conn->lock)) 406 return; 407 408 hlist_del_init_rcu(&mark->obj_list); 409 if (hlist_empty(&conn->list)) { 410 objp = fsnotify_detach_connector_from_object(conn, &type); 411 free_conn = true; 412 } else { 413 struct super_block *sb = fsnotify_connector_sb(conn); 414 415 /* Update watched objects after detaching mark */ 416 if (sb) 417 fsnotify_update_sb_watchers(sb, conn); 418 objp = __fsnotify_recalc_mask(conn); 419 type = conn->type; 420 } 421 WRITE_ONCE(mark->connector, NULL); 422 spin_unlock(&conn->lock); 423 424 fsnotify_drop_object(type, objp); 425 426 if (free_conn) { 427 spin_lock(&destroy_lock); 428 conn->destroy_next = connector_destroy_list; 429 connector_destroy_list = conn; 430 spin_unlock(&destroy_lock); 431 queue_work(system_unbound_wq, &connector_reaper_work); 432 } 433 /* 434 * Note that we didn't update flags telling whether inode cares about 435 * what's happening with children. We update these flags from 436 * __fsnotify_parent() lazily when next event happens on one of our 437 * children. 438 */ 439 spin_lock(&destroy_lock); 440 list_add(&mark->g_list, &destroy_list); 441 spin_unlock(&destroy_lock); 442 queue_delayed_work(system_unbound_wq, &reaper_work, 443 FSNOTIFY_REAPER_DELAY); 444 } 445 EXPORT_SYMBOL_GPL(fsnotify_put_mark); 446 447 /* 448 * Get mark reference when we found the mark via lockless traversal of object 449 * list. Mark can be already removed from the list by now and on its way to be 450 * destroyed once SRCU period ends. 451 * 452 * Also pin the group so it doesn't disappear under us. 453 */ 454 static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) 455 { 456 if (!mark) 457 return true; 458 459 if (refcount_inc_not_zero(&mark->refcnt)) { 460 spin_lock(&mark->lock); 461 if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { 462 /* mark is attached, group is still alive then */ 463 atomic_inc(&mark->group->user_waits); 464 spin_unlock(&mark->lock); 465 return true; 466 } 467 spin_unlock(&mark->lock); 468 fsnotify_put_mark(mark); 469 } 470 return false; 471 } 472 473 /* 474 * Puts marks and wakes up group destruction if necessary. 475 * 476 * Pairs with fsnotify_get_mark_safe() 477 */ 478 static void fsnotify_put_mark_wake(struct fsnotify_mark *mark) 479 { 480 if (mark) { 481 struct fsnotify_group *group = mark->group; 482 483 fsnotify_put_mark(mark); 484 /* 485 * We abuse notification_waitq on group shutdown for waiting for 486 * all marks pinned when waiting for userspace. 487 */ 488 if (atomic_dec_and_test(&group->user_waits) && group->shutdown) 489 wake_up(&group->notification_waitq); 490 } 491 } 492 493 bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) 494 __releases(&fsnotify_mark_srcu) 495 { 496 int type; 497 498 fsnotify_foreach_iter_type(type) { 499 /* This can fail if mark is being removed */ 500 if (!fsnotify_get_mark_safe(iter_info->marks[type])) { 501 __release(&fsnotify_mark_srcu); 502 goto fail; 503 } 504 } 505 506 /* 507 * Now that both marks are pinned by refcount in the inode / vfsmount 508 * lists, we can drop SRCU lock, and safely resume the list iteration 509 * once userspace returns. 510 */ 511 srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); 512 513 return true; 514 515 fail: 516 for (type--; type >= 0; type--) 517 fsnotify_put_mark_wake(iter_info->marks[type]); 518 return false; 519 } 520 521 void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) 522 __acquires(&fsnotify_mark_srcu) 523 { 524 int type; 525 526 iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); 527 fsnotify_foreach_iter_type(type) 528 fsnotify_put_mark_wake(iter_info->marks[type]); 529 } 530 531 /* 532 * Mark mark as detached, remove it from group list. Mark still stays in object 533 * list until its last reference is dropped. Note that we rely on mark being 534 * removed from group list before corresponding reference to it is dropped. In 535 * particular we rely on mark->connector being valid while we hold 536 * group->mark_mutex if we found the mark through g_list. 537 * 538 * Must be called with group->mark_mutex held. The caller must either hold 539 * reference to the mark or be protected by fsnotify_mark_srcu. 540 */ 541 void fsnotify_detach_mark(struct fsnotify_mark *mark) 542 { 543 fsnotify_group_assert_locked(mark->group); 544 WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && 545 refcount_read(&mark->refcnt) < 1 + 546 !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); 547 548 spin_lock(&mark->lock); 549 /* something else already called this function on this mark */ 550 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 551 spin_unlock(&mark->lock); 552 return; 553 } 554 mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; 555 list_del_init(&mark->g_list); 556 spin_unlock(&mark->lock); 557 558 /* Drop mark reference acquired in fsnotify_add_mark_locked() */ 559 fsnotify_put_mark(mark); 560 } 561 562 /* 563 * Free fsnotify mark. The mark is actually only marked as being freed. The 564 * freeing is actually happening only once last reference to the mark is 565 * dropped from a workqueue which first waits for srcu period end. 566 * 567 * Caller must have a reference to the mark or be protected by 568 * fsnotify_mark_srcu. 569 */ 570 void fsnotify_free_mark(struct fsnotify_mark *mark) 571 { 572 struct fsnotify_group *group = mark->group; 573 574 spin_lock(&mark->lock); 575 /* something else already called this function on this mark */ 576 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { 577 spin_unlock(&mark->lock); 578 return; 579 } 580 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 581 spin_unlock(&mark->lock); 582 583 /* 584 * Some groups like to know that marks are being freed. This is a 585 * callback to the group function to let it know that this mark 586 * is being freed. 587 */ 588 if (group->ops->freeing_mark) 589 group->ops->freeing_mark(mark, group); 590 } 591 592 void fsnotify_destroy_mark(struct fsnotify_mark *mark, 593 struct fsnotify_group *group) 594 { 595 fsnotify_group_lock(group); 596 fsnotify_detach_mark(mark); 597 fsnotify_group_unlock(group); 598 fsnotify_free_mark(mark); 599 } 600 EXPORT_SYMBOL_GPL(fsnotify_destroy_mark); 601 602 /* 603 * Sorting function for lists of fsnotify marks. 604 * 605 * Fanotify supports different notification classes (reflected as priority of 606 * notification group). Events shall be passed to notification groups in 607 * decreasing priority order. To achieve this marks in notification lists for 608 * inodes and vfsmounts are sorted so that priorities of corresponding groups 609 * are descending. 610 * 611 * Furthermore correct handling of the ignore mask requires processing inode 612 * and vfsmount marks of each group together. Using the group address as 613 * further sort criterion provides a unique sorting order and thus we can 614 * merge inode and vfsmount lists of marks in linear time and find groups 615 * present in both lists. 616 * 617 * A return value of 1 signifies that b has priority over a. 618 * A return value of 0 signifies that the two marks have to be handled together. 619 * A return value of -1 signifies that a has priority over b. 620 */ 621 int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) 622 { 623 if (a == b) 624 return 0; 625 if (!a) 626 return 1; 627 if (!b) 628 return -1; 629 if (a->priority < b->priority) 630 return 1; 631 if (a->priority > b->priority) 632 return -1; 633 if (a < b) 634 return 1; 635 return -1; 636 } 637 638 static int fsnotify_attach_info_to_sb(struct super_block *sb) 639 { 640 struct fsnotify_sb_info *sbinfo; 641 642 /* sb info is freed on fsnotify_sb_delete() */ 643 sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL); 644 if (!sbinfo) 645 return -ENOMEM; 646 647 /* 648 * cmpxchg() provides the barrier so that callers of fsnotify_sb_info() 649 * will observe an initialized structure 650 */ 651 if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) { 652 /* Someone else created sbinfo for us */ 653 kfree(sbinfo); 654 } 655 return 0; 656 } 657 658 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, 659 void *obj, unsigned int obj_type) 660 { 661 struct fsnotify_mark_connector *conn; 662 663 conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); 664 if (!conn) 665 return -ENOMEM; 666 spin_lock_init(&conn->lock); 667 INIT_HLIST_HEAD(&conn->list); 668 conn->flags = 0; 669 conn->prio = 0; 670 conn->type = obj_type; 671 conn->obj = obj; 672 673 /* 674 * cmpxchg() provides the barrier so that readers of *connp can see 675 * only initialized structure 676 */ 677 if (cmpxchg(connp, NULL, conn)) { 678 /* Someone else created list structure for us */ 679 kmem_cache_free(fsnotify_mark_connector_cachep, conn); 680 } 681 return 0; 682 } 683 684 /* 685 * Get mark connector, make sure it is alive and return with its lock held. 686 * This is for users that get connector pointer from inode or mount. Users that 687 * hold reference to a mark on the list may directly lock connector->lock as 688 * they are sure list cannot go away under them. 689 */ 690 static struct fsnotify_mark_connector *fsnotify_grab_connector( 691 fsnotify_connp_t *connp) 692 { 693 struct fsnotify_mark_connector *conn; 694 int idx; 695 696 idx = srcu_read_lock(&fsnotify_mark_srcu); 697 conn = srcu_dereference(*connp, &fsnotify_mark_srcu); 698 if (!conn) 699 goto out; 700 spin_lock(&conn->lock); 701 if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) { 702 spin_unlock(&conn->lock); 703 srcu_read_unlock(&fsnotify_mark_srcu, idx); 704 return NULL; 705 } 706 out: 707 srcu_read_unlock(&fsnotify_mark_srcu, idx); 708 return conn; 709 } 710 711 /* 712 * Add mark into proper place in given list of marks. These marks may be used 713 * for the fsnotify backend to determine which event types should be delivered 714 * to which group and for which inodes. These marks are ordered according to 715 * priority, highest number first, and then by the group's location in memory. 716 */ 717 static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, 718 unsigned int obj_type, int add_flags) 719 { 720 struct super_block *sb = fsnotify_object_sb(obj, obj_type); 721 struct fsnotify_mark *lmark, *last = NULL; 722 struct fsnotify_mark_connector *conn; 723 fsnotify_connp_t *connp; 724 int cmp; 725 int err = 0; 726 727 if (WARN_ON(!fsnotify_valid_obj_type(obj_type))) 728 return -EINVAL; 729 730 /* 731 * Attach the sb info before attaching a connector to any object on sb. 732 * The sb info will remain attached as long as sb lives. 733 */ 734 if (sb && !fsnotify_sb_info(sb)) { 735 err = fsnotify_attach_info_to_sb(sb); 736 if (err) 737 return err; 738 } 739 740 connp = fsnotify_object_connp(obj, obj_type); 741 restart: 742 spin_lock(&mark->lock); 743 conn = fsnotify_grab_connector(connp); 744 if (!conn) { 745 spin_unlock(&mark->lock); 746 err = fsnotify_attach_connector_to_object(connp, obj, obj_type); 747 if (err) 748 return err; 749 goto restart; 750 } 751 752 /* is mark the first mark? */ 753 if (hlist_empty(&conn->list)) { 754 hlist_add_head_rcu(&mark->obj_list, &conn->list); 755 goto added; 756 } 757 758 /* should mark be in the middle of the current list? */ 759 hlist_for_each_entry(lmark, &conn->list, obj_list) { 760 last = lmark; 761 762 if ((lmark->group == mark->group) && 763 (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && 764 !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) { 765 err = -EEXIST; 766 goto out_err; 767 } 768 769 cmp = fsnotify_compare_groups(lmark->group, mark->group); 770 if (cmp >= 0) { 771 hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); 772 goto added; 773 } 774 } 775 776 BUG_ON(last == NULL); 777 /* mark should be the last entry. last is the current last entry */ 778 hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); 779 added: 780 if (sb) 781 fsnotify_update_sb_watchers(sb, conn); 782 /* 783 * Since connector is attached to object using cmpxchg() we are 784 * guaranteed that connector initialization is fully visible by anyone 785 * seeing mark->connector set. 786 */ 787 WRITE_ONCE(mark->connector, conn); 788 out_err: 789 spin_unlock(&conn->lock); 790 spin_unlock(&mark->lock); 791 return err; 792 } 793 794 /* 795 * Attach an initialized mark to a given group and fs object. 796 * These marks may be used for the fsnotify backend to determine which 797 * event types should be delivered to which group. 798 */ 799 int fsnotify_add_mark_locked(struct fsnotify_mark *mark, 800 void *obj, unsigned int obj_type, 801 int add_flags) 802 { 803 struct fsnotify_group *group = mark->group; 804 int ret = 0; 805 806 fsnotify_group_assert_locked(group); 807 808 /* 809 * LOCKING ORDER!!!! 810 * group->mark_mutex 811 * mark->lock 812 * mark->connector->lock 813 */ 814 spin_lock(&mark->lock); 815 mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; 816 817 list_add(&mark->g_list, &group->marks_list); 818 fsnotify_get_mark(mark); /* for g_list */ 819 spin_unlock(&mark->lock); 820 821 ret = fsnotify_add_mark_list(mark, obj, obj_type, add_flags); 822 if (ret) 823 goto err; 824 825 fsnotify_recalc_mask(mark->connector); 826 827 return ret; 828 err: 829 spin_lock(&mark->lock); 830 mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | 831 FSNOTIFY_MARK_FLAG_ATTACHED); 832 list_del_init(&mark->g_list); 833 spin_unlock(&mark->lock); 834 835 fsnotify_put_mark(mark); 836 return ret; 837 } 838 839 int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj, 840 unsigned int obj_type, int add_flags) 841 { 842 int ret; 843 struct fsnotify_group *group = mark->group; 844 845 fsnotify_group_lock(group); 846 ret = fsnotify_add_mark_locked(mark, obj, obj_type, add_flags); 847 fsnotify_group_unlock(group); 848 return ret; 849 } 850 EXPORT_SYMBOL_GPL(fsnotify_add_mark); 851 852 /* 853 * Given a list of marks, find the mark associated with given group. If found 854 * take a reference to that mark and return it, else return NULL. 855 */ 856 struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type, 857 struct fsnotify_group *group) 858 { 859 fsnotify_connp_t *connp = fsnotify_object_connp(obj, obj_type); 860 struct fsnotify_mark_connector *conn; 861 struct fsnotify_mark *mark; 862 863 if (!connp) 864 return NULL; 865 866 conn = fsnotify_grab_connector(connp); 867 if (!conn) 868 return NULL; 869 870 hlist_for_each_entry(mark, &conn->list, obj_list) { 871 if (mark->group == group && 872 (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 873 fsnotify_get_mark(mark); 874 spin_unlock(&conn->lock); 875 return mark; 876 } 877 } 878 spin_unlock(&conn->lock); 879 return NULL; 880 } 881 EXPORT_SYMBOL_GPL(fsnotify_find_mark); 882 883 /* Clear any marks in a group with given type mask */ 884 void fsnotify_clear_marks_by_group(struct fsnotify_group *group, 885 unsigned int obj_type) 886 { 887 struct fsnotify_mark *lmark, *mark; 888 LIST_HEAD(to_free); 889 struct list_head *head = &to_free; 890 891 /* Skip selection step if we want to clear all marks. */ 892 if (obj_type == FSNOTIFY_OBJ_TYPE_ANY) { 893 head = &group->marks_list; 894 goto clear; 895 } 896 /* 897 * We have to be really careful here. Anytime we drop mark_mutex, e.g. 898 * fsnotify_clear_marks_by_inode() can come and free marks. Even in our 899 * to_free list so we have to use mark_mutex even when accessing that 900 * list. And freeing mark requires us to drop mark_mutex. So we can 901 * reliably free only the first mark in the list. That's why we first 902 * move marks to free to to_free list in one go and then free marks in 903 * to_free list one by one. 904 */ 905 fsnotify_group_lock(group); 906 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { 907 if (mark->connector->type == obj_type) 908 list_move(&mark->g_list, &to_free); 909 } 910 fsnotify_group_unlock(group); 911 912 clear: 913 while (1) { 914 fsnotify_group_lock(group); 915 if (list_empty(head)) { 916 fsnotify_group_unlock(group); 917 break; 918 } 919 mark = list_first_entry(head, struct fsnotify_mark, g_list); 920 fsnotify_get_mark(mark); 921 fsnotify_detach_mark(mark); 922 fsnotify_group_unlock(group); 923 fsnotify_free_mark(mark); 924 fsnotify_put_mark(mark); 925 } 926 } 927 928 /* Destroy all marks attached to an object via connector */ 929 void fsnotify_destroy_marks(fsnotify_connp_t *connp) 930 { 931 struct fsnotify_mark_connector *conn; 932 struct fsnotify_mark *mark, *old_mark = NULL; 933 void *objp; 934 unsigned int type; 935 936 conn = fsnotify_grab_connector(connp); 937 if (!conn) 938 return; 939 /* 940 * We have to be careful since we can race with e.g. 941 * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the 942 * list can get modified. However we are holding mark reference and 943 * thus our mark cannot be removed from obj_list so we can continue 944 * iteration after regaining conn->lock. 945 */ 946 hlist_for_each_entry(mark, &conn->list, obj_list) { 947 fsnotify_get_mark(mark); 948 spin_unlock(&conn->lock); 949 if (old_mark) 950 fsnotify_put_mark(old_mark); 951 old_mark = mark; 952 fsnotify_destroy_mark(mark, mark->group); 953 spin_lock(&conn->lock); 954 } 955 /* 956 * Detach list from object now so that we don't pin inode until all 957 * mark references get dropped. It would lead to strange results such 958 * as delaying inode deletion or blocking unmount. 959 */ 960 objp = fsnotify_detach_connector_from_object(conn, &type); 961 spin_unlock(&conn->lock); 962 if (old_mark) 963 fsnotify_put_mark(old_mark); 964 fsnotify_drop_object(type, objp); 965 } 966 967 /* 968 * Nothing fancy, just initialize lists and locks and counters. 969 */ 970 void fsnotify_init_mark(struct fsnotify_mark *mark, 971 struct fsnotify_group *group) 972 { 973 memset(mark, 0, sizeof(*mark)); 974 spin_lock_init(&mark->lock); 975 refcount_set(&mark->refcnt, 1); 976 fsnotify_get_group(group); 977 mark->group = group; 978 WRITE_ONCE(mark->connector, NULL); 979 } 980 EXPORT_SYMBOL_GPL(fsnotify_init_mark); 981 982 /* 983 * Destroy all marks in destroy_list, waits for SRCU period to finish before 984 * actually freeing marks. 985 */ 986 static void fsnotify_mark_destroy_workfn(struct work_struct *work) 987 { 988 struct fsnotify_mark *mark, *next; 989 struct list_head private_destroy_list; 990 991 spin_lock(&destroy_lock); 992 /* exchange the list head */ 993 list_replace_init(&destroy_list, &private_destroy_list); 994 spin_unlock(&destroy_lock); 995 996 synchronize_srcu(&fsnotify_mark_srcu); 997 998 list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { 999 list_del_init(&mark->g_list); 1000 fsnotify_final_mark_destroy(mark); 1001 } 1002 } 1003 1004 /* Wait for all marks queued for destruction to be actually destroyed */ 1005 void fsnotify_wait_marks_destroyed(void) 1006 { 1007 flush_delayed_work(&reaper_work); 1008 } 1009 EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed); 1010