1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/fanotify.h> 3 #include <linux/fsnotify_backend.h> 4 #include <linux/init.h> 5 #include <linux/jiffies.h> 6 #include <linux/kernel.h> /* UINT_MAX */ 7 #include <linux/mount.h> 8 #include <linux/sched.h> 9 #include <linux/sched/user.h> 10 #include <linux/sched/signal.h> 11 #include <linux/types.h> 12 #include <linux/wait.h> 13 #include <linux/audit.h> 14 #include <linux/sched/mm.h> 15 #include <linux/statfs.h> 16 #include <linux/stringhash.h> 17 18 #include "fanotify.h" 19 20 static bool fanotify_path_equal(const struct path *p1, const struct path *p2) 21 { 22 return p1->mnt == p2->mnt && p1->dentry == p2->dentry; 23 } 24 25 static unsigned int fanotify_hash_path(const struct path *path) 26 { 27 return hash_ptr(path->dentry, FANOTIFY_EVENT_HASH_BITS) ^ 28 hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS); 29 } 30 31 static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid) 32 { 33 return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^ 34 hash_32(fsid->val[1], FANOTIFY_EVENT_HASH_BITS); 35 } 36 37 static bool fanotify_fh_equal(struct fanotify_fh *fh1, 38 struct fanotify_fh *fh2) 39 { 40 if (fh1->type != fh2->type || fh1->len != fh2->len) 41 return false; 42 43 return !fh1->len || 44 !memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len); 45 } 46 47 static unsigned int fanotify_hash_fh(struct fanotify_fh *fh) 48 { 49 long salt = (long)fh->type | (long)fh->len << 8; 50 51 /* 52 * full_name_hash() works long by long, so it handles fh buf optimally. 53 */ 54 return full_name_hash((void *)salt, fanotify_fh_buf(fh), fh->len); 55 } 56 57 static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1, 58 struct fanotify_fid_event *ffe2) 59 { 60 /* Do not merge fid events without object fh */ 61 if (!ffe1->object_fh.len) 62 return false; 63 64 return fanotify_fsid_equal(&ffe1->fsid, &ffe2->fsid) && 65 fanotify_fh_equal(&ffe1->object_fh, &ffe2->object_fh); 66 } 67 68 static bool fanotify_info_equal(struct fanotify_info *info1, 69 struct fanotify_info *info2) 70 { 71 if (info1->dir_fh_totlen != info2->dir_fh_totlen || 72 info1->dir2_fh_totlen != info2->dir2_fh_totlen || 73 info1->file_fh_totlen != info2->file_fh_totlen || 74 info1->name_len != info2->name_len || 75 info1->name2_len != info2->name2_len) 76 return false; 77 78 if (info1->dir_fh_totlen && 79 !fanotify_fh_equal(fanotify_info_dir_fh(info1), 80 fanotify_info_dir_fh(info2))) 81 return false; 82 83 if (info1->dir2_fh_totlen && 84 !fanotify_fh_equal(fanotify_info_dir2_fh(info1), 85 fanotify_info_dir2_fh(info2))) 86 return false; 87 88 if (info1->file_fh_totlen && 89 !fanotify_fh_equal(fanotify_info_file_fh(info1), 90 fanotify_info_file_fh(info2))) 91 return false; 92 93 if (info1->name_len && 94 memcmp(fanotify_info_name(info1), fanotify_info_name(info2), 95 info1->name_len)) 96 return false; 97 98 return !info1->name2_len || 99 !memcmp(fanotify_info_name2(info1), fanotify_info_name2(info2), 100 info1->name2_len); 101 } 102 103 static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, 104 struct fanotify_name_event *fne2) 105 { 106 struct fanotify_info *info1 = &fne1->info; 107 struct fanotify_info *info2 = &fne2->info; 108 109 /* Do not merge name events without dir fh */ 110 if (!info1->dir_fh_totlen) 111 return false; 112 113 if (!fanotify_fsid_equal(&fne1->fsid, &fne2->fsid)) 114 return false; 115 116 return fanotify_info_equal(info1, info2); 117 } 118 119 static bool fanotify_error_event_equal(struct fanotify_error_event *fee1, 120 struct fanotify_error_event *fee2) 121 { 122 /* Error events against the same file system are always merged. */ 123 if (!fanotify_fsid_equal(&fee1->fsid, &fee2->fsid)) 124 return false; 125 126 return true; 127 } 128 129 static bool fanotify_should_merge(struct fanotify_event *old, 130 struct fanotify_event *new) 131 { 132 pr_debug("%s: old=%p new=%p\n", __func__, old, new); 133 134 if (old->hash != new->hash || 135 old->type != new->type || old->pid != new->pid) 136 return false; 137 138 /* 139 * We want to merge many dirent events in the same dir (i.e. 140 * creates/unlinks/renames), but we do not want to merge dirent 141 * events referring to subdirs with dirent events referring to 142 * non subdirs, otherwise, user won't be able to tell from a 143 * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+ 144 * unlink pair or rmdir+create pair of events. 145 */ 146 if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR)) 147 return false; 148 149 /* 150 * FAN_RENAME event is reported with special info record types, 151 * so we cannot merge it with other events. 152 */ 153 if ((old->mask & FAN_RENAME) != (new->mask & FAN_RENAME)) 154 return false; 155 156 switch (old->type) { 157 case FANOTIFY_EVENT_TYPE_PATH: 158 return fanotify_path_equal(fanotify_event_path(old), 159 fanotify_event_path(new)); 160 case FANOTIFY_EVENT_TYPE_FID: 161 return fanotify_fid_event_equal(FANOTIFY_FE(old), 162 FANOTIFY_FE(new)); 163 case FANOTIFY_EVENT_TYPE_FID_NAME: 164 return fanotify_name_event_equal(FANOTIFY_NE(old), 165 FANOTIFY_NE(new)); 166 case FANOTIFY_EVENT_TYPE_FS_ERROR: 167 return fanotify_error_event_equal(FANOTIFY_EE(old), 168 FANOTIFY_EE(new)); 169 case FANOTIFY_EVENT_TYPE_MNT: 170 return false; 171 default: 172 WARN_ON_ONCE(1); 173 } 174 175 return false; 176 } 177 178 /* Limit event merges to limit CPU overhead per event */ 179 #define FANOTIFY_MAX_MERGE_EVENTS 128 180 181 /* and the list better be locked by something too! */ 182 static int fanotify_merge(struct fsnotify_group *group, 183 struct fsnotify_event *event) 184 { 185 struct fanotify_event *old, *new = FANOTIFY_E(event); 186 unsigned int bucket = fanotify_event_hash_bucket(group, new); 187 struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; 188 int i = 0; 189 190 pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, 191 group, event, bucket); 192 193 /* 194 * Don't merge a permission event with any other event so that we know 195 * the event structure we have created in fanotify_handle_event() is the 196 * one we should check for permission response. 197 */ 198 if (fanotify_is_perm_event(new->mask)) 199 return 0; 200 201 hlist_for_each_entry(old, hlist, merge_list) { 202 if (++i > FANOTIFY_MAX_MERGE_EVENTS) 203 break; 204 if (fanotify_should_merge(old, new)) { 205 old->mask |= new->mask; 206 207 if (fanotify_is_error_event(old->mask)) 208 FANOTIFY_EE(old)->err_count++; 209 210 return 1; 211 } 212 } 213 214 return 0; 215 } 216 217 /* 218 * Wait for response to permission event. The function also takes care of 219 * freeing the permission event (or offloads that in case the wait is canceled 220 * by a signal). The function returns 0 in case access got allowed by userspace, 221 * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case 222 * the wait got interrupted by a signal. 223 */ 224 static int fanotify_get_response(struct fsnotify_group *group, 225 struct fanotify_perm_event *event, 226 struct fsnotify_iter_info *iter_info) 227 { 228 int ret, errno; 229 230 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 231 232 ret = wait_event_state(group->fanotify_data.access_waitq, 233 event->state == FAN_EVENT_ANSWERED, 234 (TASK_KILLABLE|TASK_FREEZABLE)); 235 236 /* Signal pending? */ 237 if (ret < 0) { 238 spin_lock(&group->notification_lock); 239 /* Event reported to userspace and no answer yet? */ 240 if (event->state == FAN_EVENT_REPORTED) { 241 /* Event will get freed once userspace answers to it */ 242 event->state = FAN_EVENT_CANCELED; 243 spin_unlock(&group->notification_lock); 244 return ret; 245 } 246 /* Event not yet reported? Just remove it. */ 247 if (event->state == FAN_EVENT_INIT) { 248 fsnotify_remove_queued_event(group, &event->fae.fse); 249 /* Permission events are not supposed to be hashed */ 250 WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list)); 251 } 252 /* 253 * Event may be also answered in case signal delivery raced 254 * with wakeup. In that case we have nothing to do besides 255 * freeing the event and reporting error. 256 */ 257 spin_unlock(&group->notification_lock); 258 goto out; 259 } 260 261 /* userspace responded, convert to something usable */ 262 switch (event->response & FANOTIFY_RESPONSE_ACCESS) { 263 case FAN_ALLOW: 264 ret = 0; 265 break; 266 case FAN_DENY: 267 /* Check custom errno from pre-content events */ 268 errno = fanotify_get_response_errno(event->response); 269 if (errno) { 270 ret = -errno; 271 break; 272 } 273 fallthrough; 274 default: 275 ret = -EPERM; 276 } 277 278 /* Check if the response should be audited */ 279 if (event->response & FAN_AUDIT) { 280 u32 response = event->response & 281 (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS); 282 audit_fanotify(response & ~FAN_AUDIT, &event->audit_rule); 283 } 284 285 pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, 286 group, event, ret); 287 out: 288 fsnotify_destroy_event(group, &event->fae.fse); 289 290 return ret; 291 } 292 293 /* 294 * This function returns a mask for an event that only contains the flags 295 * that have been specifically requested by the user. Flags that may have 296 * been included within the event mask, but have not been explicitly 297 * requested by the user, will not be present in the returned mask. 298 */ 299 static u32 fanotify_group_event_mask(struct fsnotify_group *group, 300 struct fsnotify_iter_info *iter_info, 301 u32 *match_mask, u32 event_mask, 302 const void *data, int data_type, 303 struct inode *dir) 304 { 305 __u32 marks_mask = 0, marks_ignore_mask = 0; 306 __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS | 307 FANOTIFY_EVENT_FLAGS; 308 const struct path *path = fsnotify_data_path(data, data_type); 309 unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); 310 struct fsnotify_mark *mark; 311 bool ondir = event_mask & FAN_ONDIR; 312 int type; 313 314 pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", 315 __func__, iter_info->report_mask, event_mask, data, data_type); 316 317 if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { 318 if (data_type != FSNOTIFY_EVENT_MNT) 319 return 0; 320 } else if (!fid_mode) { 321 /* Do we have path to open a file descriptor? */ 322 if (!path) 323 return 0; 324 /* Path type events are only relevant for files and dirs */ 325 if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry)) 326 return 0; 327 } else if (!(fid_mode & FAN_REPORT_FID)) { 328 /* Do we have a directory inode to report? */ 329 if (!dir && !ondir) 330 return 0; 331 } 332 333 fsnotify_foreach_iter_mark_type(iter_info, mark, type) { 334 /* 335 * Apply ignore mask depending on event flags in ignore mask. 336 */ 337 marks_ignore_mask |= 338 fsnotify_effective_ignore_mask(mark, ondir, type); 339 340 /* 341 * Send the event depending on event flags in mark mask. 342 */ 343 if (!fsnotify_mask_applicable(mark->mask, ondir, type)) 344 continue; 345 346 marks_mask |= mark->mask; 347 348 /* Record the mark types of this group that matched the event */ 349 *match_mask |= 1U << type; 350 } 351 352 test_mask = event_mask & marks_mask & ~marks_ignore_mask; 353 354 /* 355 * For dirent modification events (create/delete/move) that do not carry 356 * the child entry name information, we report FAN_ONDIR for mkdir/rmdir 357 * so user can differentiate them from creat/unlink. 358 * 359 * For backward compatibility and consistency, do not report FAN_ONDIR 360 * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR 361 * to user in fid mode for all event types. 362 * 363 * We never report FAN_EVENT_ON_CHILD to user, but we do pass it in to 364 * fanotify_alloc_event() when group is reporting fid as indication 365 * that event happened on child. 366 */ 367 if (fid_mode) { 368 /* Do not report event flags without any event */ 369 if (!(test_mask & ~FANOTIFY_EVENT_FLAGS)) 370 return 0; 371 } else { 372 user_mask &= ~FANOTIFY_EVENT_FLAGS; 373 } 374 375 return test_mask & user_mask; 376 } 377 378 /* 379 * Check size needed to encode fanotify_fh. 380 * 381 * Return size of encoded fh without fanotify_fh header. 382 * Return 0 on failure to encode. 383 */ 384 static int fanotify_encode_fh_len(struct inode *inode) 385 { 386 int dwords = 0; 387 int fh_len; 388 389 if (!inode) 390 return 0; 391 392 exportfs_encode_fid(inode, NULL, &dwords); 393 fh_len = dwords << 2; 394 395 /* 396 * struct fanotify_error_event might be preallocated and is 397 * limited to MAX_HANDLE_SZ. This should never happen, but 398 * safeguard by forcing an invalid file handle. 399 */ 400 if (WARN_ON_ONCE(fh_len > MAX_HANDLE_SZ)) 401 return 0; 402 403 return fh_len; 404 } 405 406 /* 407 * Encode fanotify_fh. 408 * 409 * Return total size of encoded fh including fanotify_fh header. 410 * Return 0 on failure to encode. 411 */ 412 static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, 413 unsigned int fh_len, unsigned int *hash, 414 gfp_t gfp) 415 { 416 int dwords, type = 0; 417 char *ext_buf = NULL; 418 void *buf = fh + 1; 419 int err; 420 421 fh->type = FILEID_ROOT; 422 fh->len = 0; 423 fh->flags = 0; 424 425 /* 426 * Invalid FHs are used by FAN_FS_ERROR for errors not 427 * linked to any inode. The f_handle won't be reported 428 * back to userspace. 429 */ 430 if (!inode) 431 goto out; 432 433 /* 434 * !gpf means preallocated variable size fh, but fh_len could 435 * be zero in that case if encoding fh len failed. 436 */ 437 err = -ENOENT; 438 if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4) || fh_len > MAX_HANDLE_SZ) 439 goto out_err; 440 441 /* No external buffer in a variable size allocated fh */ 442 if (gfp && fh_len > FANOTIFY_INLINE_FH_LEN) { 443 /* Treat failure to allocate fh as failure to encode fh */ 444 err = -ENOMEM; 445 ext_buf = kmalloc(fh_len, gfp); 446 if (!ext_buf) 447 goto out_err; 448 449 *fanotify_fh_ext_buf_ptr(fh) = ext_buf; 450 buf = ext_buf; 451 fh->flags |= FANOTIFY_FH_FLAG_EXT_BUF; 452 } 453 454 dwords = fh_len >> 2; 455 type = exportfs_encode_fid(inode, buf, &dwords); 456 err = -EINVAL; 457 if (type <= 0 || type == FILEID_INVALID || fh_len != dwords << 2) 458 goto out_err; 459 460 fh->type = type; 461 fh->len = fh_len; 462 463 out: 464 /* 465 * Mix fh into event merge key. Hash might be NULL in case of 466 * unhashed FID events (i.e. FAN_FS_ERROR). 467 */ 468 if (hash) 469 *hash ^= fanotify_hash_fh(fh); 470 471 return FANOTIFY_FH_HDR_LEN + fh_len; 472 473 out_err: 474 pr_warn_ratelimited("fanotify: failed to encode fid (type=%d, len=%d, err=%i)\n", 475 type, fh_len, err); 476 kfree(ext_buf); 477 *fanotify_fh_ext_buf_ptr(fh) = NULL; 478 /* Report the event without a file identifier on encode error */ 479 fh->type = FILEID_INVALID; 480 fh->len = 0; 481 return 0; 482 } 483 484 /* 485 * FAN_REPORT_FID is ambiguous in that it reports the fid of the child for 486 * some events and the fid of the parent for create/delete/move events. 487 * 488 * With the FAN_REPORT_TARGET_FID flag, the fid of the child is reported 489 * also in create/delete/move events in addition to the fid of the parent 490 * and the name of the child. 491 */ 492 static inline bool fanotify_report_child_fid(unsigned int fid_mode, u32 mask) 493 { 494 if (mask & ALL_FSNOTIFY_DIRENT_EVENTS) 495 return (fid_mode & FAN_REPORT_TARGET_FID); 496 497 return (fid_mode & FAN_REPORT_FID) && !(mask & FAN_ONDIR); 498 } 499 500 /* 501 * The inode to use as identifier when reporting fid depends on the event 502 * and the group flags. 503 * 504 * With the group flag FAN_REPORT_TARGET_FID, always report the child fid. 505 * 506 * Without the group flag FAN_REPORT_TARGET_FID, report the modified directory 507 * fid on dirent events and the child fid otherwise. 508 * 509 * For example: 510 * FS_ATTRIB reports the child fid even if reported on a watched parent. 511 * FS_CREATE reports the modified dir fid without FAN_REPORT_TARGET_FID. 512 * and reports the created child fid with FAN_REPORT_TARGET_FID. 513 */ 514 static struct inode *fanotify_fid_inode(u32 event_mask, const void *data, 515 int data_type, struct inode *dir, 516 unsigned int fid_mode) 517 { 518 if ((event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) && 519 !(fid_mode & FAN_REPORT_TARGET_FID)) 520 return dir; 521 522 return fsnotify_data_inode(data, data_type); 523 } 524 525 /* 526 * The inode to use as identifier when reporting dir fid depends on the event. 527 * Report the modified directory inode on dirent modification events. 528 * Report the "victim" inode if "victim" is a directory. 529 * Report the parent inode if "victim" is not a directory and event is 530 * reported to parent. 531 * Otherwise, do not report dir fid. 532 */ 533 static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data, 534 int data_type, struct inode *dir) 535 { 536 struct inode *inode = fsnotify_data_inode(data, data_type); 537 538 if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) 539 return dir; 540 541 if (inode && S_ISDIR(inode->i_mode)) 542 return inode; 543 544 return dir; 545 } 546 547 static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, 548 unsigned int *hash, 549 gfp_t gfp) 550 { 551 struct fanotify_path_event *pevent; 552 553 pevent = kmem_cache_alloc(fanotify_path_event_cachep, gfp); 554 if (!pevent) 555 return NULL; 556 557 pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH; 558 pevent->path = *path; 559 *hash ^= fanotify_hash_path(path); 560 path_get(path); 561 562 return &pevent->fae; 563 } 564 565 static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp) 566 { 567 struct fanotify_mnt_event *pevent; 568 569 pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp); 570 if (!pevent) 571 return NULL; 572 573 pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT; 574 pevent->mnt_id = mnt_id; 575 576 return &pevent->fae; 577 } 578 579 static struct fanotify_event *fanotify_alloc_perm_event(const void *data, 580 int data_type, 581 gfp_t gfp) 582 { 583 const struct path *path = fsnotify_data_path(data, data_type); 584 const struct file_range *range = 585 fsnotify_data_file_range(data, data_type); 586 struct fanotify_perm_event *pevent; 587 588 pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); 589 if (!pevent) 590 return NULL; 591 592 pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH_PERM; 593 pevent->response = 0; 594 pevent->hdr.type = FAN_RESPONSE_INFO_NONE; 595 pevent->hdr.pad = 0; 596 pevent->hdr.len = 0; 597 pevent->state = FAN_EVENT_INIT; 598 pevent->path = *path; 599 /* NULL ppos means no range info */ 600 pevent->ppos = range ? &range->pos : NULL; 601 pevent->count = range ? range->count : 0; 602 path_get(path); 603 604 return &pevent->fae; 605 } 606 607 static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, 608 __kernel_fsid_t *fsid, 609 unsigned int *hash, 610 gfp_t gfp) 611 { 612 struct fanotify_fid_event *ffe; 613 614 ffe = kmem_cache_alloc(fanotify_fid_event_cachep, gfp); 615 if (!ffe) 616 return NULL; 617 618 ffe->fae.type = FANOTIFY_EVENT_TYPE_FID; 619 ffe->fsid = *fsid; 620 *hash ^= fanotify_hash_fsid(fsid); 621 fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id), 622 hash, gfp); 623 624 return &ffe->fae; 625 } 626 627 static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir, 628 __kernel_fsid_t *fsid, 629 const struct qstr *name, 630 struct inode *child, 631 struct dentry *moved, 632 unsigned int *hash, 633 gfp_t gfp) 634 { 635 struct fanotify_name_event *fne; 636 struct fanotify_info *info; 637 struct fanotify_fh *dfh, *ffh; 638 struct inode *dir2 = moved ? d_inode(moved->d_parent) : NULL; 639 const struct qstr *name2 = moved ? &moved->d_name : NULL; 640 unsigned int dir_fh_len = fanotify_encode_fh_len(dir); 641 unsigned int dir2_fh_len = fanotify_encode_fh_len(dir2); 642 unsigned int child_fh_len = fanotify_encode_fh_len(child); 643 unsigned long name_len = name ? name->len : 0; 644 unsigned long name2_len = name2 ? name2->len : 0; 645 unsigned int len, size; 646 647 /* Reserve terminating null byte even for empty name */ 648 size = sizeof(*fne) + name_len + name2_len + 2; 649 if (dir_fh_len) 650 size += FANOTIFY_FH_HDR_LEN + dir_fh_len; 651 if (dir2_fh_len) 652 size += FANOTIFY_FH_HDR_LEN + dir2_fh_len; 653 if (child_fh_len) 654 size += FANOTIFY_FH_HDR_LEN + child_fh_len; 655 fne = kmalloc(size, gfp); 656 if (!fne) 657 return NULL; 658 659 fne->fae.type = FANOTIFY_EVENT_TYPE_FID_NAME; 660 fne->fsid = *fsid; 661 *hash ^= fanotify_hash_fsid(fsid); 662 info = &fne->info; 663 fanotify_info_init(info); 664 if (dir_fh_len) { 665 dfh = fanotify_info_dir_fh(info); 666 len = fanotify_encode_fh(dfh, dir, dir_fh_len, hash, 0); 667 fanotify_info_set_dir_fh(info, len); 668 } 669 if (dir2_fh_len) { 670 dfh = fanotify_info_dir2_fh(info); 671 len = fanotify_encode_fh(dfh, dir2, dir2_fh_len, hash, 0); 672 fanotify_info_set_dir2_fh(info, len); 673 } 674 if (child_fh_len) { 675 ffh = fanotify_info_file_fh(info); 676 len = fanotify_encode_fh(ffh, child, child_fh_len, hash, 0); 677 fanotify_info_set_file_fh(info, len); 678 } 679 if (name_len) { 680 fanotify_info_copy_name(info, name); 681 *hash ^= full_name_hash((void *)name_len, name->name, name_len); 682 } 683 if (name2_len) { 684 fanotify_info_copy_name2(info, name2); 685 *hash ^= full_name_hash((void *)name2_len, name2->name, 686 name2_len); 687 } 688 689 pr_debug("%s: size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", 690 __func__, size, dir_fh_len, child_fh_len, 691 info->name_len, info->name_len, fanotify_info_name(info)); 692 693 if (dir2_fh_len) { 694 pr_debug("%s: dir2_fh_len=%u name2_len=%u name2='%.*s'\n", 695 __func__, dir2_fh_len, info->name2_len, 696 info->name2_len, fanotify_info_name2(info)); 697 } 698 699 return &fne->fae; 700 } 701 702 static struct fanotify_event *fanotify_alloc_error_event( 703 struct fsnotify_group *group, 704 __kernel_fsid_t *fsid, 705 const void *data, int data_type, 706 unsigned int *hash) 707 { 708 struct fs_error_report *report = 709 fsnotify_data_error_report(data, data_type); 710 struct inode *inode; 711 struct fanotify_error_event *fee; 712 int fh_len; 713 714 if (WARN_ON_ONCE(!report)) 715 return NULL; 716 717 fee = mempool_alloc(&group->fanotify_data.error_events_pool, GFP_NOFS); 718 if (!fee) 719 return NULL; 720 721 fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR; 722 fee->error = report->error; 723 fee->err_count = 1; 724 fee->fsid = *fsid; 725 726 inode = report->inode; 727 fh_len = fanotify_encode_fh_len(inode); 728 729 /* Bad fh_len. Fallback to using an invalid fh. Should never happen. */ 730 if (!fh_len && inode) 731 inode = NULL; 732 733 fanotify_encode_fh(&fee->object_fh, inode, fh_len, NULL, 0); 734 735 *hash ^= fanotify_hash_fsid(fsid); 736 737 return &fee->fae; 738 } 739 740 static struct fanotify_event *fanotify_alloc_event( 741 struct fsnotify_group *group, 742 u32 mask, const void *data, int data_type, 743 struct inode *dir, const struct qstr *file_name, 744 __kernel_fsid_t *fsid, u32 match_mask) 745 { 746 struct fanotify_event *event = NULL; 747 gfp_t gfp = GFP_KERNEL_ACCOUNT; 748 unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); 749 struct inode *id = fanotify_fid_inode(mask, data, data_type, dir, 750 fid_mode); 751 struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir); 752 const struct path *path = fsnotify_data_path(data, data_type); 753 u64 mnt_id = fsnotify_data_mnt_id(data, data_type); 754 struct mem_cgroup *old_memcg; 755 struct dentry *moved = NULL; 756 struct inode *child = NULL; 757 bool name_event = false; 758 unsigned int hash = 0; 759 bool ondir = mask & FAN_ONDIR; 760 struct pid *pid; 761 762 if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { 763 /* 764 * For certain events and group flags, report the child fid 765 * in addition to reporting the parent fid and maybe child name. 766 */ 767 if (fanotify_report_child_fid(fid_mode, mask) && id != dirid) 768 child = id; 769 770 id = dirid; 771 772 /* 773 * We record file name only in a group with FAN_REPORT_NAME 774 * and when we have a directory inode to report. 775 * 776 * For directory entry modification event, we record the fid of 777 * the directory and the name of the modified entry. 778 * 779 * For event on non-directory that is reported to parent, we 780 * record the fid of the parent and the name of the child. 781 * 782 * Even if not reporting name, we need a variable length 783 * fanotify_name_event if reporting both parent and child fids. 784 */ 785 if (!(fid_mode & FAN_REPORT_NAME)) { 786 name_event = !!child; 787 file_name = NULL; 788 } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) { 789 name_event = true; 790 } 791 792 /* 793 * In the special case of FAN_RENAME event, use the match_mask 794 * to determine if we need to report only the old parent+name, 795 * only the new parent+name or both. 796 * 'dirid' and 'file_name' are the old parent+name and 797 * 'moved' has the new parent+name. 798 */ 799 if (mask & FAN_RENAME) { 800 bool report_old, report_new; 801 802 if (WARN_ON_ONCE(!match_mask)) 803 return NULL; 804 805 /* Report both old and new parent+name if sb watching */ 806 report_old = report_new = 807 match_mask & (1U << FSNOTIFY_ITER_TYPE_SB); 808 report_old |= 809 match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE); 810 report_new |= 811 match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE2); 812 813 if (!report_old) { 814 /* Do not report old parent+name */ 815 dirid = NULL; 816 file_name = NULL; 817 } 818 if (report_new) { 819 /* Report new parent+name */ 820 moved = fsnotify_data_dentry(data, data_type); 821 } 822 } 823 } 824 825 /* 826 * For queues with unlimited length lost events are not expected and 827 * can possibly have security implications. Avoid losing events when 828 * memory is short. For the limited size queues, avoid OOM killer in the 829 * target monitoring memcg as it may have security repercussion. 830 */ 831 if (group->max_events == UINT_MAX) 832 gfp |= __GFP_NOFAIL; 833 else 834 gfp |= __GFP_RETRY_MAYFAIL; 835 836 /* Whoever is interested in the event, pays for the allocation. */ 837 old_memcg = set_active_memcg(group->memcg); 838 839 if (fanotify_is_perm_event(mask)) { 840 event = fanotify_alloc_perm_event(data, data_type, gfp); 841 } else if (fanotify_is_error_event(mask)) { 842 event = fanotify_alloc_error_event(group, fsid, data, 843 data_type, &hash); 844 } else if (name_event && (file_name || moved || child)) { 845 event = fanotify_alloc_name_event(dirid, fsid, file_name, child, 846 moved, &hash, gfp); 847 } else if (fid_mode) { 848 event = fanotify_alloc_fid_event(id, fsid, &hash, gfp); 849 } else if (path) { 850 event = fanotify_alloc_path_event(path, &hash, gfp); 851 } else if (mnt_id) { 852 event = fanotify_alloc_mnt_event(mnt_id, gfp); 853 } else { 854 WARN_ON_ONCE(1); 855 } 856 857 if (!event) 858 goto out; 859 860 if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) 861 pid = get_pid(task_pid(current)); 862 else 863 pid = get_pid(task_tgid(current)); 864 865 /* Mix event info, FAN_ONDIR flag and pid into event merge key */ 866 hash ^= hash_long((unsigned long)pid | ondir, FANOTIFY_EVENT_HASH_BITS); 867 fanotify_init_event(event, hash, mask); 868 event->pid = pid; 869 870 out: 871 set_active_memcg(old_memcg); 872 return event; 873 } 874 875 /* 876 * Get cached fsid of the filesystem containing the object from any mark. 877 * All marks are supposed to have the same fsid, but we do not verify that here. 878 */ 879 static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) 880 { 881 struct fsnotify_mark *mark; 882 int type; 883 __kernel_fsid_t fsid = {}; 884 885 fsnotify_foreach_iter_mark_type(iter_info, mark, type) { 886 if (!(mark->flags & FSNOTIFY_MARK_FLAG_HAS_FSID)) 887 continue; 888 fsid = FANOTIFY_MARK(mark)->fsid; 889 if (!(mark->flags & FSNOTIFY_MARK_FLAG_WEAK_FSID) && 890 WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) 891 continue; 892 return fsid; 893 } 894 895 return fsid; 896 } 897 898 /* 899 * Add an event to hash table for faster merge. 900 */ 901 static void fanotify_insert_event(struct fsnotify_group *group, 902 struct fsnotify_event *fsn_event) 903 { 904 struct fanotify_event *event = FANOTIFY_E(fsn_event); 905 unsigned int bucket = fanotify_event_hash_bucket(group, event); 906 struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; 907 908 assert_spin_locked(&group->notification_lock); 909 910 if (!fanotify_is_hashed_event(event->mask)) 911 return; 912 913 pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, 914 group, event, bucket); 915 916 hlist_add_head(&event->merge_list, hlist); 917 } 918 919 static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, 920 const void *data, int data_type, 921 struct inode *dir, 922 const struct qstr *file_name, u32 cookie, 923 struct fsnotify_iter_info *iter_info) 924 { 925 int ret = 0; 926 struct fanotify_event *event; 927 struct fsnotify_event *fsn_event; 928 __kernel_fsid_t fsid = {}; 929 u32 match_mask = 0; 930 931 BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); 932 BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); 933 BUILD_BUG_ON(FAN_ATTRIB != FS_ATTRIB); 934 BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); 935 BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE); 936 BUILD_BUG_ON(FAN_OPEN != FS_OPEN); 937 BUILD_BUG_ON(FAN_MOVED_TO != FS_MOVED_TO); 938 BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM); 939 BUILD_BUG_ON(FAN_CREATE != FS_CREATE); 940 BUILD_BUG_ON(FAN_DELETE != FS_DELETE); 941 BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF); 942 BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF); 943 BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); 944 BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); 945 BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); 946 BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); 947 BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); 948 BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); 949 BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); 950 BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); 951 BUILD_BUG_ON(FAN_RENAME != FS_RENAME); 952 BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS); 953 954 BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24); 955 956 mask = fanotify_group_event_mask(group, iter_info, &match_mask, 957 mask, data, data_type, dir); 958 if (!mask) 959 return 0; 960 961 pr_debug("%s: group=%p mask=%x report_mask=%x\n", __func__, 962 group, mask, match_mask); 963 964 if (fanotify_is_perm_event(mask)) { 965 /* 966 * fsnotify_prepare_user_wait() fails if we race with mark 967 * deletion. Just let the operation pass in that case. 968 */ 969 if (!fsnotify_prepare_user_wait(iter_info)) 970 return 0; 971 } 972 973 if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) 974 fsid = fanotify_get_fsid(iter_info); 975 976 event = fanotify_alloc_event(group, mask, data, data_type, dir, 977 file_name, &fsid, match_mask); 978 ret = -ENOMEM; 979 if (unlikely(!event)) { 980 /* 981 * We don't queue overflow events for permission events as 982 * there the access is denied and so no event is in fact lost. 983 */ 984 if (!fanotify_is_perm_event(mask)) 985 fsnotify_queue_overflow(group); 986 goto finish; 987 } 988 989 fsn_event = &event->fse; 990 ret = fsnotify_insert_event(group, fsn_event, fanotify_merge, 991 fanotify_insert_event); 992 if (ret) { 993 /* Permission events shouldn't be merged */ 994 BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); 995 /* Our event wasn't used in the end. Free it. */ 996 fsnotify_destroy_event(group, fsn_event); 997 998 ret = 0; 999 } else if (fanotify_is_perm_event(mask)) { 1000 ret = fanotify_get_response(group, FANOTIFY_PERM(event), 1001 iter_info); 1002 } 1003 finish: 1004 if (fanotify_is_perm_event(mask)) 1005 fsnotify_finish_user_wait(iter_info); 1006 1007 return ret; 1008 } 1009 1010 static void fanotify_free_group_priv(struct fsnotify_group *group) 1011 { 1012 put_user_ns(group->user_ns); 1013 kfree(group->fanotify_data.merge_hash); 1014 if (group->fanotify_data.ucounts) 1015 dec_ucount(group->fanotify_data.ucounts, 1016 UCOUNT_FANOTIFY_GROUPS); 1017 1018 if (mempool_initialized(&group->fanotify_data.error_events_pool)) 1019 mempool_exit(&group->fanotify_data.error_events_pool); 1020 } 1021 1022 static void fanotify_free_path_event(struct fanotify_event *event) 1023 { 1024 path_put(fanotify_event_path(event)); 1025 kmem_cache_free(fanotify_path_event_cachep, FANOTIFY_PE(event)); 1026 } 1027 1028 static void fanotify_free_perm_event(struct fanotify_event *event) 1029 { 1030 path_put(fanotify_event_path(event)); 1031 kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PERM(event)); 1032 } 1033 1034 static void fanotify_free_fid_event(struct fanotify_event *event) 1035 { 1036 struct fanotify_fid_event *ffe = FANOTIFY_FE(event); 1037 1038 if (fanotify_fh_has_ext_buf(&ffe->object_fh)) 1039 kfree(fanotify_fh_ext_buf(&ffe->object_fh)); 1040 kmem_cache_free(fanotify_fid_event_cachep, ffe); 1041 } 1042 1043 static void fanotify_free_name_event(struct fanotify_event *event) 1044 { 1045 kfree(FANOTIFY_NE(event)); 1046 } 1047 1048 static void fanotify_free_error_event(struct fsnotify_group *group, 1049 struct fanotify_event *event) 1050 { 1051 struct fanotify_error_event *fee = FANOTIFY_EE(event); 1052 1053 mempool_free(fee, &group->fanotify_data.error_events_pool); 1054 } 1055 1056 static void fanotify_free_mnt_event(struct fanotify_event *event) 1057 { 1058 kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event)); 1059 } 1060 1061 static void fanotify_free_event(struct fsnotify_group *group, 1062 struct fsnotify_event *fsn_event) 1063 { 1064 struct fanotify_event *event; 1065 1066 event = FANOTIFY_E(fsn_event); 1067 put_pid(event->pid); 1068 switch (event->type) { 1069 case FANOTIFY_EVENT_TYPE_PATH: 1070 fanotify_free_path_event(event); 1071 break; 1072 case FANOTIFY_EVENT_TYPE_PATH_PERM: 1073 fanotify_free_perm_event(event); 1074 break; 1075 case FANOTIFY_EVENT_TYPE_FID: 1076 fanotify_free_fid_event(event); 1077 break; 1078 case FANOTIFY_EVENT_TYPE_FID_NAME: 1079 fanotify_free_name_event(event); 1080 break; 1081 case FANOTIFY_EVENT_TYPE_OVERFLOW: 1082 kfree(event); 1083 break; 1084 case FANOTIFY_EVENT_TYPE_FS_ERROR: 1085 fanotify_free_error_event(group, event); 1086 break; 1087 case FANOTIFY_EVENT_TYPE_MNT: 1088 fanotify_free_mnt_event(event); 1089 break; 1090 default: 1091 WARN_ON_ONCE(1); 1092 } 1093 } 1094 1095 static void fanotify_freeing_mark(struct fsnotify_mark *mark, 1096 struct fsnotify_group *group) 1097 { 1098 if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) 1099 dec_ucount(group->fanotify_data.ucounts, UCOUNT_FANOTIFY_MARKS); 1100 } 1101 1102 static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) 1103 { 1104 kmem_cache_free(fanotify_mark_cache, FANOTIFY_MARK(fsn_mark)); 1105 } 1106 1107 const struct fsnotify_ops fanotify_fsnotify_ops = { 1108 .handle_event = fanotify_handle_event, 1109 .free_group_priv = fanotify_free_group_priv, 1110 .free_event = fanotify_free_event, 1111 .freeing_mark = fanotify_freeing_mark, 1112 .free_mark = fanotify_free_mark, 1113 }; 1114