1 /* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7 */ 8 9 10 #include "fuse_i.h" 11 #include <linux/iversion.h> 12 #include <linux/posix_acl.h> 13 #include <linux/pagemap.h> 14 #include <linux/highmem.h> 15 16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) 17 { 18 struct fuse_conn *fc = get_fuse_conn(dir); 19 struct fuse_inode *fi = get_fuse_inode(dir); 20 21 if (!fc->do_readdirplus) 22 return false; 23 if (!fc->readdirplus_auto) 24 return true; 25 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) 26 return true; 27 if (ctx->pos == 0) 28 return true; 29 return false; 30 } 31 32 static void fuse_add_dirent_to_cache(struct file *file, 33 struct fuse_dirent *dirent, loff_t pos) 34 { 35 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 36 size_t reclen = FUSE_DIRENT_SIZE(dirent); 37 pgoff_t index; 38 struct page *page; 39 loff_t size; 40 u64 version; 41 unsigned int offset; 42 void *addr; 43 44 spin_lock(&fi->rdc.lock); 45 /* 46 * Is cache already completed? Or this entry does not go at the end of 47 * cache? 48 */ 49 if (fi->rdc.cached || pos != fi->rdc.pos) { 50 spin_unlock(&fi->rdc.lock); 51 return; 52 } 53 version = fi->rdc.version; 54 size = fi->rdc.size; 55 offset = size & ~PAGE_MASK; 56 index = size >> PAGE_SHIFT; 57 /* Dirent doesn't fit in current page? Jump to next page. */ 58 if (offset + reclen > PAGE_SIZE) { 59 index++; 60 offset = 0; 61 } 62 spin_unlock(&fi->rdc.lock); 63 64 if (offset) { 65 page = find_lock_page(file->f_mapping, index); 66 } else { 67 page = find_or_create_page(file->f_mapping, index, 68 mapping_gfp_mask(file->f_mapping)); 69 } 70 if (!page) 71 return; 72 73 spin_lock(&fi->rdc.lock); 74 /* Raced with another readdir */ 75 if (fi->rdc.version != version || fi->rdc.size != size || 76 WARN_ON(fi->rdc.pos != pos)) 77 goto unlock; 78 79 addr = kmap_local_page(page); 80 if (!offset) { 81 clear_page(addr); 82 SetPageUptodate(page); 83 } 84 memcpy(addr + offset, dirent, reclen); 85 kunmap_local(addr); 86 fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; 87 fi->rdc.pos = dirent->off; 88 unlock: 89 spin_unlock(&fi->rdc.lock); 90 unlock_page(page); 91 put_page(page); 92 } 93 94 static void fuse_readdir_cache_end(struct file *file, loff_t pos) 95 { 96 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 97 loff_t end; 98 99 spin_lock(&fi->rdc.lock); 100 /* does cache end position match current position? */ 101 if (fi->rdc.pos != pos) { 102 spin_unlock(&fi->rdc.lock); 103 return; 104 } 105 106 fi->rdc.cached = true; 107 end = ALIGN(fi->rdc.size, PAGE_SIZE); 108 spin_unlock(&fi->rdc.lock); 109 110 /* truncate unused tail of cache */ 111 truncate_inode_pages(file->f_mapping, end); 112 } 113 114 static bool fuse_emit(struct file *file, struct dir_context *ctx, 115 struct fuse_dirent *dirent) 116 { 117 struct fuse_file *ff = file->private_data; 118 119 if (ff->open_flags & FOPEN_CACHE_DIR) 120 fuse_add_dirent_to_cache(file, dirent, ctx->pos); 121 122 return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, 123 dirent->type | FILLDIR_FLAG_NOINTR); 124 } 125 126 static int parse_dirfile(char *buf, size_t nbytes, struct file *file, 127 struct dir_context *ctx) 128 { 129 while (nbytes >= FUSE_NAME_OFFSET) { 130 struct fuse_dirent *dirent = (struct fuse_dirent *) buf; 131 size_t reclen = FUSE_DIRENT_SIZE(dirent); 132 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) 133 return -EIO; 134 if (reclen > nbytes) 135 break; 136 if (memchr(dirent->name, '/', dirent->namelen) != NULL) 137 return -EIO; 138 139 if (!fuse_emit(file, ctx, dirent)) 140 break; 141 142 buf += reclen; 143 nbytes -= reclen; 144 ctx->pos = dirent->off; 145 } 146 147 return 0; 148 } 149 150 static int fuse_direntplus_link(struct file *file, 151 struct fuse_direntplus *direntplus, 152 u64 attr_version, u64 evict_ctr) 153 { 154 struct fuse_entry_out *o = &direntplus->entry_out; 155 struct fuse_dirent *dirent = &direntplus->dirent; 156 struct dentry *parent = file->f_path.dentry; 157 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); 158 struct dentry *dentry; 159 struct dentry *alias; 160 struct inode *dir = d_inode(parent); 161 struct fuse_conn *fc; 162 struct inode *inode; 163 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); 164 int epoch; 165 166 if (!o->nodeid) { 167 /* 168 * Unlike in the case of fuse_lookup, zero nodeid does not mean 169 * ENOENT. Instead, it only means the userspace filesystem did 170 * not want to return attributes/handle for this entry. 171 * 172 * So do nothing. 173 */ 174 return 0; 175 } 176 177 if (name.name[0] == '.') { 178 /* 179 * We could potentially refresh the attributes of the directory 180 * and its parent? 181 */ 182 if (name.len == 1) 183 return 0; 184 if (name.name[1] == '.' && name.len == 2) 185 return 0; 186 } 187 188 if (invalid_nodeid(o->nodeid)) 189 return -EIO; 190 if (fuse_invalid_attr(&o->attr)) 191 return -EIO; 192 193 fc = get_fuse_conn(dir); 194 epoch = atomic_read(&fc->epoch); 195 196 name.hash = full_name_hash(parent, name.name, name.len); 197 dentry = d_lookup(parent, &name); 198 if (!dentry) { 199 retry: 200 dentry = d_alloc_parallel(parent, &name, &wq); 201 if (IS_ERR(dentry)) 202 return PTR_ERR(dentry); 203 } 204 if (!d_in_lookup(dentry)) { 205 struct fuse_inode *fi; 206 inode = d_inode(dentry); 207 if (inode && get_node_id(inode) != o->nodeid) 208 inode = NULL; 209 if (!inode || 210 fuse_stale_inode(inode, o->generation, &o->attr)) { 211 if (inode) 212 fuse_make_bad(inode); 213 d_invalidate(dentry); 214 dput(dentry); 215 goto retry; 216 } 217 if (fuse_is_bad(inode)) { 218 dput(dentry); 219 return -EIO; 220 } 221 222 fi = get_fuse_inode(inode); 223 spin_lock(&fi->lock); 224 fi->nlookup++; 225 spin_unlock(&fi->lock); 226 227 forget_all_cached_acls(inode); 228 fuse_change_attributes(inode, &o->attr, NULL, 229 ATTR_TIMEOUT(o), 230 attr_version); 231 /* 232 * The other branch comes via fuse_iget() 233 * which bumps nlookup inside 234 */ 235 } else { 236 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, 237 &o->attr, ATTR_TIMEOUT(o), 238 attr_version, evict_ctr); 239 if (!inode) 240 inode = ERR_PTR(-ENOMEM); 241 242 alias = d_splice_alias(inode, dentry); 243 d_lookup_done(dentry); 244 if (alias) { 245 dput(dentry); 246 dentry = alias; 247 } 248 if (IS_ERR(dentry)) { 249 if (!IS_ERR(inode)) { 250 struct fuse_inode *fi = get_fuse_inode(inode); 251 252 spin_lock(&fi->lock); 253 fi->nlookup--; 254 spin_unlock(&fi->lock); 255 } 256 return PTR_ERR(dentry); 257 } 258 } 259 if (fc->readdirplus_auto) 260 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); 261 dentry->d_time = epoch; 262 fuse_change_entry_timeout(dentry, o); 263 264 dput(dentry); 265 return 0; 266 } 267 268 static void fuse_force_forget(struct file *file, u64 nodeid) 269 { 270 struct inode *inode = file_inode(file); 271 struct fuse_mount *fm = get_fuse_mount(inode); 272 struct fuse_forget_in inarg; 273 FUSE_ARGS(args); 274 275 memset(&inarg, 0, sizeof(inarg)); 276 inarg.nlookup = 1; 277 args.opcode = FUSE_FORGET; 278 args.nodeid = nodeid; 279 args.in_numargs = 1; 280 args.in_args[0].size = sizeof(inarg); 281 args.in_args[0].value = &inarg; 282 args.force = true; 283 args.noreply = true; 284 285 fuse_simple_request(fm, &args); 286 /* ignore errors */ 287 } 288 289 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, 290 struct dir_context *ctx, u64 attr_version, 291 u64 evict_ctr) 292 { 293 struct fuse_direntplus *direntplus; 294 struct fuse_dirent *dirent; 295 size_t reclen; 296 int over = 0; 297 int ret; 298 299 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { 300 direntplus = (struct fuse_direntplus *) buf; 301 dirent = &direntplus->dirent; 302 reclen = FUSE_DIRENTPLUS_SIZE(direntplus); 303 304 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) 305 return -EIO; 306 if (reclen > nbytes) 307 break; 308 if (memchr(dirent->name, '/', dirent->namelen) != NULL) 309 return -EIO; 310 311 if (!over) { 312 /* We fill entries into dstbuf only as much as 313 it can hold. But we still continue iterating 314 over remaining entries to link them. If not, 315 we need to send a FORGET for each of those 316 which we did not link. 317 */ 318 over = !fuse_emit(file, ctx, dirent); 319 if (!over) 320 ctx->pos = dirent->off; 321 } 322 323 buf += reclen; 324 nbytes -= reclen; 325 326 ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr); 327 if (ret) 328 fuse_force_forget(file, direntplus->entry_out.nodeid); 329 } 330 331 return 0; 332 } 333 334 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) 335 { 336 int plus; 337 ssize_t res; 338 struct inode *inode = file_inode(file); 339 struct fuse_mount *fm = get_fuse_mount(inode); 340 struct fuse_conn *fc = fm->fc; 341 struct fuse_io_args ia = {}; 342 struct fuse_args *args = &ia.ap.args; 343 void *buf; 344 size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT); 345 u64 attr_version = 0, evict_ctr = 0; 346 bool locked; 347 348 buf = kvmalloc(bufsize, GFP_KERNEL); 349 if (!buf) 350 return -ENOMEM; 351 352 args->out_args[0].value = buf; 353 354 plus = fuse_use_readdirplus(inode, ctx); 355 if (plus) { 356 attr_version = fuse_get_attr_version(fm->fc); 357 evict_ctr = fuse_get_evict_ctr(fm->fc); 358 fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS); 359 } else { 360 fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR); 361 } 362 locked = fuse_lock_inode(inode); 363 res = fuse_simple_request(fm, args); 364 fuse_unlock_inode(inode, locked); 365 if (res >= 0) { 366 if (!res) { 367 struct fuse_file *ff = file->private_data; 368 369 if (ff->open_flags & FOPEN_CACHE_DIR) 370 fuse_readdir_cache_end(file, ctx->pos); 371 } else if (plus) { 372 res = parse_dirplusfile(buf, res, file, ctx, attr_version, 373 evict_ctr); 374 } else { 375 res = parse_dirfile(buf, res, file, ctx); 376 } 377 } 378 379 kvfree(buf); 380 fuse_invalidate_atime(inode); 381 return res; 382 } 383 384 enum fuse_parse_result { 385 FOUND_ERR = -1, 386 FOUND_NONE = 0, 387 FOUND_SOME, 388 FOUND_ALL, 389 }; 390 391 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff, 392 void *addr, unsigned int size, 393 struct dir_context *ctx) 394 { 395 unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK; 396 enum fuse_parse_result res = FOUND_NONE; 397 398 WARN_ON(offset >= size); 399 400 for (;;) { 401 struct fuse_dirent *dirent = addr + offset; 402 unsigned int nbytes = size - offset; 403 size_t reclen; 404 405 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen) 406 break; 407 408 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */ 409 410 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX)) 411 return FOUND_ERR; 412 if (WARN_ON(reclen > nbytes)) 413 return FOUND_ERR; 414 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL)) 415 return FOUND_ERR; 416 417 if (ff->readdir.pos == ctx->pos) { 418 res = FOUND_SOME; 419 if (!dir_emit(ctx, dirent->name, dirent->namelen, 420 dirent->ino, dirent->type | FILLDIR_FLAG_NOINTR)) 421 return FOUND_ALL; 422 ctx->pos = dirent->off; 423 } 424 ff->readdir.pos = dirent->off; 425 ff->readdir.cache_off += reclen; 426 427 offset += reclen; 428 } 429 430 return res; 431 } 432 433 static void fuse_rdc_reset(struct inode *inode) 434 { 435 struct fuse_inode *fi = get_fuse_inode(inode); 436 437 fi->rdc.cached = false; 438 fi->rdc.version++; 439 fi->rdc.size = 0; 440 fi->rdc.pos = 0; 441 } 442 443 #define UNCACHED 1 444 445 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) 446 { 447 struct fuse_file *ff = file->private_data; 448 struct inode *inode = file_inode(file); 449 struct fuse_conn *fc = get_fuse_conn(inode); 450 struct fuse_inode *fi = get_fuse_inode(inode); 451 enum fuse_parse_result res; 452 pgoff_t index; 453 unsigned int size; 454 struct page *page; 455 void *addr; 456 457 /* Seeked? If so, reset the cache stream */ 458 if (ff->readdir.pos != ctx->pos) { 459 ff->readdir.pos = 0; 460 ff->readdir.cache_off = 0; 461 } 462 463 /* 464 * We're just about to start reading into the cache or reading the 465 * cache; both cases require an up-to-date mtime value. 466 */ 467 if (!ctx->pos && fc->auto_inval_data) { 468 int err = fuse_update_attributes(inode, file, STATX_MTIME); 469 470 if (err) 471 return err; 472 } 473 474 retry: 475 spin_lock(&fi->rdc.lock); 476 retry_locked: 477 if (!fi->rdc.cached) { 478 /* Starting cache? Set cache mtime. */ 479 if (!ctx->pos && !fi->rdc.size) { 480 fi->rdc.mtime = inode_get_mtime(inode); 481 fi->rdc.iversion = inode_query_iversion(inode); 482 } 483 spin_unlock(&fi->rdc.lock); 484 return UNCACHED; 485 } 486 /* 487 * When at the beginning of the directory (i.e. just after opendir(3) or 488 * rewinddir(3)), then need to check whether directory contents have 489 * changed, and reset the cache if so. 490 */ 491 if (!ctx->pos) { 492 struct timespec64 mtime = inode_get_mtime(inode); 493 494 if (inode_peek_iversion(inode) != fi->rdc.iversion || 495 !timespec64_equal(&fi->rdc.mtime, &mtime)) { 496 fuse_rdc_reset(inode); 497 goto retry_locked; 498 } 499 } 500 501 /* 502 * If cache version changed since the last getdents() call, then reset 503 * the cache stream. 504 */ 505 if (ff->readdir.version != fi->rdc.version) { 506 ff->readdir.pos = 0; 507 ff->readdir.cache_off = 0; 508 } 509 /* 510 * If at the beginning of the cache, than reset version to 511 * current. 512 */ 513 if (ff->readdir.pos == 0) 514 ff->readdir.version = fi->rdc.version; 515 516 WARN_ON(fi->rdc.size < ff->readdir.cache_off); 517 518 index = ff->readdir.cache_off >> PAGE_SHIFT; 519 520 if (index == (fi->rdc.size >> PAGE_SHIFT)) 521 size = fi->rdc.size & ~PAGE_MASK; 522 else 523 size = PAGE_SIZE; 524 spin_unlock(&fi->rdc.lock); 525 526 /* EOF? */ 527 if ((ff->readdir.cache_off & ~PAGE_MASK) == size) 528 return 0; 529 530 page = find_get_page_flags(file->f_mapping, index, 531 FGP_ACCESSED | FGP_LOCK); 532 /* Page gone missing, then re-added to cache, but not initialized? */ 533 if (page && !PageUptodate(page)) { 534 unlock_page(page); 535 put_page(page); 536 page = NULL; 537 } 538 spin_lock(&fi->rdc.lock); 539 if (!page) { 540 /* 541 * Uh-oh: page gone missing, cache is useless 542 */ 543 if (fi->rdc.version == ff->readdir.version) 544 fuse_rdc_reset(inode); 545 goto retry_locked; 546 } 547 548 /* Make sure it's still the same version after getting the page. */ 549 if (ff->readdir.version != fi->rdc.version) { 550 spin_unlock(&fi->rdc.lock); 551 unlock_page(page); 552 put_page(page); 553 goto retry; 554 } 555 spin_unlock(&fi->rdc.lock); 556 557 /* 558 * Contents of the page are now protected against changing by holding 559 * the page lock. 560 */ 561 addr = kmap_local_page(page); 562 res = fuse_parse_cache(ff, addr, size, ctx); 563 kunmap_local(addr); 564 unlock_page(page); 565 put_page(page); 566 567 if (res == FOUND_ERR) 568 return -EIO; 569 570 if (res == FOUND_ALL) 571 return 0; 572 573 if (size == PAGE_SIZE) { 574 /* We hit end of page: skip to next page. */ 575 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE); 576 goto retry; 577 } 578 579 /* 580 * End of cache reached. If found position, then we are done, otherwise 581 * need to fall back to uncached, since the position we were looking for 582 * wasn't in the cache. 583 */ 584 return res == FOUND_SOME ? 0 : UNCACHED; 585 } 586 587 int fuse_readdir(struct file *file, struct dir_context *ctx) 588 { 589 struct fuse_file *ff = file->private_data; 590 struct inode *inode = file_inode(file); 591 int err; 592 593 if (fuse_is_bad(inode)) 594 return -EIO; 595 596 err = UNCACHED; 597 if (ff->open_flags & FOPEN_CACHE_DIR) 598 err = fuse_readdir_cached(file, ctx); 599 if (err == UNCACHED) 600 err = fuse_readdir_uncached(file, ctx); 601 602 return err; 603 } 604