1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/pseudo_fs.h> 7 #include <linux/fscache.h> 8 #include "internal.h" 9 10 static DEFINE_MUTEX(erofs_domain_list_lock); 11 static DEFINE_MUTEX(erofs_domain_cookies_lock); 12 static LIST_HEAD(erofs_domain_list); 13 static LIST_HEAD(erofs_domain_cookies_list); 14 static struct vfsmount *erofs_pseudo_mnt; 15 16 static int erofs_anon_init_fs_context(struct fs_context *fc) 17 { 18 return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM; 19 } 20 21 static struct file_system_type erofs_anon_fs_type = { 22 .owner = THIS_MODULE, 23 .name = "pseudo_erofs", 24 .init_fs_context = erofs_anon_init_fs_context, 25 .kill_sb = kill_anon_super, 26 }; 27 28 struct erofs_fscache_io { 29 struct netfs_cache_resources cres; 30 struct iov_iter iter; 31 netfs_io_terminated_t end_io; 32 void *private; 33 refcount_t ref; 34 }; 35 36 struct erofs_fscache_rq { 37 struct address_space *mapping; /* The mapping being accessed */ 38 loff_t start; /* Start position */ 39 size_t len; /* Length of the request */ 40 size_t submitted; /* Length of submitted */ 41 short error; /* 0 or error that occurred */ 42 refcount_t ref; 43 }; 44 45 static bool erofs_fscache_io_put(struct erofs_fscache_io *io) 46 { 47 if (!refcount_dec_and_test(&io->ref)) 48 return false; 49 if (io->cres.ops) 50 io->cres.ops->end_operation(&io->cres); 51 kfree(io); 52 return true; 53 } 54 55 static void erofs_fscache_req_complete(struct erofs_fscache_rq *req) 56 { 57 struct folio *folio; 58 bool failed = req->error; 59 pgoff_t start_page = req->start / PAGE_SIZE; 60 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1; 61 62 XA_STATE(xas, &req->mapping->i_pages, start_page); 63 64 rcu_read_lock(); 65 xas_for_each(&xas, folio, last_page) { 66 if (xas_retry(&xas, folio)) 67 continue; 68 if (!failed) 69 folio_mark_uptodate(folio); 70 folio_unlock(folio); 71 } 72 rcu_read_unlock(); 73 } 74 75 static void erofs_fscache_req_put(struct erofs_fscache_rq *req) 76 { 77 if (!refcount_dec_and_test(&req->ref)) 78 return; 79 erofs_fscache_req_complete(req); 80 kfree(req); 81 } 82 83 static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping, 84 loff_t start, size_t len) 85 { 86 struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL); 87 88 if (!req) 89 return NULL; 90 req->mapping = mapping; 91 req->start = start; 92 req->len = len; 93 refcount_set(&req->ref, 1); 94 return req; 95 } 96 97 static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) 98 { 99 struct erofs_fscache_rq *req = io->private; 100 101 if (erofs_fscache_io_put(io)) 102 erofs_fscache_req_put(req); 103 } 104 105 static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error) 106 { 107 struct erofs_fscache_io *io = priv; 108 struct erofs_fscache_rq *req = io->private; 109 110 if (IS_ERR_VALUE(transferred_or_error)) 111 req->error = transferred_or_error; 112 erofs_fscache_req_io_put(io); 113 } 114 115 static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req) 116 { 117 struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL); 118 119 if (!io) 120 return NULL; 121 io->end_io = erofs_fscache_req_end_io; 122 io->private = req; 123 refcount_inc(&req->ref); 124 refcount_set(&io->ref, 1); 125 return io; 126 } 127 128 /* 129 * Read data from fscache described by cookie at pstart physical address 130 * offset, and fill the read data into buffer described by io->iter. 131 */ 132 static int erofs_fscache_read_io_async(struct fscache_cookie *cookie, 133 loff_t pstart, struct erofs_fscache_io *io) 134 { 135 enum netfs_io_source source; 136 struct netfs_cache_resources *cres = &io->cres; 137 struct iov_iter *iter = &io->iter; 138 int ret; 139 140 ret = fscache_begin_read_operation(cres, cookie); 141 if (ret) 142 return ret; 143 144 while (iov_iter_count(iter)) { 145 size_t orig_count = iov_iter_count(iter), len = orig_count; 146 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; 147 148 source = cres->ops->prepare_ondemand_read(cres, 149 pstart, &len, LLONG_MAX, &flags, 0); 150 if (WARN_ON(len == 0)) 151 source = NETFS_INVALID_READ; 152 if (source != NETFS_READ_FROM_CACHE) { 153 erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source); 154 return -EIO; 155 } 156 157 iov_iter_truncate(iter, len); 158 refcount_inc(&io->ref); 159 ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL, 160 io->end_io, io); 161 if (ret == -EIOCBQUEUED) 162 ret = 0; 163 if (ret) { 164 erofs_err(NULL, "fscache_read failed (ret %d)", ret); 165 return ret; 166 } 167 if (WARN_ON(iov_iter_count(iter))) 168 return -EIO; 169 170 iov_iter_reexpand(iter, orig_count - len); 171 pstart += len; 172 } 173 return 0; 174 } 175 176 struct erofs_fscache_bio { 177 struct erofs_fscache_io io; 178 struct bio bio; /* w/o bdev to share bio_add_page/endio() */ 179 struct bio_vec bvecs[BIO_MAX_VECS]; 180 }; 181 182 static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error) 183 { 184 struct erofs_fscache_bio *io = priv; 185 186 if (IS_ERR_VALUE(transferred_or_error)) 187 io->bio.bi_status = errno_to_blk_status(transferred_or_error); 188 io->bio.bi_end_io(&io->bio); 189 BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0); 190 erofs_fscache_io_put(&io->io); 191 } 192 193 struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) 194 { 195 struct erofs_fscache_bio *io; 196 197 io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL); 198 bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ); 199 io->io.private = mdev->m_dif->fscache->cookie; 200 io->io.end_io = erofs_fscache_bio_endio; 201 refcount_set(&io->io.ref, 1); 202 return &io->bio; 203 } 204 205 void erofs_fscache_submit_bio(struct bio *bio) 206 { 207 struct erofs_fscache_bio *io = container_of(bio, 208 struct erofs_fscache_bio, bio); 209 int ret; 210 211 iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt, 212 bio->bi_iter.bi_size); 213 ret = erofs_fscache_read_io_async(io->io.private, 214 bio->bi_iter.bi_sector << 9, &io->io); 215 erofs_fscache_io_put(&io->io); 216 if (!ret) 217 return; 218 bio->bi_status = errno_to_blk_status(ret); 219 bio->bi_end_io(bio); 220 } 221 222 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 223 { 224 struct erofs_fscache *ctx = folio->mapping->host->i_private; 225 int ret = -ENOMEM; 226 struct erofs_fscache_rq *req; 227 struct erofs_fscache_io *io; 228 229 req = erofs_fscache_req_alloc(folio->mapping, 230 folio_pos(folio), folio_size(folio)); 231 if (!req) { 232 folio_unlock(folio); 233 return ret; 234 } 235 236 io = erofs_fscache_req_io_alloc(req); 237 if (!io) { 238 req->error = ret; 239 goto out; 240 } 241 iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages, 242 folio_pos(folio), folio_size(folio)); 243 244 ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io); 245 if (ret) 246 req->error = ret; 247 248 erofs_fscache_req_io_put(io); 249 out: 250 erofs_fscache_req_put(req); 251 return ret; 252 } 253 254 static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) 255 { 256 struct address_space *mapping = req->mapping; 257 struct inode *inode = mapping->host; 258 struct super_block *sb = inode->i_sb; 259 struct erofs_fscache_io *io; 260 struct erofs_map_blocks map; 261 struct erofs_map_dev mdev; 262 loff_t pos = req->start + req->submitted; 263 size_t count; 264 int ret; 265 266 map.m_la = pos; 267 ret = erofs_map_blocks(inode, &map); 268 if (ret) 269 return ret; 270 271 if (map.m_flags & EROFS_MAP_META) { 272 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 273 struct iov_iter iter; 274 size_t size = map.m_llen; 275 void *src; 276 277 src = erofs_read_metabuf(&buf, sb, map.m_pa, true); 278 if (IS_ERR(src)) 279 return PTR_ERR(src); 280 281 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE); 282 if (copy_to_iter(src, size, &iter) != size) { 283 erofs_put_metabuf(&buf); 284 return -EFAULT; 285 } 286 iov_iter_zero(PAGE_SIZE - size, &iter); 287 erofs_put_metabuf(&buf); 288 req->submitted += PAGE_SIZE; 289 return 0; 290 } 291 292 count = req->len - req->submitted; 293 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 294 struct iov_iter iter; 295 296 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); 297 iov_iter_zero(count, &iter); 298 req->submitted += count; 299 return 0; 300 } 301 302 count = min_t(size_t, map.m_llen - (pos - map.m_la), count); 303 DBG_BUGON(!count || count % PAGE_SIZE); 304 305 mdev = (struct erofs_map_dev) { 306 .m_deviceid = map.m_deviceid, 307 .m_pa = map.m_pa, 308 }; 309 ret = erofs_map_dev(sb, &mdev); 310 if (ret) 311 return ret; 312 313 io = erofs_fscache_req_io_alloc(req); 314 if (!io) 315 return -ENOMEM; 316 iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count); 317 ret = erofs_fscache_read_io_async(mdev.m_dif->fscache->cookie, 318 mdev.m_pa + (pos - map.m_la), io); 319 erofs_fscache_req_io_put(io); 320 321 req->submitted += count; 322 return ret; 323 } 324 325 static int erofs_fscache_data_read(struct erofs_fscache_rq *req) 326 { 327 int ret; 328 329 do { 330 ret = erofs_fscache_data_read_slice(req); 331 if (ret) 332 req->error = ret; 333 } while (!ret && req->submitted < req->len); 334 return ret; 335 } 336 337 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 338 { 339 struct erofs_fscache_rq *req; 340 int ret; 341 342 req = erofs_fscache_req_alloc(folio->mapping, 343 folio_pos(folio), folio_size(folio)); 344 if (!req) { 345 folio_unlock(folio); 346 return -ENOMEM; 347 } 348 349 ret = erofs_fscache_data_read(req); 350 erofs_fscache_req_put(req); 351 return ret; 352 } 353 354 static void erofs_fscache_readahead(struct readahead_control *rac) 355 { 356 struct erofs_fscache_rq *req; 357 358 if (!readahead_count(rac)) 359 return; 360 361 req = erofs_fscache_req_alloc(rac->mapping, 362 readahead_pos(rac), readahead_length(rac)); 363 if (!req) 364 return; 365 366 /* The request completion will drop refs on the folios. */ 367 while (readahead_folio(rac)) 368 ; 369 370 erofs_fscache_data_read(req); 371 erofs_fscache_req_put(req); 372 } 373 374 static const struct address_space_operations erofs_fscache_meta_aops = { 375 .read_folio = erofs_fscache_meta_read_folio, 376 }; 377 378 const struct address_space_operations erofs_fscache_access_aops = { 379 .read_folio = erofs_fscache_read_folio, 380 .readahead = erofs_fscache_readahead, 381 }; 382 383 static void erofs_fscache_domain_put(struct erofs_domain *domain) 384 { 385 mutex_lock(&erofs_domain_list_lock); 386 if (refcount_dec_and_test(&domain->ref)) { 387 list_del(&domain->list); 388 if (list_empty(&erofs_domain_list)) { 389 kern_unmount(erofs_pseudo_mnt); 390 erofs_pseudo_mnt = NULL; 391 } 392 fscache_relinquish_volume(domain->volume, NULL, false); 393 mutex_unlock(&erofs_domain_list_lock); 394 kfree(domain->domain_id); 395 kfree(domain); 396 return; 397 } 398 mutex_unlock(&erofs_domain_list_lock); 399 } 400 401 static int erofs_fscache_register_volume(struct super_block *sb) 402 { 403 struct erofs_sb_info *sbi = EROFS_SB(sb); 404 char *domain_id = sbi->domain_id; 405 struct fscache_volume *volume; 406 char *name; 407 int ret = 0; 408 409 name = kasprintf(GFP_KERNEL, "erofs,%s", 410 domain_id ? domain_id : sbi->fsid); 411 if (!name) 412 return -ENOMEM; 413 414 volume = fscache_acquire_volume(name, NULL, NULL, 0); 415 if (IS_ERR_OR_NULL(volume)) { 416 erofs_err(sb, "failed to register volume for %s", name); 417 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 418 volume = NULL; 419 } 420 421 sbi->volume = volume; 422 kfree(name); 423 return ret; 424 } 425 426 static int erofs_fscache_init_domain(struct super_block *sb) 427 { 428 int err; 429 struct erofs_domain *domain; 430 struct erofs_sb_info *sbi = EROFS_SB(sb); 431 432 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 433 if (!domain) 434 return -ENOMEM; 435 436 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); 437 if (!domain->domain_id) { 438 kfree(domain); 439 return -ENOMEM; 440 } 441 442 err = erofs_fscache_register_volume(sb); 443 if (err) 444 goto out; 445 446 if (!erofs_pseudo_mnt) { 447 struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type); 448 if (IS_ERR(mnt)) { 449 err = PTR_ERR(mnt); 450 goto out; 451 } 452 erofs_pseudo_mnt = mnt; 453 } 454 455 domain->volume = sbi->volume; 456 refcount_set(&domain->ref, 1); 457 list_add(&domain->list, &erofs_domain_list); 458 sbi->domain = domain; 459 return 0; 460 out: 461 kfree(domain->domain_id); 462 kfree(domain); 463 return err; 464 } 465 466 static int erofs_fscache_register_domain(struct super_block *sb) 467 { 468 int err; 469 struct erofs_domain *domain; 470 struct erofs_sb_info *sbi = EROFS_SB(sb); 471 472 mutex_lock(&erofs_domain_list_lock); 473 list_for_each_entry(domain, &erofs_domain_list, list) { 474 if (!strcmp(domain->domain_id, sbi->domain_id)) { 475 sbi->domain = domain; 476 sbi->volume = domain->volume; 477 refcount_inc(&domain->ref); 478 mutex_unlock(&erofs_domain_list_lock); 479 return 0; 480 } 481 } 482 err = erofs_fscache_init_domain(sb); 483 mutex_unlock(&erofs_domain_list_lock); 484 return err; 485 } 486 487 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 488 char *name, unsigned int flags) 489 { 490 struct fscache_volume *volume = EROFS_SB(sb)->volume; 491 struct erofs_fscache *ctx; 492 struct fscache_cookie *cookie; 493 struct super_block *isb; 494 struct inode *inode; 495 int ret; 496 497 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 498 if (!ctx) 499 return ERR_PTR(-ENOMEM); 500 INIT_LIST_HEAD(&ctx->node); 501 refcount_set(&ctx->ref, 1); 502 503 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 504 name, strlen(name), NULL, 0, 0); 505 if (!cookie) { 506 erofs_err(sb, "failed to get cookie for %s", name); 507 ret = -EINVAL; 508 goto err; 509 } 510 fscache_use_cookie(cookie, false); 511 512 /* 513 * Allocate anonymous inode in global pseudo mount for shareable blobs, 514 * so that they are accessible among erofs fs instances. 515 */ 516 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb; 517 inode = new_inode(isb); 518 if (!inode) { 519 erofs_err(sb, "failed to get anon inode for %s", name); 520 ret = -ENOMEM; 521 goto err_cookie; 522 } 523 524 inode->i_size = OFFSET_MAX; 525 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 526 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); 527 inode->i_blkbits = EROFS_SB(sb)->blkszbits; 528 inode->i_private = ctx; 529 530 ctx->cookie = cookie; 531 ctx->inode = inode; 532 return ctx; 533 534 err_cookie: 535 fscache_unuse_cookie(cookie, NULL, NULL); 536 fscache_relinquish_cookie(cookie, false); 537 err: 538 kfree(ctx); 539 return ERR_PTR(ret); 540 } 541 542 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 543 { 544 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 545 fscache_relinquish_cookie(ctx->cookie, false); 546 iput(ctx->inode); 547 kfree(ctx->name); 548 kfree(ctx); 549 } 550 551 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb, 552 char *name, unsigned int flags) 553 { 554 struct erofs_fscache *ctx; 555 struct erofs_domain *domain = EROFS_SB(sb)->domain; 556 557 ctx = erofs_fscache_acquire_cookie(sb, name, flags); 558 if (IS_ERR(ctx)) 559 return ctx; 560 561 ctx->name = kstrdup(name, GFP_KERNEL); 562 if (!ctx->name) { 563 erofs_fscache_relinquish_cookie(ctx); 564 return ERR_PTR(-ENOMEM); 565 } 566 567 refcount_inc(&domain->ref); 568 ctx->domain = domain; 569 list_add(&ctx->node, &erofs_domain_cookies_list); 570 return ctx; 571 } 572 573 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 574 char *name, unsigned int flags) 575 { 576 struct erofs_fscache *ctx; 577 struct erofs_domain *domain = EROFS_SB(sb)->domain; 578 579 flags |= EROFS_REG_COOKIE_SHARE; 580 mutex_lock(&erofs_domain_cookies_lock); 581 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) { 582 if (ctx->domain != domain || strcmp(ctx->name, name)) 583 continue; 584 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { 585 refcount_inc(&ctx->ref); 586 } else { 587 erofs_err(sb, "%s already exists in domain %s", name, 588 domain->domain_id); 589 ctx = ERR_PTR(-EEXIST); 590 } 591 mutex_unlock(&erofs_domain_cookies_lock); 592 return ctx; 593 } 594 ctx = erofs_domain_init_cookie(sb, name, flags); 595 mutex_unlock(&erofs_domain_cookies_lock); 596 return ctx; 597 } 598 599 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 600 char *name, 601 unsigned int flags) 602 { 603 if (EROFS_SB(sb)->domain_id) 604 return erofs_domain_register_cookie(sb, name, flags); 605 return erofs_fscache_acquire_cookie(sb, name, flags); 606 } 607 608 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 609 { 610 struct erofs_domain *domain = NULL; 611 612 if (!ctx) 613 return; 614 if (!ctx->domain) 615 return erofs_fscache_relinquish_cookie(ctx); 616 617 mutex_lock(&erofs_domain_cookies_lock); 618 if (refcount_dec_and_test(&ctx->ref)) { 619 domain = ctx->domain; 620 list_del(&ctx->node); 621 erofs_fscache_relinquish_cookie(ctx); 622 } 623 mutex_unlock(&erofs_domain_cookies_lock); 624 if (domain) 625 erofs_fscache_domain_put(domain); 626 } 627 628 int erofs_fscache_register_fs(struct super_block *sb) 629 { 630 int ret; 631 struct erofs_sb_info *sbi = EROFS_SB(sb); 632 struct erofs_fscache *fscache; 633 unsigned int flags = 0; 634 635 if (sbi->domain_id) 636 ret = erofs_fscache_register_domain(sb); 637 else 638 ret = erofs_fscache_register_volume(sb); 639 if (ret) 640 return ret; 641 642 /* 643 * When shared domain is enabled, using NEED_NOEXIST to guarantee 644 * the primary data blob (aka fsid) is unique in the shared domain. 645 * 646 * For non-shared-domain case, fscache_acquire_volume() invoked by 647 * erofs_fscache_register_volume() has already guaranteed 648 * the uniqueness of primary data blob. 649 * 650 * Acquired domain/volume will be relinquished in kill_sb() on error. 651 */ 652 if (sbi->domain_id) 653 flags |= EROFS_REG_COOKIE_NEED_NOEXIST; 654 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); 655 if (IS_ERR(fscache)) 656 return PTR_ERR(fscache); 657 658 sbi->dif0.fscache = fscache; 659 return 0; 660 } 661 662 void erofs_fscache_unregister_fs(struct super_block *sb) 663 { 664 struct erofs_sb_info *sbi = EROFS_SB(sb); 665 666 erofs_fscache_unregister_cookie(sbi->dif0.fscache); 667 668 if (sbi->domain) 669 erofs_fscache_domain_put(sbi->domain); 670 else 671 fscache_relinquish_volume(sbi->volume, NULL, false); 672 673 sbi->dif0.fscache = NULL; 674 sbi->volume = NULL; 675 sbi->domain = NULL; 676 } 677