1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/f2fs/data.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
7 */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/sched/mm.h>
11 #include <linux/mpage.h>
12 #include <linux/writeback.h>
13 #include <linux/pagevec.h>
14 #include <linux/blkdev.h>
15 #include <linux/bio.h>
16 #include <linux/blk-crypto.h>
17 #include <linux/swap.h>
18 #include <linux/prefetch.h>
19 #include <linux/uio.h>
20 #include <linux/sched/signal.h>
21 #include <linux/fiemap.h>
22 #include <linux/iomap.h>
23
24 #include "f2fs.h"
25 #include "node.h"
26 #include "segment.h"
27 #include "iostat.h"
28 #include <trace/events/f2fs.h>
29
30 #define NUM_PREALLOC_POST_READ_CTXS 128
31
32 static struct kmem_cache *bio_post_read_ctx_cache;
33 static struct kmem_cache *bio_entry_slab;
34 static struct kmem_cache *ffs_entry_slab;
35 static mempool_t *bio_post_read_ctx_pool;
36 static struct bio_set f2fs_bioset;
37
38 struct f2fs_folio_state {
39 spinlock_t state_lock;
40 unsigned int read_pages_pending;
41 };
42
43 #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
44
f2fs_init_bioset(void)45 int __init f2fs_init_bioset(void)
46 {
47 return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
48 0, BIOSET_NEED_BVECS);
49 }
50
f2fs_destroy_bioset(void)51 void f2fs_destroy_bioset(void)
52 {
53 bioset_exit(&f2fs_bioset);
54 }
55
f2fs_is_cp_guaranteed(const struct folio * folio)56 bool f2fs_is_cp_guaranteed(const struct folio *folio)
57 {
58 struct address_space *mapping = folio->mapping;
59 struct inode *inode;
60 struct f2fs_sb_info *sbi;
61
62 if (fscrypt_is_bounce_folio(folio))
63 return folio_test_f2fs_gcing(fscrypt_pagecache_folio(folio));
64
65 inode = mapping->host;
66 sbi = F2FS_I_SB(inode);
67
68 if (inode->i_ino == F2FS_META_INO(sbi) ||
69 inode->i_ino == F2FS_NODE_INO(sbi) ||
70 S_ISDIR(inode->i_mode))
71 return true;
72
73 if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
74 folio_test_f2fs_gcing(folio))
75 return true;
76 return false;
77 }
78
__read_io_type(struct folio * folio)79 static enum count_type __read_io_type(struct folio *folio)
80 {
81 struct address_space *mapping = folio->mapping;
82
83 if (mapping) {
84 struct inode *inode = mapping->host;
85 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
86
87 if (inode->i_ino == F2FS_META_INO(sbi))
88 return F2FS_RD_META;
89
90 if (inode->i_ino == F2FS_NODE_INO(sbi))
91 return F2FS_RD_NODE;
92 }
93 return F2FS_RD_DATA;
94 }
95
96 /* postprocessing steps for read bios */
97 enum bio_post_read_step {
98 #ifdef CONFIG_FS_ENCRYPTION
99 STEP_DECRYPT = BIT(0),
100 #else
101 STEP_DECRYPT = 0, /* compile out the decryption-related code */
102 #endif
103 #ifdef CONFIG_F2FS_FS_COMPRESSION
104 STEP_DECOMPRESS = BIT(1),
105 #else
106 STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
107 #endif
108 #ifdef CONFIG_FS_VERITY
109 STEP_VERITY = BIT(2),
110 #else
111 STEP_VERITY = 0, /* compile out the verity-related code */
112 #endif
113 };
114
115 struct bio_post_read_ctx {
116 struct bio *bio;
117 struct f2fs_sb_info *sbi;
118 struct fsverity_info *vi;
119 struct work_struct work;
120 unsigned int enabled_steps;
121 /*
122 * decompression_attempted keeps track of whether
123 * f2fs_end_read_compressed_page() has been called on the pages in the
124 * bio that belong to a compressed cluster yet.
125 */
126 bool decompression_attempted;
127 block_t fs_blkaddr;
128 };
129
130 /*
131 * Update and unlock a bio's pages, and free the bio.
132 *
133 * This marks pages up-to-date only if there was no error in the bio (I/O error,
134 * decryption error, or verity error), as indicated by bio->bi_status.
135 *
136 * "Compressed pages" (pagecache pages backed by a compressed cluster on-disk)
137 * aren't marked up-to-date here, as decompression is done on a per-compression-
138 * cluster basis rather than a per-bio basis. Instead, we only must do two
139 * things for each compressed page here: call f2fs_end_read_compressed_page()
140 * with failed=true if an error occurred before it would have normally gotten
141 * called (i.e., I/O error or decryption error, but *not* verity error), and
142 * release the bio's reference to the decompress_io_ctx of the page's cluster.
143 */
f2fs_finish_read_bio(struct bio * bio,bool in_task)144 static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
145 {
146 struct folio_iter fi;
147 struct bio_post_read_ctx *ctx = bio->bi_private;
148 unsigned long flags;
149
150 bio_for_each_folio_all(fi, bio) {
151 struct folio *folio = fi.folio;
152 unsigned nr_pages = fi.length >> PAGE_SHIFT;
153 bool finished = true;
154
155 if (!folio_test_large(folio) &&
156 f2fs_is_compressed_page(folio)) {
157 if (ctx && !ctx->decompression_attempted)
158 f2fs_end_read_compressed_page(folio, true, 0,
159 in_task);
160 f2fs_put_folio_dic(folio, in_task);
161 continue;
162 }
163
164 if (folio_test_large(folio)) {
165 struct f2fs_folio_state *ffs = folio->private;
166
167 spin_lock_irqsave(&ffs->state_lock, flags);
168 ffs->read_pages_pending -= nr_pages;
169 finished = !ffs->read_pages_pending;
170 spin_unlock_irqrestore(&ffs->state_lock, flags);
171 }
172
173 while (nr_pages--)
174 dec_page_count(F2FS_F_SB(folio), __read_io_type(folio));
175
176 if (F2FS_F_SB(folio)->node_inode && is_node_folio(folio) &&
177 f2fs_sanity_check_node_footer(F2FS_F_SB(folio),
178 folio, folio->index, NODE_TYPE_REGULAR, true))
179 bio->bi_status = BLK_STS_IOERR;
180
181 if (finished)
182 folio_end_read(folio, bio->bi_status == BLK_STS_OK);
183 }
184
185 if (ctx)
186 mempool_free(ctx, bio_post_read_ctx_pool);
187 bio_put(bio);
188 }
189
f2fs_verify_bio(struct work_struct * work)190 static void f2fs_verify_bio(struct work_struct *work)
191 {
192 struct bio_post_read_ctx *ctx =
193 container_of(work, struct bio_post_read_ctx, work);
194 struct bio *bio = ctx->bio;
195 bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
196 struct fsverity_info *vi = ctx->vi;
197
198 /*
199 * fsverity_verify_bio() may call readahead() again, and while verity
200 * will be disabled for this, decryption and/or decompression may still
201 * be needed, resulting in another bio_post_read_ctx being allocated.
202 * So to prevent deadlocks we need to release the current ctx to the
203 * mempool first. This assumes that verity is the last post-read step.
204 */
205 mempool_free(ctx, bio_post_read_ctx_pool);
206 bio->bi_private = NULL;
207
208 /*
209 * Verify the bio's pages with fs-verity. Exclude compressed pages,
210 * as those were handled separately by f2fs_end_read_compressed_page().
211 */
212 if (may_have_compressed_pages) {
213 struct folio_iter fi;
214
215 bio_for_each_folio_all(fi, bio) {
216 struct folio *folio = fi.folio;
217
218 if (!f2fs_is_compressed_page(folio) &&
219 !fsverity_verify_folio(vi, folio)) {
220 bio->bi_status = BLK_STS_IOERR;
221 break;
222 }
223 }
224 } else {
225 fsverity_verify_bio(vi, bio);
226 }
227
228 f2fs_finish_read_bio(bio, true);
229 }
230
231 /*
232 * If the bio's data needs to be verified with fs-verity, then enqueue the
233 * verity work for the bio. Otherwise finish the bio now.
234 *
235 * Note that to avoid deadlocks, the verity work can't be done on the
236 * decryption/decompression workqueue. This is because verifying the data pages
237 * can involve reading verity metadata pages from the file, and these verity
238 * metadata pages may be encrypted and/or compressed.
239 */
f2fs_verify_and_finish_bio(struct bio * bio,bool in_task)240 static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
241 {
242 struct bio_post_read_ctx *ctx = bio->bi_private;
243
244 if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
245 INIT_WORK(&ctx->work, f2fs_verify_bio);
246 fsverity_enqueue_verify_work(&ctx->work);
247 } else {
248 f2fs_finish_read_bio(bio, in_task);
249 }
250 }
251
252 /*
253 * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
254 * remaining page was read by @ctx->bio.
255 *
256 * Note that a bio may span clusters (even a mix of compressed and uncompressed
257 * clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates
258 * that the bio includes at least one compressed page. The actual decompression
259 * is done on a per-cluster basis, not a per-bio basis.
260 */
f2fs_handle_step_decompress(struct bio_post_read_ctx * ctx,bool in_task)261 static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
262 bool in_task)
263 {
264 struct folio_iter fi;
265 bool all_compressed = true;
266 block_t blkaddr = ctx->fs_blkaddr;
267
268 bio_for_each_folio_all(fi, ctx->bio) {
269 struct folio *folio = fi.folio;
270
271 if (f2fs_is_compressed_page(folio))
272 f2fs_end_read_compressed_page(folio, false, blkaddr,
273 in_task);
274 else
275 all_compressed = false;
276
277 blkaddr++;
278 }
279
280 ctx->decompression_attempted = true;
281
282 /*
283 * Optimization: if all the bio's pages are compressed, then scheduling
284 * the per-bio verity work is unnecessary, as verity will be fully
285 * handled at the compression cluster level.
286 */
287 if (all_compressed)
288 ctx->enabled_steps &= ~STEP_VERITY;
289 }
290
f2fs_post_read_work(struct work_struct * work)291 static void f2fs_post_read_work(struct work_struct *work)
292 {
293 struct bio_post_read_ctx *ctx =
294 container_of(work, struct bio_post_read_ctx, work);
295 struct bio *bio = ctx->bio;
296
297 if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
298 f2fs_finish_read_bio(bio, true);
299 return;
300 }
301
302 if (ctx->enabled_steps & STEP_DECOMPRESS)
303 f2fs_handle_step_decompress(ctx, true);
304
305 f2fs_verify_and_finish_bio(bio, true);
306 }
307
f2fs_read_end_io(struct bio * bio)308 static void f2fs_read_end_io(struct bio *bio)
309 {
310 struct f2fs_sb_info *sbi = F2FS_F_SB(bio_first_folio_all(bio));
311 struct bio_post_read_ctx *ctx;
312 bool intask = in_task() && !irqs_disabled();
313
314 iostat_update_and_unbind_ctx(bio);
315 ctx = bio->bi_private;
316
317 if (time_to_inject(sbi, FAULT_READ_IO))
318 bio->bi_status = BLK_STS_IOERR;
319
320 if (bio->bi_status != BLK_STS_OK) {
321 f2fs_finish_read_bio(bio, intask);
322 return;
323 }
324
325 if (ctx) {
326 unsigned int enabled_steps = ctx->enabled_steps &
327 (STEP_DECRYPT | STEP_DECOMPRESS);
328
329 /*
330 * If we have only decompression step between decompression and
331 * decrypt, we don't need post processing for this.
332 */
333 if (enabled_steps == STEP_DECOMPRESS &&
334 !f2fs_low_mem_mode(sbi)) {
335 f2fs_handle_step_decompress(ctx, intask);
336 } else if (enabled_steps) {
337 INIT_WORK(&ctx->work, f2fs_post_read_work);
338 queue_work(ctx->sbi->post_read_wq, &ctx->work);
339 return;
340 }
341 }
342
343 f2fs_verify_and_finish_bio(bio, intask);
344 }
345
f2fs_write_end_io(struct bio * bio)346 static void f2fs_write_end_io(struct bio *bio)
347 {
348 struct f2fs_sb_info *sbi;
349 struct folio_iter fi;
350
351 iostat_update_and_unbind_ctx(bio);
352 sbi = bio->bi_private;
353
354 if (time_to_inject(sbi, FAULT_WRITE_IO))
355 bio->bi_status = BLK_STS_IOERR;
356
357 bio_for_each_folio_all(fi, bio) {
358 struct folio *folio = fi.folio;
359 enum count_type type;
360
361 if (fscrypt_is_bounce_folio(folio)) {
362 struct folio *io_folio = folio;
363
364 folio = fscrypt_pagecache_folio(io_folio);
365 fscrypt_free_bounce_page(&io_folio->page);
366 }
367
368 #ifdef CONFIG_F2FS_FS_COMPRESSION
369 if (f2fs_is_compressed_page(folio)) {
370 f2fs_compress_write_end_io(bio, folio);
371 continue;
372 }
373 #endif
374
375 type = WB_DATA_TYPE(folio, false);
376
377 if (unlikely(bio->bi_status != BLK_STS_OK)) {
378 mapping_set_error(folio->mapping, -EIO);
379 if (type == F2FS_WB_CP_DATA)
380 f2fs_stop_checkpoint(sbi, true,
381 STOP_CP_REASON_WRITE_FAIL);
382 }
383
384 if (is_node_folio(folio)) {
385 f2fs_sanity_check_node_footer(sbi, folio,
386 folio->index, NODE_TYPE_REGULAR, true);
387 f2fs_bug_on(sbi, folio->index != nid_of_node(folio));
388 }
389
390 dec_page_count(sbi, type);
391
392 /*
393 * we should access sbi before folio_end_writeback() to
394 * avoid racing w/ kill_f2fs_super()
395 */
396 if (type == F2FS_WB_CP_DATA && !get_pages(sbi, type) &&
397 wq_has_sleeper(&sbi->cp_wait))
398 wake_up(&sbi->cp_wait);
399
400 if (f2fs_in_warm_node_list(sbi, folio))
401 f2fs_del_fsync_node_entry(sbi, folio);
402 folio_clear_f2fs_gcing(folio);
403 folio_end_writeback(folio);
404 }
405
406 bio_put(bio);
407 }
408
409 #ifdef CONFIG_BLK_DEV_ZONED
f2fs_zone_write_end_io(struct bio * bio)410 static void f2fs_zone_write_end_io(struct bio *bio)
411 {
412 struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
413
414 bio->bi_private = io->bi_private;
415 complete(&io->zone_wait);
416 f2fs_write_end_io(bio);
417 }
418 #endif
419
f2fs_target_device(struct f2fs_sb_info * sbi,block_t blk_addr,sector_t * sector)420 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
421 block_t blk_addr, sector_t *sector)
422 {
423 struct block_device *bdev = sbi->sb->s_bdev;
424 int i;
425
426 if (f2fs_is_multi_device(sbi)) {
427 for (i = 0; i < sbi->s_ndevs; i++) {
428 if (FDEV(i).start_blk <= blk_addr &&
429 FDEV(i).end_blk >= blk_addr) {
430 blk_addr -= FDEV(i).start_blk;
431 bdev = FDEV(i).bdev;
432 break;
433 }
434 }
435 }
436
437 if (sector)
438 *sector = SECTOR_FROM_BLOCK(blk_addr);
439 return bdev;
440 }
441
f2fs_target_device_index(struct f2fs_sb_info * sbi,block_t blkaddr)442 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
443 {
444 int i;
445
446 if (!f2fs_is_multi_device(sbi))
447 return 0;
448
449 for (i = 0; i < sbi->s_ndevs; i++)
450 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
451 return i;
452 return 0;
453 }
454
f2fs_io_flags(struct f2fs_io_info * fio)455 static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
456 {
457 unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0);
458 unsigned int fua_flag, meta_flag, io_flag;
459 blk_opf_t op_flags = 0;
460
461 if (fio->op != REQ_OP_WRITE)
462 return 0;
463 if (fio->type == DATA)
464 io_flag = fio->sbi->data_io_flag;
465 else if (fio->type == NODE)
466 io_flag = fio->sbi->node_io_flag;
467 else
468 return 0;
469
470 fua_flag = io_flag & temp_mask;
471 meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
472
473 /*
474 * data/node io flag bits per temp:
475 * REQ_META | REQ_FUA |
476 * 5 | 4 | 3 | 2 | 1 | 0 |
477 * Cold | Warm | Hot | Cold | Warm | Hot |
478 */
479 if (BIT(fio->temp) & meta_flag)
480 op_flags |= REQ_META;
481 if (BIT(fio->temp) & fua_flag)
482 op_flags |= REQ_FUA;
483
484 if (fio->type == DATA &&
485 F2FS_I(fio->folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE)
486 op_flags |= REQ_PRIO;
487
488 return op_flags;
489 }
490
__bio_alloc(struct f2fs_io_info * fio,int npages)491 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
492 {
493 struct f2fs_sb_info *sbi = fio->sbi;
494 struct block_device *bdev;
495 sector_t sector;
496 struct bio *bio;
497
498 bdev = f2fs_target_device(sbi, fio->new_blkaddr, §or);
499 bio = bio_alloc_bioset(bdev, npages,
500 fio->op | fio->op_flags | f2fs_io_flags(fio),
501 GFP_NOIO, &f2fs_bioset);
502 bio->bi_iter.bi_sector = sector;
503 if (is_read_io(fio->op)) {
504 bio->bi_end_io = f2fs_read_end_io;
505 bio->bi_private = NULL;
506 } else {
507 bio->bi_end_io = f2fs_write_end_io;
508 bio->bi_private = sbi;
509 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
510 fio->type, fio->temp);
511 }
512 iostat_alloc_and_bind_ctx(sbi, bio, NULL);
513
514 if (fio->io_wbc)
515 wbc_init_bio(fio->io_wbc, bio);
516
517 return bio;
518 }
519
f2fs_set_bio_crypt_ctx(struct bio * bio,const struct inode * inode,pgoff_t first_idx,const struct f2fs_io_info * fio,gfp_t gfp_mask)520 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
521 pgoff_t first_idx,
522 const struct f2fs_io_info *fio,
523 gfp_t gfp_mask)
524 {
525 /*
526 * The f2fs garbage collector sets ->encrypted_page when it wants to
527 * read/write raw data without encryption.
528 */
529 if (!fio || !fio->encrypted_page)
530 fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
531 }
532
f2fs_crypt_mergeable_bio(struct bio * bio,const struct inode * inode,pgoff_t next_idx,const struct f2fs_io_info * fio)533 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
534 pgoff_t next_idx,
535 const struct f2fs_io_info *fio)
536 {
537 /*
538 * The f2fs garbage collector sets ->encrypted_page when it wants to
539 * read/write raw data without encryption.
540 */
541 if (fio && fio->encrypted_page)
542 return !bio_has_crypt_ctx(bio);
543
544 return fscrypt_mergeable_bio(bio, inode, next_idx);
545 }
546
f2fs_submit_read_bio(struct f2fs_sb_info * sbi,struct bio * bio,enum page_type type)547 void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
548 enum page_type type)
549 {
550 if (!bio)
551 return;
552
553 WARN_ON_ONCE(!is_read_io(bio_op(bio)));
554 trace_f2fs_submit_read_bio(sbi->sb, type, bio);
555
556 iostat_update_submit_ctx(bio, type);
557 blk_crypto_submit_bio(bio);
558 }
559
f2fs_submit_write_bio(struct f2fs_sb_info * sbi,struct bio * bio,enum page_type type)560 static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio,
561 enum page_type type)
562 {
563 WARN_ON_ONCE(is_read_io(bio_op(bio)));
564 trace_f2fs_submit_write_bio(sbi->sb, type, bio);
565 iostat_update_submit_ctx(bio, type);
566 blk_crypto_submit_bio(bio);
567 }
568
__submit_merged_bio(struct f2fs_bio_info * io)569 static void __submit_merged_bio(struct f2fs_bio_info *io)
570 {
571 struct f2fs_io_info *fio = &io->fio;
572
573 if (!io->bio)
574 return;
575
576 if (is_read_io(fio->op)) {
577 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
578 f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
579 } else {
580 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
581 f2fs_submit_write_bio(io->sbi, io->bio, fio->type);
582 }
583 io->bio = NULL;
584 }
585
__has_merged_page(struct bio * bio,struct inode * inode,struct folio * folio,nid_t ino)586 static bool __has_merged_page(struct bio *bio, struct inode *inode,
587 struct folio *folio, nid_t ino)
588 {
589 struct folio_iter fi;
590
591 if (!bio)
592 return false;
593
594 if (!inode && !folio && !ino)
595 return true;
596
597 bio_for_each_folio_all(fi, bio) {
598 struct folio *target = fi.folio;
599
600 if (fscrypt_is_bounce_folio(target)) {
601 target = fscrypt_pagecache_folio(target);
602 if (IS_ERR(target))
603 continue;
604 }
605 if (f2fs_is_compressed_page(target)) {
606 target = f2fs_compress_control_folio(target);
607 if (IS_ERR(target))
608 continue;
609 }
610
611 if (inode && inode == target->mapping->host)
612 return true;
613 if (folio && folio == target)
614 return true;
615 if (ino && ino == ino_of_node(target))
616 return true;
617 }
618
619 return false;
620 }
621
f2fs_init_write_merge_io(struct f2fs_sb_info * sbi)622 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
623 {
624 int i;
625
626 for (i = 0; i < NR_PAGE_TYPE; i++) {
627 int n = (i == META) ? 1 : NR_TEMP_TYPE;
628 int j;
629
630 sbi->write_io[i] = f2fs_kmalloc(sbi,
631 array_size(n, sizeof(struct f2fs_bio_info)),
632 GFP_KERNEL);
633 if (!sbi->write_io[i])
634 return -ENOMEM;
635
636 for (j = HOT; j < n; j++) {
637 struct f2fs_bio_info *io = &sbi->write_io[i][j];
638
639 init_f2fs_rwsem_trace(&io->io_rwsem, sbi,
640 LOCK_NAME_IO_RWSEM);
641 io->sbi = sbi;
642 io->bio = NULL;
643 io->last_block_in_bio = 0;
644 spin_lock_init(&io->io_lock);
645 INIT_LIST_HEAD(&io->io_list);
646 INIT_LIST_HEAD(&io->bio_list);
647 init_f2fs_rwsem(&io->bio_list_lock);
648 #ifdef CONFIG_BLK_DEV_ZONED
649 init_completion(&io->zone_wait);
650 io->zone_pending_bio = NULL;
651 io->bi_private = NULL;
652 #endif
653 }
654 }
655
656 return 0;
657 }
658
__f2fs_submit_merged_write(struct f2fs_sb_info * sbi,enum page_type type,enum temp_type temp)659 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
660 enum page_type type, enum temp_type temp)
661 {
662 enum page_type btype = PAGE_TYPE_OF_BIO(type);
663 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
664 struct f2fs_lock_context lc;
665
666 f2fs_down_write_trace(&io->io_rwsem, &lc);
667
668 if (!io->bio)
669 goto unlock_out;
670
671 /* change META to META_FLUSH in the checkpoint procedure */
672 if (type >= META_FLUSH) {
673 io->fio.type = META_FLUSH;
674 io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
675 if (!test_opt(sbi, NOBARRIER))
676 io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
677 }
678 __submit_merged_bio(io);
679 unlock_out:
680 f2fs_up_write_trace(&io->io_rwsem, &lc);
681 }
682
__submit_merged_write_cond(struct f2fs_sb_info * sbi,struct inode * inode,struct folio * folio,nid_t ino,enum page_type type,bool writeback)683 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
684 struct inode *inode, struct folio *folio,
685 nid_t ino, enum page_type type, bool writeback)
686 {
687 enum temp_type temp;
688 bool ret = true;
689 bool force = !inode && !folio && !ino;
690
691 for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
692 if (!force) {
693 enum page_type btype = PAGE_TYPE_OF_BIO(type);
694 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
695 struct f2fs_lock_context lc;
696
697 f2fs_down_read_trace(&io->io_rwsem, &lc);
698 ret = __has_merged_page(io->bio, inode, folio, ino);
699 f2fs_up_read_trace(&io->io_rwsem, &lc);
700 }
701 if (ret) {
702 __f2fs_submit_merged_write(sbi, type, temp);
703 /*
704 * For waitting writebck case, if the bio owned by the
705 * folio is already submitted, we do not need to submit
706 * other types of bios.
707 */
708 if (writeback)
709 break;
710 }
711
712 /* TODO: use HOT temp only for meta pages now. */
713 if (type >= META)
714 break;
715 }
716 }
717
f2fs_submit_merged_write(struct f2fs_sb_info * sbi,enum page_type type)718 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
719 {
720 __submit_merged_write_cond(sbi, NULL, NULL, 0, type, false);
721 }
722
f2fs_submit_merged_write_cond(struct f2fs_sb_info * sbi,struct inode * inode,struct folio * folio,nid_t ino,enum page_type type)723 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
724 struct inode *inode, struct folio *folio,
725 nid_t ino, enum page_type type)
726 {
727 __submit_merged_write_cond(sbi, inode, folio, ino, type, false);
728 }
729
f2fs_submit_merged_write_folio(struct f2fs_sb_info * sbi,struct folio * folio,enum page_type type)730 void f2fs_submit_merged_write_folio(struct f2fs_sb_info *sbi,
731 struct folio *folio, enum page_type type)
732 {
733 __submit_merged_write_cond(sbi, NULL, folio, 0, type, true);
734 }
735
f2fs_flush_merged_writes(struct f2fs_sb_info * sbi)736 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
737 {
738 f2fs_submit_merged_write(sbi, DATA);
739 f2fs_submit_merged_write(sbi, NODE);
740 f2fs_submit_merged_write(sbi, META);
741 }
742
743 /*
744 * Fill the locked page with data located in the block address.
745 * A caller needs to unlock the page on failure.
746 */
f2fs_submit_page_bio(struct f2fs_io_info * fio)747 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
748 {
749 struct bio *bio;
750 struct folio *fio_folio = fio->folio;
751 struct folio *data_folio = fio->encrypted_page ?
752 page_folio(fio->encrypted_page) : fio_folio;
753
754 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
755 fio->is_por ? META_POR : (__is_meta_io(fio) ?
756 META_GENERIC : DATA_GENERIC_ENHANCE)))
757 return -EFSCORRUPTED;
758
759 trace_f2fs_submit_folio_bio(data_folio, fio);
760
761 /* Allocate a new bio */
762 bio = __bio_alloc(fio, 1);
763
764 f2fs_set_bio_crypt_ctx(bio, fio_folio->mapping->host,
765 fio_folio->index, fio, GFP_NOIO);
766 bio_add_folio_nofail(bio, data_folio, folio_size(data_folio), 0);
767
768 if (fio->io_wbc && !is_read_io(fio->op))
769 wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE);
770
771 inc_page_count(fio->sbi, is_read_io(fio->op) ?
772 __read_io_type(data_folio) : WB_DATA_TYPE(fio->folio, false));
773
774 if (is_read_io(bio_op(bio)))
775 f2fs_submit_read_bio(fio->sbi, bio, fio->type);
776 else
777 f2fs_submit_write_bio(fio->sbi, bio, fio->type);
778 return 0;
779 }
780
page_is_mergeable(struct f2fs_sb_info * sbi,struct bio * bio,block_t last_blkaddr,block_t cur_blkaddr)781 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
782 block_t last_blkaddr, block_t cur_blkaddr)
783 {
784 if (unlikely(sbi->max_io_bytes &&
785 bio->bi_iter.bi_size >= sbi->max_io_bytes))
786 return false;
787 if (last_blkaddr + 1 != cur_blkaddr)
788 return false;
789 return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
790 }
791
io_type_is_mergeable(struct f2fs_bio_info * io,struct f2fs_io_info * fio)792 static bool io_type_is_mergeable(struct f2fs_bio_info *io,
793 struct f2fs_io_info *fio)
794 {
795 blk_opf_t mask = ~(REQ_PREFLUSH | REQ_FUA);
796
797 if (io->fio.op != fio->op)
798 return false;
799 return (io->fio.op_flags & mask) == (fio->op_flags & mask);
800 }
801
io_is_mergeable(struct f2fs_sb_info * sbi,struct bio * bio,struct f2fs_bio_info * io,struct f2fs_io_info * fio,block_t last_blkaddr,block_t cur_blkaddr)802 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
803 struct f2fs_bio_info *io,
804 struct f2fs_io_info *fio,
805 block_t last_blkaddr,
806 block_t cur_blkaddr)
807 {
808 if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
809 return false;
810 return io_type_is_mergeable(io, fio);
811 }
812
add_bio_entry(struct f2fs_sb_info * sbi,struct bio * bio,struct folio * folio,enum temp_type temp)813 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
814 struct folio *folio, enum temp_type temp)
815 {
816 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
817 struct bio_entry *be;
818
819 be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
820 be->bio = bio;
821 bio_get(bio);
822
823 bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
824
825 f2fs_down_write(&io->bio_list_lock);
826 list_add_tail(&be->list, &io->bio_list);
827 f2fs_up_write(&io->bio_list_lock);
828 }
829
del_bio_entry(struct bio_entry * be)830 static void del_bio_entry(struct bio_entry *be)
831 {
832 list_del(&be->list);
833 kmem_cache_free(bio_entry_slab, be);
834 }
835
add_ipu_page(struct f2fs_io_info * fio,struct bio ** bio,struct folio * folio)836 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
837 struct folio *folio)
838 {
839 struct folio *fio_folio = fio->folio;
840 struct f2fs_sb_info *sbi = fio->sbi;
841 enum temp_type temp;
842 bool found = false;
843 int ret = -EAGAIN;
844
845 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
846 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
847 struct list_head *head = &io->bio_list;
848 struct bio_entry *be;
849
850 f2fs_down_write(&io->bio_list_lock);
851 list_for_each_entry(be, head, list) {
852 if (be->bio != *bio)
853 continue;
854
855 found = true;
856
857 f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
858 *fio->last_block,
859 fio->new_blkaddr));
860 if (f2fs_crypt_mergeable_bio(*bio,
861 fio_folio->mapping->host,
862 fio_folio->index, fio) &&
863 bio_add_folio(*bio, folio, folio_size(folio), 0)) {
864 ret = 0;
865 break;
866 }
867
868 /* page can't be merged into bio; submit the bio */
869 del_bio_entry(be);
870 f2fs_submit_write_bio(sbi, *bio, DATA);
871 break;
872 }
873 f2fs_up_write(&io->bio_list_lock);
874 }
875
876 if (ret) {
877 bio_put(*bio);
878 *bio = NULL;
879 }
880
881 return ret;
882 }
883
f2fs_submit_merged_ipu_write(struct f2fs_sb_info * sbi,struct bio ** bio,struct folio * folio)884 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
885 struct bio **bio, struct folio *folio)
886 {
887 enum temp_type temp;
888 bool found = false;
889 struct bio *target = bio ? *bio : NULL;
890
891 f2fs_bug_on(sbi, !target && !folio);
892
893 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
894 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
895 struct list_head *head = &io->bio_list;
896 struct bio_entry *be;
897
898 if (list_empty(head))
899 continue;
900
901 f2fs_down_read(&io->bio_list_lock);
902 list_for_each_entry(be, head, list) {
903 if (target)
904 found = (target == be->bio);
905 else
906 found = __has_merged_page(be->bio, NULL,
907 folio, 0);
908 if (found)
909 break;
910 }
911 f2fs_up_read(&io->bio_list_lock);
912
913 if (!found)
914 continue;
915
916 found = false;
917
918 f2fs_down_write(&io->bio_list_lock);
919 list_for_each_entry(be, head, list) {
920 if (target)
921 found = (target == be->bio);
922 else
923 found = __has_merged_page(be->bio, NULL,
924 folio, 0);
925 if (found) {
926 target = be->bio;
927 del_bio_entry(be);
928 break;
929 }
930 }
931 f2fs_up_write(&io->bio_list_lock);
932 }
933
934 if (found)
935 f2fs_submit_write_bio(sbi, target, DATA);
936 if (bio && *bio) {
937 bio_put(*bio);
938 *bio = NULL;
939 }
940 }
941
f2fs_merge_page_bio(struct f2fs_io_info * fio)942 int f2fs_merge_page_bio(struct f2fs_io_info *fio)
943 {
944 struct bio *bio = *fio->bio;
945 struct folio *data_folio = fio->encrypted_page ?
946 page_folio(fio->encrypted_page) : fio->folio;
947 struct folio *folio = fio->folio;
948
949 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
950 __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
951 return -EFSCORRUPTED;
952
953 trace_f2fs_submit_folio_bio(data_folio, fio);
954
955 if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
956 fio->new_blkaddr))
957 f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
958 alloc_new:
959 if (!bio) {
960 bio = __bio_alloc(fio, BIO_MAX_VECS);
961 f2fs_set_bio_crypt_ctx(bio, folio->mapping->host,
962 folio->index, fio, GFP_NOIO);
963
964 add_bio_entry(fio->sbi, bio, data_folio, fio->temp);
965 } else {
966 if (add_ipu_page(fio, &bio, data_folio))
967 goto alloc_new;
968 }
969
970 if (fio->io_wbc)
971 wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio));
972
973 inc_page_count(fio->sbi, WB_DATA_TYPE(folio, false));
974
975 *fio->last_block = fio->new_blkaddr;
976 *fio->bio = bio;
977
978 return 0;
979 }
980
981 #ifdef CONFIG_BLK_DEV_ZONED
is_end_zone_blkaddr(struct f2fs_sb_info * sbi,block_t blkaddr)982 static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
983 {
984 struct block_device *bdev = sbi->sb->s_bdev;
985 int devi = 0;
986
987 if (f2fs_is_multi_device(sbi)) {
988 devi = f2fs_target_device_index(sbi, blkaddr);
989 if (blkaddr < FDEV(devi).start_blk ||
990 blkaddr > FDEV(devi).end_blk) {
991 f2fs_err(sbi, "Invalid block %x", blkaddr);
992 return false;
993 }
994 blkaddr -= FDEV(devi).start_blk;
995 bdev = FDEV(devi).bdev;
996 }
997 return bdev_is_zoned(bdev) &&
998 f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
999 (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
1000 }
1001 #endif
1002
f2fs_submit_page_write(struct f2fs_io_info * fio)1003 void f2fs_submit_page_write(struct f2fs_io_info *fio)
1004 {
1005 struct f2fs_sb_info *sbi = fio->sbi;
1006 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
1007 struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
1008 struct folio *bio_folio;
1009 struct f2fs_lock_context lc;
1010 enum count_type type;
1011
1012 f2fs_bug_on(sbi, is_read_io(fio->op));
1013
1014 f2fs_down_write_trace(&io->io_rwsem, &lc);
1015 next:
1016 #ifdef CONFIG_BLK_DEV_ZONED
1017 if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
1018 wait_for_completion_io(&io->zone_wait);
1019 bio_put(io->zone_pending_bio);
1020 io->zone_pending_bio = NULL;
1021 io->bi_private = NULL;
1022 }
1023 #endif
1024
1025 if (fio->in_list) {
1026 spin_lock(&io->io_lock);
1027 if (list_empty(&io->io_list)) {
1028 spin_unlock(&io->io_lock);
1029 goto out;
1030 }
1031 fio = list_first_entry(&io->io_list,
1032 struct f2fs_io_info, list);
1033 list_del(&fio->list);
1034 spin_unlock(&io->io_lock);
1035 }
1036
1037 verify_fio_blkaddr(fio);
1038
1039 if (fio->encrypted_page)
1040 bio_folio = page_folio(fio->encrypted_page);
1041 else if (fio->compressed_page)
1042 bio_folio = page_folio(fio->compressed_page);
1043 else
1044 bio_folio = fio->folio;
1045
1046 /* set submitted = true as a return value */
1047 fio->submitted = 1;
1048
1049 type = WB_DATA_TYPE(bio_folio, fio->compressed_page);
1050 inc_page_count(sbi, type);
1051
1052 if (io->bio &&
1053 (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
1054 fio->new_blkaddr) ||
1055 !f2fs_crypt_mergeable_bio(io->bio, fio_inode(fio),
1056 bio_folio->index, fio)))
1057 __submit_merged_bio(io);
1058 alloc_new:
1059 if (io->bio == NULL) {
1060 io->bio = __bio_alloc(fio, BIO_MAX_VECS);
1061 f2fs_set_bio_crypt_ctx(io->bio, fio_inode(fio),
1062 bio_folio->index, fio, GFP_NOIO);
1063 io->fio = *fio;
1064 }
1065
1066 if (!bio_add_folio(io->bio, bio_folio, folio_size(bio_folio), 0)) {
1067 __submit_merged_bio(io);
1068 goto alloc_new;
1069 }
1070
1071 if (fio->io_wbc)
1072 wbc_account_cgroup_owner(fio->io_wbc, fio->folio,
1073 folio_size(fio->folio));
1074
1075 io->last_block_in_bio = fio->new_blkaddr;
1076
1077 trace_f2fs_submit_folio_write(fio->folio, fio);
1078 #ifdef CONFIG_BLK_DEV_ZONED
1079 if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
1080 is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
1081 bio_get(io->bio);
1082 reinit_completion(&io->zone_wait);
1083 io->bi_private = io->bio->bi_private;
1084 io->bio->bi_private = io;
1085 io->bio->bi_end_io = f2fs_zone_write_end_io;
1086 io->zone_pending_bio = io->bio;
1087 __submit_merged_bio(io);
1088 }
1089 #endif
1090 if (fio->in_list)
1091 goto next;
1092 out:
1093 if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1094 !f2fs_is_checkpoint_ready(sbi))
1095 __submit_merged_bio(io);
1096 f2fs_up_write_trace(&io->io_rwsem, &lc);
1097 }
1098
f2fs_grab_read_bio(struct inode * inode,struct fsverity_info * vi,block_t blkaddr,unsigned nr_pages,blk_opf_t op_flag,pgoff_t first_idx,bool for_write)1099 static struct bio *f2fs_grab_read_bio(struct inode *inode,
1100 struct fsverity_info *vi, block_t blkaddr,
1101 unsigned nr_pages, blk_opf_t op_flag,
1102 pgoff_t first_idx, bool for_write)
1103 {
1104 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1105 struct bio *bio;
1106 struct bio_post_read_ctx *ctx = NULL;
1107 unsigned int post_read_steps = 0;
1108 sector_t sector;
1109 struct block_device *bdev = f2fs_target_device(sbi, blkaddr, §or);
1110
1111 bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
1112 REQ_OP_READ | op_flag,
1113 for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
1114 bio->bi_iter.bi_sector = sector;
1115 f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
1116 bio->bi_end_io = f2fs_read_end_io;
1117
1118 if (fscrypt_inode_uses_fs_layer_crypto(inode))
1119 post_read_steps |= STEP_DECRYPT;
1120
1121 if (vi)
1122 post_read_steps |= STEP_VERITY;
1123
1124 /*
1125 * STEP_DECOMPRESS is handled specially, since a compressed file might
1126 * contain both compressed and uncompressed clusters. We'll allocate a
1127 * bio_post_read_ctx if the file is compressed, but the caller is
1128 * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1129 */
1130
1131 if (post_read_steps || f2fs_compressed_file(inode)) {
1132 /* Due to the mempool, this never fails. */
1133 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
1134 ctx->bio = bio;
1135 ctx->sbi = sbi;
1136 ctx->vi = vi;
1137 ctx->enabled_steps = post_read_steps;
1138 ctx->fs_blkaddr = blkaddr;
1139 ctx->decompression_attempted = false;
1140 bio->bi_private = ctx;
1141 }
1142 iostat_alloc_and_bind_ctx(sbi, bio, ctx);
1143
1144 return bio;
1145 }
1146
1147 /* This can handle encryption stuffs */
f2fs_submit_page_read(struct inode * inode,struct fsverity_info * vi,struct folio * folio,block_t blkaddr,blk_opf_t op_flags,bool for_write)1148 static void f2fs_submit_page_read(struct inode *inode, struct fsverity_info *vi,
1149 struct folio *folio, block_t blkaddr,
1150 blk_opf_t op_flags, bool for_write)
1151 {
1152 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1153 struct bio *bio;
1154
1155 bio = f2fs_grab_read_bio(inode, vi, blkaddr, 1, op_flags, folio->index,
1156 for_write);
1157
1158 /* wait for GCed page writeback via META_MAPPING */
1159 f2fs_wait_on_block_writeback(inode, blkaddr);
1160
1161 if (!bio_add_folio(bio, folio, PAGE_SIZE, 0))
1162 f2fs_bug_on(sbi, 1);
1163
1164 inc_page_count(sbi, F2FS_RD_DATA);
1165 f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
1166 f2fs_submit_read_bio(sbi, bio, DATA);
1167 }
1168
__set_data_blkaddr(struct dnode_of_data * dn,block_t blkaddr)1169 static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1170 {
1171 __le32 *addr = get_dnode_addr(dn->inode, dn->node_folio);
1172
1173 dn->data_blkaddr = blkaddr;
1174 addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1175 }
1176
1177 /*
1178 * Lock ordering for the change of data block address:
1179 * ->data_page
1180 * ->node_folio
1181 * update block addresses in the node page
1182 */
f2fs_set_data_blkaddr(struct dnode_of_data * dn,block_t blkaddr)1183 void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1184 {
1185 f2fs_folio_wait_writeback(dn->node_folio, NODE, true, true);
1186 __set_data_blkaddr(dn, blkaddr);
1187 if (folio_mark_dirty(dn->node_folio))
1188 dn->node_changed = true;
1189 }
1190
f2fs_update_data_blkaddr(struct dnode_of_data * dn,block_t blkaddr)1191 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1192 {
1193 f2fs_set_data_blkaddr(dn, blkaddr);
1194 f2fs_update_read_extent_cache(dn);
1195 }
1196
1197 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
f2fs_reserve_new_blocks(struct dnode_of_data * dn,blkcnt_t count)1198 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1199 {
1200 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1201 int err;
1202
1203 if (!count)
1204 return 0;
1205
1206 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1207 return -EPERM;
1208 err = inc_valid_block_count(sbi, dn->inode, &count, true);
1209 if (unlikely(err))
1210 return err;
1211
1212 trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
1213 dn->ofs_in_node, count);
1214
1215 f2fs_folio_wait_writeback(dn->node_folio, NODE, true, true);
1216
1217 for (; count > 0; dn->ofs_in_node++) {
1218 block_t blkaddr = f2fs_data_blkaddr(dn);
1219
1220 if (blkaddr == NULL_ADDR) {
1221 __set_data_blkaddr(dn, NEW_ADDR);
1222 count--;
1223 }
1224 }
1225
1226 if (folio_mark_dirty(dn->node_folio))
1227 dn->node_changed = true;
1228 return 0;
1229 }
1230
1231 /* Should keep dn->ofs_in_node unchanged */
f2fs_reserve_new_block(struct dnode_of_data * dn)1232 int f2fs_reserve_new_block(struct dnode_of_data *dn)
1233 {
1234 unsigned int ofs_in_node = dn->ofs_in_node;
1235 int ret;
1236
1237 ret = f2fs_reserve_new_blocks(dn, 1);
1238 dn->ofs_in_node = ofs_in_node;
1239 return ret;
1240 }
1241
f2fs_reserve_block(struct dnode_of_data * dn,pgoff_t index)1242 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1243 {
1244 bool need_put = dn->inode_folio ? false : true;
1245 int err;
1246
1247 err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
1248 if (err)
1249 return err;
1250
1251 if (dn->data_blkaddr == NULL_ADDR)
1252 err = f2fs_reserve_new_block(dn);
1253 if (err || need_put)
1254 f2fs_put_dnode(dn);
1255 return err;
1256 }
1257
f2fs_need_verity(const struct inode * inode,pgoff_t idx)1258 static inline struct fsverity_info *f2fs_need_verity(const struct inode *inode,
1259 pgoff_t idx)
1260 {
1261 if (idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE))
1262 return fsverity_get_info(inode);
1263 return NULL;
1264 }
1265
f2fs_get_read_data_folio(struct inode * inode,pgoff_t index,blk_opf_t op_flags,bool for_write,pgoff_t * next_pgofs)1266 struct folio *f2fs_get_read_data_folio(struct inode *inode, pgoff_t index,
1267 blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs)
1268 {
1269 struct address_space *mapping = inode->i_mapping;
1270 struct dnode_of_data dn;
1271 struct folio *folio;
1272 int err;
1273 retry:
1274 folio = f2fs_grab_cache_folio(mapping, index, for_write);
1275 if (IS_ERR(folio))
1276 return folio;
1277
1278 if (folio_test_large(folio)) {
1279 pgoff_t folio_index = mapping_align_index(mapping, index);
1280
1281 f2fs_folio_put(folio, true);
1282 invalidate_inode_pages2_range(mapping, folio_index,
1283 folio_index + folio_nr_pages(folio) - 1);
1284 f2fs_schedule_timeout(DEFAULT_SCHEDULE_TIMEOUT);
1285 goto retry;
1286 }
1287
1288 if (f2fs_lookup_read_extent_cache_block(inode, index,
1289 &dn.data_blkaddr)) {
1290 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
1291 DATA_GENERIC_ENHANCE_READ)) {
1292 err = -EFSCORRUPTED;
1293 goto put_err;
1294 }
1295 goto got_it;
1296 }
1297
1298 set_new_dnode(&dn, inode, NULL, NULL, 0);
1299 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
1300 if (err) {
1301 if (err == -ENOENT && next_pgofs)
1302 *next_pgofs = f2fs_get_next_page_offset(&dn, index);
1303 goto put_err;
1304 }
1305 f2fs_put_dnode(&dn);
1306
1307 if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1308 err = -ENOENT;
1309 if (next_pgofs)
1310 *next_pgofs = index + 1;
1311 goto put_err;
1312 }
1313 if (dn.data_blkaddr != NEW_ADDR &&
1314 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
1315 dn.data_blkaddr,
1316 DATA_GENERIC_ENHANCE)) {
1317 err = -EFSCORRUPTED;
1318 goto put_err;
1319 }
1320 got_it:
1321 if (folio_test_uptodate(folio)) {
1322 folio_unlock(folio);
1323 return folio;
1324 }
1325
1326 /*
1327 * A new dentry page is allocated but not able to be written, since its
1328 * new inode page couldn't be allocated due to -ENOSPC.
1329 * In such the case, its blkaddr can be remained as NEW_ADDR.
1330 * see, f2fs_add_link -> f2fs_get_new_data_folio ->
1331 * f2fs_init_inode_metadata.
1332 */
1333 if (dn.data_blkaddr == NEW_ADDR) {
1334 folio_zero_segment(folio, 0, folio_size(folio));
1335 if (!folio_test_uptodate(folio))
1336 folio_mark_uptodate(folio);
1337 folio_unlock(folio);
1338 return folio;
1339 }
1340
1341 f2fs_submit_page_read(inode, f2fs_need_verity(inode, folio->index),
1342 folio, dn.data_blkaddr, op_flags, for_write);
1343 return folio;
1344
1345 put_err:
1346 f2fs_folio_put(folio, true);
1347 return ERR_PTR(err);
1348 }
1349
f2fs_find_data_folio(struct inode * inode,pgoff_t index,pgoff_t * next_pgofs)1350 struct folio *f2fs_find_data_folio(struct inode *inode, pgoff_t index,
1351 pgoff_t *next_pgofs)
1352 {
1353 struct address_space *mapping = inode->i_mapping;
1354 struct folio *folio;
1355
1356 folio = f2fs_filemap_get_folio(mapping, index, FGP_ACCESSED, 0);
1357 if (IS_ERR(folio))
1358 goto read;
1359 if (folio_test_uptodate(folio))
1360 return folio;
1361 f2fs_folio_put(folio, false);
1362
1363 read:
1364 folio = f2fs_get_read_data_folio(inode, index, 0, false, next_pgofs);
1365 if (IS_ERR(folio))
1366 return folio;
1367
1368 if (folio_test_uptodate(folio))
1369 return folio;
1370
1371 folio_wait_locked(folio);
1372 if (unlikely(!folio_test_uptodate(folio))) {
1373 f2fs_folio_put(folio, false);
1374 return ERR_PTR(-EIO);
1375 }
1376 return folio;
1377 }
1378
1379 /*
1380 * If it tries to access a hole, return an error.
1381 * Because, the callers, functions in dir.c and GC, should be able to know
1382 * whether this page exists or not.
1383 */
f2fs_get_lock_data_folio(struct inode * inode,pgoff_t index,bool for_write)1384 struct folio *f2fs_get_lock_data_folio(struct inode *inode, pgoff_t index,
1385 bool for_write)
1386 {
1387 struct address_space *mapping = inode->i_mapping;
1388 struct folio *folio;
1389
1390 folio = f2fs_get_read_data_folio(inode, index, 0, for_write, NULL);
1391 if (IS_ERR(folio))
1392 return folio;
1393
1394 /* wait for read completion */
1395 folio_lock(folio);
1396 if (unlikely(folio->mapping != mapping || !folio_test_uptodate(folio))) {
1397 f2fs_folio_put(folio, true);
1398 return ERR_PTR(-EIO);
1399 }
1400 return folio;
1401 }
1402
1403 /*
1404 * Caller ensures that this data page is never allocated.
1405 * A new zero-filled data page is allocated in the page cache.
1406 *
1407 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1408 * f2fs_unlock_op().
1409 * Note that, ifolio is set only by make_empty_dir, and if any error occur,
1410 * ifolio should be released by this function.
1411 */
f2fs_get_new_data_folio(struct inode * inode,struct folio * ifolio,pgoff_t index,bool new_i_size)1412 struct folio *f2fs_get_new_data_folio(struct inode *inode,
1413 struct folio *ifolio, pgoff_t index, bool new_i_size)
1414 {
1415 struct address_space *mapping = inode->i_mapping;
1416 struct folio *folio;
1417 struct dnode_of_data dn;
1418 int err;
1419
1420 folio = f2fs_grab_cache_folio(mapping, index, true);
1421 if (IS_ERR(folio)) {
1422 /*
1423 * before exiting, we should make sure ifolio will be released
1424 * if any error occur.
1425 */
1426 f2fs_folio_put(ifolio, true);
1427 return ERR_PTR(-ENOMEM);
1428 }
1429
1430 set_new_dnode(&dn, inode, ifolio, NULL, 0);
1431 err = f2fs_reserve_block(&dn, index);
1432 if (err) {
1433 f2fs_folio_put(folio, true);
1434 return ERR_PTR(err);
1435 }
1436 if (!ifolio)
1437 f2fs_put_dnode(&dn);
1438
1439 if (folio_test_uptodate(folio))
1440 goto got_it;
1441
1442 if (dn.data_blkaddr == NEW_ADDR) {
1443 folio_zero_segment(folio, 0, folio_size(folio));
1444 if (!folio_test_uptodate(folio))
1445 folio_mark_uptodate(folio);
1446 } else {
1447 f2fs_folio_put(folio, true);
1448
1449 /* if ifolio exists, blkaddr should be NEW_ADDR */
1450 f2fs_bug_on(F2FS_I_SB(inode), ifolio);
1451 folio = f2fs_get_lock_data_folio(inode, index, true);
1452 if (IS_ERR(folio))
1453 return folio;
1454 }
1455 got_it:
1456 if (new_i_size && i_size_read(inode) <
1457 ((loff_t)(index + 1) << PAGE_SHIFT))
1458 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
1459 return folio;
1460 }
1461
__allocate_data_block(struct dnode_of_data * dn,int seg_type)1462 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
1463 {
1464 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1465 struct f2fs_summary sum;
1466 struct node_info ni;
1467 block_t old_blkaddr;
1468 blkcnt_t count = 1;
1469 int err;
1470
1471 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1472 return -EPERM;
1473
1474 err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
1475 if (err)
1476 return err;
1477
1478 dn->data_blkaddr = f2fs_data_blkaddr(dn);
1479 if (dn->data_blkaddr == NULL_ADDR) {
1480 err = inc_valid_block_count(sbi, dn->inode, &count, true);
1481 if (unlikely(err))
1482 return err;
1483 }
1484
1485 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1486 old_blkaddr = dn->data_blkaddr;
1487 err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr,
1488 &dn->data_blkaddr, &sum, seg_type, NULL);
1489 if (err)
1490 return err;
1491
1492 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
1493 f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1);
1494
1495 f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
1496 return 0;
1497 }
1498
f2fs_map_lock(struct f2fs_sb_info * sbi,struct f2fs_lock_context * lc,int flag)1499 static void f2fs_map_lock(struct f2fs_sb_info *sbi,
1500 struct f2fs_lock_context *lc,
1501 int flag)
1502 {
1503 if (flag == F2FS_GET_BLOCK_PRE_AIO)
1504 f2fs_down_read_trace(&sbi->node_change, lc);
1505 else
1506 f2fs_lock_op(sbi, lc);
1507 }
1508
f2fs_map_unlock(struct f2fs_sb_info * sbi,struct f2fs_lock_context * lc,int flag)1509 static void f2fs_map_unlock(struct f2fs_sb_info *sbi,
1510 struct f2fs_lock_context *lc,
1511 int flag)
1512 {
1513 if (flag == F2FS_GET_BLOCK_PRE_AIO)
1514 f2fs_up_read_trace(&sbi->node_change, lc);
1515 else
1516 f2fs_unlock_op(sbi, lc);
1517 }
1518
f2fs_get_block_locked(struct dnode_of_data * dn,pgoff_t index)1519 int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index)
1520 {
1521 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1522 struct f2fs_lock_context lc;
1523 int err = 0;
1524
1525 f2fs_map_lock(sbi, &lc, F2FS_GET_BLOCK_PRE_AIO);
1526 if (!f2fs_lookup_read_extent_cache_block(dn->inode, index,
1527 &dn->data_blkaddr))
1528 err = f2fs_reserve_block(dn, index);
1529 f2fs_map_unlock(sbi, &lc, F2FS_GET_BLOCK_PRE_AIO);
1530
1531 return err;
1532 }
1533
f2fs_map_no_dnode(struct inode * inode,struct f2fs_map_blocks * map,struct dnode_of_data * dn,pgoff_t pgoff)1534 static int f2fs_map_no_dnode(struct inode *inode,
1535 struct f2fs_map_blocks *map, struct dnode_of_data *dn,
1536 pgoff_t pgoff)
1537 {
1538 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1539
1540 /*
1541 * There is one exceptional case that read_node_page() may return
1542 * -ENOENT due to filesystem has been shutdown or cp_error, return
1543 * -EIO in that case.
1544 */
1545 if (map->m_may_create &&
1546 (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi)))
1547 return -EIO;
1548
1549 if (map->m_next_pgofs)
1550 *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff);
1551 if (map->m_next_extent)
1552 *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff);
1553 return 0;
1554 }
1555
f2fs_map_blocks_cached(struct inode * inode,struct f2fs_map_blocks * map,int flag)1556 static bool f2fs_map_blocks_cached(struct inode *inode,
1557 struct f2fs_map_blocks *map, int flag)
1558 {
1559 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1560 unsigned int maxblocks = map->m_len;
1561 pgoff_t pgoff = (pgoff_t)map->m_lblk;
1562 struct extent_info ei = {};
1563
1564 if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei))
1565 return false;
1566
1567 map->m_pblk = ei.blk + pgoff - ei.fofs;
1568 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff);
1569 map->m_flags = F2FS_MAP_MAPPED;
1570 if (map->m_next_extent)
1571 *map->m_next_extent = pgoff + map->m_len;
1572
1573 /* for hardware encryption, but to avoid potential issue in future */
1574 if (flag == F2FS_GET_BLOCK_DIO)
1575 f2fs_wait_on_block_writeback_range(inode,
1576 map->m_pblk, map->m_len);
1577
1578 if (f2fs_allow_multi_device_dio(sbi, flag)) {
1579 int bidx = f2fs_target_device_index(sbi, map->m_pblk);
1580 struct f2fs_dev_info *dev = &sbi->devs[bidx];
1581
1582 map->m_bdev = dev->bdev;
1583 map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk);
1584 map->m_pblk -= dev->start_blk;
1585 } else {
1586 map->m_bdev = inode->i_sb->s_bdev;
1587 }
1588 return true;
1589 }
1590
map_is_mergeable(struct f2fs_sb_info * sbi,struct f2fs_map_blocks * map,block_t blkaddr,int flag,int bidx,int ofs)1591 static bool map_is_mergeable(struct f2fs_sb_info *sbi,
1592 struct f2fs_map_blocks *map,
1593 block_t blkaddr, int flag, int bidx,
1594 int ofs)
1595 {
1596 if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1597 return false;
1598 if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs))
1599 return true;
1600 if (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR)
1601 return true;
1602 if (flag == F2FS_GET_BLOCK_PRE_DIO)
1603 return true;
1604 if (flag == F2FS_GET_BLOCK_DIO &&
1605 map->m_pblk == NULL_ADDR && blkaddr == NULL_ADDR)
1606 return true;
1607 return false;
1608 }
1609
1610 /*
1611 * f2fs_map_blocks() tries to find or build mapping relationship which
1612 * maps continuous logical blocks to physical blocks, and return such
1613 * info via f2fs_map_blocks structure.
1614 */
f2fs_map_blocks(struct inode * inode,struct f2fs_map_blocks * map,int flag)1615 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
1616 {
1617 unsigned int maxblocks = map->m_len;
1618 struct dnode_of_data dn;
1619 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1620 struct f2fs_lock_context lc;
1621 int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1622 pgoff_t pgofs, end_offset, end;
1623 int err = 0, ofs = 1;
1624 unsigned int ofs_in_node, last_ofs_in_node;
1625 blkcnt_t prealloc;
1626 block_t blkaddr;
1627 unsigned int start_pgofs;
1628 int bidx = 0;
1629 bool is_hole;
1630 bool lfs_dio_write;
1631
1632 if (!maxblocks)
1633 return 0;
1634
1635 lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
1636 map->m_may_create);
1637
1638 if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
1639 goto out;
1640
1641 map->m_bdev = inode->i_sb->s_bdev;
1642 map->m_multidev_dio =
1643 f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
1644
1645 map->m_len = 0;
1646 map->m_flags = 0;
1647
1648 /* it only supports block size == page size */
1649 pgofs = (pgoff_t)map->m_lblk;
1650 end = pgofs + maxblocks;
1651
1652 if (flag == F2FS_GET_BLOCK_PRECACHE)
1653 mode = LOOKUP_NODE_RA;
1654
1655 next_dnode:
1656 if (map->m_may_create) {
1657 if (f2fs_lfs_mode(sbi))
1658 f2fs_balance_fs(sbi, true);
1659 f2fs_map_lock(sbi, &lc, flag);
1660 }
1661
1662 /* When reading holes, we need its node page */
1663 set_new_dnode(&dn, inode, NULL, NULL, 0);
1664 err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1665 if (err) {
1666 if (flag == F2FS_GET_BLOCK_BMAP)
1667 map->m_pblk = 0;
1668 if (err == -ENOENT)
1669 err = f2fs_map_no_dnode(inode, map, &dn, pgofs);
1670 goto unlock_out;
1671 }
1672
1673 start_pgofs = pgofs;
1674 prealloc = 0;
1675 last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1676 end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
1677
1678 next_block:
1679 blkaddr = f2fs_data_blkaddr(&dn);
1680 is_hole = !__is_valid_data_blkaddr(blkaddr);
1681 if (!is_hole &&
1682 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1683 err = -EFSCORRUPTED;
1684 goto sync_out;
1685 }
1686
1687 /* use out-place-update for direct IO under LFS mode */
1688 if (map->m_may_create && (is_hole ||
1689 (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
1690 !f2fs_is_pinned_file(inode) && map->m_last_pblk != blkaddr))) {
1691 if (unlikely(f2fs_cp_error(sbi))) {
1692 err = -EIO;
1693 goto sync_out;
1694 }
1695
1696 switch (flag) {
1697 case F2FS_GET_BLOCK_PRE_AIO:
1698 if (blkaddr == NULL_ADDR) {
1699 prealloc++;
1700 last_ofs_in_node = dn.ofs_in_node;
1701 }
1702 break;
1703 case F2FS_GET_BLOCK_PRE_DIO:
1704 case F2FS_GET_BLOCK_DIO:
1705 err = __allocate_data_block(&dn, map->m_seg_type);
1706 if (err)
1707 goto sync_out;
1708 if (flag == F2FS_GET_BLOCK_PRE_DIO)
1709 file_need_truncate(inode);
1710 set_inode_flag(inode, FI_APPEND_WRITE);
1711 break;
1712 default:
1713 WARN_ON_ONCE(1);
1714 err = -EIO;
1715 goto sync_out;
1716 }
1717
1718 blkaddr = dn.data_blkaddr;
1719 if (is_hole)
1720 map->m_flags |= F2FS_MAP_NEW;
1721 } else if (is_hole) {
1722 if (f2fs_compressed_file(inode) &&
1723 f2fs_sanity_check_cluster(&dn)) {
1724 err = -EFSCORRUPTED;
1725 f2fs_handle_error(sbi,
1726 ERROR_CORRUPTED_CLUSTER);
1727 goto sync_out;
1728 }
1729
1730 switch (flag) {
1731 case F2FS_GET_BLOCK_PRECACHE:
1732 goto sync_out;
1733 case F2FS_GET_BLOCK_BMAP:
1734 map->m_pblk = 0;
1735 goto sync_out;
1736 case F2FS_GET_BLOCK_FIEMAP:
1737 if (blkaddr == NULL_ADDR) {
1738 if (map->m_next_pgofs)
1739 *map->m_next_pgofs = pgofs + 1;
1740 goto sync_out;
1741 }
1742 break;
1743 case F2FS_GET_BLOCK_DIO:
1744 if (map->m_next_pgofs)
1745 *map->m_next_pgofs = pgofs + 1;
1746 break;
1747 default:
1748 /* for defragment case */
1749 if (map->m_next_pgofs)
1750 *map->m_next_pgofs = pgofs + 1;
1751 goto sync_out;
1752 }
1753 }
1754
1755 if (flag == F2FS_GET_BLOCK_PRE_AIO)
1756 goto skip;
1757
1758 if (map->m_multidev_dio)
1759 bidx = f2fs_target_device_index(sbi, blkaddr);
1760
1761 if (map->m_len == 0) {
1762 /* reserved delalloc block should be mapped for fiemap. */
1763 if (blkaddr == NEW_ADDR)
1764 map->m_flags |= F2FS_MAP_DELALLOC;
1765 /* DIO READ and hole case, should not map the blocks. */
1766 if (!(flag == F2FS_GET_BLOCK_DIO && is_hole && !map->m_may_create))
1767 map->m_flags |= F2FS_MAP_MAPPED;
1768
1769 map->m_pblk = blkaddr;
1770 map->m_len = 1;
1771
1772 if (map->m_multidev_dio)
1773 map->m_bdev = FDEV(bidx).bdev;
1774
1775 if (lfs_dio_write)
1776 map->m_last_pblk = NULL_ADDR;
1777 } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) {
1778 ofs++;
1779 map->m_len++;
1780 } else {
1781 if (lfs_dio_write && !f2fs_is_pinned_file(inode))
1782 map->m_last_pblk = blkaddr;
1783 goto sync_out;
1784 }
1785
1786 skip:
1787 dn.ofs_in_node++;
1788 pgofs++;
1789
1790 /* preallocate blocks in batch for one dnode page */
1791 if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1792 (pgofs == end || dn.ofs_in_node == end_offset)) {
1793
1794 dn.ofs_in_node = ofs_in_node;
1795 err = f2fs_reserve_new_blocks(&dn, prealloc);
1796 if (err)
1797 goto sync_out;
1798
1799 map->m_len += dn.ofs_in_node - ofs_in_node;
1800 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1801 err = -ENOSPC;
1802 goto sync_out;
1803 }
1804 dn.ofs_in_node = end_offset;
1805 }
1806
1807 if (pgofs >= end)
1808 goto sync_out;
1809 else if (dn.ofs_in_node < end_offset)
1810 goto next_block;
1811
1812 if (flag == F2FS_GET_BLOCK_PRECACHE) {
1813 if (map->m_flags & F2FS_MAP_MAPPED) {
1814 unsigned int ofs = start_pgofs - map->m_lblk;
1815
1816 f2fs_update_read_extent_cache_range(&dn,
1817 start_pgofs, map->m_pblk + ofs,
1818 map->m_len - ofs);
1819 }
1820 }
1821
1822 f2fs_put_dnode(&dn);
1823
1824 if (map->m_may_create) {
1825 f2fs_map_unlock(sbi, &lc, flag);
1826 f2fs_balance_fs(sbi, dn.node_changed);
1827 }
1828 goto next_dnode;
1829
1830 sync_out:
1831
1832 if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1833 /*
1834 * for hardware encryption, but to avoid potential issue
1835 * in future
1836 */
1837 f2fs_wait_on_block_writeback_range(inode,
1838 map->m_pblk, map->m_len);
1839
1840 if (map->m_multidev_dio) {
1841 block_t blk_addr = map->m_pblk;
1842
1843 bidx = f2fs_target_device_index(sbi, map->m_pblk);
1844
1845 map->m_bdev = FDEV(bidx).bdev;
1846 map->m_pblk -= FDEV(bidx).start_blk;
1847
1848 if (map->m_may_create)
1849 f2fs_update_device_state(sbi, inode->i_ino,
1850 blk_addr, map->m_len);
1851
1852 f2fs_bug_on(sbi, blk_addr + map->m_len >
1853 FDEV(bidx).end_blk + 1);
1854 }
1855 }
1856
1857 if (flag == F2FS_GET_BLOCK_PRECACHE) {
1858 if (map->m_flags & F2FS_MAP_MAPPED) {
1859 unsigned int ofs = start_pgofs - map->m_lblk;
1860
1861 if (map->m_len > ofs)
1862 f2fs_update_read_extent_cache_range(&dn,
1863 start_pgofs, map->m_pblk + ofs,
1864 map->m_len - ofs);
1865 }
1866 if (map->m_next_extent)
1867 *map->m_next_extent = is_hole ? pgofs + 1 : pgofs;
1868 }
1869 f2fs_put_dnode(&dn);
1870 unlock_out:
1871 if (map->m_may_create) {
1872 f2fs_map_unlock(sbi, &lc, flag);
1873 f2fs_balance_fs(sbi, dn.node_changed);
1874 }
1875 out:
1876 trace_f2fs_map_blocks(inode, map, flag, err);
1877 return err;
1878 }
1879
__f2fs_overwrite_io(struct inode * inode,loff_t pos,size_t len,bool check_first)1880 static bool __f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len,
1881 bool check_first)
1882 {
1883 struct f2fs_map_blocks map;
1884 block_t last_lblk;
1885 int err;
1886
1887 if (pos + len > i_size_read(inode))
1888 return false;
1889
1890 map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1891 map.m_next_pgofs = NULL;
1892 map.m_next_extent = NULL;
1893 map.m_seg_type = NO_CHECK_TYPE;
1894 map.m_may_create = false;
1895 last_lblk = F2FS_BLK_ALIGN(pos + len);
1896
1897 while (map.m_lblk < last_lblk) {
1898 map.m_len = last_lblk - map.m_lblk;
1899 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
1900 if (err || map.m_len == 0)
1901 return false;
1902 map.m_lblk += map.m_len;
1903 if (check_first)
1904 break;
1905 }
1906 return true;
1907 }
1908
f2fs_overwrite_io(struct inode * inode,loff_t pos,size_t len)1909 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1910 {
1911 return __f2fs_overwrite_io(inode, pos, len, false);
1912 }
1913
f2fs_xattr_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo)1914 static int f2fs_xattr_fiemap(struct inode *inode,
1915 struct fiemap_extent_info *fieinfo)
1916 {
1917 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1918 struct node_info ni;
1919 __u64 phys = 0, len;
1920 __u32 flags;
1921 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1922 int err = 0;
1923
1924 if (f2fs_has_inline_xattr(inode)) {
1925 int offset;
1926 struct folio *folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi),
1927 inode->i_ino, false);
1928
1929 if (IS_ERR(folio))
1930 return PTR_ERR(folio);
1931
1932 err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
1933 if (err) {
1934 f2fs_folio_put(folio, true);
1935 return err;
1936 }
1937
1938 phys = F2FS_BLK_TO_BYTES(ni.blk_addr);
1939 offset = offsetof(struct f2fs_inode, i_addr) +
1940 sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1941 get_inline_xattr_addrs(inode));
1942
1943 phys += offset;
1944 len = inline_xattr_size(inode);
1945
1946 f2fs_folio_put(folio, true);
1947
1948 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1949
1950 if (!xnid)
1951 flags |= FIEMAP_EXTENT_LAST;
1952
1953 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1954 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1955 if (err)
1956 return err;
1957 }
1958
1959 if (xnid) {
1960 struct folio *folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi),
1961 xnid, false);
1962
1963 if (IS_ERR(folio))
1964 return PTR_ERR(folio);
1965
1966 err = f2fs_get_node_info(sbi, xnid, &ni, false);
1967 if (err) {
1968 f2fs_folio_put(folio, true);
1969 return err;
1970 }
1971
1972 phys = F2FS_BLK_TO_BYTES(ni.blk_addr);
1973 len = inode->i_sb->s_blocksize;
1974
1975 f2fs_folio_put(folio, true);
1976
1977 flags = FIEMAP_EXTENT_LAST;
1978 }
1979
1980 if (phys) {
1981 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1982 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1983 }
1984
1985 return (err < 0 ? err : 0);
1986 }
1987
f2fs_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,u64 start,u64 len)1988 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1989 u64 start, u64 len)
1990 {
1991 struct f2fs_map_blocks map;
1992 sector_t start_blk, last_blk, blk_len, max_len;
1993 pgoff_t next_pgofs;
1994 u64 logical = 0, phys = 0, size = 0;
1995 u32 flags = 0;
1996 int ret = 0;
1997 bool compr_cluster = false, compr_appended;
1998 unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1999 unsigned int count_in_cluster = 0;
2000 loff_t maxbytes;
2001
2002 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
2003 ret = f2fs_precache_extents(inode);
2004 if (ret)
2005 return ret;
2006 }
2007
2008 ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
2009 if (ret)
2010 return ret;
2011
2012 inode_lock_shared(inode);
2013
2014 maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
2015 if (start > maxbytes) {
2016 ret = -EFBIG;
2017 goto out;
2018 }
2019
2020 if (len > maxbytes || (maxbytes - len) < start)
2021 len = maxbytes - start;
2022
2023 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
2024 ret = f2fs_xattr_fiemap(inode, fieinfo);
2025 goto out;
2026 }
2027
2028 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
2029 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
2030 if (ret != -EAGAIN)
2031 goto out;
2032 }
2033
2034 start_blk = F2FS_BYTES_TO_BLK(start);
2035 last_blk = F2FS_BYTES_TO_BLK(start + len - 1);
2036 blk_len = last_blk - start_blk + 1;
2037 max_len = F2FS_BYTES_TO_BLK(maxbytes) - start_blk;
2038
2039 next:
2040 memset(&map, 0, sizeof(map));
2041 map.m_lblk = start_blk;
2042 map.m_len = blk_len;
2043 map.m_next_pgofs = &next_pgofs;
2044 map.m_seg_type = NO_CHECK_TYPE;
2045
2046 if (compr_cluster) {
2047 map.m_lblk += 1;
2048 map.m_len = cluster_size - count_in_cluster;
2049 }
2050
2051 ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
2052 if (ret)
2053 goto out;
2054
2055 /* HOLE */
2056 if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
2057 start_blk = next_pgofs;
2058
2059 if (F2FS_BLK_TO_BYTES(start_blk) < maxbytes)
2060 goto prep_next;
2061
2062 flags |= FIEMAP_EXTENT_LAST;
2063 }
2064
2065 /*
2066 * current extent may cross boundary of inquiry, increase len to
2067 * requery.
2068 */
2069 if (!compr_cluster && (map.m_flags & F2FS_MAP_MAPPED) &&
2070 map.m_lblk + map.m_len - 1 == last_blk &&
2071 blk_len != max_len) {
2072 blk_len = max_len;
2073 goto next;
2074 }
2075
2076 compr_appended = false;
2077 /* In a case of compressed cluster, append this to the last extent */
2078 if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) ||
2079 !(map.m_flags & F2FS_MAP_FLAGS))) {
2080 compr_appended = true;
2081 goto skip_fill;
2082 }
2083
2084 if (size) {
2085 flags |= FIEMAP_EXTENT_MERGED;
2086 if (IS_ENCRYPTED(inode))
2087 flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
2088
2089 ret = fiemap_fill_next_extent(fieinfo, logical,
2090 phys, size, flags);
2091 trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
2092 if (ret)
2093 goto out;
2094 size = 0;
2095 }
2096
2097 if (start_blk > last_blk)
2098 goto out;
2099
2100 skip_fill:
2101 if (map.m_pblk == COMPRESS_ADDR) {
2102 compr_cluster = true;
2103 count_in_cluster = 1;
2104 } else if (compr_appended) {
2105 unsigned int appended_blks = cluster_size -
2106 count_in_cluster + 1;
2107 size += F2FS_BLK_TO_BYTES(appended_blks);
2108 start_blk += appended_blks;
2109 compr_cluster = false;
2110 } else {
2111 logical = F2FS_BLK_TO_BYTES(start_blk);
2112 phys = __is_valid_data_blkaddr(map.m_pblk) ?
2113 F2FS_BLK_TO_BYTES(map.m_pblk) : 0;
2114 size = F2FS_BLK_TO_BYTES(map.m_len);
2115 flags = 0;
2116
2117 if (compr_cluster) {
2118 flags = FIEMAP_EXTENT_ENCODED;
2119 count_in_cluster += map.m_len;
2120 if (count_in_cluster == cluster_size) {
2121 compr_cluster = false;
2122 size += F2FS_BLKSIZE;
2123 }
2124 } else if (map.m_flags & F2FS_MAP_DELALLOC) {
2125 flags = FIEMAP_EXTENT_UNWRITTEN;
2126 }
2127
2128 start_blk += F2FS_BYTES_TO_BLK(size);
2129 }
2130
2131 prep_next:
2132 cond_resched();
2133 if (fatal_signal_pending(current))
2134 ret = -EINTR;
2135 else
2136 goto next;
2137 out:
2138 if (ret == 1)
2139 ret = 0;
2140
2141 inode_unlock_shared(inode);
2142 return ret;
2143 }
2144
f2fs_readpage_limit(struct inode * inode)2145 static inline loff_t f2fs_readpage_limit(struct inode *inode)
2146 {
2147 if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
2148 return F2FS_BLK_TO_BYTES(max_file_blocks(inode));
2149
2150 return i_size_read(inode);
2151 }
2152
f2fs_ra_op_flags(struct readahead_control * rac)2153 static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac)
2154 {
2155 return rac ? REQ_RAHEAD : 0;
2156 }
2157
f2fs_read_single_page(struct inode * inode,struct fsverity_info * vi,struct folio * folio,unsigned int nr_pages,struct f2fs_map_blocks * map,struct bio ** bio_ret,sector_t * last_block_in_bio,struct readahead_control * rac)2158 static int f2fs_read_single_page(struct inode *inode, struct fsverity_info *vi,
2159 struct folio *folio, unsigned int nr_pages,
2160 struct f2fs_map_blocks *map,
2161 struct bio **bio_ret,
2162 sector_t *last_block_in_bio,
2163 struct readahead_control *rac)
2164 {
2165 struct bio *bio = *bio_ret;
2166 const unsigned int blocksize = F2FS_BLKSIZE;
2167 sector_t block_in_file;
2168 sector_t last_block;
2169 sector_t last_block_in_file;
2170 sector_t block_nr;
2171 pgoff_t index = folio->index;
2172 int ret = 0;
2173
2174 block_in_file = (sector_t)index;
2175 last_block = block_in_file + nr_pages;
2176 last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) +
2177 blocksize - 1);
2178 if (last_block > last_block_in_file)
2179 last_block = last_block_in_file;
2180
2181 /* just zeroing out page which is beyond EOF */
2182 if (block_in_file >= last_block)
2183 goto zero_out;
2184 /*
2185 * Map blocks using the previous result first.
2186 */
2187 if (map->m_flags & F2FS_MAP_MAPPED) {
2188 if (block_in_file > map->m_lblk &&
2189 block_in_file < (map->m_lblk + map->m_len))
2190 goto got_it;
2191 } else if (block_in_file < *map->m_next_pgofs) {
2192 goto got_it;
2193 }
2194
2195 /*
2196 * Then do more f2fs_map_blocks() calls until we are
2197 * done with this page.
2198 */
2199 map->m_lblk = block_in_file;
2200 map->m_len = last_block - block_in_file;
2201
2202 ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT);
2203 if (ret)
2204 goto out;
2205 got_it:
2206 if ((map->m_flags & F2FS_MAP_MAPPED)) {
2207 block_nr = map->m_pblk + block_in_file - map->m_lblk;
2208 folio_set_mappedtodisk(folio);
2209
2210 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2211 DATA_GENERIC_ENHANCE_READ)) {
2212 ret = -EFSCORRUPTED;
2213 goto out;
2214 }
2215 } else {
2216 zero_out:
2217 folio_zero_segment(folio, 0, folio_size(folio));
2218 if (vi && !fsverity_verify_folio(vi, folio)) {
2219 ret = -EIO;
2220 goto out;
2221 }
2222 if (!folio_test_uptodate(folio))
2223 folio_mark_uptodate(folio);
2224 folio_unlock(folio);
2225 goto out;
2226 }
2227
2228 /*
2229 * This page will go to BIO. Do we need to send this
2230 * BIO off first?
2231 */
2232 if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2233 *last_block_in_bio, block_nr) ||
2234 !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) {
2235 submit_and_realloc:
2236 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2237 bio = NULL;
2238 }
2239 if (bio == NULL)
2240 bio = f2fs_grab_read_bio(inode, vi, block_nr, nr_pages,
2241 f2fs_ra_op_flags(rac), index, false);
2242
2243 /*
2244 * If the page is under writeback, we need to wait for
2245 * its completion to see the correct decrypted data.
2246 */
2247 f2fs_wait_on_block_writeback(inode, block_nr);
2248
2249 if (!bio_add_folio(bio, folio, blocksize, 0))
2250 goto submit_and_realloc;
2251
2252 inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2253 f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
2254 F2FS_BLKSIZE);
2255 *last_block_in_bio = block_nr;
2256 out:
2257 *bio_ret = bio;
2258 return ret;
2259 }
2260
2261 #ifdef CONFIG_F2FS_FS_COMPRESSION
f2fs_read_multi_pages(struct compress_ctx * cc,struct bio ** bio_ret,unsigned nr_pages,sector_t * last_block_in_bio,struct readahead_control * rac,bool for_write)2262 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
2263 unsigned nr_pages, sector_t *last_block_in_bio,
2264 struct readahead_control *rac, bool for_write)
2265 {
2266 struct dnode_of_data dn;
2267 struct inode *inode = cc->inode;
2268 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2269 struct bio *bio = *bio_ret;
2270 unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2271 sector_t last_block_in_file;
2272 const unsigned int blocksize = F2FS_BLKSIZE;
2273 struct decompress_io_ctx *dic = NULL;
2274 struct extent_info ei = {};
2275 bool from_dnode = true;
2276 int i;
2277 int ret = 0;
2278
2279 if (unlikely(f2fs_cp_error(sbi))) {
2280 ret = -EIO;
2281 from_dnode = false;
2282 goto out_put_dnode;
2283 }
2284
2285 f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2286
2287 last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) +
2288 blocksize - 1);
2289
2290 /* get rid of pages beyond EOF */
2291 for (i = 0; i < cc->cluster_size; i++) {
2292 struct page *page = cc->rpages[i];
2293 struct folio *folio;
2294
2295 if (!page)
2296 continue;
2297
2298 folio = page_folio(page);
2299 if ((sector_t)folio->index >= last_block_in_file) {
2300 folio_zero_segment(folio, 0, folio_size(folio));
2301 if (!folio_test_uptodate(folio))
2302 folio_mark_uptodate(folio);
2303 } else if (!folio_test_uptodate(folio)) {
2304 continue;
2305 }
2306 folio_unlock(folio);
2307 if (for_write)
2308 folio_put(folio);
2309 cc->rpages[i] = NULL;
2310 cc->nr_rpages--;
2311 }
2312
2313 /* we are done since all pages are beyond EOF */
2314 if (f2fs_cluster_is_empty(cc))
2315 goto out;
2316
2317 if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei))
2318 from_dnode = false;
2319
2320 if (!from_dnode)
2321 goto skip_reading_dnode;
2322
2323 set_new_dnode(&dn, inode, NULL, NULL, 0);
2324 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
2325 if (ret)
2326 goto out;
2327
2328 f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
2329
2330 skip_reading_dnode:
2331 for (i = 1; i < cc->cluster_size; i++) {
2332 block_t blkaddr;
2333
2334 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_folio,
2335 dn.ofs_in_node + i) :
2336 ei.blk + i - 1;
2337
2338 if (!__is_valid_data_blkaddr(blkaddr))
2339 break;
2340
2341 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
2342 ret = -EFAULT;
2343 goto out_put_dnode;
2344 }
2345 cc->nr_cpages++;
2346
2347 if (!from_dnode && i >= ei.c_len)
2348 break;
2349 }
2350
2351 /* nothing to decompress */
2352 if (cc->nr_cpages == 0) {
2353 ret = 0;
2354 goto out_put_dnode;
2355 }
2356
2357 dic = f2fs_alloc_dic(cc);
2358 if (IS_ERR(dic)) {
2359 ret = PTR_ERR(dic);
2360 goto out_put_dnode;
2361 }
2362
2363 for (i = 0; i < cc->nr_cpages; i++) {
2364 struct folio *folio = page_folio(dic->cpages[i]);
2365 block_t blkaddr;
2366 struct bio_post_read_ctx *ctx;
2367
2368 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_folio,
2369 dn.ofs_in_node + i + 1) :
2370 ei.blk + i;
2371
2372 f2fs_wait_on_block_writeback(inode, blkaddr);
2373
2374 if (f2fs_load_compressed_folio(sbi, folio, blkaddr)) {
2375 if (atomic_dec_and_test(&dic->remaining_pages)) {
2376 f2fs_decompress_cluster(dic, true);
2377 break;
2378 }
2379 continue;
2380 }
2381
2382 if (bio && (!page_is_mergeable(sbi, bio,
2383 *last_block_in_bio, blkaddr) ||
2384 !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) {
2385 submit_and_realloc:
2386 f2fs_submit_read_bio(sbi, bio, DATA);
2387 bio = NULL;
2388 }
2389
2390 if (!bio)
2391 bio = f2fs_grab_read_bio(inode, cc->vi, blkaddr,
2392 nr_pages - i,
2393 f2fs_ra_op_flags(rac),
2394 folio->index, for_write);
2395
2396 if (!bio_add_folio(bio, folio, blocksize, 0))
2397 goto submit_and_realloc;
2398
2399 ctx = get_post_read_ctx(bio);
2400 ctx->enabled_steps |= STEP_DECOMPRESS;
2401 refcount_inc(&dic->refcnt);
2402
2403 inc_page_count(sbi, F2FS_RD_DATA);
2404 f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
2405 *last_block_in_bio = blkaddr;
2406 }
2407
2408 if (from_dnode)
2409 f2fs_put_dnode(&dn);
2410
2411 *bio_ret = bio;
2412 return 0;
2413
2414 out_put_dnode:
2415 if (from_dnode)
2416 f2fs_put_dnode(&dn);
2417 out:
2418 for (i = 0; i < cc->cluster_size; i++) {
2419 if (cc->rpages[i]) {
2420 ClearPageUptodate(cc->rpages[i]);
2421 unlock_page(cc->rpages[i]);
2422 }
2423 }
2424 *bio_ret = bio;
2425 return ret;
2426 }
2427 #endif
2428
ffs_find_or_alloc(struct folio * folio)2429 static struct f2fs_folio_state *ffs_find_or_alloc(struct folio *folio)
2430 {
2431 struct f2fs_folio_state *ffs = folio->private;
2432
2433 if (ffs)
2434 return ffs;
2435
2436 ffs = f2fs_kmem_cache_alloc(ffs_entry_slab,
2437 GFP_NOIO | __GFP_ZERO, true, NULL);
2438
2439 spin_lock_init(&ffs->state_lock);
2440 folio_attach_private(folio, ffs);
2441 return ffs;
2442 }
2443
ffs_detach_free(struct folio * folio)2444 static void ffs_detach_free(struct folio *folio)
2445 {
2446 struct f2fs_folio_state *ffs;
2447
2448 if (!folio_test_large(folio)) {
2449 folio_detach_private(folio);
2450 return;
2451 }
2452
2453 ffs = folio_detach_private(folio);
2454 if (!ffs)
2455 return;
2456
2457 WARN_ON_ONCE(ffs->read_pages_pending != 0);
2458 kmem_cache_free(ffs_entry_slab, ffs);
2459 }
2460
f2fs_read_data_large_folio(struct inode * inode,struct fsverity_info * vi,struct readahead_control * rac,struct folio * folio)2461 static int f2fs_read_data_large_folio(struct inode *inode,
2462 struct fsverity_info *vi,
2463 struct readahead_control *rac, struct folio *folio)
2464 {
2465 struct bio *bio = NULL;
2466 sector_t last_block_in_bio = 0;
2467 struct f2fs_map_blocks map = {0, };
2468 pgoff_t index, offset, next_pgofs = 0;
2469 unsigned max_nr_pages = rac ? readahead_count(rac) :
2470 folio_nr_pages(folio);
2471 unsigned nrpages;
2472 struct f2fs_folio_state *ffs;
2473 int ret = 0;
2474 bool folio_in_bio;
2475
2476 if (!IS_IMMUTABLE(inode) || f2fs_compressed_file(inode)) {
2477 if (folio)
2478 folio_unlock(folio);
2479 return -EOPNOTSUPP;
2480 }
2481
2482 map.m_seg_type = NO_CHECK_TYPE;
2483
2484 if (rac)
2485 folio = readahead_folio(rac);
2486 next_folio:
2487 if (!folio)
2488 goto out;
2489
2490 folio_in_bio = false;
2491 index = folio->index;
2492 offset = 0;
2493 ffs = NULL;
2494 nrpages = folio_nr_pages(folio);
2495
2496 for (; nrpages; nrpages--, max_nr_pages--, index++, offset++) {
2497 sector_t block_nr;
2498 /*
2499 * Map blocks using the previous result first.
2500 */
2501 if (map.m_flags & F2FS_MAP_MAPPED) {
2502 if (index > map.m_lblk &&
2503 index < (map.m_lblk + map.m_len))
2504 goto got_it;
2505 } else if (index < next_pgofs) {
2506 /* hole case */
2507 goto got_it;
2508 }
2509
2510 /*
2511 * Then do more f2fs_map_blocks() calls until we are
2512 * done with this page.
2513 */
2514 memset(&map, 0, sizeof(map));
2515 map.m_next_pgofs = &next_pgofs;
2516 map.m_seg_type = NO_CHECK_TYPE;
2517 map.m_lblk = index;
2518 map.m_len = max_nr_pages;
2519
2520 ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2521 if (ret)
2522 goto err_out;
2523 got_it:
2524 if ((map.m_flags & F2FS_MAP_MAPPED)) {
2525 block_nr = map.m_pblk + index - map.m_lblk;
2526 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2527 DATA_GENERIC_ENHANCE_READ)) {
2528 ret = -EFSCORRUPTED;
2529 goto err_out;
2530 }
2531 } else {
2532 size_t page_offset = offset << PAGE_SHIFT;
2533 folio_zero_range(folio, page_offset, PAGE_SIZE);
2534 if (vi && !fsverity_verify_blocks(vi, folio, PAGE_SIZE, page_offset)) {
2535 ret = -EIO;
2536 goto err_out;
2537 }
2538 continue;
2539 }
2540
2541 /* We must increment read_pages_pending before possible BIOs submitting
2542 * to prevent from premature folio_end_read() call on folio
2543 */
2544 if (folio_test_large(folio)) {
2545 ffs = ffs_find_or_alloc(folio);
2546
2547 /* set the bitmap to wait */
2548 spin_lock_irq(&ffs->state_lock);
2549 ffs->read_pages_pending++;
2550 spin_unlock_irq(&ffs->state_lock);
2551 }
2552
2553 /*
2554 * This page will go to BIO. Do we need to send this
2555 * BIO off first?
2556 */
2557 if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2558 last_block_in_bio, block_nr) ||
2559 !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) {
2560 submit_and_realloc:
2561 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2562 bio = NULL;
2563 }
2564 if (bio == NULL)
2565 bio = f2fs_grab_read_bio(inode, vi,
2566 block_nr, max_nr_pages,
2567 f2fs_ra_op_flags(rac),
2568 index, false);
2569
2570 /*
2571 * If the page is under writeback, we need to wait for
2572 * its completion to see the correct decrypted data.
2573 */
2574 f2fs_wait_on_block_writeback(inode, block_nr);
2575
2576 if (!bio_add_folio(bio, folio, F2FS_BLKSIZE,
2577 offset << PAGE_SHIFT))
2578 goto submit_and_realloc;
2579
2580 folio_in_bio = true;
2581 inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2582 f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
2583 F2FS_BLKSIZE);
2584 last_block_in_bio = block_nr;
2585 }
2586 trace_f2fs_read_folio(folio, DATA);
2587 err_out:
2588 if (!folio_in_bio) {
2589 folio_end_read(folio, !ret);
2590 if (ret)
2591 return ret;
2592 }
2593 if (rac) {
2594 folio = readahead_folio(rac);
2595 goto next_folio;
2596 }
2597 out:
2598 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2599 if (ret) {
2600 /* Wait bios and clear uptodate. */
2601 folio_lock(folio);
2602 folio_clear_uptodate(folio);
2603 folio_unlock(folio);
2604 }
2605 return ret;
2606 }
2607
2608 /*
2609 * This function was originally taken from fs/mpage.c, and customized for f2fs.
2610 * Major change was from block_size == page_size in f2fs by default.
2611 */
f2fs_mpage_readpages(struct inode * inode,struct fsverity_info * vi,struct readahead_control * rac,struct folio * folio)2612 static int f2fs_mpage_readpages(struct inode *inode, struct fsverity_info *vi,
2613 struct readahead_control *rac, struct folio *folio)
2614 {
2615 struct bio *bio = NULL;
2616 sector_t last_block_in_bio = 0;
2617 struct f2fs_map_blocks map;
2618 #ifdef CONFIG_F2FS_FS_COMPRESSION
2619 struct compress_ctx cc = {
2620 .inode = inode,
2621 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2622 .cluster_size = F2FS_I(inode)->i_cluster_size,
2623 .cluster_idx = NULL_CLUSTER,
2624 .rpages = NULL,
2625 .cpages = NULL,
2626 .nr_rpages = 0,
2627 .nr_cpages = 0,
2628 };
2629 pgoff_t nc_cluster_idx = NULL_CLUSTER;
2630 pgoff_t index;
2631 #endif
2632 pgoff_t next_pgofs = 0;
2633 unsigned nr_pages = rac ? readahead_count(rac) : 1;
2634 struct address_space *mapping = rac ? rac->mapping : folio->mapping;
2635 unsigned max_nr_pages = nr_pages;
2636 int ret = 0;
2637
2638 if (mapping_large_folio_support(mapping))
2639 return f2fs_read_data_large_folio(inode, vi, rac, folio);
2640
2641 #ifdef CONFIG_F2FS_FS_COMPRESSION
2642 if (f2fs_compressed_file(inode)) {
2643 index = rac ? readahead_index(rac) : folio->index;
2644 max_nr_pages = round_up(index + nr_pages, cc.cluster_size) -
2645 round_down(index, cc.cluster_size);
2646 }
2647 #endif
2648
2649 map.m_pblk = 0;
2650 map.m_lblk = 0;
2651 map.m_len = 0;
2652 map.m_flags = 0;
2653 map.m_next_pgofs = &next_pgofs;
2654 map.m_next_extent = NULL;
2655 map.m_seg_type = NO_CHECK_TYPE;
2656 map.m_may_create = false;
2657
2658 for (; nr_pages; nr_pages--) {
2659 if (rac) {
2660 folio = readahead_folio(rac);
2661 prefetchw(&folio->flags);
2662 }
2663
2664 #ifdef CONFIG_F2FS_FS_COMPRESSION
2665 index = folio->index;
2666
2667 if (!f2fs_compressed_file(inode))
2668 goto read_single_page;
2669
2670 /* there are remained compressed pages, submit them */
2671 if (!f2fs_cluster_can_merge_page(&cc, index)) {
2672 cc.vi = vi;
2673 ret = f2fs_read_multi_pages(&cc, &bio,
2674 max_nr_pages,
2675 &last_block_in_bio,
2676 rac, false);
2677 f2fs_destroy_compress_ctx(&cc, false);
2678 if (ret)
2679 goto set_error_page;
2680 }
2681 if (cc.cluster_idx == NULL_CLUSTER) {
2682 if (nc_cluster_idx == index >> cc.log_cluster_size)
2683 goto read_single_page;
2684
2685 ret = f2fs_is_compressed_cluster(inode, index);
2686 if (ret < 0)
2687 goto set_error_page;
2688 else if (!ret) {
2689 nc_cluster_idx =
2690 index >> cc.log_cluster_size;
2691 goto read_single_page;
2692 }
2693
2694 nc_cluster_idx = NULL_CLUSTER;
2695 }
2696 ret = f2fs_init_compress_ctx(&cc);
2697 if (ret)
2698 goto set_error_page;
2699
2700 f2fs_compress_ctx_add_page(&cc, folio);
2701
2702 goto next_page;
2703 read_single_page:
2704 #endif
2705
2706 ret = f2fs_read_single_page(inode, vi, folio, max_nr_pages,
2707 &map, &bio, &last_block_in_bio,
2708 rac);
2709 if (ret) {
2710 #ifdef CONFIG_F2FS_FS_COMPRESSION
2711 set_error_page:
2712 #endif
2713 folio_zero_segment(folio, 0, folio_size(folio));
2714 folio_unlock(folio);
2715 }
2716 #ifdef CONFIG_F2FS_FS_COMPRESSION
2717 next_page:
2718 #endif
2719
2720 #ifdef CONFIG_F2FS_FS_COMPRESSION
2721 if (f2fs_compressed_file(inode)) {
2722 /* last page */
2723 if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
2724 cc.vi = vi;
2725 ret = f2fs_read_multi_pages(&cc, &bio,
2726 max_nr_pages,
2727 &last_block_in_bio,
2728 rac, false);
2729 f2fs_destroy_compress_ctx(&cc, false);
2730 }
2731 }
2732 #endif
2733 }
2734 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2735 return ret;
2736 }
2737
f2fs_read_data_folio(struct file * file,struct folio * folio)2738 static int f2fs_read_data_folio(struct file *file, struct folio *folio)
2739 {
2740 struct inode *inode = folio->mapping->host;
2741 struct fsverity_info *vi = NULL;
2742 int ret;
2743
2744 trace_f2fs_readpage(folio, DATA);
2745
2746 if (!f2fs_is_compress_backend_ready(inode)) {
2747 folio_unlock(folio);
2748 return -EOPNOTSUPP;
2749 }
2750
2751 /* If the file has inline data, try to read it directly */
2752 if (f2fs_has_inline_data(inode)) {
2753 ret = f2fs_read_inline_data(inode, folio);
2754 if (ret != -EAGAIN)
2755 return ret;
2756 }
2757
2758 vi = f2fs_need_verity(inode, folio->index);
2759 if (vi)
2760 fsverity_readahead(vi, folio->index, folio_nr_pages(folio));
2761 return f2fs_mpage_readpages(inode, vi, NULL, folio);
2762 }
2763
f2fs_readahead(struct readahead_control * rac)2764 static void f2fs_readahead(struct readahead_control *rac)
2765 {
2766 struct inode *inode = rac->mapping->host;
2767 struct fsverity_info *vi = NULL;
2768
2769 trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
2770
2771 if (!f2fs_is_compress_backend_ready(inode))
2772 return;
2773
2774 /* If the file has inline data, skip readahead */
2775 if (f2fs_has_inline_data(inode))
2776 return;
2777
2778 vi = f2fs_need_verity(inode, readahead_index(rac));
2779 if (vi)
2780 fsverity_readahead(vi, readahead_index(rac),
2781 readahead_count(rac));
2782 f2fs_mpage_readpages(inode, vi, rac, NULL);
2783 }
2784
f2fs_encrypt_one_page(struct f2fs_io_info * fio)2785 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2786 {
2787 struct inode *inode = fio_inode(fio);
2788 struct folio *mfolio;
2789 struct page *page;
2790 gfp_t gfp_flags = GFP_NOFS;
2791
2792 if (!f2fs_encrypted_file(inode))
2793 return 0;
2794
2795 page = fio->compressed_page ? fio->compressed_page : fio->page;
2796
2797 if (fscrypt_inode_uses_inline_crypto(inode))
2798 return 0;
2799
2800 retry_encrypt:
2801 fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page_folio(page),
2802 PAGE_SIZE, 0, gfp_flags);
2803 if (IS_ERR(fio->encrypted_page)) {
2804 /* flush pending IOs and wait for a while in the ENOMEM case */
2805 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
2806 f2fs_flush_merged_writes(fio->sbi);
2807 memalloc_retry_wait(GFP_NOFS);
2808 gfp_flags |= __GFP_NOFAIL;
2809 goto retry_encrypt;
2810 }
2811 return PTR_ERR(fio->encrypted_page);
2812 }
2813
2814 mfolio = filemap_lock_folio(META_MAPPING(fio->sbi), fio->old_blkaddr);
2815 if (!IS_ERR(mfolio)) {
2816 if (folio_test_uptodate(mfolio))
2817 memcpy(folio_address(mfolio),
2818 page_address(fio->encrypted_page), PAGE_SIZE);
2819 f2fs_folio_put(mfolio, true);
2820 }
2821 return 0;
2822 }
2823
check_inplace_update_policy(struct inode * inode,struct f2fs_io_info * fio)2824 static inline bool check_inplace_update_policy(struct inode *inode,
2825 struct f2fs_io_info *fio)
2826 {
2827 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2828
2829 if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) &&
2830 is_inode_flag_set(inode, FI_OPU_WRITE))
2831 return false;
2832 if (IS_F2FS_IPU_FORCE(sbi))
2833 return true;
2834 if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi))
2835 return true;
2836 if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
2837 return true;
2838 if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) &&
2839 utilization(sbi) > SM_I(sbi)->min_ipu_util)
2840 return true;
2841
2842 /*
2843 * IPU for rewrite async pages
2844 */
2845 if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE &&
2846 !(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode))
2847 return true;
2848
2849 /* this is only set during fdatasync */
2850 if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, FI_NEED_IPU))
2851 return true;
2852
2853 if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2854 !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2855 return true;
2856
2857 return false;
2858 }
2859
f2fs_should_update_inplace(struct inode * inode,struct f2fs_io_info * fio)2860 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
2861 {
2862 /* swap file is migrating in aligned write mode */
2863 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2864 return false;
2865
2866 if (f2fs_is_pinned_file(inode))
2867 return true;
2868
2869 /* if this is cold file, we should overwrite to avoid fragmentation */
2870 if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE))
2871 return true;
2872
2873 return check_inplace_update_policy(inode, fio);
2874 }
2875
f2fs_should_update_outplace(struct inode * inode,struct f2fs_io_info * fio)2876 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
2877 {
2878 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2879
2880 /* The below cases were checked when setting it. */
2881 if (f2fs_is_pinned_file(inode))
2882 return false;
2883 if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
2884 return true;
2885 if (f2fs_lfs_mode(sbi))
2886 return true;
2887 if (S_ISDIR(inode->i_mode))
2888 return true;
2889 if (IS_NOQUOTA(inode))
2890 return true;
2891 if (f2fs_used_in_atomic_write(inode))
2892 return true;
2893 /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */
2894 if (f2fs_compressed_file(inode) &&
2895 F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER &&
2896 is_inode_flag_set(inode, FI_ENABLE_COMPRESS))
2897 return true;
2898
2899 /* swap file is migrating in aligned write mode */
2900 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2901 return true;
2902
2903 if (is_inode_flag_set(inode, FI_OPU_WRITE))
2904 return true;
2905
2906 if (fio) {
2907 if (page_private_gcing(fio->page))
2908 return true;
2909 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2910 f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2911 return true;
2912 }
2913 return false;
2914 }
2915
need_inplace_update(struct f2fs_io_info * fio)2916 static inline bool need_inplace_update(struct f2fs_io_info *fio)
2917 {
2918 struct inode *inode = fio_inode(fio);
2919
2920 if (f2fs_should_update_outplace(inode, fio))
2921 return false;
2922
2923 return f2fs_should_update_inplace(inode, fio);
2924 }
2925
f2fs_do_write_data_page(struct f2fs_io_info * fio)2926 int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2927 {
2928 struct folio *folio = fio->folio;
2929 struct inode *inode = folio->mapping->host;
2930 struct dnode_of_data dn;
2931 struct node_info ni;
2932 struct f2fs_lock_context lc;
2933 bool ipu_force = false;
2934 bool atomic_commit;
2935 int err = 0;
2936
2937 /* Use COW inode to make dnode_of_data for atomic write */
2938 atomic_commit = f2fs_is_atomic_file(inode) &&
2939 folio_test_f2fs_atomic(folio);
2940 if (atomic_commit)
2941 set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
2942 else
2943 set_new_dnode(&dn, inode, NULL, NULL, 0);
2944
2945 if (need_inplace_update(fio) &&
2946 f2fs_lookup_read_extent_cache_block(inode, folio->index,
2947 &fio->old_blkaddr)) {
2948 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2949 DATA_GENERIC_ENHANCE))
2950 return -EFSCORRUPTED;
2951
2952 ipu_force = true;
2953 fio->need_lock = LOCK_DONE;
2954 goto got_it;
2955 }
2956
2957 if (is_sbi_flag_set(fio->sbi, SBI_ENABLE_CHECKPOINT) &&
2958 time_to_inject(fio->sbi, FAULT_SKIP_WRITE))
2959 return -EINVAL;
2960
2961 /* Deadlock due to between page->lock and f2fs_lock_op */
2962 if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi, &lc))
2963 return -EAGAIN;
2964
2965 err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
2966 if (err)
2967 goto out;
2968
2969 fio->old_blkaddr = dn.data_blkaddr;
2970
2971 /* This page is already truncated */
2972 if (fio->old_blkaddr == NULL_ADDR) {
2973 folio_clear_uptodate(folio);
2974 folio_clear_f2fs_gcing(folio);
2975 goto out_writepage;
2976 }
2977 got_it:
2978 if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2979 !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2980 DATA_GENERIC_ENHANCE)) {
2981 err = -EFSCORRUPTED;
2982 goto out_writepage;
2983 }
2984
2985 /* wait for GCed page writeback via META_MAPPING */
2986 if (fio->meta_gc)
2987 f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
2988
2989 /*
2990 * If current allocation needs SSR,
2991 * it had better in-place writes for updated data.
2992 */
2993 if (ipu_force ||
2994 (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2995 need_inplace_update(fio))) {
2996 err = f2fs_encrypt_one_page(fio);
2997 if (err)
2998 goto out_writepage;
2999
3000 folio_start_writeback(folio);
3001 f2fs_put_dnode(&dn);
3002 if (fio->need_lock == LOCK_REQ)
3003 f2fs_unlock_op(fio->sbi, &lc);
3004 err = f2fs_inplace_write_data(fio);
3005 if (err) {
3006 if (fscrypt_inode_uses_fs_layer_crypto(inode))
3007 fscrypt_finalize_bounce_page(&fio->encrypted_page);
3008 folio_end_writeback(folio);
3009 } else {
3010 set_inode_flag(inode, FI_UPDATE_WRITE);
3011 }
3012 trace_f2fs_do_write_data_page(folio, IPU);
3013 return err;
3014 }
3015
3016 if (fio->need_lock == LOCK_RETRY) {
3017 if (!f2fs_trylock_op(fio->sbi, &lc)) {
3018 err = -EAGAIN;
3019 goto out_writepage;
3020 }
3021 fio->need_lock = LOCK_REQ;
3022 }
3023
3024 err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
3025 if (err)
3026 goto out_writepage;
3027
3028 fio->version = ni.version;
3029
3030 err = f2fs_encrypt_one_page(fio);
3031 if (err)
3032 goto out_writepage;
3033
3034 folio_start_writeback(folio);
3035
3036 if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
3037 f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
3038
3039 /* LFS mode write path */
3040 f2fs_outplace_write_data(&dn, fio);
3041 trace_f2fs_do_write_data_page(folio, OPU);
3042 set_inode_flag(inode, FI_APPEND_WRITE);
3043 if (atomic_commit)
3044 folio_clear_f2fs_atomic(folio);
3045 out_writepage:
3046 f2fs_put_dnode(&dn);
3047 out:
3048 if (fio->need_lock == LOCK_REQ)
3049 f2fs_unlock_op(fio->sbi, &lc);
3050 return err;
3051 }
3052
f2fs_write_single_data_page(struct folio * folio,int * submitted,struct bio ** bio,sector_t * last_block,struct writeback_control * wbc,enum iostat_type io_type,int compr_blocks,bool allow_balance)3053 int f2fs_write_single_data_page(struct folio *folio, int *submitted,
3054 struct bio **bio,
3055 sector_t *last_block,
3056 struct writeback_control *wbc,
3057 enum iostat_type io_type,
3058 int compr_blocks,
3059 bool allow_balance)
3060 {
3061 struct inode *inode = folio->mapping->host;
3062 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3063 loff_t i_size = i_size_read(inode);
3064 const pgoff_t end_index = ((unsigned long long)i_size)
3065 >> PAGE_SHIFT;
3066 loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT;
3067 unsigned offset = 0;
3068 bool need_balance_fs = false;
3069 bool quota_inode = IS_NOQUOTA(inode);
3070 int err = 0;
3071 struct f2fs_io_info fio = {
3072 .sbi = sbi,
3073 .ino = inode->i_ino,
3074 .type = DATA,
3075 .op = REQ_OP_WRITE,
3076 .op_flags = wbc_to_write_flags(wbc),
3077 .old_blkaddr = NULL_ADDR,
3078 .folio = folio,
3079 .encrypted_page = NULL,
3080 .submitted = 0,
3081 .compr_blocks = compr_blocks,
3082 .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
3083 .meta_gc = f2fs_meta_inode_gc_required(inode) ? 1 : 0,
3084 .io_type = io_type,
3085 .io_wbc = wbc,
3086 .bio = bio,
3087 .last_block = last_block,
3088 };
3089
3090 trace_f2fs_writepage(folio, DATA);
3091
3092 /* we should bypass data pages to proceed the kworker jobs */
3093 if (unlikely(f2fs_cp_error(sbi))) {
3094 mapping_set_error(folio->mapping, -EIO);
3095 /*
3096 * don't drop any dirty dentry pages for keeping lastest
3097 * directory structure.
3098 */
3099 if (S_ISDIR(inode->i_mode) &&
3100 !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
3101 goto redirty_out;
3102
3103 /* keep data pages in remount-ro mode */
3104 if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
3105 goto redirty_out;
3106 goto out;
3107 }
3108
3109 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3110 goto redirty_out;
3111
3112 if (folio->index < end_index ||
3113 f2fs_verity_in_progress(inode) ||
3114 compr_blocks)
3115 goto write;
3116
3117 /*
3118 * If the offset is out-of-range of file size,
3119 * this page does not have to be written to disk.
3120 */
3121 offset = i_size & (PAGE_SIZE - 1);
3122 if ((folio->index >= end_index + 1) || !offset)
3123 goto out;
3124
3125 folio_zero_segment(folio, offset, folio_size(folio));
3126 write:
3127 /* Dentry/quota blocks are controlled by checkpoint */
3128 if (S_ISDIR(inode->i_mode) || quota_inode) {
3129 struct f2fs_lock_context lc;
3130
3131 /*
3132 * We need to wait for node_write to avoid block allocation during
3133 * checkpoint. This can only happen to quota writes which can cause
3134 * the below discard race condition.
3135 */
3136 if (quota_inode)
3137 f2fs_down_read_trace(&sbi->node_write, &lc);
3138
3139 fio.need_lock = LOCK_DONE;
3140 err = f2fs_do_write_data_page(&fio);
3141
3142 if (quota_inode)
3143 f2fs_up_read_trace(&sbi->node_write, &lc);
3144
3145 goto done;
3146 }
3147
3148 need_balance_fs = true;
3149 err = -EAGAIN;
3150 if (f2fs_has_inline_data(inode)) {
3151 err = f2fs_write_inline_data(inode, folio);
3152 if (!err)
3153 goto out;
3154 }
3155
3156 if (err == -EAGAIN) {
3157 err = f2fs_do_write_data_page(&fio);
3158 if (err == -EAGAIN) {
3159 f2fs_bug_on(sbi, compr_blocks);
3160 fio.need_lock = LOCK_REQ;
3161 err = f2fs_do_write_data_page(&fio);
3162 }
3163 }
3164
3165 if (err) {
3166 file_set_keep_isize(inode);
3167 } else {
3168 spin_lock(&F2FS_I(inode)->i_size_lock);
3169 if (F2FS_I(inode)->last_disk_size < psize)
3170 F2FS_I(inode)->last_disk_size = psize;
3171 spin_unlock(&F2FS_I(inode)->i_size_lock);
3172 }
3173
3174 done:
3175 if (err && err != -ENOENT)
3176 goto redirty_out;
3177
3178 out:
3179 inode_dec_dirty_pages(inode);
3180 if (err) {
3181 folio_clear_uptodate(folio);
3182 folio_clear_f2fs_gcing(folio);
3183 }
3184 folio_unlock(folio);
3185 if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
3186 !F2FS_I(inode)->wb_task && allow_balance)
3187 f2fs_balance_fs(sbi, need_balance_fs);
3188
3189 if (unlikely(f2fs_cp_error(sbi))) {
3190 f2fs_submit_merged_write(sbi, DATA);
3191 if (bio && *bio)
3192 f2fs_submit_merged_ipu_write(sbi, bio, NULL);
3193 submitted = NULL;
3194 }
3195
3196 if (submitted)
3197 *submitted = fio.submitted;
3198
3199 return 0;
3200
3201 redirty_out:
3202 folio_redirty_for_writepage(wbc, folio);
3203 /*
3204 * pageout() in MM translates EAGAIN, so calls handle_write_error()
3205 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
3206 * file_write_and_wait_range() will see EIO error, which is critical
3207 * to return value of fsync() followed by atomic_write failure to user.
3208 */
3209 folio_unlock(folio);
3210 if (!err)
3211 return 1;
3212 return err;
3213 }
3214
3215 /*
3216 * This function was copied from write_cache_pages from mm/page-writeback.c.
3217 * The major change is making write step of cold data page separately from
3218 * warm/hot data page.
3219 */
f2fs_write_cache_pages(struct address_space * mapping,struct writeback_control * wbc,enum iostat_type io_type)3220 static int f2fs_write_cache_pages(struct address_space *mapping,
3221 struct writeback_control *wbc,
3222 enum iostat_type io_type)
3223 {
3224 int ret = 0;
3225 int done = 0, retry = 0;
3226 struct page *pages_local[F2FS_ONSTACK_PAGES];
3227 struct page **pages = pages_local;
3228 struct folio_batch fbatch;
3229 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
3230 struct bio *bio = NULL;
3231 sector_t last_block;
3232 #ifdef CONFIG_F2FS_FS_COMPRESSION
3233 struct inode *inode = mapping->host;
3234 struct compress_ctx cc = {
3235 .inode = inode,
3236 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
3237 .cluster_size = F2FS_I(inode)->i_cluster_size,
3238 .cluster_idx = NULL_CLUSTER,
3239 .rpages = NULL,
3240 .nr_rpages = 0,
3241 .cpages = NULL,
3242 .valid_nr_cpages = 0,
3243 .rbuf = NULL,
3244 .cbuf = NULL,
3245 .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
3246 .private = NULL,
3247 };
3248 #endif
3249 int nr_folios, p, idx;
3250 int nr_pages;
3251 unsigned int max_pages = F2FS_ONSTACK_PAGES;
3252 pgoff_t index;
3253 pgoff_t end; /* Inclusive */
3254 pgoff_t done_index;
3255 int range_whole = 0;
3256 xa_mark_t tag;
3257 int nwritten = 0;
3258 int submitted = 0;
3259 int i;
3260
3261 #ifdef CONFIG_F2FS_FS_COMPRESSION
3262 if (f2fs_compressed_file(inode) &&
3263 1 << cc.log_cluster_size > F2FS_ONSTACK_PAGES) {
3264 pages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
3265 cc.log_cluster_size, GFP_NOFS | __GFP_NOFAIL);
3266 max_pages = 1 << cc.log_cluster_size;
3267 }
3268 #endif
3269
3270 folio_batch_init(&fbatch);
3271
3272 if (get_dirty_pages(mapping->host) <=
3273 SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
3274 set_inode_flag(mapping->host, FI_HOT_DATA);
3275 else
3276 clear_inode_flag(mapping->host, FI_HOT_DATA);
3277
3278 if (wbc->range_cyclic) {
3279 index = mapping->writeback_index; /* prev offset */
3280 end = -1;
3281 } else {
3282 index = wbc->range_start >> PAGE_SHIFT;
3283 end = wbc->range_end >> PAGE_SHIFT;
3284 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
3285 range_whole = 1;
3286 }
3287 tag = wbc_to_tag(wbc);
3288 retry:
3289 retry = 0;
3290 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3291 tag_pages_for_writeback(mapping, index, end);
3292 done_index = index;
3293 while (!done && !retry && (index <= end)) {
3294 nr_pages = 0;
3295 again:
3296 nr_folios = filemap_get_folios_tag(mapping, &index, end,
3297 tag, &fbatch);
3298 if (nr_folios == 0) {
3299 if (nr_pages)
3300 goto write;
3301 break;
3302 }
3303
3304 for (i = 0; i < nr_folios; i++) {
3305 struct folio *folio = fbatch.folios[i];
3306
3307 idx = 0;
3308 p = folio_nr_pages(folio);
3309 add_more:
3310 pages[nr_pages] = folio_page(folio, idx);
3311 folio_get(folio);
3312 if (++nr_pages == max_pages) {
3313 index = folio->index + idx + 1;
3314 folio_batch_release(&fbatch);
3315 goto write;
3316 }
3317 if (++idx < p)
3318 goto add_more;
3319 }
3320 folio_batch_release(&fbatch);
3321 goto again;
3322 write:
3323 for (i = 0; i < nr_pages; i++) {
3324 struct page *page = pages[i];
3325 struct folio *folio = page_folio(page);
3326 bool need_readd;
3327 readd:
3328 need_readd = false;
3329 #ifdef CONFIG_F2FS_FS_COMPRESSION
3330 if (f2fs_compressed_file(inode)) {
3331 void *fsdata = NULL;
3332 struct page *pagep;
3333 int ret2;
3334
3335 ret = f2fs_init_compress_ctx(&cc);
3336 if (ret) {
3337 done = 1;
3338 break;
3339 }
3340
3341 if (!f2fs_cluster_can_merge_page(&cc,
3342 folio->index)) {
3343 ret = f2fs_write_multi_pages(&cc,
3344 &submitted, wbc, io_type);
3345 if (!ret)
3346 need_readd = true;
3347 goto result;
3348 }
3349
3350 if (unlikely(f2fs_cp_error(sbi)))
3351 goto lock_folio;
3352
3353 if (!f2fs_cluster_is_empty(&cc))
3354 goto lock_folio;
3355
3356 if (f2fs_all_cluster_page_ready(&cc,
3357 pages, i, nr_pages, true))
3358 goto lock_folio;
3359
3360 ret2 = f2fs_prepare_compress_overwrite(
3361 inode, &pagep,
3362 folio->index, &fsdata);
3363 if (ret2 < 0) {
3364 ret = ret2;
3365 done = 1;
3366 break;
3367 } else if (ret2 &&
3368 (!f2fs_compress_write_end(inode,
3369 fsdata, folio->index, 1) ||
3370 !f2fs_all_cluster_page_ready(&cc,
3371 pages, i, nr_pages,
3372 false))) {
3373 retry = 1;
3374 break;
3375 }
3376 }
3377 #endif
3378 /* give a priority to WB_SYNC threads */
3379 if (atomic_read(&sbi->wb_sync_req[DATA]) &&
3380 wbc->sync_mode == WB_SYNC_NONE) {
3381 done = 1;
3382 break;
3383 }
3384 #ifdef CONFIG_F2FS_FS_COMPRESSION
3385 lock_folio:
3386 #endif
3387 done_index = folio->index;
3388 retry_write:
3389 folio_lock(folio);
3390
3391 if (unlikely(folio->mapping != mapping)) {
3392 continue_unlock:
3393 folio_unlock(folio);
3394 continue;
3395 }
3396
3397 if (!folio_test_dirty(folio)) {
3398 /* someone wrote it for us */
3399 goto continue_unlock;
3400 }
3401
3402 if (folio_test_writeback(folio)) {
3403 if (wbc->sync_mode == WB_SYNC_NONE)
3404 goto continue_unlock;
3405 f2fs_folio_wait_writeback(folio, DATA, true, true);
3406 }
3407
3408 if (!folio_clear_dirty_for_io(folio))
3409 goto continue_unlock;
3410
3411 #ifdef CONFIG_F2FS_FS_COMPRESSION
3412 if (f2fs_compressed_file(inode)) {
3413 folio_get(folio);
3414 f2fs_compress_ctx_add_page(&cc, folio);
3415 continue;
3416 }
3417 #endif
3418 submitted = 0;
3419 ret = f2fs_write_single_data_page(folio,
3420 &submitted, &bio, &last_block,
3421 wbc, io_type, 0, true);
3422 #ifdef CONFIG_F2FS_FS_COMPRESSION
3423 result:
3424 #endif
3425 nwritten += submitted;
3426 wbc->nr_to_write -= submitted;
3427
3428 if (unlikely(ret)) {
3429 /*
3430 * keep nr_to_write, since vfs uses this to
3431 * get # of written pages.
3432 */
3433 if (ret == 1) {
3434 ret = 0;
3435 goto next;
3436 } else if (ret == -EAGAIN) {
3437 ret = 0;
3438 if (wbc->sync_mode == WB_SYNC_ALL) {
3439 f2fs_schedule_timeout(
3440 DEFAULT_SCHEDULE_TIMEOUT);
3441 goto retry_write;
3442 }
3443 goto next;
3444 }
3445 done_index = folio_next_index(folio);
3446 done = 1;
3447 break;
3448 }
3449
3450 if (wbc->nr_to_write <= 0 &&
3451 wbc->sync_mode == WB_SYNC_NONE) {
3452 done = 1;
3453 break;
3454 }
3455 next:
3456 if (need_readd)
3457 goto readd;
3458 }
3459 release_pages(pages, nr_pages);
3460 cond_resched();
3461 }
3462 #ifdef CONFIG_F2FS_FS_COMPRESSION
3463 /* flush remained pages in compress cluster */
3464 if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
3465 ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
3466 nwritten += submitted;
3467 wbc->nr_to_write -= submitted;
3468 if (ret) {
3469 done = 1;
3470 retry = 0;
3471 }
3472 }
3473 if (f2fs_compressed_file(inode))
3474 f2fs_destroy_compress_ctx(&cc, false);
3475 #endif
3476 if (retry) {
3477 index = 0;
3478 end = -1;
3479 goto retry;
3480 }
3481 if (wbc->range_cyclic && !done)
3482 done_index = 0;
3483 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3484 mapping->writeback_index = done_index;
3485
3486 if (nwritten)
3487 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
3488 NULL, 0, DATA);
3489 /* submit cached bio of IPU write */
3490 if (bio)
3491 f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
3492
3493 #ifdef CONFIG_F2FS_FS_COMPRESSION
3494 if (pages != pages_local)
3495 kfree(pages);
3496 #endif
3497
3498 return ret;
3499 }
3500
__should_serialize_io(struct inode * inode,struct writeback_control * wbc)3501 static inline bool __should_serialize_io(struct inode *inode,
3502 struct writeback_control *wbc)
3503 {
3504 /* to avoid deadlock in path of data flush */
3505 if (F2FS_I(inode)->wb_task)
3506 return false;
3507
3508 if (!S_ISREG(inode->i_mode))
3509 return false;
3510 if (IS_NOQUOTA(inode))
3511 return false;
3512
3513 if (f2fs_is_pinned_file(inode))
3514 return false;
3515 if (f2fs_need_compress_data(inode))
3516 return true;
3517 if (wbc->sync_mode != WB_SYNC_ALL)
3518 return true;
3519 if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
3520 return true;
3521 return false;
3522 }
3523
account_writeback(struct inode * inode,bool inc)3524 static inline void account_writeback(struct inode *inode, bool inc)
3525 {
3526 if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
3527 return;
3528
3529 f2fs_down_read(&F2FS_I(inode)->i_sem);
3530 if (inc)
3531 atomic_inc(&F2FS_I(inode)->writeback);
3532 else
3533 atomic_dec(&F2FS_I(inode)->writeback);
3534 f2fs_up_read(&F2FS_I(inode)->i_sem);
3535 }
3536
update_skipped_write(struct f2fs_sb_info * sbi,struct writeback_control * wbc)3537 static inline void update_skipped_write(struct f2fs_sb_info *sbi,
3538 struct writeback_control *wbc)
3539 {
3540 long skipped = wbc->pages_skipped;
3541
3542 if (is_sbi_flag_set(sbi, SBI_ENABLE_CHECKPOINT) && skipped &&
3543 wbc->sync_mode == WB_SYNC_ALL)
3544 atomic_add(skipped, &sbi->nr_pages[F2FS_SKIPPED_WRITE]);
3545 }
3546
__f2fs_write_data_pages(struct address_space * mapping,struct writeback_control * wbc,enum iostat_type io_type)3547 static int __f2fs_write_data_pages(struct address_space *mapping,
3548 struct writeback_control *wbc,
3549 enum iostat_type io_type)
3550 {
3551 struct inode *inode = mapping->host;
3552 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3553 struct blk_plug plug;
3554 int ret;
3555 bool locked = false;
3556
3557 /* skip writing if there is no dirty page in this inode */
3558 if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3559 return 0;
3560
3561 /* during POR, we don't need to trigger writepage at all. */
3562 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3563 goto skip_write;
3564
3565 if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
3566 wbc->sync_mode == WB_SYNC_NONE &&
3567 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
3568 f2fs_available_free_memory(sbi, DIRTY_DENTS))
3569 goto skip_write;
3570
3571 /* skip writing in file defragment preparing stage */
3572 if (is_inode_flag_set(inode, FI_SKIP_WRITES))
3573 goto skip_write;
3574
3575 trace_f2fs_writepages(mapping->host, wbc, DATA);
3576
3577 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3578 if (wbc->sync_mode == WB_SYNC_ALL)
3579 atomic_inc(&sbi->wb_sync_req[DATA]);
3580 else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3581 /* to avoid potential deadlock */
3582 if (current->plug)
3583 blk_finish_plug(current->plug);
3584 goto skip_write;
3585 }
3586
3587 if (__should_serialize_io(inode, wbc)) {
3588 mutex_lock(&sbi->writepages);
3589 locked = true;
3590 }
3591
3592 account_writeback(inode, true);
3593
3594 blk_start_plug(&plug);
3595 ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3596 blk_finish_plug(&plug);
3597
3598 account_writeback(inode, false);
3599
3600 if (locked)
3601 mutex_unlock(&sbi->writepages);
3602
3603 if (wbc->sync_mode == WB_SYNC_ALL)
3604 atomic_dec(&sbi->wb_sync_req[DATA]);
3605 /*
3606 * if some pages were truncated, we cannot guarantee its mapping->host
3607 * to detect pending bios.
3608 */
3609
3610 f2fs_remove_dirty_inode(inode);
3611
3612 /*
3613 * f2fs_write_cache_pages() has retry logic for EAGAIN case which is
3614 * common when racing w/ checkpoint, so only update skipped write
3615 * when ret is non-zero.
3616 */
3617 if (ret)
3618 update_skipped_write(sbi, wbc);
3619 return ret;
3620
3621 skip_write:
3622 wbc->pages_skipped += get_dirty_pages(inode);
3623 update_skipped_write(sbi, wbc);
3624 trace_f2fs_writepages(mapping->host, wbc, DATA);
3625 return 0;
3626 }
3627
f2fs_write_data_pages(struct address_space * mapping,struct writeback_control * wbc)3628 static int f2fs_write_data_pages(struct address_space *mapping,
3629 struct writeback_control *wbc)
3630 {
3631 struct inode *inode = mapping->host;
3632
3633 return __f2fs_write_data_pages(mapping, wbc,
3634 F2FS_I(inode)->cp_task == current ?
3635 FS_CP_DATA_IO : FS_DATA_IO);
3636 }
3637
f2fs_write_failed(struct inode * inode,loff_t to)3638 void f2fs_write_failed(struct inode *inode, loff_t to)
3639 {
3640 loff_t i_size = i_size_read(inode);
3641
3642 if (IS_NOQUOTA(inode))
3643 return;
3644
3645 /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3646 if (to > i_size && !f2fs_verity_in_progress(inode)) {
3647 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3648 filemap_invalidate_lock(inode->i_mapping);
3649
3650 truncate_pagecache(inode, i_size);
3651 f2fs_truncate_blocks(inode, i_size, true);
3652
3653 filemap_invalidate_unlock(inode->i_mapping);
3654 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3655 }
3656 }
3657
prepare_write_begin(struct f2fs_sb_info * sbi,struct folio * folio,loff_t pos,unsigned int len,block_t * blk_addr,bool * node_changed)3658 static int prepare_write_begin(struct f2fs_sb_info *sbi,
3659 struct folio *folio, loff_t pos, unsigned int len,
3660 block_t *blk_addr, bool *node_changed)
3661 {
3662 struct inode *inode = folio->mapping->host;
3663 pgoff_t index = folio->index;
3664 struct dnode_of_data dn;
3665 struct f2fs_lock_context lc;
3666 struct folio *ifolio;
3667 bool locked = false;
3668 int flag = F2FS_GET_BLOCK_PRE_AIO;
3669 int err = 0;
3670
3671 /*
3672 * If a whole page is being written and we already preallocated all the
3673 * blocks, then there is no need to get a block address now.
3674 */
3675 if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
3676 return 0;
3677
3678 /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3679 if (f2fs_has_inline_data(inode)) {
3680 if (pos + len > MAX_INLINE_DATA(inode))
3681 flag = F2FS_GET_BLOCK_DEFAULT;
3682 f2fs_map_lock(sbi, &lc, flag);
3683 locked = true;
3684 } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
3685 f2fs_map_lock(sbi, &lc, flag);
3686 locked = true;
3687 }
3688
3689 restart:
3690 /* check inline_data */
3691 ifolio = f2fs_get_inode_folio(sbi, inode->i_ino);
3692 if (IS_ERR(ifolio)) {
3693 err = PTR_ERR(ifolio);
3694 goto unlock_out;
3695 }
3696
3697 set_new_dnode(&dn, inode, ifolio, ifolio, 0);
3698
3699 if (f2fs_has_inline_data(inode)) {
3700 if (pos + len <= MAX_INLINE_DATA(inode)) {
3701 f2fs_do_read_inline_data(folio, ifolio);
3702 set_inode_flag(inode, FI_DATA_EXIST);
3703 if (inode->i_nlink)
3704 folio_set_f2fs_inline(ifolio);
3705 goto out;
3706 }
3707 err = f2fs_convert_inline_folio(&dn, folio);
3708 if (err || dn.data_blkaddr != NULL_ADDR)
3709 goto out;
3710 }
3711
3712 if (!f2fs_lookup_read_extent_cache_block(inode, index,
3713 &dn.data_blkaddr)) {
3714 if (IS_DEVICE_ALIASING(inode)) {
3715 err = -ENODATA;
3716 goto out;
3717 }
3718
3719 if (locked) {
3720 err = f2fs_reserve_block(&dn, index);
3721 goto out;
3722 }
3723
3724 /* hole case */
3725 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3726 if (!err && dn.data_blkaddr != NULL_ADDR)
3727 goto out;
3728 f2fs_put_dnode(&dn);
3729 f2fs_map_lock(sbi, &lc, F2FS_GET_BLOCK_PRE_AIO);
3730 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3731 locked = true;
3732 goto restart;
3733 }
3734 out:
3735 if (!err) {
3736 /* convert_inline_page can make node_changed */
3737 *blk_addr = dn.data_blkaddr;
3738 *node_changed = dn.node_changed;
3739 }
3740 f2fs_put_dnode(&dn);
3741 unlock_out:
3742 if (locked)
3743 f2fs_map_unlock(sbi, &lc, flag);
3744 return err;
3745 }
3746
__find_data_block(struct inode * inode,pgoff_t index,block_t * blk_addr)3747 static int __find_data_block(struct inode *inode, pgoff_t index,
3748 block_t *blk_addr)
3749 {
3750 struct dnode_of_data dn;
3751 struct folio *ifolio;
3752 int err = 0;
3753
3754 ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino);
3755 if (IS_ERR(ifolio))
3756 return PTR_ERR(ifolio);
3757
3758 set_new_dnode(&dn, inode, ifolio, ifolio, 0);
3759
3760 if (!f2fs_lookup_read_extent_cache_block(inode, index,
3761 &dn.data_blkaddr)) {
3762 /* hole case */
3763 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3764 if (err) {
3765 dn.data_blkaddr = NULL_ADDR;
3766 err = 0;
3767 }
3768 }
3769 *blk_addr = dn.data_blkaddr;
3770 f2fs_put_dnode(&dn);
3771 return err;
3772 }
3773
__reserve_data_block(struct inode * inode,pgoff_t index,block_t * blk_addr,bool * node_changed)3774 static int __reserve_data_block(struct inode *inode, pgoff_t index,
3775 block_t *blk_addr, bool *node_changed)
3776 {
3777 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3778 struct dnode_of_data dn;
3779 struct f2fs_lock_context lc;
3780 struct folio *ifolio;
3781 int err = 0;
3782
3783 f2fs_map_lock(sbi, &lc, F2FS_GET_BLOCK_PRE_AIO);
3784
3785 ifolio = f2fs_get_inode_folio(sbi, inode->i_ino);
3786 if (IS_ERR(ifolio)) {
3787 err = PTR_ERR(ifolio);
3788 goto unlock_out;
3789 }
3790 set_new_dnode(&dn, inode, ifolio, ifolio, 0);
3791
3792 if (!f2fs_lookup_read_extent_cache_block(dn.inode, index,
3793 &dn.data_blkaddr))
3794 err = f2fs_reserve_block(&dn, index);
3795
3796 *blk_addr = dn.data_blkaddr;
3797 *node_changed = dn.node_changed;
3798 f2fs_put_dnode(&dn);
3799
3800 unlock_out:
3801 f2fs_map_unlock(sbi, &lc, F2FS_GET_BLOCK_PRE_AIO);
3802 return err;
3803 }
3804
prepare_atomic_write_begin(struct f2fs_sb_info * sbi,struct folio * folio,loff_t pos,unsigned int len,block_t * blk_addr,bool * node_changed,bool * use_cow)3805 static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
3806 struct folio *folio, loff_t pos, unsigned int len,
3807 block_t *blk_addr, bool *node_changed, bool *use_cow)
3808 {
3809 struct inode *inode = folio->mapping->host;
3810 struct inode *cow_inode = F2FS_I(inode)->cow_inode;
3811 pgoff_t index = folio->index;
3812 int err = 0;
3813 block_t ori_blk_addr = NULL_ADDR;
3814
3815 /* If pos is beyond the end of file, reserve a new block in COW inode */
3816 if ((pos & PAGE_MASK) >= i_size_read(inode))
3817 goto reserve_block;
3818
3819 /* Look for the block in COW inode first */
3820 err = __find_data_block(cow_inode, index, blk_addr);
3821 if (err) {
3822 return err;
3823 } else if (*blk_addr != NULL_ADDR) {
3824 *use_cow = true;
3825 return 0;
3826 }
3827
3828 if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
3829 goto reserve_block;
3830
3831 /* Look for the block in the original inode */
3832 err = __find_data_block(inode, index, &ori_blk_addr);
3833 if (err)
3834 return err;
3835
3836 reserve_block:
3837 /* Finally, we should reserve a new block in COW inode for the update */
3838 err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
3839 if (err)
3840 return err;
3841 inc_atomic_write_cnt(inode);
3842
3843 if (ori_blk_addr != NULL_ADDR)
3844 *blk_addr = ori_blk_addr;
3845 return 0;
3846 }
3847
f2fs_write_begin(const struct kiocb * iocb,struct address_space * mapping,loff_t pos,unsigned len,struct folio ** foliop,void ** fsdata)3848 static int f2fs_write_begin(const struct kiocb *iocb,
3849 struct address_space *mapping,
3850 loff_t pos, unsigned len, struct folio **foliop,
3851 void **fsdata)
3852 {
3853 struct inode *inode = mapping->host;
3854 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3855 struct folio *folio;
3856 pgoff_t index = pos >> PAGE_SHIFT;
3857 bool need_balance = false;
3858 bool use_cow = false;
3859 block_t blkaddr = NULL_ADDR;
3860 int err = 0;
3861
3862 trace_f2fs_write_begin(inode, pos, len);
3863
3864 if (!f2fs_is_checkpoint_ready(sbi)) {
3865 err = -ENOSPC;
3866 goto fail;
3867 }
3868
3869 /*
3870 * We should check this at this moment to avoid deadlock on inode page
3871 * and #0 page. The locking rule for inline_data conversion should be:
3872 * folio_lock(folio #0) -> folio_lock(inode_page)
3873 */
3874 if (index != 0) {
3875 err = f2fs_convert_inline_inode(inode);
3876 if (err)
3877 goto fail;
3878 }
3879
3880 #ifdef CONFIG_F2FS_FS_COMPRESSION
3881 if (f2fs_compressed_file(inode)) {
3882 int ret;
3883 struct page *page;
3884
3885 *fsdata = NULL;
3886
3887 if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
3888 goto repeat;
3889
3890 ret = f2fs_prepare_compress_overwrite(inode, &page,
3891 index, fsdata);
3892 if (ret < 0) {
3893 err = ret;
3894 goto fail;
3895 } else if (ret) {
3896 *foliop = page_folio(page);
3897 return 0;
3898 }
3899 }
3900 #endif
3901
3902 repeat:
3903 /*
3904 * Do not use FGP_STABLE to avoid deadlock.
3905 * Will wait that below with our IO control.
3906 */
3907 folio = f2fs_filemap_get_folio(mapping, index,
3908 FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_NOFS,
3909 mapping_gfp_mask(mapping));
3910 if (IS_ERR(folio)) {
3911 err = PTR_ERR(folio);
3912 goto fail;
3913 }
3914
3915 /* TODO: cluster can be compressed due to race with .writepage */
3916
3917 *foliop = folio;
3918
3919 if (f2fs_is_atomic_file(inode))
3920 err = prepare_atomic_write_begin(sbi, folio, pos, len,
3921 &blkaddr, &need_balance, &use_cow);
3922 else
3923 err = prepare_write_begin(sbi, folio, pos, len,
3924 &blkaddr, &need_balance);
3925 if (err)
3926 goto put_folio;
3927
3928 if (need_balance && !IS_NOQUOTA(inode) &&
3929 has_not_enough_free_secs(sbi, 0, 0)) {
3930 folio_unlock(folio);
3931 f2fs_balance_fs(sbi, true);
3932 folio_lock(folio);
3933 if (folio->mapping != mapping) {
3934 /* The folio got truncated from under us */
3935 folio_unlock(folio);
3936 folio_put(folio);
3937 goto repeat;
3938 }
3939 }
3940
3941 f2fs_folio_wait_writeback(folio, DATA, false, true);
3942
3943 if (len == folio_size(folio) || folio_test_uptodate(folio))
3944 return 0;
3945
3946 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
3947 !f2fs_verity_in_progress(inode)) {
3948 folio_zero_segment(folio, len, folio_size(folio));
3949 return 0;
3950 }
3951
3952 if (blkaddr == NEW_ADDR) {
3953 folio_zero_segment(folio, 0, folio_size(folio));
3954 folio_mark_uptodate(folio);
3955 } else {
3956 if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3957 DATA_GENERIC_ENHANCE_READ)) {
3958 err = -EFSCORRUPTED;
3959 goto put_folio;
3960 }
3961 f2fs_submit_page_read(use_cow ? F2FS_I(inode)->cow_inode :
3962 inode,
3963 NULL, /* can't write to fsverity files */
3964 folio, blkaddr, 0, true);
3965
3966 folio_lock(folio);
3967 if (unlikely(folio->mapping != mapping)) {
3968 folio_unlock(folio);
3969 folio_put(folio);
3970 goto repeat;
3971 }
3972 if (unlikely(!folio_test_uptodate(folio))) {
3973 err = -EIO;
3974 goto put_folio;
3975 }
3976 }
3977 return 0;
3978
3979 put_folio:
3980 f2fs_folio_put(folio, true);
3981 fail:
3982 f2fs_write_failed(inode, pos + len);
3983 return err;
3984 }
3985
f2fs_write_end(const struct kiocb * iocb,struct address_space * mapping,loff_t pos,unsigned len,unsigned copied,struct folio * folio,void * fsdata)3986 static int f2fs_write_end(const struct kiocb *iocb,
3987 struct address_space *mapping,
3988 loff_t pos, unsigned len, unsigned copied,
3989 struct folio *folio, void *fsdata)
3990 {
3991 struct inode *inode = folio->mapping->host;
3992
3993 trace_f2fs_write_end(inode, pos, len, copied);
3994
3995 /*
3996 * This should be come from len == PAGE_SIZE, and we expect copied
3997 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3998 * let generic_perform_write() try to copy data again through copied=0.
3999 */
4000 if (!folio_test_uptodate(folio)) {
4001 if (unlikely(copied != len))
4002 copied = 0;
4003 else
4004 folio_mark_uptodate(folio);
4005 }
4006
4007 #ifdef CONFIG_F2FS_FS_COMPRESSION
4008 /* overwrite compressed file */
4009 if (f2fs_compressed_file(inode) && fsdata) {
4010 f2fs_compress_write_end(inode, fsdata, folio->index, copied);
4011 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
4012
4013 if (pos + copied > i_size_read(inode) &&
4014 !f2fs_verity_in_progress(inode))
4015 f2fs_i_size_write(inode, pos + copied);
4016 return copied;
4017 }
4018 #endif
4019
4020 if (!copied)
4021 goto unlock_out;
4022
4023 folio_mark_dirty(folio);
4024
4025 if (f2fs_is_atomic_file(inode))
4026 folio_set_f2fs_atomic(folio);
4027
4028 if (pos + copied > i_size_read(inode) &&
4029 !f2fs_verity_in_progress(inode)) {
4030 f2fs_i_size_write(inode, pos + copied);
4031 if (f2fs_is_atomic_file(inode))
4032 f2fs_i_size_write(F2FS_I(inode)->cow_inode,
4033 pos + copied);
4034 }
4035 unlock_out:
4036 f2fs_folio_put(folio, true);
4037 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
4038 return copied;
4039 }
4040
f2fs_invalidate_folio(struct folio * folio,size_t offset,size_t length)4041 void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
4042 {
4043 struct inode *inode = folio->mapping->host;
4044 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4045
4046 if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
4047 (offset || length != folio_size(folio)))
4048 return;
4049
4050 if (folio_test_dirty(folio)) {
4051 if (inode->i_ino == F2FS_META_INO(sbi)) {
4052 dec_page_count(sbi, F2FS_DIRTY_META);
4053 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
4054 dec_page_count(sbi, F2FS_DIRTY_NODES);
4055 } else {
4056 inode_dec_dirty_pages(inode);
4057 f2fs_remove_dirty_inode(inode);
4058 }
4059 }
4060
4061 if (offset || length != folio_size(folio))
4062 return;
4063
4064 folio_cancel_dirty(folio);
4065 ffs_detach_free(folio);
4066 }
4067
f2fs_release_folio(struct folio * folio,gfp_t wait)4068 bool f2fs_release_folio(struct folio *folio, gfp_t wait)
4069 {
4070 /* If this is dirty folio, keep private data */
4071 if (folio_test_dirty(folio))
4072 return false;
4073
4074 ffs_detach_free(folio);
4075 return true;
4076 }
4077
f2fs_dirty_data_folio(struct address_space * mapping,struct folio * folio)4078 static bool f2fs_dirty_data_folio(struct address_space *mapping,
4079 struct folio *folio)
4080 {
4081 struct inode *inode = mapping->host;
4082
4083 trace_f2fs_set_page_dirty(folio, DATA);
4084
4085 if (!folio_test_uptodate(folio))
4086 folio_mark_uptodate(folio);
4087 BUG_ON(folio_test_swapcache(folio));
4088
4089 if (filemap_dirty_folio(mapping, folio)) {
4090 f2fs_update_dirty_folio(inode, folio);
4091 return true;
4092 }
4093 return false;
4094 }
4095
4096
f2fs_bmap_compress(struct inode * inode,sector_t block)4097 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
4098 {
4099 #ifdef CONFIG_F2FS_FS_COMPRESSION
4100 struct dnode_of_data dn;
4101 sector_t start_idx, blknr = 0;
4102 int ret;
4103
4104 start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
4105
4106 set_new_dnode(&dn, inode, NULL, NULL, 0);
4107 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
4108 if (ret)
4109 return 0;
4110
4111 if (dn.data_blkaddr != COMPRESS_ADDR) {
4112 dn.ofs_in_node += block - start_idx;
4113 blknr = f2fs_data_blkaddr(&dn);
4114 if (!__is_valid_data_blkaddr(blknr))
4115 blknr = 0;
4116 }
4117
4118 f2fs_put_dnode(&dn);
4119 return blknr;
4120 #else
4121 return 0;
4122 #endif
4123 }
4124
4125
f2fs_bmap(struct address_space * mapping,sector_t block)4126 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
4127 {
4128 struct inode *inode = mapping->host;
4129 sector_t blknr = 0;
4130
4131 if (f2fs_has_inline_data(inode))
4132 goto out;
4133
4134 /* make sure allocating whole blocks */
4135 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
4136 filemap_write_and_wait(mapping);
4137
4138 /* Block number less than F2FS MAX BLOCKS */
4139 if (unlikely(block >= max_file_blocks(inode)))
4140 goto out;
4141
4142 if (f2fs_compressed_file(inode)) {
4143 blknr = f2fs_bmap_compress(inode, block);
4144 } else {
4145 struct f2fs_map_blocks map;
4146
4147 memset(&map, 0, sizeof(map));
4148 map.m_lblk = block;
4149 map.m_len = 1;
4150 map.m_next_pgofs = NULL;
4151 map.m_seg_type = NO_CHECK_TYPE;
4152
4153 if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP))
4154 blknr = map.m_pblk;
4155 }
4156 out:
4157 trace_f2fs_bmap(inode, block, blknr);
4158 return blknr;
4159 }
4160
4161 #ifdef CONFIG_SWAP
f2fs_migrate_blocks(struct inode * inode,block_t start_blk,unsigned int blkcnt)4162 static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
4163 unsigned int blkcnt)
4164 {
4165 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4166 unsigned int blkofs;
4167 unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
4168 unsigned int end_blk = start_blk + blkcnt - 1;
4169 unsigned int secidx = start_blk / blk_per_sec;
4170 unsigned int end_sec;
4171 int ret = 0;
4172
4173 if (!blkcnt)
4174 return 0;
4175 end_sec = end_blk / blk_per_sec;
4176
4177 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4178 filemap_invalidate_lock(inode->i_mapping);
4179
4180 set_inode_flag(inode, FI_ALIGNED_WRITE);
4181 set_inode_flag(inode, FI_OPU_WRITE);
4182
4183 for (; secidx <= end_sec; secidx++) {
4184 unsigned int blkofs_end = secidx == end_sec ?
4185 end_blk % blk_per_sec : blk_per_sec - 1;
4186
4187 f2fs_down_write(&sbi->pin_sem);
4188
4189 ret = f2fs_allocate_pinning_section(sbi);
4190 if (ret) {
4191 f2fs_up_write(&sbi->pin_sem);
4192 break;
4193 }
4194
4195 set_inode_flag(inode, FI_SKIP_WRITES);
4196
4197 for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
4198 struct folio *folio;
4199 unsigned int blkidx = secidx * blk_per_sec + blkofs;
4200
4201 folio = f2fs_get_lock_data_folio(inode, blkidx, true);
4202 if (IS_ERR(folio)) {
4203 f2fs_up_write(&sbi->pin_sem);
4204 ret = PTR_ERR(folio);
4205 goto done;
4206 }
4207
4208 folio_mark_dirty(folio);
4209 f2fs_folio_put(folio, true);
4210 }
4211
4212 clear_inode_flag(inode, FI_SKIP_WRITES);
4213
4214 ret = filemap_fdatawrite(inode->i_mapping);
4215
4216 f2fs_up_write(&sbi->pin_sem);
4217
4218 if (ret)
4219 break;
4220 }
4221
4222 done:
4223 clear_inode_flag(inode, FI_SKIP_WRITES);
4224 clear_inode_flag(inode, FI_OPU_WRITE);
4225 clear_inode_flag(inode, FI_ALIGNED_WRITE);
4226
4227 filemap_invalidate_unlock(inode->i_mapping);
4228 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4229
4230 return ret;
4231 }
4232
check_swap_activate(struct swap_info_struct * sis,struct file * swap_file,sector_t * span)4233 static int check_swap_activate(struct swap_info_struct *sis,
4234 struct file *swap_file, sector_t *span)
4235 {
4236 struct address_space *mapping = swap_file->f_mapping;
4237 struct inode *inode = mapping->host;
4238 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4239 block_t cur_lblock;
4240 block_t last_lblock;
4241 block_t pblock;
4242 block_t lowest_pblock = -1;
4243 block_t highest_pblock = 0;
4244 int nr_extents = 0;
4245 unsigned int nr_pblocks;
4246 unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
4247 unsigned int not_aligned = 0;
4248 int ret = 0;
4249
4250 /*
4251 * Map all the blocks into the extent list. This code doesn't try
4252 * to be very smart.
4253 */
4254 cur_lblock = 0;
4255 last_lblock = F2FS_BYTES_TO_BLK(i_size_read(inode));
4256
4257 while (cur_lblock < last_lblock && cur_lblock < sis->max) {
4258 struct f2fs_map_blocks map;
4259 bool last_extent = false;
4260 retry:
4261 cond_resched();
4262
4263 memset(&map, 0, sizeof(map));
4264 map.m_lblk = cur_lblock;
4265 map.m_len = last_lblock - cur_lblock;
4266 map.m_next_pgofs = NULL;
4267 map.m_next_extent = NULL;
4268 map.m_seg_type = NO_CHECK_TYPE;
4269 map.m_may_create = false;
4270
4271 ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
4272 if (ret)
4273 goto out;
4274
4275 /* hole */
4276 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
4277 f2fs_err(sbi, "Swapfile has holes");
4278 ret = -EINVAL;
4279 goto out;
4280 }
4281
4282 pblock = map.m_pblk;
4283 nr_pblocks = map.m_len;
4284
4285 if (!last_extent &&
4286 ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec ||
4287 nr_pblocks % blks_per_sec ||
4288 f2fs_is_sequential_zone_area(sbi, pblock))) {
4289 not_aligned++;
4290
4291 nr_pblocks = roundup(nr_pblocks, blks_per_sec);
4292 if (cur_lblock + nr_pblocks > sis->max)
4293 nr_pblocks -= blks_per_sec;
4294
4295 /* this extent is last one */
4296 if (!nr_pblocks) {
4297 nr_pblocks = last_lblock - cur_lblock;
4298 last_extent = true;
4299 }
4300
4301 ret = f2fs_migrate_blocks(inode, cur_lblock,
4302 nr_pblocks);
4303 if (ret) {
4304 if (ret == -ENOENT)
4305 ret = -EINVAL;
4306 goto out;
4307 }
4308
4309 /* lookup block mapping info after block migration */
4310 goto retry;
4311 }
4312
4313 if (cur_lblock + nr_pblocks >= sis->max)
4314 nr_pblocks = sis->max - cur_lblock;
4315
4316 if (cur_lblock) { /* exclude the header page */
4317 if (pblock < lowest_pblock)
4318 lowest_pblock = pblock;
4319 if (pblock + nr_pblocks - 1 > highest_pblock)
4320 highest_pblock = pblock + nr_pblocks - 1;
4321 }
4322
4323 /*
4324 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
4325 */
4326 ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
4327 if (ret < 0)
4328 goto out;
4329 nr_extents += ret;
4330 cur_lblock += nr_pblocks;
4331 }
4332 ret = nr_extents;
4333 *span = 1 + highest_pblock - lowest_pblock;
4334 if (cur_lblock == 0)
4335 cur_lblock = 1; /* force Empty message */
4336 sis->max = cur_lblock;
4337 sis->pages = cur_lblock - 1;
4338 out:
4339 if (not_aligned)
4340 f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)",
4341 not_aligned, blks_per_sec * F2FS_BLKSIZE);
4342 return ret;
4343 }
4344
f2fs_swap_activate(struct swap_info_struct * sis,struct file * file,sector_t * span)4345 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4346 sector_t *span)
4347 {
4348 struct inode *inode = file_inode(file);
4349 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4350 int ret;
4351
4352 if (!S_ISREG(inode->i_mode))
4353 return -EINVAL;
4354
4355 if (f2fs_readonly(sbi->sb))
4356 return -EROFS;
4357
4358 if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
4359 f2fs_err(sbi, "Swapfile not supported in LFS mode");
4360 return -EINVAL;
4361 }
4362
4363 ret = f2fs_convert_inline_inode(inode);
4364 if (ret)
4365 return ret;
4366
4367 if (!f2fs_disable_compressed_file(inode))
4368 return -EINVAL;
4369
4370 ret = filemap_fdatawrite(inode->i_mapping);
4371 if (ret < 0)
4372 return ret;
4373
4374 f2fs_precache_extents(inode);
4375
4376 ret = check_swap_activate(sis, file, span);
4377 if (ret < 0)
4378 return ret;
4379
4380 stat_inc_swapfile_inode(inode);
4381 set_inode_flag(inode, FI_PIN_FILE);
4382 f2fs_update_time(sbi, REQ_TIME);
4383 return ret;
4384 }
4385
f2fs_swap_deactivate(struct file * file)4386 static void f2fs_swap_deactivate(struct file *file)
4387 {
4388 struct inode *inode = file_inode(file);
4389
4390 stat_dec_swapfile_inode(inode);
4391 clear_inode_flag(inode, FI_PIN_FILE);
4392 }
4393 #else
f2fs_swap_activate(struct swap_info_struct * sis,struct file * file,sector_t * span)4394 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4395 sector_t *span)
4396 {
4397 return -EOPNOTSUPP;
4398 }
4399
f2fs_swap_deactivate(struct file * file)4400 static void f2fs_swap_deactivate(struct file *file)
4401 {
4402 }
4403 #endif
4404
4405 const struct address_space_operations f2fs_dblock_aops = {
4406 .read_folio = f2fs_read_data_folio,
4407 .readahead = f2fs_readahead,
4408 .writepages = f2fs_write_data_pages,
4409 .write_begin = f2fs_write_begin,
4410 .write_end = f2fs_write_end,
4411 .dirty_folio = f2fs_dirty_data_folio,
4412 .migrate_folio = filemap_migrate_folio,
4413 .invalidate_folio = f2fs_invalidate_folio,
4414 .release_folio = f2fs_release_folio,
4415 .bmap = f2fs_bmap,
4416 .swap_activate = f2fs_swap_activate,
4417 .swap_deactivate = f2fs_swap_deactivate,
4418 };
4419
f2fs_clear_page_cache_dirty_tag(struct folio * folio)4420 void f2fs_clear_page_cache_dirty_tag(struct folio *folio)
4421 {
4422 struct address_space *mapping = folio->mapping;
4423 unsigned long flags;
4424
4425 xa_lock_irqsave(&mapping->i_pages, flags);
4426 __xa_clear_mark(&mapping->i_pages, folio->index,
4427 PAGECACHE_TAG_DIRTY);
4428 xa_unlock_irqrestore(&mapping->i_pages, flags);
4429 }
4430
f2fs_init_post_read_processing(void)4431 int __init f2fs_init_post_read_processing(void)
4432 {
4433 bio_post_read_ctx_cache =
4434 kmem_cache_create("f2fs_bio_post_read_ctx",
4435 sizeof(struct bio_post_read_ctx), 0, 0, NULL);
4436 if (!bio_post_read_ctx_cache)
4437 goto fail;
4438 bio_post_read_ctx_pool =
4439 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4440 bio_post_read_ctx_cache);
4441 if (!bio_post_read_ctx_pool)
4442 goto fail_free_cache;
4443 return 0;
4444
4445 fail_free_cache:
4446 kmem_cache_destroy(bio_post_read_ctx_cache);
4447 fail:
4448 return -ENOMEM;
4449 }
4450
f2fs_destroy_post_read_processing(void)4451 void f2fs_destroy_post_read_processing(void)
4452 {
4453 mempool_destroy(bio_post_read_ctx_pool);
4454 kmem_cache_destroy(bio_post_read_ctx_cache);
4455 }
4456
f2fs_init_post_read_wq(struct f2fs_sb_info * sbi)4457 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4458 {
4459 if (!f2fs_sb_has_encrypt(sbi) &&
4460 !f2fs_sb_has_verity(sbi) &&
4461 !f2fs_sb_has_compression(sbi))
4462 return 0;
4463
4464 sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
4465 WQ_UNBOUND | WQ_HIGHPRI,
4466 num_online_cpus());
4467 return sbi->post_read_wq ? 0 : -ENOMEM;
4468 }
4469
f2fs_destroy_post_read_wq(struct f2fs_sb_info * sbi)4470 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4471 {
4472 if (sbi->post_read_wq)
4473 destroy_workqueue(sbi->post_read_wq);
4474 }
4475
f2fs_init_bio_entry_cache(void)4476 int __init f2fs_init_bio_entry_cache(void)
4477 {
4478 bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4479 sizeof(struct bio_entry));
4480
4481 if (!bio_entry_slab)
4482 return -ENOMEM;
4483
4484 ffs_entry_slab = f2fs_kmem_cache_create("f2fs_ffs_slab",
4485 sizeof(struct f2fs_folio_state));
4486
4487 if (!ffs_entry_slab) {
4488 kmem_cache_destroy(bio_entry_slab);
4489 return -ENOMEM;
4490 }
4491
4492 return 0;
4493 }
4494
f2fs_destroy_bio_entry_cache(void)4495 void f2fs_destroy_bio_entry_cache(void)
4496 {
4497 kmem_cache_destroy(bio_entry_slab);
4498 kmem_cache_destroy(ffs_entry_slab);
4499 }
4500
f2fs_iomap_begin(struct inode * inode,loff_t offset,loff_t length,unsigned int flags,struct iomap * iomap,struct iomap * srcmap)4501 static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
4502 unsigned int flags, struct iomap *iomap,
4503 struct iomap *srcmap)
4504 {
4505 struct f2fs_map_blocks map = { NULL, };
4506 pgoff_t next_pgofs = 0;
4507 int err;
4508
4509 map.m_lblk = F2FS_BYTES_TO_BLK(offset);
4510 map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1;
4511 map.m_next_pgofs = &next_pgofs;
4512 map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
4513 inode->i_write_hint);
4514 if (flags & IOMAP_WRITE && iomap->private) {
4515 map.m_last_pblk = (unsigned long)iomap->private;
4516 iomap->private = NULL;
4517 }
4518
4519 /*
4520 * If the blocks being overwritten are already allocated,
4521 * f2fs_map_lock and f2fs_balance_fs are not necessary.
4522 */
4523 if ((flags & IOMAP_WRITE) &&
4524 !__f2fs_overwrite_io(inode, offset, length, true))
4525 map.m_may_create = true;
4526
4527 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
4528 if (err)
4529 return err;
4530
4531 iomap->offset = F2FS_BLK_TO_BYTES(map.m_lblk);
4532
4533 /*
4534 * When inline encryption is enabled, sometimes I/O to an encrypted file
4535 * has to be broken up to guarantee DUN contiguity. Handle this by
4536 * limiting the length of the mapping returned.
4537 */
4538 map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
4539
4540 /*
4541 * We should never see delalloc or compressed extents here based on
4542 * prior flushing and checks.
4543 */
4544 if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
4545 return -EINVAL;
4546
4547 if (map.m_flags & F2FS_MAP_MAPPED) {
4548 if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
4549 return -EINVAL;
4550
4551 iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
4552 iomap->type = IOMAP_MAPPED;
4553 iomap->flags |= IOMAP_F_MERGED;
4554 iomap->bdev = map.m_bdev;
4555 iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk);
4556
4557 if (flags & IOMAP_WRITE && map.m_last_pblk)
4558 iomap->private = (void *)map.m_last_pblk;
4559 } else {
4560 if (flags & IOMAP_WRITE)
4561 return -ENOTBLK;
4562
4563 if (map.m_pblk == NULL_ADDR) {
4564 iomap->length = F2FS_BLK_TO_BYTES(next_pgofs) -
4565 iomap->offset;
4566 iomap->type = IOMAP_HOLE;
4567 } else if (map.m_pblk == NEW_ADDR) {
4568 iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
4569 iomap->type = IOMAP_UNWRITTEN;
4570 } else {
4571 f2fs_bug_on(F2FS_I_SB(inode), 1);
4572 }
4573 iomap->addr = IOMAP_NULL_ADDR;
4574 }
4575
4576 if (map.m_flags & F2FS_MAP_NEW)
4577 iomap->flags |= IOMAP_F_NEW;
4578 if ((inode_state_read_once(inode) & I_DIRTY_DATASYNC) ||
4579 offset + length > i_size_read(inode))
4580 iomap->flags |= IOMAP_F_DIRTY;
4581
4582 return 0;
4583 }
4584
4585 const struct iomap_ops f2fs_iomap_ops = {
4586 .iomap_begin = f2fs_iomap_begin,
4587 };
4588