xref: /linux/fs/f2fs/file.c (revision 334fbe734e687404f346eba7d5d96ed2b44d35ab)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/file.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/blk-crypto.h>
9 #include <linux/fs.h>
10 #include <linux/f2fs_fs.h>
11 #include <linux/stat.h>
12 #include <linux/writeback.h>
13 #include <linux/blkdev.h>
14 #include <linux/falloc.h>
15 #include <linux/filelock.h>
16 #include <linux/types.h>
17 #include <linux/compat.h>
18 #include <linux/uaccess.h>
19 #include <linux/mount.h>
20 #include <linux/uio.h>
21 #include <linux/uuid.h>
22 #include <linux/file.h>
23 #include <linux/nls.h>
24 #include <linux/sched/signal.h>
25 #include <linux/fileattr.h>
26 #include <linux/fadvise.h>
27 #include <linux/iomap.h>
28 
29 #include "f2fs.h"
30 #include "node.h"
31 #include "segment.h"
32 #include "xattr.h"
33 #include "acl.h"
34 #include "gc.h"
35 #include "iostat.h"
36 #include <trace/events/f2fs.h>
37 #include <uapi/linux/f2fs.h>
38 
f2fs_zero_post_eof_page(struct inode * inode,loff_t new_size,bool lock)39 static void f2fs_zero_post_eof_page(struct inode *inode,
40 					loff_t new_size, bool lock)
41 {
42 	loff_t old_size = i_size_read(inode);
43 
44 	if (old_size >= new_size)
45 		return;
46 
47 	if (mapping_empty(inode->i_mapping))
48 		return;
49 
50 	if (lock)
51 		filemap_invalidate_lock(inode->i_mapping);
52 	/* zero or drop pages only in range of [old_size, new_size] */
53 	truncate_inode_pages_range(inode->i_mapping, old_size, new_size);
54 	if (lock)
55 		filemap_invalidate_unlock(inode->i_mapping);
56 }
57 
f2fs_filemap_fault(struct vm_fault * vmf)58 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
59 {
60 	struct inode *inode = file_inode(vmf->vma->vm_file);
61 	vm_flags_t flags = vmf->vma->vm_flags;
62 	vm_fault_t ret;
63 
64 	ret = filemap_fault(vmf);
65 	if (ret & VM_FAULT_LOCKED)
66 		f2fs_update_iostat(F2FS_I_SB(inode), inode,
67 					APP_MAPPED_READ_IO, F2FS_BLKSIZE);
68 
69 	trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret);
70 
71 	return ret;
72 }
73 
f2fs_vm_page_mkwrite(struct vm_fault * vmf)74 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
75 {
76 	struct folio *folio = page_folio(vmf->page);
77 	struct inode *inode = file_inode(vmf->vma->vm_file);
78 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
79 	struct dnode_of_data dn;
80 	bool need_alloc = !f2fs_is_pinned_file(inode);
81 	int err = 0;
82 	vm_fault_t ret;
83 
84 	if (unlikely(IS_IMMUTABLE(inode)))
85 		return VM_FAULT_SIGBUS;
86 
87 	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
88 		err = -EIO;
89 		goto out;
90 	}
91 
92 	if (unlikely(f2fs_cp_error(sbi))) {
93 		err = -EIO;
94 		goto out;
95 	}
96 
97 	if (!f2fs_is_checkpoint_ready(sbi)) {
98 		err = -ENOSPC;
99 		goto out;
100 	}
101 
102 	err = f2fs_convert_inline_inode(inode);
103 	if (err)
104 		goto out;
105 
106 #ifdef CONFIG_F2FS_FS_COMPRESSION
107 	if (f2fs_compressed_file(inode)) {
108 		int ret = f2fs_is_compressed_cluster(inode, folio->index);
109 
110 		if (ret < 0) {
111 			err = ret;
112 			goto out;
113 		} else if (ret) {
114 			need_alloc = false;
115 		}
116 	}
117 #endif
118 	/* should do out of any locked page */
119 	if (need_alloc)
120 		f2fs_balance_fs(sbi, true);
121 
122 	sb_start_pagefault(inode->i_sb);
123 
124 	f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
125 
126 	f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true);
127 
128 	file_update_time(vmf->vma->vm_file);
129 	filemap_invalidate_lock_shared(inode->i_mapping);
130 
131 	folio_lock(folio);
132 	if (unlikely(folio->mapping != inode->i_mapping ||
133 			folio_pos(folio) > i_size_read(inode) ||
134 			!folio_test_uptodate(folio))) {
135 		folio_unlock(folio);
136 		err = -EFAULT;
137 		goto out_sem;
138 	}
139 
140 	set_new_dnode(&dn, inode, NULL, NULL, 0);
141 	if (need_alloc) {
142 		/* block allocation */
143 		err = f2fs_get_block_locked(&dn, folio->index);
144 	} else {
145 		err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
146 		f2fs_put_dnode(&dn);
147 		if (f2fs_is_pinned_file(inode) &&
148 		    !__is_valid_data_blkaddr(dn.data_blkaddr))
149 			err = -EIO;
150 	}
151 
152 	if (err) {
153 		folio_unlock(folio);
154 		goto out_sem;
155 	}
156 
157 	f2fs_folio_wait_writeback(folio, DATA, false, true);
158 
159 	/* wait for GCed page writeback via META_MAPPING */
160 	f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
161 
162 	/*
163 	 * check to see if the page is mapped already (no holes)
164 	 */
165 	if (folio_test_mappedtodisk(folio))
166 		goto out_sem;
167 
168 	/* page is wholly or partially inside EOF */
169 	if (((loff_t)(folio->index + 1) << PAGE_SHIFT) >
170 						i_size_read(inode)) {
171 		loff_t offset;
172 
173 		offset = i_size_read(inode) & ~PAGE_MASK;
174 		folio_zero_segment(folio, offset, folio_size(folio));
175 	}
176 	folio_mark_dirty(folio);
177 
178 	f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE);
179 	f2fs_update_time(sbi, REQ_TIME);
180 
181 out_sem:
182 	filemap_invalidate_unlock_shared(inode->i_mapping);
183 
184 	sb_end_pagefault(inode->i_sb);
185 out:
186 	ret = vmf_fs_error(err);
187 
188 	trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret);
189 	return ret;
190 }
191 
192 static const struct vm_operations_struct f2fs_file_vm_ops = {
193 	.fault		= f2fs_filemap_fault,
194 	.map_pages	= filemap_map_pages,
195 	.page_mkwrite	= f2fs_vm_page_mkwrite,
196 };
197 
get_parent_ino(struct inode * inode,nid_t * pino)198 static int get_parent_ino(struct inode *inode, nid_t *pino)
199 {
200 	struct dentry *dentry;
201 
202 	/*
203 	 * Make sure to get the non-deleted alias.  The alias associated with
204 	 * the open file descriptor being fsync()'ed may be deleted already.
205 	 */
206 	dentry = d_find_alias(inode);
207 	if (!dentry)
208 		return 0;
209 
210 	*pino = d_parent_ino(dentry);
211 	dput(dentry);
212 	return 1;
213 }
214 
need_do_checkpoint(struct inode * inode)215 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
216 {
217 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
218 	enum cp_reason_type cp_reason = CP_NO_NEEDED;
219 
220 	if (!S_ISREG(inode->i_mode))
221 		cp_reason = CP_NON_REGULAR;
222 	else if (f2fs_compressed_file(inode))
223 		cp_reason = CP_COMPRESSED;
224 	else if (inode->i_nlink != 1)
225 		cp_reason = CP_HARDLINK;
226 	else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
227 		cp_reason = CP_SB_NEED_CP;
228 	else if (file_wrong_pino(inode))
229 		cp_reason = CP_WRONG_PINO;
230 	else if (!f2fs_space_for_roll_forward(sbi))
231 		cp_reason = CP_NO_SPC_ROLL;
232 	else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
233 		cp_reason = CP_NODE_NEED_CP;
234 	else if (test_opt(sbi, FASTBOOT))
235 		cp_reason = CP_FASTBOOT_MODE;
236 	else if (F2FS_OPTION(sbi).active_logs == 2)
237 		cp_reason = CP_SPEC_LOG_NUM;
238 	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
239 		f2fs_need_dentry_mark(sbi, inode->i_ino) &&
240 		f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
241 							TRANS_DIR_INO))
242 		cp_reason = CP_RECOVER_DIR;
243 	else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
244 							XATTR_DIR_INO))
245 		cp_reason = CP_XATTR_DIR;
246 
247 	return cp_reason;
248 }
249 
need_inode_page_update(struct f2fs_sb_info * sbi,nid_t ino)250 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
251 {
252 	struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino);
253 	bool ret = false;
254 	/* But we need to avoid that there are some inode updates */
255 	if ((!IS_ERR(i) && folio_test_dirty(i)) ||
256 	    f2fs_need_inode_block_update(sbi, ino))
257 		ret = true;
258 	f2fs_folio_put(i, false);
259 	return ret;
260 }
261 
try_to_fix_pino(struct inode * inode)262 static void try_to_fix_pino(struct inode *inode)
263 {
264 	struct f2fs_inode_info *fi = F2FS_I(inode);
265 	nid_t pino;
266 
267 	f2fs_down_write(&fi->i_sem);
268 	if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
269 			get_parent_ino(inode, &pino)) {
270 		f2fs_i_pino_write(inode, pino);
271 		file_got_pino(inode);
272 	}
273 	f2fs_up_write(&fi->i_sem);
274 }
275 
f2fs_do_sync_file(struct file * file,loff_t start,loff_t end,int datasync,bool atomic)276 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
277 						int datasync, bool atomic)
278 {
279 	struct inode *inode = file->f_mapping->host;
280 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
281 	nid_t ino = inode->i_ino;
282 	int ret = 0;
283 	enum cp_reason_type cp_reason = 0;
284 	struct writeback_control wbc = {
285 		.sync_mode = WB_SYNC_ALL,
286 		.nr_to_write = LONG_MAX,
287 	};
288 	unsigned int seq_id = 0;
289 
290 	if (unlikely(f2fs_readonly(inode->i_sb)))
291 		return 0;
292 
293 	trace_f2fs_sync_file_enter(inode);
294 
295 	if (S_ISDIR(inode->i_mode))
296 		goto go_write;
297 
298 	/* if fdatasync is triggered, let's do in-place-update */
299 	if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
300 		set_inode_flag(inode, FI_NEED_IPU);
301 	ret = file_write_and_wait_range(file, start, end);
302 	clear_inode_flag(inode, FI_NEED_IPU);
303 
304 	if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
305 		trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
306 		return ret;
307 	}
308 
309 	/* if the inode is dirty, let's recover all the time */
310 	if (!f2fs_skip_inode_update(inode, datasync)) {
311 		f2fs_write_inode(inode, NULL);
312 		goto go_write;
313 	}
314 
315 	/*
316 	 * if there is no written data, don't waste time to write recovery info.
317 	 */
318 	if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
319 			!f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
320 
321 		/* it may call write_inode just prior to fsync */
322 		if (need_inode_page_update(sbi, ino))
323 			goto go_write;
324 
325 		if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
326 				f2fs_exist_written_data(sbi, ino, UPDATE_INO))
327 			goto flush_out;
328 		goto out;
329 	} else {
330 		/*
331 		 * for OPU case, during fsync(), node can be persisted before
332 		 * data when lower device doesn't support write barrier, result
333 		 * in data corruption after SPO.
334 		 * So for strict fsync mode, force to use atomic write semantics
335 		 * to keep write order in between data/node and last node to
336 		 * avoid potential data corruption.
337 		 */
338 		if (F2FS_OPTION(sbi).fsync_mode ==
339 				FSYNC_MODE_STRICT && !atomic)
340 			atomic = true;
341 	}
342 go_write:
343 	/*
344 	 * Both of fdatasync() and fsync() are able to be recovered from
345 	 * sudden-power-off.
346 	 */
347 	f2fs_down_read(&F2FS_I(inode)->i_sem);
348 	cp_reason = need_do_checkpoint(inode);
349 	f2fs_up_read(&F2FS_I(inode)->i_sem);
350 
351 	if (cp_reason) {
352 		/* all the dirty node pages should be flushed for POR */
353 		ret = f2fs_sync_fs(inode->i_sb, 1);
354 
355 		/*
356 		 * We've secured consistency through sync_fs. Following pino
357 		 * will be used only for fsynced inodes after checkpoint.
358 		 */
359 		try_to_fix_pino(inode);
360 		clear_inode_flag(inode, FI_APPEND_WRITE);
361 		clear_inode_flag(inode, FI_UPDATE_WRITE);
362 		goto out;
363 	}
364 sync_nodes:
365 	atomic_inc(&sbi->wb_sync_req[NODE]);
366 	ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
367 	atomic_dec(&sbi->wb_sync_req[NODE]);
368 	if (ret)
369 		goto out;
370 
371 	/* if cp_error was enabled, we should avoid infinite loop */
372 	if (unlikely(f2fs_cp_error(sbi))) {
373 		ret = -EIO;
374 		goto out;
375 	}
376 
377 	if (f2fs_need_inode_block_update(sbi, ino)) {
378 		f2fs_mark_inode_dirty_sync(inode, true);
379 		f2fs_write_inode(inode, NULL);
380 		goto sync_nodes;
381 	}
382 
383 	/*
384 	 * If it's atomic_write, it's just fine to keep write ordering. So
385 	 * here we don't need to wait for node write completion, since we use
386 	 * node chain which serializes node blocks. If one of node writes are
387 	 * reordered, we can see simply broken chain, resulting in stopping
388 	 * roll-forward recovery. It means we'll recover all or none node blocks
389 	 * given fsync mark.
390 	 */
391 	if (!atomic) {
392 		ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
393 		if (ret)
394 			goto out;
395 	}
396 
397 	/* once recovery info is written, don't need to tack this */
398 	f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
399 	clear_inode_flag(inode, FI_APPEND_WRITE);
400 flush_out:
401 	if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
402 		ret = f2fs_issue_flush(sbi, inode->i_ino);
403 	if (!ret) {
404 		f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
405 		clear_inode_flag(inode, FI_UPDATE_WRITE);
406 		f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
407 	}
408 	f2fs_update_time(sbi, REQ_TIME);
409 out:
410 	trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
411 	return ret;
412 }
413 
f2fs_sync_file(struct file * file,loff_t start,loff_t end,int datasync)414 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
415 {
416 	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
417 		return -EIO;
418 	return f2fs_do_sync_file(file, start, end, datasync, false);
419 }
420 
__found_offset(struct address_space * mapping,struct dnode_of_data * dn,pgoff_t index,int whence)421 static bool __found_offset(struct address_space *mapping,
422 		struct dnode_of_data *dn, pgoff_t index, int whence)
423 {
424 	block_t blkaddr = f2fs_data_blkaddr(dn);
425 	struct inode *inode = mapping->host;
426 	bool compressed_cluster = false;
427 
428 	if (f2fs_compressed_file(inode)) {
429 		block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio,
430 		    ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size));
431 
432 		compressed_cluster = first_blkaddr == COMPRESS_ADDR;
433 	}
434 
435 	switch (whence) {
436 	case SEEK_DATA:
437 		if (__is_valid_data_blkaddr(blkaddr))
438 			return true;
439 		if (blkaddr == NEW_ADDR &&
440 		    xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
441 			return true;
442 		if (compressed_cluster)
443 			return true;
444 		break;
445 	case SEEK_HOLE:
446 		if (compressed_cluster)
447 			return false;
448 		if (blkaddr == NULL_ADDR)
449 			return true;
450 		break;
451 	}
452 	return false;
453 }
454 
f2fs_seek_block(struct file * file,loff_t offset,int whence)455 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
456 {
457 	struct inode *inode = file->f_mapping->host;
458 	loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
459 	struct dnode_of_data dn;
460 	pgoff_t pgofs, end_offset;
461 	loff_t data_ofs = offset;
462 	loff_t isize;
463 	int err = 0;
464 
465 	inode_lock_shared(inode);
466 
467 	isize = i_size_read(inode);
468 	if (offset >= isize)
469 		goto fail;
470 
471 	/* handle inline data case */
472 	if (f2fs_has_inline_data(inode)) {
473 		if (whence == SEEK_HOLE) {
474 			data_ofs = isize;
475 			goto found;
476 		} else if (whence == SEEK_DATA) {
477 			data_ofs = offset;
478 			goto found;
479 		}
480 	}
481 
482 	pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
483 
484 	for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
485 		set_new_dnode(&dn, inode, NULL, NULL, 0);
486 		err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
487 		if (err && err != -ENOENT) {
488 			goto fail;
489 		} else if (err == -ENOENT) {
490 			/* direct node does not exists */
491 			if (whence == SEEK_DATA) {
492 				pgofs = f2fs_get_next_page_offset(&dn, pgofs);
493 				continue;
494 			} else {
495 				goto found;
496 			}
497 		}
498 
499 		end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
500 
501 		/* find data/hole in dnode block */
502 		for (; dn.ofs_in_node < end_offset;
503 				dn.ofs_in_node++, pgofs++,
504 				data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
505 			block_t blkaddr;
506 
507 			blkaddr = f2fs_data_blkaddr(&dn);
508 
509 			if (__is_valid_data_blkaddr(blkaddr) &&
510 				!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
511 					blkaddr, DATA_GENERIC_ENHANCE)) {
512 				f2fs_put_dnode(&dn);
513 				goto fail;
514 			}
515 
516 			if (__found_offset(file->f_mapping, &dn,
517 							pgofs, whence)) {
518 				f2fs_put_dnode(&dn);
519 				goto found;
520 			}
521 		}
522 		f2fs_put_dnode(&dn);
523 	}
524 
525 	if (whence == SEEK_DATA)
526 		goto fail;
527 found:
528 	if (whence == SEEK_HOLE && data_ofs > isize)
529 		data_ofs = isize;
530 	inode_unlock_shared(inode);
531 	return vfs_setpos(file, data_ofs, maxbytes);
532 fail:
533 	inode_unlock_shared(inode);
534 	return -ENXIO;
535 }
536 
f2fs_llseek(struct file * file,loff_t offset,int whence)537 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
538 {
539 	struct inode *inode = file->f_mapping->host;
540 	loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
541 
542 	switch (whence) {
543 	case SEEK_SET:
544 	case SEEK_CUR:
545 	case SEEK_END:
546 		return generic_file_llseek_size(file, offset, whence,
547 						maxbytes, i_size_read(inode));
548 	case SEEK_DATA:
549 	case SEEK_HOLE:
550 		if (offset < 0)
551 			return -ENXIO;
552 		return f2fs_seek_block(file, offset, whence);
553 	}
554 
555 	return -EINVAL;
556 }
557 
f2fs_file_mmap_prepare(struct vm_area_desc * desc)558 static int f2fs_file_mmap_prepare(struct vm_area_desc *desc)
559 {
560 	struct file *file = desc->file;
561 	struct inode *inode = file_inode(file);
562 
563 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
564 		return -EIO;
565 
566 	if (!f2fs_is_compress_backend_ready(inode))
567 		return -EOPNOTSUPP;
568 
569 	file_accessed(file);
570 	desc->vm_ops = &f2fs_file_vm_ops;
571 
572 	f2fs_down_read(&F2FS_I(inode)->i_sem);
573 	set_inode_flag(inode, FI_MMAP_FILE);
574 	f2fs_up_read(&F2FS_I(inode)->i_sem);
575 
576 	return 0;
577 }
578 
finish_preallocate_blocks(struct inode * inode)579 static int finish_preallocate_blocks(struct inode *inode)
580 {
581 	int ret = 0;
582 	bool opened;
583 
584 	f2fs_down_read(&F2FS_I(inode)->i_sem);
585 	opened = is_inode_flag_set(inode, FI_OPENED_FILE);
586 	f2fs_up_read(&F2FS_I(inode)->i_sem);
587 	if (opened)
588 		return 0;
589 
590 	inode_lock(inode);
591 	if (is_inode_flag_set(inode, FI_OPENED_FILE))
592 		goto out_unlock;
593 
594 	if (!file_should_truncate(inode))
595 		goto out_update;
596 
597 	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
598 	filemap_invalidate_lock(inode->i_mapping);
599 
600 	truncate_setsize(inode, i_size_read(inode));
601 	ret = f2fs_truncate(inode);
602 
603 	filemap_invalidate_unlock(inode->i_mapping);
604 	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
605 	if (ret)
606 		goto out_unlock;
607 
608 	file_dont_truncate(inode);
609 out_update:
610 	f2fs_down_write(&F2FS_I(inode)->i_sem);
611 	set_inode_flag(inode, FI_OPENED_FILE);
612 	f2fs_up_write(&F2FS_I(inode)->i_sem);
613 out_unlock:
614 	inode_unlock(inode);
615 	return ret;
616 }
617 
f2fs_file_open(struct inode * inode,struct file * filp)618 static int f2fs_file_open(struct inode *inode, struct file *filp)
619 {
620 	int err = fscrypt_file_open(inode, filp);
621 
622 	if (err)
623 		return err;
624 
625 	if (!f2fs_is_compress_backend_ready(inode))
626 		return -EOPNOTSUPP;
627 
628 	if (mapping_large_folio_support(inode->i_mapping) &&
629 	    filp->f_mode & FMODE_WRITE)
630 		return -EOPNOTSUPP;
631 
632 	err = fsverity_file_open(inode, filp);
633 	if (err)
634 		return err;
635 
636 	filp->f_mode |= FMODE_NOWAIT;
637 	filp->f_mode |= FMODE_CAN_ODIRECT;
638 
639 	err = dquot_file_open(inode, filp);
640 	if (err)
641 		return err;
642 
643 	err = finish_preallocate_blocks(inode);
644 	if (!err)
645 		atomic_inc(&F2FS_I(inode)->open_count);
646 	return err;
647 }
648 
f2fs_truncate_data_blocks_range(struct dnode_of_data * dn,int count)649 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
650 {
651 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
652 	int nr_free = 0, ofs = dn->ofs_in_node, len = count;
653 	__le32 *addr;
654 	bool compressed_cluster = false;
655 	int cluster_index = 0, valid_blocks = 0;
656 	int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
657 	bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks);
658 	block_t blkstart;
659 	int blklen = 0;
660 
661 	addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs;
662 	blkstart = le32_to_cpu(*addr);
663 
664 	/* Assumption: truncation starts with cluster */
665 	for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) {
666 		block_t blkaddr = le32_to_cpu(*addr);
667 
668 		if (f2fs_compressed_file(dn->inode) &&
669 					!(cluster_index & (cluster_size - 1))) {
670 			if (compressed_cluster)
671 				f2fs_i_compr_blocks_update(dn->inode,
672 							valid_blocks, false);
673 			compressed_cluster = (blkaddr == COMPRESS_ADDR);
674 			valid_blocks = 0;
675 		}
676 
677 		if (blkaddr == NULL_ADDR)
678 			goto next;
679 
680 		f2fs_set_data_blkaddr(dn, NULL_ADDR);
681 
682 		if (__is_valid_data_blkaddr(blkaddr)) {
683 			if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
684 				goto next;
685 			if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
686 						DATA_GENERIC_ENHANCE))
687 				goto next;
688 			if (compressed_cluster)
689 				valid_blocks++;
690 		}
691 
692 		if (blkstart + blklen == blkaddr) {
693 			blklen++;
694 		} else {
695 			f2fs_invalidate_blocks(sbi, blkstart, blklen);
696 			blkstart = blkaddr;
697 			blklen = 1;
698 		}
699 
700 		if (!released || blkaddr != COMPRESS_ADDR)
701 			nr_free++;
702 
703 		continue;
704 
705 next:
706 		if (blklen)
707 			f2fs_invalidate_blocks(sbi, blkstart, blklen);
708 
709 		blkstart = le32_to_cpu(*(addr + 1));
710 		blklen = 0;
711 	}
712 
713 	if (blklen)
714 		f2fs_invalidate_blocks(sbi, blkstart, blklen);
715 
716 	if (compressed_cluster)
717 		f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false);
718 
719 	if (nr_free) {
720 		pgoff_t fofs;
721 		/*
722 		 * once we invalidate valid blkaddr in range [ofs, ofs + count],
723 		 * we will invalidate all blkaddr in the whole range.
724 		 */
725 		fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio),
726 							dn->inode) + ofs;
727 		f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
728 		f2fs_update_age_extent_cache_range(dn, fofs, len);
729 		dec_valid_block_count(sbi, dn->inode, nr_free);
730 	}
731 	dn->ofs_in_node = ofs;
732 
733 	f2fs_update_time(sbi, REQ_TIME);
734 	trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
735 					 dn->ofs_in_node, nr_free);
736 }
737 
truncate_partial_data_page(struct inode * inode,u64 from,bool cache_only)738 static int truncate_partial_data_page(struct inode *inode, u64 from,
739 								bool cache_only)
740 {
741 	loff_t offset = from & (PAGE_SIZE - 1);
742 	pgoff_t index = from >> PAGE_SHIFT;
743 	struct address_space *mapping = inode->i_mapping;
744 	struct folio *folio;
745 
746 	if (!offset && !cache_only)
747 		return 0;
748 
749 	if (cache_only) {
750 		folio = filemap_lock_folio(mapping, index);
751 		if (IS_ERR(folio))
752 		       return 0;
753 		if (folio_test_uptodate(folio))
754 			goto truncate_out;
755 		f2fs_folio_put(folio, true);
756 		return 0;
757 	}
758 
759 	folio = f2fs_get_lock_data_folio(inode, index, true);
760 	if (IS_ERR(folio))
761 		return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio);
762 truncate_out:
763 	f2fs_folio_wait_writeback(folio, DATA, true, true);
764 	folio_zero_segment(folio, offset, folio_size(folio));
765 
766 	/* An encrypted inode should have a key and truncate the last page. */
767 	f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode));
768 	if (!cache_only)
769 		folio_mark_dirty(folio);
770 	f2fs_folio_put(folio, true);
771 	return 0;
772 }
773 
f2fs_do_truncate_blocks(struct inode * inode,u64 from,bool lock)774 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
775 {
776 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
777 	struct dnode_of_data dn;
778 	struct f2fs_lock_context lc;
779 	pgoff_t free_from;
780 	int count = 0, err = 0;
781 	struct folio *ifolio;
782 	bool truncate_page = false;
783 
784 	trace_f2fs_truncate_blocks_enter(inode, from);
785 
786 	if (IS_DEVICE_ALIASING(inode) && from) {
787 		err = -EINVAL;
788 		goto out_err;
789 	}
790 
791 	free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
792 
793 	if (free_from >= max_file_blocks(inode))
794 		goto free_partial;
795 
796 	if (lock)
797 		f2fs_lock_op(sbi, &lc);
798 
799 	ifolio = f2fs_get_inode_folio(sbi, inode->i_ino);
800 	if (IS_ERR(ifolio)) {
801 		err = PTR_ERR(ifolio);
802 		goto out;
803 	}
804 
805 	if (IS_DEVICE_ALIASING(inode)) {
806 		struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
807 		struct extent_info ei = et->largest;
808 
809 		f2fs_invalidate_blocks(sbi, ei.blk, ei.len);
810 
811 		dec_valid_block_count(sbi, inode, ei.len);
812 		f2fs_update_time(sbi, REQ_TIME);
813 
814 		f2fs_folio_put(ifolio, true);
815 		goto out;
816 	}
817 
818 	if (f2fs_has_inline_data(inode)) {
819 		f2fs_truncate_inline_inode(inode, ifolio, from);
820 		f2fs_folio_put(ifolio, true);
821 		truncate_page = true;
822 		goto out;
823 	}
824 
825 	set_new_dnode(&dn, inode, ifolio, NULL, 0);
826 	err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
827 	if (err) {
828 		if (err == -ENOENT)
829 			goto free_next;
830 		goto out;
831 	}
832 
833 	count = ADDRS_PER_PAGE(dn.node_folio, inode);
834 
835 	count -= dn.ofs_in_node;
836 	f2fs_bug_on(sbi, count < 0);
837 
838 	if (dn.ofs_in_node || IS_INODE(dn.node_folio)) {
839 		f2fs_truncate_data_blocks_range(&dn, count);
840 		free_from += count;
841 	}
842 
843 	f2fs_put_dnode(&dn);
844 free_next:
845 	err = f2fs_truncate_inode_blocks(inode, free_from);
846 out:
847 	if (lock)
848 		f2fs_unlock_op(sbi, &lc);
849 free_partial:
850 	/* lastly zero out the first data page */
851 	if (!err)
852 		err = truncate_partial_data_page(inode, from, truncate_page);
853 out_err:
854 	trace_f2fs_truncate_blocks_exit(inode, err);
855 	return err;
856 }
857 
f2fs_truncate_blocks(struct inode * inode,u64 from,bool lock)858 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
859 {
860 	u64 free_from = from;
861 	int err;
862 
863 #ifdef CONFIG_F2FS_FS_COMPRESSION
864 	/*
865 	 * for compressed file, only support cluster size
866 	 * aligned truncation.
867 	 */
868 	if (f2fs_compressed_file(inode))
869 		free_from = round_up(from,
870 				F2FS_I(inode)->i_cluster_size << PAGE_SHIFT);
871 #endif
872 
873 	err = f2fs_do_truncate_blocks(inode, free_from, lock);
874 	if (err)
875 		return err;
876 
877 #ifdef CONFIG_F2FS_FS_COMPRESSION
878 	/*
879 	 * For compressed file, after release compress blocks, don't allow write
880 	 * direct, but we should allow write direct after truncate to zero.
881 	 */
882 	if (f2fs_compressed_file(inode) && !free_from
883 			&& is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
884 		clear_inode_flag(inode, FI_COMPRESS_RELEASED);
885 
886 	if (from != free_from) {
887 		err = f2fs_truncate_partial_cluster(inode, from, lock);
888 		if (err)
889 			return err;
890 	}
891 #endif
892 
893 	return 0;
894 }
895 
f2fs_truncate(struct inode * inode)896 int f2fs_truncate(struct inode *inode)
897 {
898 	int err;
899 
900 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
901 		return -EIO;
902 
903 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
904 				S_ISLNK(inode->i_mode)))
905 		return 0;
906 
907 	trace_f2fs_truncate(inode);
908 
909 	if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE))
910 		return -EIO;
911 
912 	err = f2fs_dquot_initialize(inode);
913 	if (err)
914 		return err;
915 
916 	/* we should check inline_data size */
917 	if (!f2fs_may_inline_data(inode)) {
918 		err = f2fs_convert_inline_inode(inode);
919 		if (err) {
920 			/*
921 			 * Always truncate page #0 to avoid page cache
922 			 * leak in evict() path.
923 			 */
924 			truncate_inode_pages_range(inode->i_mapping,
925 					F2FS_BLK_TO_BYTES(0),
926 					F2FS_BLK_END_BYTES(0));
927 			return err;
928 		}
929 	}
930 
931 	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
932 	if (err)
933 		return err;
934 
935 	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
936 	f2fs_mark_inode_dirty_sync(inode, false);
937 	return 0;
938 }
939 
f2fs_force_buffered_io(struct inode * inode,int rw)940 static bool f2fs_force_buffered_io(struct inode *inode, int rw)
941 {
942 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
943 
944 	if (!fscrypt_dio_supported(inode))
945 		return true;
946 	if (fsverity_active(inode))
947 		return true;
948 	if (f2fs_compressed_file(inode))
949 		return true;
950 	/*
951 	 * only force direct read to use buffered IO, for direct write,
952 	 * it expects inline data conversion before committing IO.
953 	 */
954 	if (f2fs_has_inline_data(inode) && rw == READ)
955 		return true;
956 
957 	/* disallow direct IO if any of devices has unaligned blksize */
958 	if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
959 		return true;
960 	/*
961 	 * for blkzoned device, fallback direct IO to buffered IO, so
962 	 * all IOs can be serialized by log-structured write.
963 	 */
964 	if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) &&
965 	    !f2fs_is_pinned_file(inode))
966 		return true;
967 	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
968 		return true;
969 
970 	return false;
971 }
972 
f2fs_getattr(struct mnt_idmap * idmap,const struct path * path,struct kstat * stat,u32 request_mask,unsigned int query_flags)973 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path,
974 		 struct kstat *stat, u32 request_mask, unsigned int query_flags)
975 {
976 	struct inode *inode = d_inode(path->dentry);
977 	struct f2fs_inode_info *fi = F2FS_I(inode);
978 	struct f2fs_inode *ri = NULL;
979 	unsigned int flags;
980 
981 	if (f2fs_has_extra_attr(inode) &&
982 			f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
983 			F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
984 		stat->result_mask |= STATX_BTIME;
985 		stat->btime.tv_sec = fi->i_crtime.tv_sec;
986 		stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
987 	}
988 
989 	/*
990 	 * Return the DIO alignment restrictions if requested.  We only return
991 	 * this information when requested, since on encrypted files it might
992 	 * take a fair bit of work to get if the file wasn't opened recently.
993 	 *
994 	 * f2fs sometimes supports DIO reads but not DIO writes.  STATX_DIOALIGN
995 	 * cannot represent that, so in that case we report no DIO support.
996 	 */
997 	if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
998 		unsigned int bsize = i_blocksize(inode);
999 
1000 		stat->result_mask |= STATX_DIOALIGN;
1001 		if (!f2fs_force_buffered_io(inode, WRITE)) {
1002 			stat->dio_mem_align = bsize;
1003 			stat->dio_offset_align = bsize;
1004 		}
1005 	}
1006 
1007 	flags = fi->i_flags;
1008 	if (flags & F2FS_COMPR_FL)
1009 		stat->attributes |= STATX_ATTR_COMPRESSED;
1010 	if (flags & F2FS_APPEND_FL)
1011 		stat->attributes |= STATX_ATTR_APPEND;
1012 	if (IS_ENCRYPTED(inode))
1013 		stat->attributes |= STATX_ATTR_ENCRYPTED;
1014 	if (flags & F2FS_IMMUTABLE_FL)
1015 		stat->attributes |= STATX_ATTR_IMMUTABLE;
1016 	if (flags & F2FS_NODUMP_FL)
1017 		stat->attributes |= STATX_ATTR_NODUMP;
1018 	if (IS_VERITY(inode))
1019 		stat->attributes |= STATX_ATTR_VERITY;
1020 
1021 	stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
1022 				  STATX_ATTR_APPEND |
1023 				  STATX_ATTR_ENCRYPTED |
1024 				  STATX_ATTR_IMMUTABLE |
1025 				  STATX_ATTR_NODUMP |
1026 				  STATX_ATTR_VERITY);
1027 
1028 	generic_fillattr(idmap, request_mask, inode, stat);
1029 
1030 	/* we need to show initial sectors used for inline_data/dentries */
1031 	if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
1032 					f2fs_has_inline_dentry(inode))
1033 		stat->blocks += (stat->size + 511) >> 9;
1034 
1035 	return 0;
1036 }
1037 
1038 #ifdef CONFIG_F2FS_FS_POSIX_ACL
__setattr_copy(struct mnt_idmap * idmap,struct inode * inode,const struct iattr * attr)1039 static void __setattr_copy(struct mnt_idmap *idmap,
1040 			   struct inode *inode, const struct iattr *attr)
1041 {
1042 	unsigned int ia_valid = attr->ia_valid;
1043 
1044 	i_uid_update(idmap, attr, inode);
1045 	i_gid_update(idmap, attr, inode);
1046 	if (ia_valid & ATTR_ATIME)
1047 		inode_set_atime_to_ts(inode, attr->ia_atime);
1048 	if (ia_valid & ATTR_MTIME)
1049 		inode_set_mtime_to_ts(inode, attr->ia_mtime);
1050 	if (ia_valid & ATTR_CTIME)
1051 		inode_set_ctime_to_ts(inode, attr->ia_ctime);
1052 	if (ia_valid & ATTR_MODE) {
1053 		umode_t mode = attr->ia_mode;
1054 
1055 		if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
1056 			mode &= ~S_ISGID;
1057 		set_acl_inode(inode, mode);
1058 	}
1059 }
1060 #else
1061 #define __setattr_copy setattr_copy
1062 #endif
1063 
f2fs_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * attr)1064 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
1065 		 struct iattr *attr)
1066 {
1067 	struct inode *inode = d_inode(dentry);
1068 	struct f2fs_inode_info *fi = F2FS_I(inode);
1069 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1070 	int err;
1071 
1072 	if (unlikely(f2fs_cp_error(sbi)))
1073 		return -EIO;
1074 
1075 	err = setattr_prepare(idmap, dentry, attr);
1076 	if (err)
1077 		return err;
1078 
1079 	err = fscrypt_prepare_setattr(dentry, attr);
1080 	if (err)
1081 		return err;
1082 
1083 	if (unlikely(IS_IMMUTABLE(inode)))
1084 		return -EPERM;
1085 
1086 	if (unlikely(IS_APPEND(inode) &&
1087 			(attr->ia_valid & (ATTR_MODE | ATTR_UID |
1088 				  ATTR_GID | ATTR_TIMES_SET))))
1089 		return -EPERM;
1090 
1091 	if ((attr->ia_valid & ATTR_SIZE)) {
1092 		if (!f2fs_is_compress_backend_ready(inode) ||
1093 				IS_DEVICE_ALIASING(inode))
1094 			return -EOPNOTSUPP;
1095 		if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) &&
1096 			!IS_ALIGNED(attr->ia_size,
1097 			F2FS_BLK_TO_BYTES(fi->i_cluster_size)))
1098 			return -EINVAL;
1099 		/*
1100 		 * To prevent scattered pin block generation, we don't allow
1101 		 * smaller/equal size unaligned truncation for pinned file.
1102 		 * We only support overwrite IO to pinned file, so don't
1103 		 * care about larger size truncation.
1104 		 */
1105 		if (f2fs_is_pinned_file(inode) &&
1106 			attr->ia_size <= i_size_read(inode) &&
1107 			!IS_ALIGNED(attr->ia_size,
1108 			F2FS_BLK_TO_BYTES(CAP_BLKS_PER_SEC(sbi))))
1109 			return -EINVAL;
1110 	}
1111 
1112 	if (is_quota_modification(idmap, inode, attr)) {
1113 		err = f2fs_dquot_initialize(inode);
1114 		if (err)
1115 			return err;
1116 	}
1117 	if (i_uid_needs_update(idmap, attr, inode) ||
1118 	    i_gid_needs_update(idmap, attr, inode)) {
1119 		struct f2fs_lock_context lc;
1120 
1121 		f2fs_lock_op(sbi, &lc);
1122 		err = dquot_transfer(idmap, inode, attr);
1123 		if (err) {
1124 			set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
1125 			f2fs_unlock_op(sbi, &lc);
1126 			return err;
1127 		}
1128 		/*
1129 		 * update uid/gid under lock_op(), so that dquot and inode can
1130 		 * be updated atomically.
1131 		 */
1132 		i_uid_update(idmap, attr, inode);
1133 		i_gid_update(idmap, attr, inode);
1134 		f2fs_mark_inode_dirty_sync(inode, true);
1135 		f2fs_unlock_op(sbi, &lc);
1136 	}
1137 
1138 	if (attr->ia_valid & ATTR_SIZE) {
1139 		loff_t old_size = i_size_read(inode);
1140 
1141 		if (attr->ia_size > MAX_INLINE_DATA(inode)) {
1142 			/*
1143 			 * should convert inline inode before i_size_write to
1144 			 * keep smaller than inline_data size with inline flag.
1145 			 */
1146 			err = f2fs_convert_inline_inode(inode);
1147 			if (err)
1148 				return err;
1149 		}
1150 
1151 		/*
1152 		 * wait for inflight dio, blocks should be removed after
1153 		 * IO completion.
1154 		 */
1155 		if (attr->ia_size < old_size)
1156 			inode_dio_wait(inode);
1157 
1158 		f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
1159 		filemap_invalidate_lock(inode->i_mapping);
1160 
1161 		if (attr->ia_size > old_size)
1162 			f2fs_zero_post_eof_page(inode, attr->ia_size, false);
1163 		truncate_setsize(inode, attr->ia_size);
1164 
1165 		if (attr->ia_size <= old_size)
1166 			err = f2fs_truncate(inode);
1167 		/*
1168 		 * do not trim all blocks after i_size if target size is
1169 		 * larger than i_size.
1170 		 */
1171 		filemap_invalidate_unlock(inode->i_mapping);
1172 		f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
1173 		if (err)
1174 			return err;
1175 
1176 		spin_lock(&fi->i_size_lock);
1177 		inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
1178 		fi->last_disk_size = i_size_read(inode);
1179 		spin_unlock(&fi->i_size_lock);
1180 	}
1181 
1182 	__setattr_copy(idmap, inode, attr);
1183 
1184 	if (attr->ia_valid & ATTR_MODE) {
1185 		err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode));
1186 
1187 		if (is_inode_flag_set(inode, FI_ACL_MODE)) {
1188 			if (!err)
1189 				inode->i_mode = fi->i_acl_mode;
1190 			clear_inode_flag(inode, FI_ACL_MODE);
1191 		}
1192 	}
1193 
1194 	/* file size may changed here */
1195 	f2fs_mark_inode_dirty_sync(inode, true);
1196 
1197 	/* inode change will produce dirty node pages flushed by checkpoint */
1198 	f2fs_balance_fs(sbi, true);
1199 
1200 	return err;
1201 }
1202 
1203 const struct inode_operations f2fs_file_inode_operations = {
1204 	.getattr	= f2fs_getattr,
1205 	.setattr	= f2fs_setattr,
1206 	.get_inode_acl	= f2fs_get_acl,
1207 	.set_acl	= f2fs_set_acl,
1208 	.listxattr	= f2fs_listxattr,
1209 	.fiemap		= f2fs_fiemap,
1210 	.fileattr_get	= f2fs_fileattr_get,
1211 	.fileattr_set	= f2fs_fileattr_set,
1212 };
1213 
fill_zero(struct inode * inode,pgoff_t index,loff_t start,loff_t len)1214 static int fill_zero(struct inode *inode, pgoff_t index,
1215 					loff_t start, loff_t len)
1216 {
1217 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1218 	struct folio *folio;
1219 	struct f2fs_lock_context lc;
1220 
1221 	if (!len)
1222 		return 0;
1223 
1224 	f2fs_balance_fs(sbi, true);
1225 
1226 	f2fs_lock_op(sbi, &lc);
1227 	folio = f2fs_get_new_data_folio(inode, NULL, index, false);
1228 	f2fs_unlock_op(sbi, &lc);
1229 
1230 	if (IS_ERR(folio))
1231 		return PTR_ERR(folio);
1232 
1233 	f2fs_folio_wait_writeback(folio, DATA, true, true);
1234 	folio_zero_range(folio, start, len);
1235 	folio_mark_dirty(folio);
1236 	f2fs_folio_put(folio, true);
1237 	return 0;
1238 }
1239 
f2fs_truncate_hole(struct inode * inode,pgoff_t pg_start,pgoff_t pg_end)1240 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
1241 {
1242 	int err;
1243 
1244 	while (pg_start < pg_end) {
1245 		struct dnode_of_data dn;
1246 		pgoff_t end_offset, count;
1247 
1248 		set_new_dnode(&dn, inode, NULL, NULL, 0);
1249 		err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
1250 		if (err) {
1251 			if (err == -ENOENT) {
1252 				pg_start = f2fs_get_next_page_offset(&dn,
1253 								pg_start);
1254 				continue;
1255 			}
1256 			return err;
1257 		}
1258 
1259 		end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
1260 		count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
1261 
1262 		f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
1263 
1264 		f2fs_truncate_data_blocks_range(&dn, count);
1265 		f2fs_put_dnode(&dn);
1266 
1267 		pg_start += count;
1268 	}
1269 	return 0;
1270 }
1271 
f2fs_punch_hole(struct inode * inode,loff_t offset,loff_t len)1272 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
1273 {
1274 	pgoff_t pg_start, pg_end;
1275 	loff_t off_start, off_end;
1276 	int ret;
1277 
1278 	ret = f2fs_convert_inline_inode(inode);
1279 	if (ret)
1280 		return ret;
1281 
1282 	f2fs_zero_post_eof_page(inode, offset + len, true);
1283 
1284 	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1285 	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1286 
1287 	off_start = offset & (PAGE_SIZE - 1);
1288 	off_end = (offset + len) & (PAGE_SIZE - 1);
1289 
1290 	if (pg_start == pg_end) {
1291 		ret = fill_zero(inode, pg_start, off_start,
1292 						off_end - off_start);
1293 		if (ret)
1294 			return ret;
1295 	} else {
1296 		if (off_start) {
1297 			ret = fill_zero(inode, pg_start++, off_start,
1298 						PAGE_SIZE - off_start);
1299 			if (ret)
1300 				return ret;
1301 		}
1302 		if (off_end) {
1303 			ret = fill_zero(inode, pg_end, 0, off_end);
1304 			if (ret)
1305 				return ret;
1306 		}
1307 
1308 		if (pg_start < pg_end) {
1309 			loff_t blk_start, blk_end;
1310 			struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1311 			struct f2fs_lock_context lc;
1312 
1313 			f2fs_balance_fs(sbi, true);
1314 
1315 			blk_start = (loff_t)pg_start << PAGE_SHIFT;
1316 			blk_end = (loff_t)pg_end << PAGE_SHIFT;
1317 
1318 			f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1319 			filemap_invalidate_lock(inode->i_mapping);
1320 
1321 			truncate_pagecache_range(inode, blk_start, blk_end - 1);
1322 
1323 			f2fs_lock_op(sbi, &lc);
1324 			ret = f2fs_truncate_hole(inode, pg_start, pg_end);
1325 			f2fs_unlock_op(sbi, &lc);
1326 
1327 			filemap_invalidate_unlock(inode->i_mapping);
1328 			f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1329 		}
1330 	}
1331 
1332 	return ret;
1333 }
1334 
__read_out_blkaddrs(struct inode * inode,block_t * blkaddr,int * do_replace,pgoff_t off,pgoff_t len)1335 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
1336 				int *do_replace, pgoff_t off, pgoff_t len)
1337 {
1338 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1339 	struct dnode_of_data dn;
1340 	int ret, done, i;
1341 
1342 next_dnode:
1343 	set_new_dnode(&dn, inode, NULL, NULL, 0);
1344 	ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
1345 	if (ret && ret != -ENOENT) {
1346 		return ret;
1347 	} else if (ret == -ENOENT) {
1348 		if (dn.max_level == 0)
1349 			return -ENOENT;
1350 		done = min((pgoff_t)ADDRS_PER_BLOCK(inode) -
1351 						dn.ofs_in_node, len);
1352 		blkaddr += done;
1353 		do_replace += done;
1354 		goto next;
1355 	}
1356 
1357 	done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_folio, inode) -
1358 							dn.ofs_in_node, len);
1359 	for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
1360 		*blkaddr = f2fs_data_blkaddr(&dn);
1361 
1362 		if (__is_valid_data_blkaddr(*blkaddr) &&
1363 			!f2fs_is_valid_blkaddr(sbi, *blkaddr,
1364 					DATA_GENERIC_ENHANCE)) {
1365 			f2fs_put_dnode(&dn);
1366 			return -EFSCORRUPTED;
1367 		}
1368 
1369 		if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
1370 
1371 			if (f2fs_lfs_mode(sbi)) {
1372 				f2fs_put_dnode(&dn);
1373 				return -EOPNOTSUPP;
1374 			}
1375 
1376 			/* do not invalidate this block address */
1377 			f2fs_update_data_blkaddr(&dn, NULL_ADDR);
1378 			*do_replace = 1;
1379 		}
1380 	}
1381 	f2fs_put_dnode(&dn);
1382 next:
1383 	len -= done;
1384 	off += done;
1385 	if (len)
1386 		goto next_dnode;
1387 	return 0;
1388 }
1389 
__roll_back_blkaddrs(struct inode * inode,block_t * blkaddr,int * do_replace,pgoff_t off,int len)1390 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
1391 				int *do_replace, pgoff_t off, int len)
1392 {
1393 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1394 	struct dnode_of_data dn;
1395 	int ret, i;
1396 
1397 	for (i = 0; i < len; i++, do_replace++, blkaddr++) {
1398 		if (*do_replace == 0)
1399 			continue;
1400 
1401 		set_new_dnode(&dn, inode, NULL, NULL, 0);
1402 		ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
1403 		if (ret) {
1404 			dec_valid_block_count(sbi, inode, 1);
1405 			f2fs_invalidate_blocks(sbi, *blkaddr, 1);
1406 		} else {
1407 			f2fs_update_data_blkaddr(&dn, *blkaddr);
1408 		}
1409 		f2fs_put_dnode(&dn);
1410 	}
1411 	return 0;
1412 }
1413 
__clone_blkaddrs(struct inode * src_inode,struct inode * dst_inode,block_t * blkaddr,int * do_replace,pgoff_t src,pgoff_t dst,pgoff_t len,bool full)1414 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
1415 			block_t *blkaddr, int *do_replace,
1416 			pgoff_t src, pgoff_t dst, pgoff_t len, bool full)
1417 {
1418 	struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode);
1419 	pgoff_t i = 0;
1420 	int ret;
1421 
1422 	while (i < len) {
1423 		if (blkaddr[i] == NULL_ADDR && !full) {
1424 			i++;
1425 			continue;
1426 		}
1427 
1428 		if (do_replace[i] || blkaddr[i] == NULL_ADDR) {
1429 			struct dnode_of_data dn;
1430 			struct node_info ni;
1431 			size_t new_size;
1432 			pgoff_t ilen;
1433 
1434 			set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
1435 			ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
1436 			if (ret)
1437 				return ret;
1438 
1439 			ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
1440 			if (ret) {
1441 				f2fs_put_dnode(&dn);
1442 				return ret;
1443 			}
1444 
1445 			ilen = min((pgoff_t)
1446 				ADDRS_PER_PAGE(dn.node_folio, dst_inode) -
1447 						dn.ofs_in_node, len - i);
1448 			do {
1449 				dn.data_blkaddr = f2fs_data_blkaddr(&dn);
1450 				f2fs_truncate_data_blocks_range(&dn, 1);
1451 
1452 				if (do_replace[i]) {
1453 					f2fs_i_blocks_write(src_inode,
1454 							1, false, false);
1455 					f2fs_i_blocks_write(dst_inode,
1456 							1, true, false);
1457 					f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
1458 					blkaddr[i], ni.version, true, false);
1459 
1460 					do_replace[i] = 0;
1461 				}
1462 				dn.ofs_in_node++;
1463 				i++;
1464 				new_size = (loff_t)(dst + i) << PAGE_SHIFT;
1465 				if (dst_inode->i_size < new_size)
1466 					f2fs_i_size_write(dst_inode, new_size);
1467 			} while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
1468 
1469 			f2fs_put_dnode(&dn);
1470 		} else {
1471 			struct folio *fsrc, *fdst;
1472 
1473 			fsrc = f2fs_get_lock_data_folio(src_inode,
1474 							src + i, true);
1475 			if (IS_ERR(fsrc))
1476 				return PTR_ERR(fsrc);
1477 			fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i,
1478 								true);
1479 			if (IS_ERR(fdst)) {
1480 				f2fs_folio_put(fsrc, true);
1481 				return PTR_ERR(fdst);
1482 			}
1483 
1484 			f2fs_folio_wait_writeback(fdst, DATA, true, true);
1485 
1486 			memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE);
1487 			folio_mark_dirty(fdst);
1488 			folio_set_f2fs_gcing(fdst);
1489 			f2fs_folio_put(fdst, true);
1490 			f2fs_folio_put(fsrc, true);
1491 
1492 			ret = f2fs_truncate_hole(src_inode,
1493 						src + i, src + i + 1);
1494 			if (ret)
1495 				return ret;
1496 			i++;
1497 		}
1498 	}
1499 	return 0;
1500 }
1501 
__exchange_data_block(struct inode * src_inode,struct inode * dst_inode,pgoff_t src,pgoff_t dst,pgoff_t len,bool full)1502 static int __exchange_data_block(struct inode *src_inode,
1503 			struct inode *dst_inode, pgoff_t src, pgoff_t dst,
1504 			pgoff_t len, bool full)
1505 {
1506 	block_t *src_blkaddr;
1507 	int *do_replace;
1508 	pgoff_t olen;
1509 	int ret;
1510 
1511 	while (len) {
1512 		olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
1513 
1514 		src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1515 					array_size(olen, sizeof(block_t)),
1516 					GFP_NOFS);
1517 		if (!src_blkaddr)
1518 			return -ENOMEM;
1519 
1520 		do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1521 					array_size(olen, sizeof(int)),
1522 					GFP_NOFS);
1523 		if (!do_replace) {
1524 			kvfree(src_blkaddr);
1525 			return -ENOMEM;
1526 		}
1527 
1528 		ret = __read_out_blkaddrs(src_inode, src_blkaddr,
1529 					do_replace, src, olen);
1530 		if (ret)
1531 			goto roll_back;
1532 
1533 		ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr,
1534 					do_replace, src, dst, olen, full);
1535 		if (ret)
1536 			goto roll_back;
1537 
1538 		src += olen;
1539 		dst += olen;
1540 		len -= olen;
1541 
1542 		kvfree(src_blkaddr);
1543 		kvfree(do_replace);
1544 	}
1545 	return 0;
1546 
1547 roll_back:
1548 	__roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
1549 	kvfree(src_blkaddr);
1550 	kvfree(do_replace);
1551 	return ret;
1552 }
1553 
f2fs_do_collapse(struct inode * inode,loff_t offset,loff_t len)1554 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
1555 {
1556 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1557 	struct f2fs_lock_context lc;
1558 	pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1559 	pgoff_t start = offset >> PAGE_SHIFT;
1560 	pgoff_t end = (offset + len) >> PAGE_SHIFT;
1561 	int ret;
1562 
1563 	f2fs_balance_fs(sbi, true);
1564 
1565 	/* avoid gc operation during block exchange */
1566 	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1567 	filemap_invalidate_lock(inode->i_mapping);
1568 
1569 	f2fs_zero_post_eof_page(inode, offset + len, false);
1570 
1571 	f2fs_lock_op(sbi, &lc);
1572 	f2fs_drop_extent_tree(inode);
1573 	truncate_pagecache(inode, offset);
1574 	ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
1575 	f2fs_unlock_op(sbi, &lc);
1576 
1577 	filemap_invalidate_unlock(inode->i_mapping);
1578 	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1579 	return ret;
1580 }
1581 
f2fs_collapse_range(struct inode * inode,loff_t offset,loff_t len)1582 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
1583 {
1584 	loff_t new_size;
1585 	int ret;
1586 
1587 	if (offset + len >= i_size_read(inode))
1588 		return -EINVAL;
1589 
1590 	/* collapse range should be aligned to block size of f2fs. */
1591 	if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1592 		return -EINVAL;
1593 
1594 	ret = f2fs_convert_inline_inode(inode);
1595 	if (ret)
1596 		return ret;
1597 
1598 	/* write out all dirty pages from offset */
1599 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1600 	if (ret)
1601 		return ret;
1602 
1603 	ret = f2fs_do_collapse(inode, offset, len);
1604 	if (ret)
1605 		return ret;
1606 
1607 	/* write out all moved pages, if possible */
1608 	filemap_invalidate_lock(inode->i_mapping);
1609 	filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1610 	truncate_pagecache(inode, offset);
1611 
1612 	new_size = i_size_read(inode) - len;
1613 	ret = f2fs_truncate_blocks(inode, new_size, true);
1614 	filemap_invalidate_unlock(inode->i_mapping);
1615 	if (!ret)
1616 		f2fs_i_size_write(inode, new_size);
1617 	return ret;
1618 }
1619 
f2fs_do_zero_range(struct dnode_of_data * dn,pgoff_t start,pgoff_t end)1620 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
1621 								pgoff_t end)
1622 {
1623 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1624 	pgoff_t index = start;
1625 	unsigned int ofs_in_node = dn->ofs_in_node;
1626 	blkcnt_t count = 0;
1627 	int ret;
1628 
1629 	for (; index < end; index++, dn->ofs_in_node++) {
1630 		if (f2fs_data_blkaddr(dn) == NULL_ADDR)
1631 			count++;
1632 	}
1633 
1634 	dn->ofs_in_node = ofs_in_node;
1635 	ret = f2fs_reserve_new_blocks(dn, count);
1636 	if (ret)
1637 		return ret;
1638 
1639 	dn->ofs_in_node = ofs_in_node;
1640 	for (index = start; index < end; index++, dn->ofs_in_node++) {
1641 		dn->data_blkaddr = f2fs_data_blkaddr(dn);
1642 		/*
1643 		 * f2fs_reserve_new_blocks will not guarantee entire block
1644 		 * allocation.
1645 		 */
1646 		if (dn->data_blkaddr == NULL_ADDR) {
1647 			ret = -ENOSPC;
1648 			break;
1649 		}
1650 
1651 		if (dn->data_blkaddr == NEW_ADDR)
1652 			continue;
1653 
1654 		if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
1655 					DATA_GENERIC_ENHANCE)) {
1656 			ret = -EFSCORRUPTED;
1657 			break;
1658 		}
1659 
1660 		f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1);
1661 		f2fs_set_data_blkaddr(dn, NEW_ADDR);
1662 	}
1663 
1664 	if (index > start) {
1665 		f2fs_update_read_extent_cache_range(dn, start, 0,
1666 							index - start);
1667 		f2fs_update_age_extent_cache_range(dn, start, index - start);
1668 	}
1669 
1670 	return ret;
1671 }
1672 
f2fs_zero_range(struct inode * inode,loff_t offset,loff_t len,int mode)1673 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
1674 								int mode)
1675 {
1676 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1677 	struct address_space *mapping = inode->i_mapping;
1678 	pgoff_t index, pg_start, pg_end;
1679 	loff_t new_size = i_size_read(inode);
1680 	loff_t off_start, off_end;
1681 	int ret = 0;
1682 
1683 	ret = inode_newsize_ok(inode, (len + offset));
1684 	if (ret)
1685 		return ret;
1686 
1687 	ret = f2fs_convert_inline_inode(inode);
1688 	if (ret)
1689 		return ret;
1690 
1691 	ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
1692 	if (ret)
1693 		return ret;
1694 
1695 	f2fs_zero_post_eof_page(inode, offset + len, true);
1696 
1697 	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1698 	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1699 
1700 	off_start = offset & (PAGE_SIZE - 1);
1701 	off_end = (offset + len) & (PAGE_SIZE - 1);
1702 
1703 	if (pg_start == pg_end) {
1704 		ret = fill_zero(inode, pg_start, off_start,
1705 						off_end - off_start);
1706 		if (ret)
1707 			return ret;
1708 
1709 		new_size = max_t(loff_t, new_size, offset + len);
1710 	} else {
1711 		if (off_start) {
1712 			ret = fill_zero(inode, pg_start++, off_start,
1713 						PAGE_SIZE - off_start);
1714 			if (ret)
1715 				return ret;
1716 
1717 			new_size = max_t(loff_t, new_size,
1718 					(loff_t)pg_start << PAGE_SHIFT);
1719 		}
1720 
1721 		for (index = pg_start; index < pg_end;) {
1722 			struct dnode_of_data dn;
1723 			struct f2fs_lock_context lc;
1724 			unsigned int end_offset;
1725 			pgoff_t end;
1726 
1727 			f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1728 			filemap_invalidate_lock(mapping);
1729 
1730 			truncate_pagecache_range(inode,
1731 				(loff_t)index << PAGE_SHIFT,
1732 				((loff_t)pg_end << PAGE_SHIFT) - 1);
1733 
1734 			f2fs_lock_op(sbi, &lc);
1735 
1736 			set_new_dnode(&dn, inode, NULL, NULL, 0);
1737 			ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
1738 			if (ret) {
1739 				f2fs_unlock_op(sbi, &lc);
1740 				filemap_invalidate_unlock(mapping);
1741 				f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1742 				goto out;
1743 			}
1744 
1745 			end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
1746 			end = min(pg_end, end_offset - dn.ofs_in_node + index);
1747 
1748 			ret = f2fs_do_zero_range(&dn, index, end);
1749 			f2fs_put_dnode(&dn);
1750 
1751 			f2fs_unlock_op(sbi, &lc);
1752 			filemap_invalidate_unlock(mapping);
1753 			f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1754 
1755 			f2fs_balance_fs(sbi, dn.node_changed);
1756 
1757 			if (ret)
1758 				goto out;
1759 
1760 			index = end;
1761 			new_size = max_t(loff_t, new_size,
1762 					(loff_t)index << PAGE_SHIFT);
1763 		}
1764 
1765 		if (off_end) {
1766 			ret = fill_zero(inode, pg_end, 0, off_end);
1767 			if (ret)
1768 				goto out;
1769 
1770 			new_size = max_t(loff_t, new_size, offset + len);
1771 		}
1772 	}
1773 
1774 out:
1775 	if (new_size > i_size_read(inode)) {
1776 		if (mode & FALLOC_FL_KEEP_SIZE)
1777 			file_set_keep_isize(inode);
1778 		else
1779 			f2fs_i_size_write(inode, new_size);
1780 	}
1781 	return ret;
1782 }
1783 
f2fs_insert_range(struct inode * inode,loff_t offset,loff_t len)1784 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
1785 {
1786 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1787 	struct address_space *mapping = inode->i_mapping;
1788 	pgoff_t nr, pg_start, pg_end, delta, idx;
1789 	loff_t new_size;
1790 	int ret = 0;
1791 
1792 	new_size = i_size_read(inode) + len;
1793 	ret = inode_newsize_ok(inode, new_size);
1794 	if (ret)
1795 		return ret;
1796 
1797 	if (offset >= i_size_read(inode))
1798 		return -EINVAL;
1799 
1800 	/* insert range should be aligned to block size of f2fs. */
1801 	if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1802 		return -EINVAL;
1803 
1804 	ret = f2fs_convert_inline_inode(inode);
1805 	if (ret)
1806 		return ret;
1807 
1808 	f2fs_balance_fs(sbi, true);
1809 
1810 	filemap_invalidate_lock(mapping);
1811 	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
1812 	filemap_invalidate_unlock(mapping);
1813 	if (ret)
1814 		return ret;
1815 
1816 	/* write out all dirty pages from offset */
1817 	ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
1818 	if (ret)
1819 		return ret;
1820 
1821 	pg_start = offset >> PAGE_SHIFT;
1822 	pg_end = (offset + len) >> PAGE_SHIFT;
1823 	delta = pg_end - pg_start;
1824 	idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1825 
1826 	/* avoid gc operation during block exchange */
1827 	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1828 	filemap_invalidate_lock(mapping);
1829 
1830 	f2fs_zero_post_eof_page(inode, offset + len, false);
1831 	truncate_pagecache(inode, offset);
1832 
1833 	while (!ret && idx > pg_start) {
1834 		struct f2fs_lock_context lc;
1835 
1836 		nr = idx - pg_start;
1837 		if (nr > delta)
1838 			nr = delta;
1839 		idx -= nr;
1840 
1841 		f2fs_lock_op(sbi, &lc);
1842 		f2fs_drop_extent_tree(inode);
1843 
1844 		ret = __exchange_data_block(inode, inode, idx,
1845 					idx + delta, nr, false);
1846 		f2fs_unlock_op(sbi, &lc);
1847 	}
1848 	filemap_invalidate_unlock(mapping);
1849 	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1850 	if (ret)
1851 		return ret;
1852 
1853 	/* write out all moved pages, if possible */
1854 	filemap_invalidate_lock(mapping);
1855 	ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
1856 	truncate_pagecache(inode, offset);
1857 	filemap_invalidate_unlock(mapping);
1858 
1859 	if (!ret)
1860 		f2fs_i_size_write(inode, new_size);
1861 	return ret;
1862 }
1863 
f2fs_expand_inode_data(struct inode * inode,loff_t offset,loff_t len,int mode)1864 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
1865 					loff_t len, int mode)
1866 {
1867 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1868 	struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
1869 			.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
1870 			.m_may_create = true };
1871 	struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
1872 			.init_gc_type = FG_GC,
1873 			.should_migrate_blocks = false,
1874 			.err_gc_skipped = true,
1875 			.nr_free_secs = 0 };
1876 	pgoff_t pg_start, pg_end;
1877 	loff_t new_size;
1878 	loff_t off_end;
1879 	block_t expanded = 0;
1880 	int err;
1881 
1882 	err = inode_newsize_ok(inode, (len + offset));
1883 	if (err)
1884 		return err;
1885 
1886 	err = f2fs_convert_inline_inode(inode);
1887 	if (err)
1888 		return err;
1889 
1890 	f2fs_zero_post_eof_page(inode, offset + len, true);
1891 
1892 	f2fs_balance_fs(sbi, true);
1893 
1894 	pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
1895 	pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
1896 	off_end = (offset + len) & (PAGE_SIZE - 1);
1897 
1898 	map.m_lblk = pg_start;
1899 	map.m_len = pg_end - pg_start;
1900 	if (off_end)
1901 		map.m_len++;
1902 
1903 	if (!map.m_len)
1904 		return 0;
1905 
1906 	if (f2fs_is_pinned_file(inode)) {
1907 		block_t sec_blks = CAP_BLKS_PER_SEC(sbi);
1908 		block_t sec_len = roundup(map.m_len, sec_blks);
1909 
1910 		map.m_len = sec_blks;
1911 next_alloc:
1912 		f2fs_down_write(&sbi->pin_sem);
1913 
1914 		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
1915 			if (has_not_enough_free_secs(sbi, 0, 0)) {
1916 				f2fs_up_write(&sbi->pin_sem);
1917 				err = -ENOSPC;
1918 				f2fs_warn_ratelimited(sbi,
1919 					"ino:%llu, start:%lu, end:%lu, need to trigger GC to "
1920 					"reclaim enough free segment when checkpoint is enabled",
1921 					inode->i_ino, pg_start, pg_end);
1922 				goto out_err;
1923 			}
1924 		}
1925 
1926 		if (has_not_enough_free_secs(sbi, 0,
1927 				sbi->reserved_pin_section)) {
1928 			f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc);
1929 			stat_inc_gc_call_count(sbi, FOREGROUND);
1930 			err = f2fs_gc(sbi, &gc_control);
1931 			if (err && err != -ENODATA) {
1932 				f2fs_up_write(&sbi->pin_sem);
1933 				goto out_err;
1934 			}
1935 		}
1936 
1937 		err = f2fs_allocate_pinning_section(sbi);
1938 		if (err) {
1939 			f2fs_up_write(&sbi->pin_sem);
1940 			goto out_err;
1941 		}
1942 
1943 		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
1944 		err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
1945 		file_dont_truncate(inode);
1946 
1947 		f2fs_up_write(&sbi->pin_sem);
1948 
1949 		expanded += map.m_len;
1950 		sec_len -= map.m_len;
1951 		map.m_lblk += map.m_len;
1952 		if (!err && sec_len)
1953 			goto next_alloc;
1954 
1955 		map.m_len = expanded;
1956 	} else {
1957 		err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO);
1958 		expanded = map.m_len;
1959 	}
1960 out_err:
1961 	if (err) {
1962 		pgoff_t last_off;
1963 
1964 		if (!expanded)
1965 			return err;
1966 
1967 		last_off = pg_start + expanded - 1;
1968 
1969 		/* update new size to the failed position */
1970 		new_size = (last_off == pg_end) ? offset + len :
1971 					(loff_t)(last_off + 1) << PAGE_SHIFT;
1972 	} else {
1973 		new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
1974 	}
1975 
1976 	if (new_size > i_size_read(inode)) {
1977 		if (mode & FALLOC_FL_KEEP_SIZE)
1978 			file_set_keep_isize(inode);
1979 		else
1980 			f2fs_i_size_write(inode, new_size);
1981 	}
1982 
1983 	return err;
1984 }
1985 
f2fs_fallocate(struct file * file,int mode,loff_t offset,loff_t len)1986 static long f2fs_fallocate(struct file *file, int mode,
1987 				loff_t offset, loff_t len)
1988 {
1989 	struct inode *inode = file_inode(file);
1990 	long ret = 0;
1991 
1992 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
1993 		return -EIO;
1994 	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
1995 		return -ENOSPC;
1996 	if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode))
1997 		return -EOPNOTSUPP;
1998 
1999 	/* f2fs only support ->fallocate for regular file */
2000 	if (!S_ISREG(inode->i_mode))
2001 		return -EINVAL;
2002 
2003 	if (IS_ENCRYPTED(inode) &&
2004 		(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
2005 		return -EOPNOTSUPP;
2006 
2007 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
2008 			FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
2009 			FALLOC_FL_INSERT_RANGE))
2010 		return -EOPNOTSUPP;
2011 
2012 	inode_lock(inode);
2013 
2014 	/*
2015 	 * Pinned file should not support partial truncation since the block
2016 	 * can be used by applications.
2017 	 */
2018 	if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
2019 		(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
2020 			FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) {
2021 		ret = -EOPNOTSUPP;
2022 		goto out;
2023 	}
2024 
2025 	ret = file_modified(file);
2026 	if (ret)
2027 		goto out;
2028 
2029 	/*
2030 	 * wait for inflight dio, blocks should be removed after IO
2031 	 * completion.
2032 	 */
2033 	inode_dio_wait(inode);
2034 
2035 	if (mode & FALLOC_FL_PUNCH_HOLE) {
2036 		if (offset >= inode->i_size)
2037 			goto out;
2038 
2039 		ret = f2fs_punch_hole(inode, offset, len);
2040 	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
2041 		ret = f2fs_collapse_range(inode, offset, len);
2042 	} else if (mode & FALLOC_FL_ZERO_RANGE) {
2043 		ret = f2fs_zero_range(inode, offset, len, mode);
2044 	} else if (mode & FALLOC_FL_INSERT_RANGE) {
2045 		ret = f2fs_insert_range(inode, offset, len);
2046 	} else {
2047 		ret = f2fs_expand_inode_data(inode, offset, len, mode);
2048 	}
2049 
2050 	if (!ret) {
2051 		inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
2052 		f2fs_mark_inode_dirty_sync(inode, false);
2053 		f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2054 	}
2055 
2056 out:
2057 	inode_unlock(inode);
2058 
2059 	trace_f2fs_fallocate(inode, mode, offset, len, ret);
2060 	return ret;
2061 }
2062 
f2fs_release_file(struct inode * inode,struct file * filp)2063 static int f2fs_release_file(struct inode *inode, struct file *filp)
2064 {
2065 	if (atomic_dec_and_test(&F2FS_I(inode)->open_count))
2066 		f2fs_remove_donate_inode(inode);
2067 
2068 	/*
2069 	 * f2fs_release_file is called at every close calls. So we should
2070 	 * not drop any inmemory pages by close called by other process.
2071 	 */
2072 	if (!(filp->f_mode & FMODE_WRITE) ||
2073 			atomic_read(&inode->i_writecount) != 1)
2074 		return 0;
2075 
2076 	inode_lock(inode);
2077 	f2fs_abort_atomic_write(inode, true);
2078 	inode_unlock(inode);
2079 
2080 	return 0;
2081 }
2082 
f2fs_file_flush(struct file * file,fl_owner_t id)2083 static int f2fs_file_flush(struct file *file, fl_owner_t id)
2084 {
2085 	struct inode *inode = file_inode(file);
2086 
2087 	/*
2088 	 * If the process doing a transaction is crashed, we should do
2089 	 * roll-back. Otherwise, other reader/write can see corrupted database
2090 	 * until all the writers close its file. Since this should be done
2091 	 * before dropping file lock, it needs to do in ->flush.
2092 	 */
2093 	if (F2FS_I(inode)->atomic_write_task == current &&
2094 				(current->flags & PF_EXITING)) {
2095 		inode_lock(inode);
2096 		f2fs_abort_atomic_write(inode, true);
2097 		inode_unlock(inode);
2098 	}
2099 
2100 	return 0;
2101 }
2102 
f2fs_setflags_common(struct inode * inode,u32 iflags,u32 mask)2103 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
2104 {
2105 	struct f2fs_inode_info *fi = F2FS_I(inode);
2106 	u32 masked_flags = fi->i_flags & mask;
2107 
2108 	/* mask can be shrunk by flags_valid selector */
2109 	iflags &= mask;
2110 
2111 	/* Is it quota file? Do not allow user to mess with it */
2112 	if (IS_NOQUOTA(inode))
2113 		return -EPERM;
2114 
2115 	if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) {
2116 		if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
2117 			return -EOPNOTSUPP;
2118 		if (!f2fs_empty_dir(inode))
2119 			return -ENOTEMPTY;
2120 	}
2121 
2122 	if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) {
2123 		if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
2124 			return -EOPNOTSUPP;
2125 		if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL))
2126 			return -EINVAL;
2127 	}
2128 
2129 	if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
2130 		if (masked_flags & F2FS_COMPR_FL) {
2131 			if (!f2fs_disable_compressed_file(inode))
2132 				return -EINVAL;
2133 		} else {
2134 			/* try to convert inline_data to support compression */
2135 			int err = f2fs_convert_inline_inode(inode);
2136 			if (err)
2137 				return err;
2138 
2139 			f2fs_down_write(&fi->i_sem);
2140 			if (!f2fs_may_compress(inode) ||
2141 				atomic_read(&fi->writeback) ||
2142 				(S_ISREG(inode->i_mode) &&
2143 				F2FS_HAS_BLOCKS(inode))) {
2144 				f2fs_up_write(&fi->i_sem);
2145 				return -EINVAL;
2146 			}
2147 			err = set_compress_context(inode);
2148 			f2fs_up_write(&fi->i_sem);
2149 
2150 			if (err)
2151 				return err;
2152 		}
2153 	}
2154 
2155 	fi->i_flags = iflags | (fi->i_flags & ~mask);
2156 	f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) &&
2157 					(fi->i_flags & F2FS_NOCOMP_FL));
2158 
2159 	if (fi->i_flags & F2FS_PROJINHERIT_FL)
2160 		set_inode_flag(inode, FI_PROJ_INHERIT);
2161 	else
2162 		clear_inode_flag(inode, FI_PROJ_INHERIT);
2163 
2164 	inode_set_ctime_current(inode);
2165 	f2fs_set_inode_flags(inode);
2166 	f2fs_mark_inode_dirty_sync(inode, true);
2167 	return 0;
2168 }
2169 
2170 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */
2171 
2172 /*
2173  * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
2174  * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
2175  * F2FS_GETTABLE_FS_FL.  To also make it settable via FS_IOC_SETFLAGS, also add
2176  * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
2177  *
2178  * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and
2179  * FS_IOC_FSSETXATTR is done by the VFS.
2180  */
2181 
2182 static const struct {
2183 	u32 iflag;
2184 	u32 fsflag;
2185 } f2fs_fsflags_map[] = {
2186 	{ F2FS_COMPR_FL,	FS_COMPR_FL },
2187 	{ F2FS_SYNC_FL,		FS_SYNC_FL },
2188 	{ F2FS_IMMUTABLE_FL,	FS_IMMUTABLE_FL },
2189 	{ F2FS_APPEND_FL,	FS_APPEND_FL },
2190 	{ F2FS_NODUMP_FL,	FS_NODUMP_FL },
2191 	{ F2FS_NOATIME_FL,	FS_NOATIME_FL },
2192 	{ F2FS_NOCOMP_FL,	FS_NOCOMP_FL },
2193 	{ F2FS_INDEX_FL,	FS_INDEX_FL },
2194 	{ F2FS_DIRSYNC_FL,	FS_DIRSYNC_FL },
2195 	{ F2FS_PROJINHERIT_FL,	FS_PROJINHERIT_FL },
2196 	{ F2FS_CASEFOLD_FL,	FS_CASEFOLD_FL },
2197 };
2198 
2199 #define F2FS_GETTABLE_FS_FL (		\
2200 		FS_COMPR_FL |		\
2201 		FS_SYNC_FL |		\
2202 		FS_IMMUTABLE_FL |	\
2203 		FS_APPEND_FL |		\
2204 		FS_NODUMP_FL |		\
2205 		FS_NOATIME_FL |		\
2206 		FS_NOCOMP_FL |		\
2207 		FS_INDEX_FL |		\
2208 		FS_DIRSYNC_FL |		\
2209 		FS_PROJINHERIT_FL |	\
2210 		FS_ENCRYPT_FL |		\
2211 		FS_INLINE_DATA_FL |	\
2212 		FS_NOCOW_FL |		\
2213 		FS_VERITY_FL |		\
2214 		FS_CASEFOLD_FL)
2215 
2216 #define F2FS_SETTABLE_FS_FL (		\
2217 		FS_COMPR_FL |		\
2218 		FS_SYNC_FL |		\
2219 		FS_IMMUTABLE_FL |	\
2220 		FS_APPEND_FL |		\
2221 		FS_NODUMP_FL |		\
2222 		FS_NOATIME_FL |		\
2223 		FS_NOCOMP_FL |		\
2224 		FS_DIRSYNC_FL |		\
2225 		FS_PROJINHERIT_FL |	\
2226 		FS_CASEFOLD_FL)
2227 
2228 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
f2fs_iflags_to_fsflags(u32 iflags)2229 static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
2230 {
2231 	u32 fsflags = 0;
2232 	int i;
2233 
2234 	for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
2235 		if (iflags & f2fs_fsflags_map[i].iflag)
2236 			fsflags |= f2fs_fsflags_map[i].fsflag;
2237 
2238 	return fsflags;
2239 }
2240 
2241 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
f2fs_fsflags_to_iflags(u32 fsflags)2242 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
2243 {
2244 	u32 iflags = 0;
2245 	int i;
2246 
2247 	for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
2248 		if (fsflags & f2fs_fsflags_map[i].fsflag)
2249 			iflags |= f2fs_fsflags_map[i].iflag;
2250 
2251 	return iflags;
2252 }
2253 
f2fs_ioc_getversion(struct file * filp,unsigned long arg)2254 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
2255 {
2256 	struct inode *inode = file_inode(filp);
2257 
2258 	return put_user(inode->i_generation, (int __user *)arg);
2259 }
2260 
f2fs_ioc_start_atomic_write(struct file * filp,bool truncate)2261 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
2262 {
2263 	struct inode *inode = file_inode(filp);
2264 	struct mnt_idmap *idmap = file_mnt_idmap(filp);
2265 	struct f2fs_inode_info *fi = F2FS_I(inode);
2266 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2267 	loff_t isize;
2268 	int ret;
2269 
2270 	if (!(filp->f_mode & FMODE_WRITE))
2271 		return -EBADF;
2272 
2273 	if (!inode_owner_or_capable(idmap, inode))
2274 		return -EACCES;
2275 
2276 	if (!S_ISREG(inode->i_mode))
2277 		return -EINVAL;
2278 
2279 	if (filp->f_flags & O_DIRECT)
2280 		return -EINVAL;
2281 
2282 	ret = mnt_want_write_file(filp);
2283 	if (ret)
2284 		return ret;
2285 
2286 	inode_lock(inode);
2287 
2288 	if (!f2fs_disable_compressed_file(inode) ||
2289 			f2fs_is_pinned_file(inode)) {
2290 		ret = -EINVAL;
2291 		goto out;
2292 	}
2293 
2294 	if (f2fs_is_atomic_file(inode))
2295 		goto out;
2296 
2297 	ret = f2fs_convert_inline_inode(inode);
2298 	if (ret)
2299 		goto out;
2300 
2301 	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
2302 	f2fs_down_write(&fi->i_gc_rwsem[READ]);
2303 
2304 	/*
2305 	 * Should wait end_io to count F2FS_WB_CP_DATA correctly by
2306 	 * f2fs_is_atomic_file.
2307 	 */
2308 	if (get_dirty_pages(inode))
2309 		f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%llu, npages=%u",
2310 			  inode->i_ino, get_dirty_pages(inode));
2311 	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
2312 	if (ret)
2313 		goto out_unlock;
2314 
2315 	/* Check if the inode already has a COW inode */
2316 	if (fi->cow_inode == NULL) {
2317 		/* Create a COW inode for atomic write */
2318 		struct dentry *dentry = file_dentry(filp);
2319 		struct inode *dir = d_inode(dentry->d_parent);
2320 
2321 		ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode);
2322 		if (ret)
2323 			goto out_unlock;
2324 
2325 		set_inode_flag(fi->cow_inode, FI_COW_FILE);
2326 		clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
2327 
2328 		/* Set the COW inode's atomic_inode to the atomic inode */
2329 		F2FS_I(fi->cow_inode)->atomic_inode = inode;
2330 	} else {
2331 		/* Reuse the already created COW inode */
2332 		f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode));
2333 
2334 		invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1);
2335 
2336 		ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
2337 		if (ret)
2338 			goto out_unlock;
2339 	}
2340 
2341 	f2fs_write_inode(inode, NULL);
2342 
2343 	stat_inc_atomic_inode(inode);
2344 
2345 	set_inode_flag(inode, FI_ATOMIC_FILE);
2346 
2347 	isize = i_size_read(inode);
2348 	fi->original_i_size = isize;
2349 	if (truncate) {
2350 		set_inode_flag(inode, FI_ATOMIC_REPLACE);
2351 		truncate_inode_pages_final(inode->i_mapping);
2352 		f2fs_i_size_write(inode, 0);
2353 		isize = 0;
2354 	}
2355 	f2fs_i_size_write(fi->cow_inode, isize);
2356 
2357 out_unlock:
2358 	f2fs_up_write(&fi->i_gc_rwsem[READ]);
2359 	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
2360 	if (ret)
2361 		goto out;
2362 
2363 	f2fs_update_time(sbi, REQ_TIME);
2364 	fi->atomic_write_task = current;
2365 	stat_update_max_atomic_write(inode);
2366 	fi->atomic_write_cnt = 0;
2367 out:
2368 	inode_unlock(inode);
2369 	mnt_drop_write_file(filp);
2370 	return ret;
2371 }
2372 
f2fs_ioc_commit_atomic_write(struct file * filp)2373 static int f2fs_ioc_commit_atomic_write(struct file *filp)
2374 {
2375 	struct inode *inode = file_inode(filp);
2376 	struct mnt_idmap *idmap = file_mnt_idmap(filp);
2377 	int ret;
2378 
2379 	if (!(filp->f_mode & FMODE_WRITE))
2380 		return -EBADF;
2381 
2382 	if (!inode_owner_or_capable(idmap, inode))
2383 		return -EACCES;
2384 
2385 	ret = mnt_want_write_file(filp);
2386 	if (ret)
2387 		return ret;
2388 
2389 	f2fs_balance_fs(F2FS_I_SB(inode), true);
2390 
2391 	inode_lock(inode);
2392 
2393 	if (f2fs_is_atomic_file(inode)) {
2394 		ret = f2fs_commit_atomic_write(inode);
2395 		if (!ret)
2396 			ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
2397 
2398 		f2fs_abort_atomic_write(inode, ret);
2399 	} else {
2400 		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
2401 	}
2402 
2403 	inode_unlock(inode);
2404 	mnt_drop_write_file(filp);
2405 	return ret;
2406 }
2407 
f2fs_ioc_abort_atomic_write(struct file * filp)2408 static int f2fs_ioc_abort_atomic_write(struct file *filp)
2409 {
2410 	struct inode *inode = file_inode(filp);
2411 	struct mnt_idmap *idmap = file_mnt_idmap(filp);
2412 	int ret;
2413 
2414 	if (!(filp->f_mode & FMODE_WRITE))
2415 		return -EBADF;
2416 
2417 	if (!inode_owner_or_capable(idmap, inode))
2418 		return -EACCES;
2419 
2420 	ret = mnt_want_write_file(filp);
2421 	if (ret)
2422 		return ret;
2423 
2424 	inode_lock(inode);
2425 
2426 	f2fs_abort_atomic_write(inode, true);
2427 
2428 	inode_unlock(inode);
2429 
2430 	mnt_drop_write_file(filp);
2431 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2432 	return ret;
2433 }
2434 
f2fs_do_shutdown(struct f2fs_sb_info * sbi,unsigned int flag,bool readonly,bool need_lock)2435 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag,
2436 						bool readonly, bool need_lock)
2437 {
2438 	struct super_block *sb = sbi->sb;
2439 	int ret = 0;
2440 
2441 	switch (flag) {
2442 	case F2FS_GOING_DOWN_FULLSYNC:
2443 		ret = bdev_freeze(sb->s_bdev);
2444 		if (ret)
2445 			goto out;
2446 		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2447 		bdev_thaw(sb->s_bdev);
2448 		break;
2449 	case F2FS_GOING_DOWN_METASYNC:
2450 		/* do checkpoint only */
2451 		ret = f2fs_sync_fs(sb, 1);
2452 		if (ret) {
2453 			if (ret == -EIO)
2454 				ret = 0;
2455 			goto out;
2456 		}
2457 		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2458 		break;
2459 	case F2FS_GOING_DOWN_NOSYNC:
2460 		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2461 		break;
2462 	case F2FS_GOING_DOWN_METAFLUSH:
2463 		f2fs_sync_meta_pages(sbi, LONG_MAX, FS_META_IO);
2464 		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2465 		break;
2466 	case F2FS_GOING_DOWN_NEED_FSCK:
2467 		set_sbi_flag(sbi, SBI_NEED_FSCK);
2468 		set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
2469 		set_sbi_flag(sbi, SBI_IS_DIRTY);
2470 		/* do checkpoint only */
2471 		ret = f2fs_sync_fs(sb, 1);
2472 		if (ret == -EIO)
2473 			ret = 0;
2474 		goto out;
2475 	default:
2476 		ret = -EINVAL;
2477 		goto out;
2478 	}
2479 
2480 	if (readonly)
2481 		goto out;
2482 
2483 	/*
2484 	 * grab sb->s_umount to avoid racing w/ remount() and other shutdown
2485 	 * paths.
2486 	 */
2487 	if (need_lock)
2488 		down_write(&sbi->sb->s_umount);
2489 
2490 	f2fs_stop_gc_thread(sbi);
2491 	f2fs_stop_discard_thread(sbi);
2492 
2493 	f2fs_drop_discard_cmd(sbi);
2494 	clear_opt(sbi, DISCARD);
2495 
2496 	if (need_lock)
2497 		up_write(&sbi->sb->s_umount);
2498 
2499 	f2fs_update_time(sbi, REQ_TIME);
2500 out:
2501 
2502 	trace_f2fs_shutdown(sbi, flag, ret);
2503 
2504 	return ret;
2505 }
2506 
f2fs_ioc_shutdown(struct file * filp,unsigned long arg)2507 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
2508 {
2509 	struct inode *inode = file_inode(filp);
2510 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2511 	__u32 in;
2512 	int ret;
2513 	bool need_drop = false, readonly = false;
2514 
2515 	if (!capable(CAP_SYS_ADMIN))
2516 		return -EPERM;
2517 
2518 	if (get_user(in, (__u32 __user *)arg))
2519 		return -EFAULT;
2520 
2521 	if (in != F2FS_GOING_DOWN_FULLSYNC) {
2522 		ret = mnt_want_write_file(filp);
2523 		if (ret) {
2524 			if (ret != -EROFS)
2525 				return ret;
2526 
2527 			/* fallback to nosync shutdown for readonly fs */
2528 			in = F2FS_GOING_DOWN_NOSYNC;
2529 			readonly = true;
2530 		} else {
2531 			need_drop = true;
2532 		}
2533 	}
2534 
2535 	ret = f2fs_do_shutdown(sbi, in, readonly, true);
2536 
2537 	if (need_drop)
2538 		mnt_drop_write_file(filp);
2539 
2540 	return ret;
2541 }
2542 
f2fs_keep_noreuse_range(struct inode * inode,loff_t offset,loff_t len)2543 static int f2fs_keep_noreuse_range(struct inode *inode,
2544 				loff_t offset, loff_t len)
2545 {
2546 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2547 	u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
2548 	u64 start, end;
2549 	int ret = 0;
2550 
2551 	if (!S_ISREG(inode->i_mode))
2552 		return 0;
2553 
2554 	if (offset >= max_bytes || len > max_bytes ||
2555 	    (offset + len) > max_bytes)
2556 		return 0;
2557 
2558 	start = offset >> PAGE_SHIFT;
2559 	end = DIV_ROUND_UP(offset + len, PAGE_SIZE);
2560 
2561 	inode_lock(inode);
2562 	if (f2fs_is_atomic_file(inode)) {
2563 		inode_unlock(inode);
2564 		return 0;
2565 	}
2566 
2567 	spin_lock(&sbi->inode_lock[DONATE_INODE]);
2568 	/* let's remove the range, if len = 0 */
2569 	if (!len) {
2570 		if (!list_empty(&F2FS_I(inode)->gdonate_list)) {
2571 			list_del_init(&F2FS_I(inode)->gdonate_list);
2572 			sbi->donate_files--;
2573 			if (is_inode_flag_set(inode, FI_DONATE_FINISHED))
2574 				ret = -EALREADY;
2575 			else
2576 				set_inode_flag(inode, FI_DONATE_FINISHED);
2577 		} else
2578 			ret = -ENOENT;
2579 	} else {
2580 		if (list_empty(&F2FS_I(inode)->gdonate_list)) {
2581 			list_add_tail(&F2FS_I(inode)->gdonate_list,
2582 					&sbi->inode_list[DONATE_INODE]);
2583 			sbi->donate_files++;
2584 		} else {
2585 			list_move_tail(&F2FS_I(inode)->gdonate_list,
2586 					&sbi->inode_list[DONATE_INODE]);
2587 		}
2588 		F2FS_I(inode)->donate_start = start;
2589 		F2FS_I(inode)->donate_end = end - 1;
2590 		clear_inode_flag(inode, FI_DONATE_FINISHED);
2591 	}
2592 	spin_unlock(&sbi->inode_lock[DONATE_INODE]);
2593 	inode_unlock(inode);
2594 
2595 	return ret;
2596 }
2597 
f2fs_ioc_fitrim(struct file * filp,unsigned long arg)2598 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
2599 {
2600 	struct inode *inode = file_inode(filp);
2601 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2602 	struct fstrim_range range;
2603 	int ret;
2604 
2605 	if (!capable(CAP_SYS_ADMIN))
2606 		return -EPERM;
2607 
2608 	if (!f2fs_hw_support_discard(sbi))
2609 		return -EOPNOTSUPP;
2610 
2611 	if (copy_from_user(&range, (struct fstrim_range __user *)arg,
2612 				sizeof(range)))
2613 		return -EFAULT;
2614 
2615 	ret = mnt_want_write_file(filp);
2616 	if (ret)
2617 		return ret;
2618 
2619 	range.minlen = max_t(unsigned int, range.minlen,
2620 			f2fs_hw_discard_granularity(sbi));
2621 	ret = f2fs_trim_fs(sbi, &range);
2622 	mnt_drop_write_file(filp);
2623 	if (ret < 0)
2624 		return ret;
2625 
2626 	if (copy_to_user((struct fstrim_range __user *)arg, &range,
2627 				sizeof(range)))
2628 		return -EFAULT;
2629 	f2fs_update_time(sbi, REQ_TIME);
2630 	return 0;
2631 }
2632 
uuid_is_nonzero(__u8 u[16])2633 static bool uuid_is_nonzero(__u8 u[16])
2634 {
2635 	int i;
2636 
2637 	for (i = 0; i < 16; i++)
2638 		if (u[i])
2639 			return true;
2640 	return false;
2641 }
2642 
f2fs_ioc_set_encryption_policy(struct file * filp,unsigned long arg)2643 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
2644 {
2645 	struct inode *inode = file_inode(filp);
2646 	int ret;
2647 
2648 	if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
2649 		return -EOPNOTSUPP;
2650 
2651 	ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
2652 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2653 	return ret;
2654 }
2655 
f2fs_ioc_get_encryption_policy(struct file * filp,unsigned long arg)2656 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
2657 {
2658 	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2659 		return -EOPNOTSUPP;
2660 	return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
2661 }
2662 
f2fs_ioc_get_encryption_pwsalt(struct file * filp,unsigned long arg)2663 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
2664 {
2665 	struct inode *inode = file_inode(filp);
2666 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2667 	u8 encrypt_pw_salt[16];
2668 	int err;
2669 
2670 	if (!f2fs_sb_has_encrypt(sbi))
2671 		return -EOPNOTSUPP;
2672 
2673 	err = mnt_want_write_file(filp);
2674 	if (err)
2675 		return err;
2676 
2677 	f2fs_down_write(&sbi->sb_lock);
2678 
2679 	if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
2680 		goto got_it;
2681 
2682 	/* update superblock with uuid */
2683 	generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
2684 
2685 	err = f2fs_commit_super(sbi, false);
2686 	if (err) {
2687 		/* undo new data */
2688 		memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
2689 		goto out_err;
2690 	}
2691 got_it:
2692 	memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16);
2693 out_err:
2694 	f2fs_up_write(&sbi->sb_lock);
2695 	mnt_drop_write_file(filp);
2696 
2697 	if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16))
2698 		err = -EFAULT;
2699 
2700 	return err;
2701 }
2702 
f2fs_ioc_get_encryption_policy_ex(struct file * filp,unsigned long arg)2703 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp,
2704 					     unsigned long arg)
2705 {
2706 	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2707 		return -EOPNOTSUPP;
2708 
2709 	return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
2710 }
2711 
f2fs_ioc_add_encryption_key(struct file * filp,unsigned long arg)2712 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg)
2713 {
2714 	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2715 		return -EOPNOTSUPP;
2716 
2717 	return fscrypt_ioctl_add_key(filp, (void __user *)arg);
2718 }
2719 
f2fs_ioc_remove_encryption_key(struct file * filp,unsigned long arg)2720 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg)
2721 {
2722 	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2723 		return -EOPNOTSUPP;
2724 
2725 	return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
2726 }
2727 
f2fs_ioc_remove_encryption_key_all_users(struct file * filp,unsigned long arg)2728 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp,
2729 						    unsigned long arg)
2730 {
2731 	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2732 		return -EOPNOTSUPP;
2733 
2734 	return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg);
2735 }
2736 
f2fs_ioc_get_encryption_key_status(struct file * filp,unsigned long arg)2737 static int f2fs_ioc_get_encryption_key_status(struct file *filp,
2738 					      unsigned long arg)
2739 {
2740 	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2741 		return -EOPNOTSUPP;
2742 
2743 	return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
2744 }
2745 
f2fs_ioc_get_encryption_nonce(struct file * filp,unsigned long arg)2746 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg)
2747 {
2748 	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2749 		return -EOPNOTSUPP;
2750 
2751 	return fscrypt_ioctl_get_nonce(filp, (void __user *)arg);
2752 }
2753 
f2fs_ioc_gc(struct file * filp,unsigned long arg)2754 static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
2755 {
2756 	struct inode *inode = file_inode(filp);
2757 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2758 	struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
2759 			.no_bg_gc = false,
2760 			.should_migrate_blocks = false,
2761 			.nr_free_secs = 0 };
2762 	__u32 sync;
2763 	int ret;
2764 
2765 	if (!capable(CAP_SYS_ADMIN))
2766 		return -EPERM;
2767 
2768 	if (get_user(sync, (__u32 __user *)arg))
2769 		return -EFAULT;
2770 
2771 	if (f2fs_readonly(sbi->sb))
2772 		return -EROFS;
2773 
2774 	ret = mnt_want_write_file(filp);
2775 	if (ret)
2776 		return ret;
2777 
2778 	if (!sync) {
2779 		if (!f2fs_down_write_trylock_trace(&sbi->gc_lock,
2780 						&gc_control.lc)) {
2781 			ret = -EBUSY;
2782 			goto out;
2783 		}
2784 	} else {
2785 		f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc);
2786 	}
2787 
2788 	gc_control.init_gc_type = sync ? FG_GC : BG_GC;
2789 	gc_control.err_gc_skipped = sync;
2790 	stat_inc_gc_call_count(sbi, FOREGROUND);
2791 	ret = f2fs_gc(sbi, &gc_control);
2792 out:
2793 	mnt_drop_write_file(filp);
2794 	return ret;
2795 }
2796 
__f2fs_ioc_gc_range(struct file * filp,struct f2fs_gc_range * range)2797 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
2798 {
2799 	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
2800 	struct f2fs_gc_control gc_control = {
2801 			.init_gc_type = range->sync ? FG_GC : BG_GC,
2802 			.no_bg_gc = false,
2803 			.should_migrate_blocks = false,
2804 			.err_gc_skipped = range->sync,
2805 			.nr_free_secs = 0 };
2806 	u64 end;
2807 	int ret;
2808 
2809 	if (!capable(CAP_SYS_ADMIN))
2810 		return -EPERM;
2811 	if (f2fs_readonly(sbi->sb))
2812 		return -EROFS;
2813 
2814 	end = range->start + range->len;
2815 	if (end < range->start || range->start < MAIN_BLKADDR(sbi) ||
2816 					end >= MAX_BLKADDR(sbi))
2817 		return -EINVAL;
2818 
2819 	ret = mnt_want_write_file(filp);
2820 	if (ret)
2821 		return ret;
2822 
2823 do_more:
2824 	if (!range->sync) {
2825 		if (!f2fs_down_write_trylock_trace(&sbi->gc_lock, &gc_control.lc)) {
2826 			ret = -EBUSY;
2827 			goto out;
2828 		}
2829 	} else {
2830 		f2fs_down_write_trace(&sbi->gc_lock, &gc_control.lc);
2831 	}
2832 
2833 	gc_control.victim_segno = GET_SEGNO(sbi, range->start);
2834 	stat_inc_gc_call_count(sbi, FOREGROUND);
2835 	ret = f2fs_gc(sbi, &gc_control);
2836 	if (ret) {
2837 		if (ret == -EBUSY)
2838 			ret = -EAGAIN;
2839 		goto out;
2840 	}
2841 	range->start += CAP_BLKS_PER_SEC(sbi);
2842 	if (range->start <= end)
2843 		goto do_more;
2844 out:
2845 	mnt_drop_write_file(filp);
2846 	return ret;
2847 }
2848 
f2fs_ioc_gc_range(struct file * filp,unsigned long arg)2849 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
2850 {
2851 	struct f2fs_gc_range range;
2852 
2853 	if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
2854 							sizeof(range)))
2855 		return -EFAULT;
2856 	return __f2fs_ioc_gc_range(filp, &range);
2857 }
2858 
f2fs_ioc_write_checkpoint(struct file * filp)2859 static int f2fs_ioc_write_checkpoint(struct file *filp)
2860 {
2861 	struct inode *inode = file_inode(filp);
2862 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2863 	int ret;
2864 
2865 	if (!capable(CAP_SYS_ADMIN))
2866 		return -EPERM;
2867 
2868 	if (f2fs_readonly(sbi->sb))
2869 		return -EROFS;
2870 
2871 	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2872 		f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
2873 		return -EINVAL;
2874 	}
2875 
2876 	ret = mnt_want_write_file(filp);
2877 	if (ret)
2878 		return ret;
2879 
2880 	ret = f2fs_sync_fs(sbi->sb, 1);
2881 
2882 	mnt_drop_write_file(filp);
2883 	return ret;
2884 }
2885 
f2fs_defragment_range(struct f2fs_sb_info * sbi,struct file * filp,struct f2fs_defragment * range)2886 static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
2887 					struct file *filp,
2888 					struct f2fs_defragment *range)
2889 {
2890 	struct inode *inode = file_inode(filp);
2891 	struct f2fs_map_blocks map = { .m_next_extent = NULL,
2892 					.m_seg_type = NO_CHECK_TYPE,
2893 					.m_may_create = false };
2894 	struct extent_info ei = {};
2895 	pgoff_t pg_start, pg_end, next_pgofs;
2896 	unsigned int total = 0, sec_num;
2897 	block_t blk_end = 0;
2898 	bool fragmented = false;
2899 	int err;
2900 
2901 	f2fs_balance_fs(sbi, true);
2902 
2903 	inode_lock(inode);
2904 	pg_start = range->start >> PAGE_SHIFT;
2905 	pg_end = min_t(pgoff_t,
2906 				(range->start + range->len) >> PAGE_SHIFT,
2907 				DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE));
2908 
2909 	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) ||
2910 		f2fs_is_atomic_file(inode)) {
2911 		err = -EINVAL;
2912 		goto unlock_out;
2913 	}
2914 
2915 	/* if in-place-update policy is enabled, don't waste time here */
2916 	set_inode_flag(inode, FI_OPU_WRITE);
2917 	if (f2fs_should_update_inplace(inode, NULL)) {
2918 		err = -EINVAL;
2919 		goto out;
2920 	}
2921 
2922 	/* writeback all dirty pages in the range */
2923 	err = filemap_write_and_wait_range(inode->i_mapping,
2924 						pg_start << PAGE_SHIFT,
2925 						(pg_end << PAGE_SHIFT) - 1);
2926 	if (err)
2927 		goto out;
2928 
2929 	/*
2930 	 * lookup mapping info in extent cache, skip defragmenting if physical
2931 	 * block addresses are continuous.
2932 	 */
2933 	if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
2934 		if ((pgoff_t)ei.fofs + ei.len >= pg_end)
2935 			goto out;
2936 	}
2937 
2938 	map.m_lblk = pg_start;
2939 	map.m_next_pgofs = &next_pgofs;
2940 
2941 	/*
2942 	 * lookup mapping info in dnode page cache, skip defragmenting if all
2943 	 * physical block addresses are continuous even if there are hole(s)
2944 	 * in logical blocks.
2945 	 */
2946 	while (map.m_lblk < pg_end) {
2947 		map.m_len = pg_end - map.m_lblk;
2948 		err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2949 		if (err)
2950 			goto out;
2951 
2952 		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2953 			map.m_lblk = next_pgofs;
2954 			continue;
2955 		}
2956 
2957 		if (blk_end && blk_end != map.m_pblk)
2958 			fragmented = true;
2959 
2960 		/* record total count of block that we're going to move */
2961 		total += map.m_len;
2962 
2963 		blk_end = map.m_pblk + map.m_len;
2964 
2965 		map.m_lblk += map.m_len;
2966 	}
2967 
2968 	if (!fragmented) {
2969 		total = 0;
2970 		goto out;
2971 	}
2972 
2973 	sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi));
2974 
2975 	/*
2976 	 * make sure there are enough free section for LFS allocation, this can
2977 	 * avoid defragment running in SSR mode when free section are allocated
2978 	 * intensively
2979 	 */
2980 	if (has_not_enough_free_secs(sbi, 0, sec_num)) {
2981 		err = -EAGAIN;
2982 		goto out;
2983 	}
2984 
2985 	map.m_lblk = pg_start;
2986 	map.m_len = pg_end - pg_start;
2987 	total = 0;
2988 
2989 	while (map.m_lblk < pg_end) {
2990 		pgoff_t idx;
2991 		int cnt = 0;
2992 
2993 do_map:
2994 		map.m_len = pg_end - map.m_lblk;
2995 		err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2996 		if (err)
2997 			goto clear_out;
2998 
2999 		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
3000 			map.m_lblk = next_pgofs;
3001 			goto check;
3002 		}
3003 
3004 		set_inode_flag(inode, FI_SKIP_WRITES);
3005 
3006 		idx = map.m_lblk;
3007 		while (idx < map.m_lblk + map.m_len &&
3008 						cnt < BLKS_PER_SEG(sbi)) {
3009 			struct folio *folio;
3010 
3011 			folio = f2fs_get_lock_data_folio(inode, idx, true);
3012 			if (IS_ERR(folio)) {
3013 				err = PTR_ERR(folio);
3014 				goto clear_out;
3015 			}
3016 
3017 			f2fs_folio_wait_writeback(folio, DATA, true, true);
3018 
3019 			folio_mark_dirty(folio);
3020 			folio_set_f2fs_gcing(folio);
3021 			f2fs_folio_put(folio, true);
3022 
3023 			idx++;
3024 			cnt++;
3025 			total++;
3026 		}
3027 
3028 		map.m_lblk = idx;
3029 check:
3030 		if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi))
3031 			goto do_map;
3032 
3033 		clear_inode_flag(inode, FI_SKIP_WRITES);
3034 
3035 		err = filemap_fdatawrite(inode->i_mapping);
3036 		if (err)
3037 			goto out;
3038 	}
3039 clear_out:
3040 	clear_inode_flag(inode, FI_SKIP_WRITES);
3041 out:
3042 	clear_inode_flag(inode, FI_OPU_WRITE);
3043 unlock_out:
3044 	inode_unlock(inode);
3045 	if (!err)
3046 		range->len = (u64)total << PAGE_SHIFT;
3047 	return err;
3048 }
3049 
f2fs_ioc_defragment(struct file * filp,unsigned long arg)3050 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
3051 {
3052 	struct inode *inode = file_inode(filp);
3053 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3054 	struct f2fs_defragment range;
3055 	int err;
3056 
3057 	if (!capable(CAP_SYS_ADMIN))
3058 		return -EPERM;
3059 
3060 	if (!S_ISREG(inode->i_mode))
3061 		return -EINVAL;
3062 
3063 	if (f2fs_readonly(sbi->sb))
3064 		return -EROFS;
3065 
3066 	if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
3067 							sizeof(range)))
3068 		return -EFAULT;
3069 
3070 	/* verify alignment of offset & size */
3071 	if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
3072 		return -EINVAL;
3073 
3074 	if (unlikely((range.start + range.len) >> PAGE_SHIFT >
3075 					max_file_blocks(inode)))
3076 		return -EINVAL;
3077 
3078 	err = mnt_want_write_file(filp);
3079 	if (err)
3080 		return err;
3081 
3082 	err = f2fs_defragment_range(sbi, filp, &range);
3083 	mnt_drop_write_file(filp);
3084 
3085 	if (range.len)
3086 		f2fs_update_time(sbi, REQ_TIME);
3087 	if (err < 0)
3088 		return err;
3089 
3090 	if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
3091 							sizeof(range)))
3092 		return -EFAULT;
3093 
3094 	return 0;
3095 }
3096 
f2fs_move_file_range(struct file * file_in,loff_t pos_in,struct file * file_out,loff_t pos_out,size_t len)3097 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
3098 			struct file *file_out, loff_t pos_out, size_t len)
3099 {
3100 	struct inode *src = file_inode(file_in);
3101 	struct inode *dst = file_inode(file_out);
3102 	struct f2fs_sb_info *sbi = F2FS_I_SB(src);
3103 	struct f2fs_lock_context lc;
3104 	size_t olen = len, dst_max_i_size = 0;
3105 	size_t dst_osize;
3106 	int ret;
3107 
3108 	if (file_in->f_path.mnt != file_out->f_path.mnt ||
3109 				src->i_sb != dst->i_sb)
3110 		return -EXDEV;
3111 
3112 	if (unlikely(f2fs_readonly(src->i_sb)))
3113 		return -EROFS;
3114 
3115 	if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
3116 		return -EINVAL;
3117 
3118 	if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst))
3119 		return -EOPNOTSUPP;
3120 
3121 	if (pos_out < 0 || pos_in < 0)
3122 		return -EINVAL;
3123 
3124 	if (src == dst) {
3125 		if (pos_in == pos_out)
3126 			return 0;
3127 		if (pos_out > pos_in && pos_out < pos_in + len)
3128 			return -EINVAL;
3129 	}
3130 
3131 	inode_lock(src);
3132 	if (src != dst) {
3133 		ret = -EBUSY;
3134 		if (!inode_trylock(dst))
3135 			goto out;
3136 	}
3137 
3138 	if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) ||
3139 		f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) {
3140 		ret = -EOPNOTSUPP;
3141 		goto out_unlock;
3142 	}
3143 
3144 	if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) {
3145 		ret = -EINVAL;
3146 		goto out_unlock;
3147 	}
3148 
3149 	ret = -EINVAL;
3150 	if (pos_in + len > src->i_size || pos_in + len < pos_in)
3151 		goto out_unlock;
3152 	if (len == 0)
3153 		olen = len = src->i_size - pos_in;
3154 	if (pos_in + len == src->i_size)
3155 		len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in;
3156 	if (len == 0) {
3157 		ret = 0;
3158 		goto out_unlock;
3159 	}
3160 
3161 	dst_osize = dst->i_size;
3162 	if (pos_out + olen > dst->i_size)
3163 		dst_max_i_size = pos_out + olen;
3164 
3165 	/* verify the end result is block aligned */
3166 	if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) ||
3167 			!IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) ||
3168 			!IS_ALIGNED(pos_out, F2FS_BLKSIZE))
3169 		goto out_unlock;
3170 
3171 	ret = f2fs_convert_inline_inode(src);
3172 	if (ret)
3173 		goto out_unlock;
3174 
3175 	ret = f2fs_convert_inline_inode(dst);
3176 	if (ret)
3177 		goto out_unlock;
3178 
3179 	/* write out all dirty pages from offset */
3180 	ret = filemap_write_and_wait_range(src->i_mapping,
3181 					pos_in, pos_in + len);
3182 	if (ret)
3183 		goto out_unlock;
3184 
3185 	ret = filemap_write_and_wait_range(dst->i_mapping,
3186 					pos_out, pos_out + len);
3187 	if (ret)
3188 		goto out_unlock;
3189 
3190 	f2fs_balance_fs(sbi, true);
3191 
3192 	f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
3193 	if (src != dst) {
3194 		ret = -EBUSY;
3195 		if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
3196 			goto out_src;
3197 	}
3198 
3199 	f2fs_lock_op(sbi, &lc);
3200 	ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in),
3201 				F2FS_BYTES_TO_BLK(pos_out),
3202 				F2FS_BYTES_TO_BLK(len), false);
3203 
3204 	if (!ret) {
3205 		if (dst_max_i_size)
3206 			f2fs_i_size_write(dst, dst_max_i_size);
3207 		else if (dst_osize != dst->i_size)
3208 			f2fs_i_size_write(dst, dst_osize);
3209 	}
3210 	f2fs_unlock_op(sbi, &lc);
3211 
3212 	if (src != dst)
3213 		f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
3214 out_src:
3215 	f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
3216 	if (ret)
3217 		goto out_unlock;
3218 
3219 	inode_set_mtime_to_ts(src, inode_set_ctime_current(src));
3220 	f2fs_mark_inode_dirty_sync(src, false);
3221 	if (src != dst) {
3222 		inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst));
3223 		f2fs_mark_inode_dirty_sync(dst, false);
3224 	}
3225 	f2fs_update_time(sbi, REQ_TIME);
3226 
3227 out_unlock:
3228 	if (src != dst)
3229 		inode_unlock(dst);
3230 out:
3231 	inode_unlock(src);
3232 	return ret;
3233 }
3234 
__f2fs_ioc_move_range(struct file * filp,struct f2fs_move_range * range)3235 static int __f2fs_ioc_move_range(struct file *filp,
3236 				struct f2fs_move_range *range)
3237 {
3238 	int err;
3239 
3240 	if (!(filp->f_mode & FMODE_READ) ||
3241 			!(filp->f_mode & FMODE_WRITE))
3242 		return -EBADF;
3243 
3244 	CLASS(fd, dst)(range->dst_fd);
3245 	if (fd_empty(dst))
3246 		return -EBADF;
3247 
3248 	if (!(fd_file(dst)->f_mode & FMODE_WRITE))
3249 		return -EBADF;
3250 
3251 	err = mnt_want_write_file(filp);
3252 	if (err)
3253 		return err;
3254 
3255 	err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst),
3256 					range->pos_out, range->len);
3257 
3258 	mnt_drop_write_file(filp);
3259 	return err;
3260 }
3261 
f2fs_ioc_move_range(struct file * filp,unsigned long arg)3262 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
3263 {
3264 	struct f2fs_move_range range;
3265 
3266 	if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
3267 							sizeof(range)))
3268 		return -EFAULT;
3269 	return __f2fs_ioc_move_range(filp, &range);
3270 }
3271 
f2fs_ioc_flush_device(struct file * filp,unsigned long arg)3272 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
3273 {
3274 	struct inode *inode = file_inode(filp);
3275 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3276 	struct sit_info *sm = SIT_I(sbi);
3277 	unsigned int start_segno = 0, end_segno = 0;
3278 	unsigned int dev_start_segno = 0, dev_end_segno = 0;
3279 	struct f2fs_flush_device range;
3280 	struct f2fs_gc_control gc_control = {
3281 			.init_gc_type = FG_GC,
3282 			.should_migrate_blocks = true,
3283 			.err_gc_skipped = true,
3284 			.nr_free_secs = 0 };
3285 	int ret;
3286 
3287 	if (!capable(CAP_SYS_ADMIN))
3288 		return -EPERM;
3289 
3290 	if (f2fs_readonly(sbi->sb))
3291 		return -EROFS;
3292 
3293 	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3294 		return -EINVAL;
3295 
3296 	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
3297 							sizeof(range)))
3298 		return -EFAULT;
3299 
3300 	if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
3301 			__is_large_section(sbi)) {
3302 		f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1",
3303 			  range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi));
3304 		return -EINVAL;
3305 	}
3306 
3307 	ret = mnt_want_write_file(filp);
3308 	if (ret)
3309 		return ret;
3310 
3311 	if (range.dev_num != 0)
3312 		dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
3313 	dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
3314 
3315 	start_segno = sm->last_victim[FLUSH_DEVICE];
3316 	if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
3317 		start_segno = dev_start_segno;
3318 	end_segno = min(start_segno + range.segments, dev_end_segno);
3319 
3320 	while (start_segno < end_segno) {
3321 		if (!f2fs_down_write_trylock_trace(&sbi->gc_lock, &gc_control.lc)) {
3322 			ret = -EBUSY;
3323 			goto out;
3324 		}
3325 		sm->last_victim[GC_CB] = end_segno + 1;
3326 		sm->last_victim[GC_GREEDY] = end_segno + 1;
3327 		sm->last_victim[ALLOC_NEXT] = end_segno + 1;
3328 
3329 		gc_control.victim_segno = start_segno;
3330 		stat_inc_gc_call_count(sbi, FOREGROUND);
3331 		ret = f2fs_gc(sbi, &gc_control);
3332 		if (ret == -EAGAIN)
3333 			ret = 0;
3334 		else if (ret < 0)
3335 			break;
3336 		start_segno++;
3337 	}
3338 out:
3339 	mnt_drop_write_file(filp);
3340 	return ret;
3341 }
3342 
f2fs_ioc_get_features(struct file * filp,unsigned long arg)3343 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
3344 {
3345 	struct inode *inode = file_inode(filp);
3346 	u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature);
3347 
3348 	/* Must validate to set it with SQLite behavior in Android. */
3349 	sb_feature |= F2FS_FEATURE_ATOMIC_WRITE;
3350 
3351 	return put_user(sb_feature, (u32 __user *)arg);
3352 }
3353 
3354 #ifdef CONFIG_QUOTA
f2fs_transfer_project_quota(struct inode * inode,kprojid_t kprojid)3355 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
3356 {
3357 	struct dquot *transfer_to[MAXQUOTAS] = {};
3358 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3359 	struct super_block *sb = sbi->sb;
3360 	int err;
3361 
3362 	transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
3363 	if (IS_ERR(transfer_to[PRJQUOTA]))
3364 		return PTR_ERR(transfer_to[PRJQUOTA]);
3365 
3366 	err = __dquot_transfer(inode, transfer_to);
3367 	if (err)
3368 		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
3369 	dqput(transfer_to[PRJQUOTA]);
3370 	return err;
3371 }
3372 
f2fs_ioc_setproject(struct inode * inode,__u32 projid)3373 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
3374 {
3375 	struct f2fs_inode_info *fi = F2FS_I(inode);
3376 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3377 	struct f2fs_inode *ri = NULL;
3378 	struct f2fs_lock_context lc;
3379 	kprojid_t kprojid;
3380 	int err;
3381 
3382 	if (!f2fs_sb_has_project_quota(sbi)) {
3383 		if (projid != F2FS_DEF_PROJID)
3384 			return -EOPNOTSUPP;
3385 		else
3386 			return 0;
3387 	}
3388 
3389 	if (!f2fs_has_extra_attr(inode))
3390 		return -EOPNOTSUPP;
3391 
3392 	kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
3393 
3394 	if (projid_eq(kprojid, fi->i_projid))
3395 		return 0;
3396 
3397 	err = -EPERM;
3398 	/* Is it quota file? Do not allow user to mess with it */
3399 	if (IS_NOQUOTA(inode))
3400 		return err;
3401 
3402 	if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
3403 		return -EOVERFLOW;
3404 
3405 	err = f2fs_dquot_initialize(inode);
3406 	if (err)
3407 		return err;
3408 
3409 	f2fs_lock_op(sbi, &lc);
3410 	err = f2fs_transfer_project_quota(inode, kprojid);
3411 	if (err)
3412 		goto out_unlock;
3413 
3414 	fi->i_projid = kprojid;
3415 	inode_set_ctime_current(inode);
3416 	f2fs_mark_inode_dirty_sync(inode, true);
3417 out_unlock:
3418 	f2fs_unlock_op(sbi, &lc);
3419 	return err;
3420 }
3421 #else
f2fs_transfer_project_quota(struct inode * inode,kprojid_t kprojid)3422 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
3423 {
3424 	return 0;
3425 }
3426 
f2fs_ioc_setproject(struct inode * inode,__u32 projid)3427 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
3428 {
3429 	if (projid != F2FS_DEF_PROJID)
3430 		return -EOPNOTSUPP;
3431 	return 0;
3432 }
3433 #endif
3434 
f2fs_fileattr_get(struct dentry * dentry,struct file_kattr * fa)3435 int f2fs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
3436 {
3437 	struct inode *inode = d_inode(dentry);
3438 	struct f2fs_inode_info *fi = F2FS_I(inode);
3439 	u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
3440 
3441 	if (IS_ENCRYPTED(inode))
3442 		fsflags |= FS_ENCRYPT_FL;
3443 	if (IS_VERITY(inode))
3444 		fsflags |= FS_VERITY_FL;
3445 	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
3446 		fsflags |= FS_INLINE_DATA_FL;
3447 	if (is_inode_flag_set(inode, FI_PIN_FILE))
3448 		fsflags |= FS_NOCOW_FL;
3449 
3450 	fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL);
3451 
3452 	if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
3453 		fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid);
3454 
3455 	return 0;
3456 }
3457 
f2fs_fileattr_set(struct mnt_idmap * idmap,struct dentry * dentry,struct file_kattr * fa)3458 int f2fs_fileattr_set(struct mnt_idmap *idmap,
3459 		      struct dentry *dentry, struct file_kattr *fa)
3460 {
3461 	struct inode *inode = d_inode(dentry);
3462 	u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL;
3463 	u32 iflags;
3464 	int err;
3465 
3466 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
3467 		return -EIO;
3468 	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
3469 		return -ENOSPC;
3470 	if (fsflags & ~F2FS_GETTABLE_FS_FL)
3471 		return -EOPNOTSUPP;
3472 	fsflags &= F2FS_SETTABLE_FS_FL;
3473 	if (!fa->flags_valid)
3474 		mask &= FS_COMMON_FL;
3475 
3476 	iflags = f2fs_fsflags_to_iflags(fsflags);
3477 	if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
3478 		return -EOPNOTSUPP;
3479 
3480 	err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask));
3481 	if (!err)
3482 		err = f2fs_ioc_setproject(inode, fa->fsx_projid);
3483 
3484 	return err;
3485 }
3486 
f2fs_pin_file_control(struct inode * inode,bool inc)3487 int f2fs_pin_file_control(struct inode *inode, bool inc)
3488 {
3489 	struct f2fs_inode_info *fi = F2FS_I(inode);
3490 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3491 
3492 	if (IS_DEVICE_ALIASING(inode))
3493 		return -EINVAL;
3494 
3495 	if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) {
3496 		f2fs_warn(sbi, "%s: Enable GC = ino %llx after %x GC trials",
3497 			  __func__, inode->i_ino, fi->i_gc_failures);
3498 		clear_inode_flag(inode, FI_PIN_FILE);
3499 		return -EAGAIN;
3500 	}
3501 
3502 	/* Use i_gc_failures for normal file as a risk signal. */
3503 	if (inc)
3504 		f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
3505 
3506 	return 0;
3507 }
3508 
f2fs_ioc_set_pin_file(struct file * filp,unsigned long arg)3509 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
3510 {
3511 	struct inode *inode = file_inode(filp);
3512 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3513 	__u32 pin;
3514 	int ret = 0;
3515 
3516 	if (get_user(pin, (__u32 __user *)arg))
3517 		return -EFAULT;
3518 
3519 	if (!S_ISREG(inode->i_mode))
3520 		return -EINVAL;
3521 
3522 	if (f2fs_readonly(sbi->sb))
3523 		return -EROFS;
3524 
3525 	if (!pin && IS_DEVICE_ALIASING(inode))
3526 		return -EOPNOTSUPP;
3527 
3528 	ret = mnt_want_write_file(filp);
3529 	if (ret)
3530 		return ret;
3531 
3532 	inode_lock(inode);
3533 
3534 	if (f2fs_is_atomic_file(inode)) {
3535 		ret = -EINVAL;
3536 		goto out;
3537 	}
3538 
3539 	if (!pin) {
3540 		clear_inode_flag(inode, FI_PIN_FILE);
3541 		f2fs_i_gc_failures_write(inode, 0);
3542 		goto done;
3543 	} else if (f2fs_is_pinned_file(inode)) {
3544 		goto done;
3545 	}
3546 
3547 	if (F2FS_HAS_BLOCKS(inode)) {
3548 		ret = -EFBIG;
3549 		goto out;
3550 	}
3551 
3552 	/* Let's allow file pinning on zoned device. */
3553 	if (!f2fs_sb_has_blkzoned(sbi) &&
3554 	    f2fs_should_update_outplace(inode, NULL)) {
3555 		ret = -EINVAL;
3556 		goto out;
3557 	}
3558 
3559 	if (f2fs_pin_file_control(inode, false)) {
3560 		ret = -EAGAIN;
3561 		goto out;
3562 	}
3563 
3564 	ret = f2fs_convert_inline_inode(inode);
3565 	if (ret)
3566 		goto out;
3567 
3568 	if (!f2fs_disable_compressed_file(inode)) {
3569 		ret = -EOPNOTSUPP;
3570 		goto out;
3571 	}
3572 
3573 	set_inode_flag(inode, FI_PIN_FILE);
3574 	ret = F2FS_I(inode)->i_gc_failures;
3575 done:
3576 	f2fs_update_time(sbi, REQ_TIME);
3577 out:
3578 	inode_unlock(inode);
3579 	mnt_drop_write_file(filp);
3580 	return ret;
3581 }
3582 
f2fs_ioc_get_pin_file(struct file * filp,unsigned long arg)3583 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
3584 {
3585 	struct inode *inode = file_inode(filp);
3586 	__u32 pin = 0;
3587 
3588 	if (is_inode_flag_set(inode, FI_PIN_FILE))
3589 		pin = F2FS_I(inode)->i_gc_failures;
3590 	return put_user(pin, (u32 __user *)arg);
3591 }
3592 
f2fs_ioc_get_dev_alias_file(struct file * filp,unsigned long arg)3593 static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg)
3594 {
3595 	return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0,
3596 			(u32 __user *)arg);
3597 }
3598 
f2fs_ioc_io_prio(struct file * filp,unsigned long arg)3599 static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg)
3600 {
3601 	struct inode *inode = file_inode(filp);
3602 	__u32 level;
3603 
3604 	if (get_user(level, (__u32 __user *)arg))
3605 		return -EFAULT;
3606 
3607 	if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX)
3608 		return -EINVAL;
3609 
3610 	inode_lock(inode);
3611 	F2FS_I(inode)->ioprio_hint = level;
3612 	inode_unlock(inode);
3613 	return 0;
3614 }
3615 
f2fs_precache_extents(struct inode * inode)3616 int f2fs_precache_extents(struct inode *inode)
3617 {
3618 	struct f2fs_inode_info *fi = F2FS_I(inode);
3619 	struct f2fs_map_blocks map;
3620 	pgoff_t m_next_extent;
3621 	loff_t end;
3622 	int err;
3623 
3624 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
3625 		return -EOPNOTSUPP;
3626 
3627 	map.m_lblk = 0;
3628 	map.m_pblk = 0;
3629 	map.m_next_pgofs = NULL;
3630 	map.m_next_extent = &m_next_extent;
3631 	map.m_seg_type = NO_CHECK_TYPE;
3632 	map.m_may_create = false;
3633 	end = F2FS_BLK_ALIGN(i_size_read(inode));
3634 
3635 	while (map.m_lblk < end) {
3636 		map.m_len = end - map.m_lblk;
3637 
3638 		f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3639 		err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE);
3640 		f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3641 		if (err || !map.m_len)
3642 			return err;
3643 
3644 		map.m_lblk = m_next_extent;
3645 	}
3646 
3647 	return 0;
3648 }
3649 
f2fs_ioc_precache_extents(struct file * filp)3650 static int f2fs_ioc_precache_extents(struct file *filp)
3651 {
3652 	return f2fs_precache_extents(file_inode(filp));
3653 }
3654 
f2fs_ioc_resize_fs(struct file * filp,unsigned long arg)3655 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
3656 {
3657 	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
3658 	__u64 block_count;
3659 
3660 	if (!capable(CAP_SYS_ADMIN))
3661 		return -EPERM;
3662 
3663 	if (f2fs_readonly(sbi->sb))
3664 		return -EROFS;
3665 
3666 	if (copy_from_user(&block_count, (void __user *)arg,
3667 			   sizeof(block_count)))
3668 		return -EFAULT;
3669 
3670 	return f2fs_resize_fs(filp, block_count);
3671 }
3672 
f2fs_ioc_enable_verity(struct file * filp,unsigned long arg)3673 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
3674 {
3675 	struct inode *inode = file_inode(filp);
3676 
3677 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3678 
3679 	if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
3680 		f2fs_warn(F2FS_I_SB(inode),
3681 			  "Can't enable fs-verity on inode %llu: the verity feature is not enabled on this filesystem",
3682 			  inode->i_ino);
3683 		return -EOPNOTSUPP;
3684 	}
3685 
3686 	return fsverity_ioctl_enable(filp, (const void __user *)arg);
3687 }
3688 
f2fs_ioc_measure_verity(struct file * filp,unsigned long arg)3689 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
3690 {
3691 	if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3692 		return -EOPNOTSUPP;
3693 
3694 	return fsverity_ioctl_measure(filp, (void __user *)arg);
3695 }
3696 
f2fs_ioc_read_verity_metadata(struct file * filp,unsigned long arg)3697 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg)
3698 {
3699 	if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3700 		return -EOPNOTSUPP;
3701 
3702 	return fsverity_ioctl_read_metadata(filp, (const void __user *)arg);
3703 }
3704 
f2fs_ioc_getfslabel(struct file * filp,unsigned long arg)3705 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
3706 {
3707 	struct inode *inode = file_inode(filp);
3708 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3709 	char *vbuf;
3710 	int count;
3711 	int err = 0;
3712 
3713 	vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL);
3714 	if (!vbuf)
3715 		return -ENOMEM;
3716 
3717 	f2fs_down_read(&sbi->sb_lock);
3718 	count = utf16s_to_utf8s(sbi->raw_super->volume_name,
3719 			ARRAY_SIZE(sbi->raw_super->volume_name),
3720 			UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
3721 	f2fs_up_read(&sbi->sb_lock);
3722 
3723 	if (copy_to_user((char __user *)arg, vbuf,
3724 				min(FSLABEL_MAX, count)))
3725 		err = -EFAULT;
3726 
3727 	kfree(vbuf);
3728 	return err;
3729 }
3730 
f2fs_ioc_setfslabel(struct file * filp,unsigned long arg)3731 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
3732 {
3733 	struct inode *inode = file_inode(filp);
3734 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3735 	char *vbuf;
3736 	int err = 0;
3737 
3738 	if (!capable(CAP_SYS_ADMIN))
3739 		return -EPERM;
3740 
3741 	vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
3742 	if (IS_ERR(vbuf))
3743 		return PTR_ERR(vbuf);
3744 
3745 	err = mnt_want_write_file(filp);
3746 	if (err)
3747 		goto out;
3748 
3749 	f2fs_down_write(&sbi->sb_lock);
3750 
3751 	memset(sbi->raw_super->volume_name, 0,
3752 			sizeof(sbi->raw_super->volume_name));
3753 	utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN,
3754 			sbi->raw_super->volume_name,
3755 			ARRAY_SIZE(sbi->raw_super->volume_name));
3756 
3757 	err = f2fs_commit_super(sbi, false);
3758 
3759 	f2fs_up_write(&sbi->sb_lock);
3760 
3761 	mnt_drop_write_file(filp);
3762 out:
3763 	kfree(vbuf);
3764 	return err;
3765 }
3766 
f2fs_get_compress_blocks(struct inode * inode,__u64 * blocks)3767 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks)
3768 {
3769 	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
3770 		return -EOPNOTSUPP;
3771 
3772 	if (!f2fs_compressed_file(inode))
3773 		return -EINVAL;
3774 
3775 	*blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
3776 
3777 	return 0;
3778 }
3779 
f2fs_ioc_get_compress_blocks(struct file * filp,unsigned long arg)3780 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg)
3781 {
3782 	struct inode *inode = file_inode(filp);
3783 	__u64 blocks;
3784 	int ret;
3785 
3786 	ret = f2fs_get_compress_blocks(inode, &blocks);
3787 	if (ret < 0)
3788 		return ret;
3789 
3790 	return put_user(blocks, (u64 __user *)arg);
3791 }
3792 
release_compress_blocks(struct dnode_of_data * dn,pgoff_t count)3793 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
3794 {
3795 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
3796 	unsigned int released_blocks = 0;
3797 	int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
3798 	block_t blkaddr;
3799 	int i;
3800 
3801 	for (i = 0; i < count; i++) {
3802 		blkaddr = data_blkaddr(dn->inode, dn->node_folio,
3803 						dn->ofs_in_node + i);
3804 
3805 		if (!__is_valid_data_blkaddr(blkaddr))
3806 			continue;
3807 		if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
3808 					DATA_GENERIC_ENHANCE)))
3809 			return -EFSCORRUPTED;
3810 	}
3811 
3812 	while (count) {
3813 		int compr_blocks = 0;
3814 
3815 		for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
3816 			blkaddr = f2fs_data_blkaddr(dn);
3817 
3818 			if (i == 0) {
3819 				if (blkaddr == COMPRESS_ADDR)
3820 					continue;
3821 				dn->ofs_in_node += cluster_size;
3822 				goto next;
3823 			}
3824 
3825 			if (__is_valid_data_blkaddr(blkaddr))
3826 				compr_blocks++;
3827 
3828 			if (blkaddr != NEW_ADDR)
3829 				continue;
3830 
3831 			f2fs_set_data_blkaddr(dn, NULL_ADDR);
3832 		}
3833 
3834 		f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false);
3835 		dec_valid_block_count(sbi, dn->inode,
3836 					cluster_size - compr_blocks);
3837 
3838 		released_blocks += cluster_size - compr_blocks;
3839 next:
3840 		count -= cluster_size;
3841 	}
3842 
3843 	return released_blocks;
3844 }
3845 
f2fs_release_compress_blocks(struct file * filp,unsigned long arg)3846 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
3847 {
3848 	struct inode *inode = file_inode(filp);
3849 	struct f2fs_inode_info *fi = F2FS_I(inode);
3850 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3851 	struct f2fs_lock_context lc;
3852 	pgoff_t page_idx = 0, last_idx;
3853 	unsigned int released_blocks = 0;
3854 	int ret;
3855 	int writecount;
3856 
3857 	if (!f2fs_sb_has_compression(sbi))
3858 		return -EOPNOTSUPP;
3859 
3860 	if (f2fs_readonly(sbi->sb))
3861 		return -EROFS;
3862 
3863 	ret = mnt_want_write_file(filp);
3864 	if (ret)
3865 		return ret;
3866 
3867 	f2fs_balance_fs(sbi, true);
3868 
3869 	inode_lock(inode);
3870 
3871 	writecount = atomic_read(&inode->i_writecount);
3872 	if ((filp->f_mode & FMODE_WRITE && writecount != 1) ||
3873 			(!(filp->f_mode & FMODE_WRITE) && writecount)) {
3874 		ret = -EBUSY;
3875 		goto out;
3876 	}
3877 
3878 	if (!f2fs_compressed_file(inode) ||
3879 		is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
3880 		ret = -EINVAL;
3881 		goto out;
3882 	}
3883 
3884 	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
3885 	if (ret)
3886 		goto out;
3887 
3888 	if (!atomic_read(&fi->i_compr_blocks)) {
3889 		ret = -EPERM;
3890 		goto out;
3891 	}
3892 
3893 	set_inode_flag(inode, FI_COMPRESS_RELEASED);
3894 	inode_set_ctime_current(inode);
3895 	f2fs_mark_inode_dirty_sync(inode, true);
3896 
3897 	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3898 	filemap_invalidate_lock(inode->i_mapping);
3899 
3900 	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
3901 
3902 	while (page_idx < last_idx) {
3903 		struct dnode_of_data dn;
3904 		pgoff_t end_offset, count;
3905 
3906 		f2fs_lock_op(sbi, &lc);
3907 
3908 		set_new_dnode(&dn, inode, NULL, NULL, 0);
3909 		ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
3910 		if (ret) {
3911 			f2fs_unlock_op(sbi, &lc);
3912 			if (ret == -ENOENT) {
3913 				page_idx = f2fs_get_next_page_offset(&dn,
3914 								page_idx);
3915 				ret = 0;
3916 				continue;
3917 			}
3918 			break;
3919 		}
3920 
3921 		end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
3922 		count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
3923 		count = round_up(count, fi->i_cluster_size);
3924 
3925 		ret = release_compress_blocks(&dn, count);
3926 
3927 		f2fs_put_dnode(&dn);
3928 
3929 		f2fs_unlock_op(sbi, &lc);
3930 
3931 		if (ret < 0)
3932 			break;
3933 
3934 		page_idx += count;
3935 		released_blocks += ret;
3936 	}
3937 
3938 	filemap_invalidate_unlock(inode->i_mapping);
3939 	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3940 out:
3941 	if (released_blocks)
3942 		f2fs_update_time(sbi, REQ_TIME);
3943 	inode_unlock(inode);
3944 
3945 	mnt_drop_write_file(filp);
3946 
3947 	if (ret >= 0) {
3948 		ret = put_user(released_blocks, (u64 __user *)arg);
3949 	} else if (released_blocks &&
3950 			atomic_read(&fi->i_compr_blocks)) {
3951 		set_sbi_flag(sbi, SBI_NEED_FSCK);
3952 		f2fs_warn(sbi, "%s: partial blocks were released i_ino=%llx "
3953 			"iblocks=%llu, released=%u, compr_blocks=%u, "
3954 			"run fsck to fix.",
3955 			__func__, inode->i_ino, inode->i_blocks,
3956 			released_blocks,
3957 			atomic_read(&fi->i_compr_blocks));
3958 	}
3959 
3960 	return ret;
3961 }
3962 
reserve_compress_blocks(struct dnode_of_data * dn,pgoff_t count,unsigned int * reserved_blocks)3963 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count,
3964 		unsigned int *reserved_blocks)
3965 {
3966 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
3967 	int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
3968 	block_t blkaddr;
3969 	int i;
3970 
3971 	for (i = 0; i < count; i++) {
3972 		blkaddr = data_blkaddr(dn->inode, dn->node_folio,
3973 						dn->ofs_in_node + i);
3974 
3975 		if (!__is_valid_data_blkaddr(blkaddr))
3976 			continue;
3977 		if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
3978 					DATA_GENERIC_ENHANCE)))
3979 			return -EFSCORRUPTED;
3980 	}
3981 
3982 	while (count) {
3983 		int compr_blocks = 0;
3984 		blkcnt_t reserved = 0;
3985 		blkcnt_t to_reserved;
3986 		int ret;
3987 
3988 		for (i = 0; i < cluster_size; i++) {
3989 			blkaddr = data_blkaddr(dn->inode, dn->node_folio,
3990 						dn->ofs_in_node + i);
3991 
3992 			if (i == 0) {
3993 				if (blkaddr != COMPRESS_ADDR) {
3994 					dn->ofs_in_node += cluster_size;
3995 					goto next;
3996 				}
3997 				continue;
3998 			}
3999 
4000 			/*
4001 			 * compressed cluster was not released due to it
4002 			 * fails in release_compress_blocks(), so NEW_ADDR
4003 			 * is a possible case.
4004 			 */
4005 			if (blkaddr == NEW_ADDR) {
4006 				reserved++;
4007 				continue;
4008 			}
4009 			if (__is_valid_data_blkaddr(blkaddr)) {
4010 				compr_blocks++;
4011 				continue;
4012 			}
4013 		}
4014 
4015 		to_reserved = cluster_size - compr_blocks - reserved;
4016 
4017 		/* for the case all blocks in cluster were reserved */
4018 		if (reserved && to_reserved == 1) {
4019 			dn->ofs_in_node += cluster_size;
4020 			goto next;
4021 		}
4022 
4023 		ret = inc_valid_block_count(sbi, dn->inode,
4024 						&to_reserved, false);
4025 		if (unlikely(ret))
4026 			return ret;
4027 
4028 		for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
4029 			if (f2fs_data_blkaddr(dn) == NULL_ADDR)
4030 				f2fs_set_data_blkaddr(dn, NEW_ADDR);
4031 		}
4032 
4033 		f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
4034 
4035 		*reserved_blocks += to_reserved;
4036 next:
4037 		count -= cluster_size;
4038 	}
4039 
4040 	return 0;
4041 }
4042 
f2fs_reserve_compress_blocks(struct file * filp,unsigned long arg)4043 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
4044 {
4045 	struct inode *inode = file_inode(filp);
4046 	struct f2fs_inode_info *fi = F2FS_I(inode);
4047 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4048 	pgoff_t page_idx = 0, last_idx;
4049 	unsigned int reserved_blocks = 0;
4050 	int ret;
4051 
4052 	if (!f2fs_sb_has_compression(sbi))
4053 		return -EOPNOTSUPP;
4054 
4055 	if (f2fs_readonly(sbi->sb))
4056 		return -EROFS;
4057 
4058 	ret = mnt_want_write_file(filp);
4059 	if (ret)
4060 		return ret;
4061 
4062 	f2fs_balance_fs(sbi, true);
4063 
4064 	inode_lock(inode);
4065 
4066 	if (!f2fs_compressed_file(inode) ||
4067 		!is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4068 		ret = -EINVAL;
4069 		goto unlock_inode;
4070 	}
4071 
4072 	if (atomic_read(&fi->i_compr_blocks))
4073 		goto unlock_inode;
4074 
4075 	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
4076 	filemap_invalidate_lock(inode->i_mapping);
4077 
4078 	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4079 
4080 	while (page_idx < last_idx) {
4081 		struct dnode_of_data dn;
4082 		struct f2fs_lock_context lc;
4083 		pgoff_t end_offset, count;
4084 
4085 		f2fs_lock_op(sbi, &lc);
4086 
4087 		set_new_dnode(&dn, inode, NULL, NULL, 0);
4088 		ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
4089 		if (ret) {
4090 			f2fs_unlock_op(sbi, &lc);
4091 			if (ret == -ENOENT) {
4092 				page_idx = f2fs_get_next_page_offset(&dn,
4093 								page_idx);
4094 				ret = 0;
4095 				continue;
4096 			}
4097 			break;
4098 		}
4099 
4100 		end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
4101 		count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
4102 		count = round_up(count, fi->i_cluster_size);
4103 
4104 		ret = reserve_compress_blocks(&dn, count, &reserved_blocks);
4105 
4106 		f2fs_put_dnode(&dn);
4107 
4108 		f2fs_unlock_op(sbi, &lc);
4109 
4110 		if (ret < 0)
4111 			break;
4112 
4113 		page_idx += count;
4114 	}
4115 
4116 	filemap_invalidate_unlock(inode->i_mapping);
4117 	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
4118 
4119 	if (!ret) {
4120 		clear_inode_flag(inode, FI_COMPRESS_RELEASED);
4121 		inode_set_ctime_current(inode);
4122 		f2fs_mark_inode_dirty_sync(inode, true);
4123 	}
4124 unlock_inode:
4125 	if (reserved_blocks)
4126 		f2fs_update_time(sbi, REQ_TIME);
4127 	inode_unlock(inode);
4128 	mnt_drop_write_file(filp);
4129 
4130 	if (!ret) {
4131 		ret = put_user(reserved_blocks, (u64 __user *)arg);
4132 	} else if (reserved_blocks &&
4133 			atomic_read(&fi->i_compr_blocks)) {
4134 		set_sbi_flag(sbi, SBI_NEED_FSCK);
4135 		f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%llx "
4136 			"iblocks=%llu, reserved=%u, compr_blocks=%u, "
4137 			"run fsck to fix.",
4138 			__func__, inode->i_ino, inode->i_blocks,
4139 			reserved_blocks,
4140 			atomic_read(&fi->i_compr_blocks));
4141 	}
4142 
4143 	return ret;
4144 }
4145 
f2fs_secure_erase(struct block_device * bdev,struct inode * inode,pgoff_t off,block_t block,block_t len,u32 flags)4146 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
4147 		pgoff_t off, block_t block, block_t len, u32 flags)
4148 {
4149 	sector_t sector = SECTOR_FROM_BLOCK(block);
4150 	sector_t nr_sects = SECTOR_FROM_BLOCK(len);
4151 	int ret = 0;
4152 
4153 	if (flags & F2FS_TRIM_FILE_DISCARD) {
4154 		if (bdev_max_secure_erase_sectors(bdev))
4155 			ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
4156 					GFP_NOFS);
4157 		else
4158 			ret = blkdev_issue_discard(bdev, sector, nr_sects,
4159 					GFP_NOFS);
4160 	}
4161 
4162 	if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
4163 		if (IS_ENCRYPTED(inode))
4164 			ret = fscrypt_zeroout_range(inode,
4165 					(loff_t)off << inode->i_blkbits, sector,
4166 					(u64)len << inode->i_blkbits);
4167 		else
4168 			ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
4169 					GFP_NOFS, 0);
4170 	}
4171 
4172 	return ret;
4173 }
4174 
f2fs_sec_trim_file(struct file * filp,unsigned long arg)4175 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
4176 {
4177 	struct inode *inode = file_inode(filp);
4178 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4179 	struct address_space *mapping = inode->i_mapping;
4180 	struct block_device *prev_bdev = NULL;
4181 	struct f2fs_sectrim_range range;
4182 	pgoff_t index, pg_end, prev_index = 0;
4183 	block_t prev_block = 0, len = 0;
4184 	loff_t end_addr;
4185 	bool to_end = false;
4186 	int ret = 0;
4187 
4188 	if (!(filp->f_mode & FMODE_WRITE))
4189 		return -EBADF;
4190 
4191 	if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg,
4192 				sizeof(range)))
4193 		return -EFAULT;
4194 
4195 	if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) ||
4196 			!S_ISREG(inode->i_mode))
4197 		return -EINVAL;
4198 
4199 	if (((range.flags & F2FS_TRIM_FILE_DISCARD) &&
4200 			!f2fs_hw_support_discard(sbi)) ||
4201 			((range.flags & F2FS_TRIM_FILE_ZEROOUT) &&
4202 			 IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
4203 		return -EOPNOTSUPP;
4204 
4205 	ret = mnt_want_write_file(filp);
4206 	if (ret)
4207 		return ret;
4208 	inode_lock(inode);
4209 
4210 	if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) ||
4211 			range.start >= inode->i_size) {
4212 		ret = -EINVAL;
4213 		goto err;
4214 	}
4215 
4216 	if (range.len == 0)
4217 		goto err;
4218 
4219 	if (inode->i_size - range.start > range.len) {
4220 		end_addr = range.start + range.len;
4221 	} else {
4222 		end_addr = range.len == (u64)-1 ?
4223 			sbi->sb->s_maxbytes : inode->i_size;
4224 		to_end = true;
4225 	}
4226 
4227 	if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) ||
4228 			(!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) {
4229 		ret = -EINVAL;
4230 		goto err;
4231 	}
4232 
4233 	index = F2FS_BYTES_TO_BLK(range.start);
4234 	pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE);
4235 
4236 	ret = f2fs_convert_inline_inode(inode);
4237 	if (ret)
4238 		goto err;
4239 
4240 	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4241 	filemap_invalidate_lock(mapping);
4242 
4243 	ret = filemap_write_and_wait_range(mapping, range.start,
4244 			to_end ? LLONG_MAX : end_addr - 1);
4245 	if (ret)
4246 		goto out;
4247 
4248 	truncate_inode_pages_range(mapping, range.start,
4249 			to_end ? -1 : end_addr - 1);
4250 
4251 	while (index < pg_end) {
4252 		struct dnode_of_data dn;
4253 		pgoff_t end_offset, count;
4254 		int i;
4255 
4256 		set_new_dnode(&dn, inode, NULL, NULL, 0);
4257 		ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
4258 		if (ret) {
4259 			if (ret == -ENOENT) {
4260 				index = f2fs_get_next_page_offset(&dn, index);
4261 				continue;
4262 			}
4263 			goto out;
4264 		}
4265 
4266 		end_offset = ADDRS_PER_PAGE(dn.node_folio, inode);
4267 		count = min(end_offset - dn.ofs_in_node, pg_end - index);
4268 		for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
4269 			struct block_device *cur_bdev;
4270 			block_t blkaddr = f2fs_data_blkaddr(&dn);
4271 
4272 			if (!__is_valid_data_blkaddr(blkaddr))
4273 				continue;
4274 
4275 			if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
4276 						DATA_GENERIC_ENHANCE)) {
4277 				ret = -EFSCORRUPTED;
4278 				f2fs_put_dnode(&dn);
4279 				goto out;
4280 			}
4281 
4282 			cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
4283 			if (f2fs_is_multi_device(sbi)) {
4284 				int di = f2fs_target_device_index(sbi, blkaddr);
4285 
4286 				blkaddr -= FDEV(di).start_blk;
4287 			}
4288 
4289 			if (len) {
4290 				if (prev_bdev == cur_bdev &&
4291 						index == prev_index + len &&
4292 						blkaddr == prev_block + len) {
4293 					len++;
4294 				} else {
4295 					ret = f2fs_secure_erase(prev_bdev,
4296 						inode, prev_index, prev_block,
4297 						len, range.flags);
4298 					if (ret) {
4299 						f2fs_put_dnode(&dn);
4300 						goto out;
4301 					}
4302 
4303 					len = 0;
4304 				}
4305 			}
4306 
4307 			if (!len) {
4308 				prev_bdev = cur_bdev;
4309 				prev_index = index;
4310 				prev_block = blkaddr;
4311 				len = 1;
4312 			}
4313 		}
4314 
4315 		f2fs_put_dnode(&dn);
4316 
4317 		if (fatal_signal_pending(current)) {
4318 			ret = -EINTR;
4319 			goto out;
4320 		}
4321 		cond_resched();
4322 	}
4323 
4324 	if (len)
4325 		ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
4326 				prev_block, len, range.flags);
4327 	f2fs_update_time(sbi, REQ_TIME);
4328 out:
4329 	filemap_invalidate_unlock(mapping);
4330 	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4331 err:
4332 	inode_unlock(inode);
4333 	mnt_drop_write_file(filp);
4334 
4335 	return ret;
4336 }
4337 
f2fs_ioc_get_compress_option(struct file * filp,unsigned long arg)4338 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
4339 {
4340 	struct inode *inode = file_inode(filp);
4341 	struct f2fs_comp_option option;
4342 
4343 	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
4344 		return -EOPNOTSUPP;
4345 
4346 	inode_lock_shared(inode);
4347 
4348 	if (!f2fs_compressed_file(inode)) {
4349 		inode_unlock_shared(inode);
4350 		return -ENODATA;
4351 	}
4352 
4353 	option.algorithm = F2FS_I(inode)->i_compress_algorithm;
4354 	option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
4355 
4356 	inode_unlock_shared(inode);
4357 
4358 	if (copy_to_user((struct f2fs_comp_option __user *)arg, &option,
4359 				sizeof(option)))
4360 		return -EFAULT;
4361 
4362 	return 0;
4363 }
4364 
f2fs_ioc_set_compress_option(struct file * filp,unsigned long arg)4365 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
4366 {
4367 	struct inode *inode = file_inode(filp);
4368 	struct f2fs_inode_info *fi = F2FS_I(inode);
4369 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4370 	struct f2fs_comp_option option;
4371 	int ret = 0;
4372 
4373 	if (!f2fs_sb_has_compression(sbi))
4374 		return -EOPNOTSUPP;
4375 
4376 	if (!(filp->f_mode & FMODE_WRITE))
4377 		return -EBADF;
4378 
4379 	if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg,
4380 				sizeof(option)))
4381 		return -EFAULT;
4382 
4383 	if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
4384 		option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
4385 		option.algorithm >= COMPRESS_MAX)
4386 		return -EINVAL;
4387 
4388 	ret = mnt_want_write_file(filp);
4389 	if (ret)
4390 		return ret;
4391 	inode_lock(inode);
4392 
4393 	f2fs_down_write(&F2FS_I(inode)->i_sem);
4394 	if (!f2fs_compressed_file(inode)) {
4395 		ret = -EINVAL;
4396 		goto out;
4397 	}
4398 
4399 	if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) {
4400 		ret = -EBUSY;
4401 		goto out;
4402 	}
4403 
4404 	if (F2FS_HAS_BLOCKS(inode)) {
4405 		ret = -EFBIG;
4406 		goto out;
4407 	}
4408 
4409 	fi->i_compress_algorithm = option.algorithm;
4410 	fi->i_log_cluster_size = option.log_cluster_size;
4411 	fi->i_cluster_size = BIT(option.log_cluster_size);
4412 	/* Set default level */
4413 	if (fi->i_compress_algorithm == COMPRESS_ZSTD)
4414 		fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
4415 	else
4416 		fi->i_compress_level = 0;
4417 	/* Adjust mount option level */
4418 	if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm &&
4419 	    F2FS_OPTION(sbi).compress_level)
4420 		fi->i_compress_level = F2FS_OPTION(sbi).compress_level;
4421 	f2fs_mark_inode_dirty_sync(inode, true);
4422 
4423 	if (!f2fs_is_compress_backend_ready(inode))
4424 		f2fs_warn(sbi, "compression algorithm is successfully set, "
4425 			"but current kernel doesn't support this algorithm.");
4426 out:
4427 	f2fs_up_write(&fi->i_sem);
4428 	inode_unlock(inode);
4429 	mnt_drop_write_file(filp);
4430 
4431 	return ret;
4432 }
4433 
redirty_blocks(struct inode * inode,pgoff_t page_idx,int len)4434 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
4435 {
4436 	DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx);
4437 	struct address_space *mapping = inode->i_mapping;
4438 	struct folio *folio;
4439 	pgoff_t redirty_idx = page_idx;
4440 	int page_len = 0, ret = 0;
4441 
4442 	filemap_invalidate_lock_shared(mapping);
4443 	page_cache_ra_unbounded(&ractl, len, 0);
4444 	filemap_invalidate_unlock_shared(mapping);
4445 
4446 	do {
4447 		folio = read_cache_folio(mapping, page_idx, NULL, NULL);
4448 		if (IS_ERR(folio)) {
4449 			ret = PTR_ERR(folio);
4450 			break;
4451 		}
4452 		page_len += folio_nr_pages(folio) - (page_idx - folio->index);
4453 		page_idx = folio_next_index(folio);
4454 	} while (page_len < len);
4455 
4456 	do {
4457 		folio = filemap_lock_folio(mapping, redirty_idx);
4458 
4459 		/* It will never fail, when folio has pinned above */
4460 		f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio));
4461 
4462 		f2fs_folio_wait_writeback(folio, DATA, true, true);
4463 
4464 		folio_mark_dirty(folio);
4465 		folio_set_f2fs_gcing(folio);
4466 		redirty_idx = folio_next_index(folio);
4467 		folio_unlock(folio);
4468 		folio_put_refs(folio, 2);
4469 	} while (redirty_idx < page_idx);
4470 
4471 	return ret;
4472 }
4473 
f2fs_ioc_decompress_file(struct file * filp)4474 static int f2fs_ioc_decompress_file(struct file *filp)
4475 {
4476 	struct inode *inode = file_inode(filp);
4477 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4478 	struct f2fs_inode_info *fi = F2FS_I(inode);
4479 	pgoff_t page_idx = 0, last_idx, cluster_idx;
4480 	int ret;
4481 
4482 	if (!f2fs_sb_has_compression(sbi) ||
4483 			F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
4484 		return -EOPNOTSUPP;
4485 
4486 	if (!(filp->f_mode & FMODE_WRITE))
4487 		return -EBADF;
4488 
4489 	f2fs_balance_fs(sbi, true);
4490 
4491 	ret = mnt_want_write_file(filp);
4492 	if (ret)
4493 		return ret;
4494 	inode_lock(inode);
4495 
4496 	if (!f2fs_is_compress_backend_ready(inode)) {
4497 		ret = -EOPNOTSUPP;
4498 		goto out;
4499 	}
4500 
4501 	if (!f2fs_compressed_file(inode) ||
4502 		is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4503 		ret = -EINVAL;
4504 		goto out;
4505 	}
4506 
4507 	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
4508 	if (ret)
4509 		goto out;
4510 
4511 	if (!atomic_read(&fi->i_compr_blocks))
4512 		goto out;
4513 
4514 	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4515 	last_idx >>= fi->i_log_cluster_size;
4516 
4517 	for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
4518 		page_idx = cluster_idx << fi->i_log_cluster_size;
4519 
4520 		if (!f2fs_is_compressed_cluster(inode, page_idx))
4521 			continue;
4522 
4523 		ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
4524 		if (ret < 0)
4525 			break;
4526 
4527 		if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
4528 			ret = filemap_fdatawrite(inode->i_mapping);
4529 			if (ret < 0)
4530 				break;
4531 		}
4532 
4533 		cond_resched();
4534 		if (fatal_signal_pending(current)) {
4535 			ret = -EINTR;
4536 			break;
4537 		}
4538 	}
4539 
4540 	if (!ret)
4541 		ret = filemap_write_and_wait_range(inode->i_mapping, 0,
4542 							LLONG_MAX);
4543 
4544 	if (ret)
4545 		f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.",
4546 			  __func__, ret);
4547 	f2fs_update_time(sbi, REQ_TIME);
4548 out:
4549 	inode_unlock(inode);
4550 	mnt_drop_write_file(filp);
4551 
4552 	return ret;
4553 }
4554 
f2fs_ioc_compress_file(struct file * filp)4555 static int f2fs_ioc_compress_file(struct file *filp)
4556 {
4557 	struct inode *inode = file_inode(filp);
4558 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4559 	struct f2fs_inode_info *fi = F2FS_I(inode);
4560 	pgoff_t page_idx = 0, last_idx, cluster_idx;
4561 	int ret;
4562 
4563 	if (!f2fs_sb_has_compression(sbi) ||
4564 			F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
4565 		return -EOPNOTSUPP;
4566 
4567 	if (!(filp->f_mode & FMODE_WRITE))
4568 		return -EBADF;
4569 
4570 	f2fs_balance_fs(sbi, true);
4571 
4572 	ret = mnt_want_write_file(filp);
4573 	if (ret)
4574 		return ret;
4575 	inode_lock(inode);
4576 
4577 	if (!f2fs_is_compress_backend_ready(inode)) {
4578 		ret = -EOPNOTSUPP;
4579 		goto out;
4580 	}
4581 
4582 	if (!f2fs_compressed_file(inode) ||
4583 		is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4584 		ret = -EINVAL;
4585 		goto out;
4586 	}
4587 
4588 	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
4589 	if (ret)
4590 		goto out;
4591 
4592 	set_inode_flag(inode, FI_ENABLE_COMPRESS);
4593 
4594 	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4595 	last_idx >>= fi->i_log_cluster_size;
4596 
4597 	for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
4598 		page_idx = cluster_idx << fi->i_log_cluster_size;
4599 
4600 		if (f2fs_is_sparse_cluster(inode, page_idx))
4601 			continue;
4602 
4603 		ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
4604 		if (ret < 0)
4605 			break;
4606 
4607 		if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
4608 			ret = filemap_fdatawrite(inode->i_mapping);
4609 			if (ret < 0)
4610 				break;
4611 		}
4612 
4613 		cond_resched();
4614 		if (fatal_signal_pending(current)) {
4615 			ret = -EINTR;
4616 			break;
4617 		}
4618 	}
4619 
4620 	if (!ret)
4621 		ret = filemap_write_and_wait_range(inode->i_mapping, 0,
4622 							LLONG_MAX);
4623 
4624 	clear_inode_flag(inode, FI_ENABLE_COMPRESS);
4625 
4626 	if (ret)
4627 		f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.",
4628 			  __func__, ret);
4629 	f2fs_update_time(sbi, REQ_TIME);
4630 out:
4631 	inode_unlock(inode);
4632 	mnt_drop_write_file(filp);
4633 
4634 	return ret;
4635 }
4636 
__f2fs_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)4637 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4638 {
4639 	switch (cmd) {
4640 	case FS_IOC_GETVERSION:
4641 		return f2fs_ioc_getversion(filp, arg);
4642 	case F2FS_IOC_START_ATOMIC_WRITE:
4643 		return f2fs_ioc_start_atomic_write(filp, false);
4644 	case F2FS_IOC_START_ATOMIC_REPLACE:
4645 		return f2fs_ioc_start_atomic_write(filp, true);
4646 	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
4647 		return f2fs_ioc_commit_atomic_write(filp);
4648 	case F2FS_IOC_ABORT_ATOMIC_WRITE:
4649 		return f2fs_ioc_abort_atomic_write(filp);
4650 	case F2FS_IOC_START_VOLATILE_WRITE:
4651 	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
4652 		return -EOPNOTSUPP;
4653 	case F2FS_IOC_SHUTDOWN:
4654 		return f2fs_ioc_shutdown(filp, arg);
4655 	case FITRIM:
4656 		return f2fs_ioc_fitrim(filp, arg);
4657 	case FS_IOC_SET_ENCRYPTION_POLICY:
4658 		return f2fs_ioc_set_encryption_policy(filp, arg);
4659 	case FS_IOC_GET_ENCRYPTION_POLICY:
4660 		return f2fs_ioc_get_encryption_policy(filp, arg);
4661 	case FS_IOC_GET_ENCRYPTION_PWSALT:
4662 		return f2fs_ioc_get_encryption_pwsalt(filp, arg);
4663 	case FS_IOC_GET_ENCRYPTION_POLICY_EX:
4664 		return f2fs_ioc_get_encryption_policy_ex(filp, arg);
4665 	case FS_IOC_ADD_ENCRYPTION_KEY:
4666 		return f2fs_ioc_add_encryption_key(filp, arg);
4667 	case FS_IOC_REMOVE_ENCRYPTION_KEY:
4668 		return f2fs_ioc_remove_encryption_key(filp, arg);
4669 	case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
4670 		return f2fs_ioc_remove_encryption_key_all_users(filp, arg);
4671 	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
4672 		return f2fs_ioc_get_encryption_key_status(filp, arg);
4673 	case FS_IOC_GET_ENCRYPTION_NONCE:
4674 		return f2fs_ioc_get_encryption_nonce(filp, arg);
4675 	case F2FS_IOC_GARBAGE_COLLECT:
4676 		return f2fs_ioc_gc(filp, arg);
4677 	case F2FS_IOC_GARBAGE_COLLECT_RANGE:
4678 		return f2fs_ioc_gc_range(filp, arg);
4679 	case F2FS_IOC_WRITE_CHECKPOINT:
4680 		return f2fs_ioc_write_checkpoint(filp);
4681 	case F2FS_IOC_DEFRAGMENT:
4682 		return f2fs_ioc_defragment(filp, arg);
4683 	case F2FS_IOC_MOVE_RANGE:
4684 		return f2fs_ioc_move_range(filp, arg);
4685 	case F2FS_IOC_FLUSH_DEVICE:
4686 		return f2fs_ioc_flush_device(filp, arg);
4687 	case F2FS_IOC_GET_FEATURES:
4688 		return f2fs_ioc_get_features(filp, arg);
4689 	case F2FS_IOC_GET_PIN_FILE:
4690 		return f2fs_ioc_get_pin_file(filp, arg);
4691 	case F2FS_IOC_SET_PIN_FILE:
4692 		return f2fs_ioc_set_pin_file(filp, arg);
4693 	case F2FS_IOC_PRECACHE_EXTENTS:
4694 		return f2fs_ioc_precache_extents(filp);
4695 	case F2FS_IOC_RESIZE_FS:
4696 		return f2fs_ioc_resize_fs(filp, arg);
4697 	case FS_IOC_ENABLE_VERITY:
4698 		return f2fs_ioc_enable_verity(filp, arg);
4699 	case FS_IOC_MEASURE_VERITY:
4700 		return f2fs_ioc_measure_verity(filp, arg);
4701 	case FS_IOC_READ_VERITY_METADATA:
4702 		return f2fs_ioc_read_verity_metadata(filp, arg);
4703 	case FS_IOC_GETFSLABEL:
4704 		return f2fs_ioc_getfslabel(filp, arg);
4705 	case FS_IOC_SETFSLABEL:
4706 		return f2fs_ioc_setfslabel(filp, arg);
4707 	case F2FS_IOC_GET_COMPRESS_BLOCKS:
4708 		return f2fs_ioc_get_compress_blocks(filp, arg);
4709 	case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
4710 		return f2fs_release_compress_blocks(filp, arg);
4711 	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
4712 		return f2fs_reserve_compress_blocks(filp, arg);
4713 	case F2FS_IOC_SEC_TRIM_FILE:
4714 		return f2fs_sec_trim_file(filp, arg);
4715 	case F2FS_IOC_GET_COMPRESS_OPTION:
4716 		return f2fs_ioc_get_compress_option(filp, arg);
4717 	case F2FS_IOC_SET_COMPRESS_OPTION:
4718 		return f2fs_ioc_set_compress_option(filp, arg);
4719 	case F2FS_IOC_DECOMPRESS_FILE:
4720 		return f2fs_ioc_decompress_file(filp);
4721 	case F2FS_IOC_COMPRESS_FILE:
4722 		return f2fs_ioc_compress_file(filp);
4723 	case F2FS_IOC_GET_DEV_ALIAS_FILE:
4724 		return f2fs_ioc_get_dev_alias_file(filp, arg);
4725 	case F2FS_IOC_IO_PRIO:
4726 		return f2fs_ioc_io_prio(filp, arg);
4727 	default:
4728 		return -ENOTTY;
4729 	}
4730 }
4731 
f2fs_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)4732 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4733 {
4734 	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
4735 		return -EIO;
4736 	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
4737 		return -ENOSPC;
4738 
4739 	return __f2fs_ioctl(filp, cmd, arg);
4740 }
4741 
4742 /*
4743  * Return %true if the given read or write request should use direct I/O, or
4744  * %false if it should use buffered I/O.
4745  */
f2fs_should_use_dio(struct inode * inode,struct kiocb * iocb,struct iov_iter * iter)4746 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
4747 				struct iov_iter *iter)
4748 {
4749 	unsigned int align;
4750 
4751 	if (!(iocb->ki_flags & IOCB_DIRECT))
4752 		return false;
4753 
4754 	if (f2fs_force_buffered_io(inode, iov_iter_rw(iter)))
4755 		return false;
4756 
4757 	/*
4758 	 * Direct I/O not aligned to the disk's logical_block_size will be
4759 	 * attempted, but will fail with -EINVAL.
4760 	 *
4761 	 * f2fs additionally requires that direct I/O be aligned to the
4762 	 * filesystem block size, which is often a stricter requirement.
4763 	 * However, f2fs traditionally falls back to buffered I/O on requests
4764 	 * that are logical_block_size-aligned but not fs-block aligned.
4765 	 *
4766 	 * The below logic implements this behavior.
4767 	 */
4768 	align = iocb->ki_pos | iov_iter_alignment(iter);
4769 	if (!IS_ALIGNED(align, i_blocksize(inode)) &&
4770 	    IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
4771 		return false;
4772 
4773 	return true;
4774 }
4775 
f2fs_dio_read_end_io(struct kiocb * iocb,ssize_t size,int error,unsigned int flags)4776 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
4777 				unsigned int flags)
4778 {
4779 	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
4780 
4781 	dec_page_count(sbi, F2FS_DIO_READ);
4782 	if (error)
4783 		return error;
4784 	f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size);
4785 	return 0;
4786 }
4787 
4788 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
4789 	.end_io = f2fs_dio_read_end_io,
4790 };
4791 
f2fs_dio_read_iter(struct kiocb * iocb,struct iov_iter * to)4792 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
4793 {
4794 	struct file *file = iocb->ki_filp;
4795 	struct inode *inode = file_inode(file);
4796 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4797 	struct f2fs_inode_info *fi = F2FS_I(inode);
4798 	const loff_t pos = iocb->ki_pos;
4799 	const size_t count = iov_iter_count(to);
4800 	struct iomap_dio *dio;
4801 	ssize_t ret;
4802 
4803 	if (count == 0)
4804 		return 0; /* skip atime update */
4805 
4806 	trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
4807 
4808 	if (iocb->ki_flags & IOCB_NOWAIT) {
4809 		if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
4810 			ret = -EAGAIN;
4811 			goto out;
4812 		}
4813 	} else {
4814 		f2fs_down_read(&fi->i_gc_rwsem[READ]);
4815 	}
4816 
4817 	/* dio is not compatible w/ atomic file */
4818 	if (f2fs_is_atomic_file(inode)) {
4819 		f2fs_up_read(&fi->i_gc_rwsem[READ]);
4820 		ret = -EOPNOTSUPP;
4821 		goto out;
4822 	}
4823 
4824 	/*
4825 	 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
4826 	 * the higher-level function iomap_dio_rw() in order to ensure that the
4827 	 * F2FS_DIO_READ counter will be decremented correctly in all cases.
4828 	 */
4829 	inc_page_count(sbi, F2FS_DIO_READ);
4830 	dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
4831 			     &f2fs_iomap_dio_read_ops, 0, NULL, 0);
4832 	if (IS_ERR_OR_NULL(dio)) {
4833 		ret = PTR_ERR_OR_ZERO(dio);
4834 		if (ret != -EIOCBQUEUED)
4835 			dec_page_count(sbi, F2FS_DIO_READ);
4836 	} else {
4837 		ret = iomap_dio_complete(dio);
4838 	}
4839 
4840 	f2fs_up_read(&fi->i_gc_rwsem[READ]);
4841 
4842 	file_accessed(file);
4843 out:
4844 	trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
4845 	return ret;
4846 }
4847 
f2fs_trace_rw_file_path(struct file * file,loff_t pos,size_t count,int rw)4848 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count,
4849 				    int rw)
4850 {
4851 	struct inode *inode = file_inode(file);
4852 	char *buf, *path;
4853 
4854 	buf = f2fs_getname(F2FS_I_SB(inode));
4855 	if (!buf)
4856 		return;
4857 	path = dentry_path_raw(file_dentry(file), buf, PATH_MAX);
4858 	if (IS_ERR(path))
4859 		goto free_buf;
4860 	if (rw == WRITE)
4861 		trace_f2fs_datawrite_start(inode, pos, count,
4862 				current->pid, path, current->comm);
4863 	else
4864 		trace_f2fs_dataread_start(inode, pos, count,
4865 				current->pid, path, current->comm);
4866 free_buf:
4867 	f2fs_putname(buf);
4868 }
4869 
f2fs_file_read_iter(struct kiocb * iocb,struct iov_iter * to)4870 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
4871 {
4872 	struct inode *inode = file_inode(iocb->ki_filp);
4873 	const loff_t pos = iocb->ki_pos;
4874 	ssize_t ret;
4875 	bool dio;
4876 
4877 	if (!f2fs_is_compress_backend_ready(inode))
4878 		return -EOPNOTSUPP;
4879 
4880 	if (trace_f2fs_dataread_start_enabled())
4881 		f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
4882 					iov_iter_count(to), READ);
4883 
4884 	dio = f2fs_should_use_dio(inode, iocb, to);
4885 
4886 	/* In LFS mode, if there is inflight dio, wait for its completion */
4887 	if (f2fs_lfs_mode(F2FS_I_SB(inode)) &&
4888 	    get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE) &&
4889 		(!f2fs_is_pinned_file(inode) || !dio))
4890 		inode_dio_wait(inode);
4891 
4892 	if (dio) {
4893 		ret = f2fs_dio_read_iter(iocb, to);
4894 	} else {
4895 		ret = filemap_read(iocb, to, 0);
4896 		if (ret > 0)
4897 			f2fs_update_iostat(F2FS_I_SB(inode), inode,
4898 						APP_BUFFERED_READ_IO, ret);
4899 	}
4900 	trace_f2fs_dataread_end(inode, pos, ret);
4901 	return ret;
4902 }
4903 
f2fs_file_splice_read(struct file * in,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)4904 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos,
4905 				     struct pipe_inode_info *pipe,
4906 				     size_t len, unsigned int flags)
4907 {
4908 	struct inode *inode = file_inode(in);
4909 	const loff_t pos = *ppos;
4910 	ssize_t ret;
4911 
4912 	if (!f2fs_is_compress_backend_ready(inode))
4913 		return -EOPNOTSUPP;
4914 
4915 	if (trace_f2fs_dataread_start_enabled())
4916 		f2fs_trace_rw_file_path(in, pos, len, READ);
4917 
4918 	ret = filemap_splice_read(in, ppos, pipe, len, flags);
4919 	if (ret > 0)
4920 		f2fs_update_iostat(F2FS_I_SB(inode), inode,
4921 				   APP_BUFFERED_READ_IO, ret);
4922 
4923 	trace_f2fs_dataread_end(inode, pos, ret);
4924 	return ret;
4925 }
4926 
f2fs_write_checks(struct kiocb * iocb,struct iov_iter * from)4927 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
4928 {
4929 	struct file *file = iocb->ki_filp;
4930 	struct inode *inode = file_inode(file);
4931 	ssize_t count;
4932 	int err;
4933 
4934 	if (IS_IMMUTABLE(inode))
4935 		return -EPERM;
4936 
4937 	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
4938 		return -EPERM;
4939 
4940 	count = generic_write_checks(iocb, from);
4941 	if (count <= 0)
4942 		return count;
4943 
4944 	err = file_modified(file);
4945 	if (err)
4946 		return err;
4947 
4948 	f2fs_zero_post_eof_page(inode,
4949 		iocb->ki_pos + iov_iter_count(from), true);
4950 	return count;
4951 }
4952 
4953 /*
4954  * Preallocate blocks for a write request, if it is possible and helpful to do
4955  * so.  Returns a positive number if blocks may have been preallocated, 0 if no
4956  * blocks were preallocated, or a negative errno value if something went
4957  * seriously wrong.  Also sets FI_PREALLOCATED_ALL on the inode if *all* the
4958  * requested blocks (not just some of them) have been allocated.
4959  */
f2fs_preallocate_blocks(struct kiocb * iocb,struct iov_iter * iter,bool dio)4960 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
4961 				   bool dio)
4962 {
4963 	struct inode *inode = file_inode(iocb->ki_filp);
4964 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4965 	const loff_t pos = iocb->ki_pos;
4966 	const size_t count = iov_iter_count(iter);
4967 	struct f2fs_map_blocks map = {};
4968 	int flag;
4969 	int ret;
4970 
4971 	/* If it will be an out-of-place direct write, don't bother. */
4972 	if (dio && f2fs_lfs_mode(sbi))
4973 		return 0;
4974 	/*
4975 	 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
4976 	 * buffered IO, if DIO meets any holes.
4977 	 */
4978 	if (dio && i_size_read(inode) &&
4979 		(F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
4980 		return 0;
4981 
4982 	/* No-wait I/O can't allocate blocks. */
4983 	if (iocb->ki_flags & IOCB_NOWAIT)
4984 		return 0;
4985 
4986 	/* If it will be a short write, don't bother. */
4987 	if (fault_in_iov_iter_readable(iter, count))
4988 		return 0;
4989 
4990 	if (f2fs_has_inline_data(inode)) {
4991 		/* If the data will fit inline, don't bother. */
4992 		if (pos + count <= MAX_INLINE_DATA(inode))
4993 			return 0;
4994 		ret = f2fs_convert_inline_inode(inode);
4995 		if (ret)
4996 			return ret;
4997 	}
4998 
4999 	/* Do not preallocate blocks that will be written partially in 4KB. */
5000 	map.m_lblk = F2FS_BLK_ALIGN(pos);
5001 	map.m_len = F2FS_BYTES_TO_BLK(pos + count);
5002 	if (map.m_len > map.m_lblk)
5003 		map.m_len -= map.m_lblk;
5004 	else
5005 		return 0;
5006 
5007 	if (!IS_DEVICE_ALIASING(inode))
5008 		map.m_may_create = true;
5009 	if (dio) {
5010 		map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi,
5011 						inode->i_write_hint);
5012 		flag = F2FS_GET_BLOCK_PRE_DIO;
5013 	} else {
5014 		map.m_seg_type = NO_CHECK_TYPE;
5015 		flag = F2FS_GET_BLOCK_PRE_AIO;
5016 	}
5017 
5018 	ret = f2fs_map_blocks(inode, &map, flag);
5019 	/* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
5020 	if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
5021 		return ret;
5022 	if (ret == 0)
5023 		set_inode_flag(inode, FI_PREALLOCATED_ALL);
5024 	return map.m_len;
5025 }
5026 
f2fs_buffered_write_iter(struct kiocb * iocb,struct iov_iter * from)5027 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
5028 					struct iov_iter *from)
5029 {
5030 	struct file *file = iocb->ki_filp;
5031 	struct inode *inode = file_inode(file);
5032 	ssize_t ret;
5033 
5034 	if (iocb->ki_flags & IOCB_NOWAIT)
5035 		return -EOPNOTSUPP;
5036 
5037 	ret = generic_perform_write(iocb, from);
5038 
5039 	if (ret > 0) {
5040 		f2fs_update_iostat(F2FS_I_SB(inode), inode,
5041 						APP_BUFFERED_IO, ret);
5042 	}
5043 	return ret;
5044 }
5045 
f2fs_dio_write_end_io(struct kiocb * iocb,ssize_t size,int error,unsigned int flags)5046 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
5047 				 unsigned int flags)
5048 {
5049 	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
5050 
5051 	dec_page_count(sbi, F2FS_DIO_WRITE);
5052 	if (error)
5053 		return error;
5054 	f2fs_update_time(sbi, REQ_TIME);
5055 	f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size);
5056 	return 0;
5057 }
5058 
f2fs_dio_write_submit_io(const struct iomap_iter * iter,struct bio * bio,loff_t file_offset)5059 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
5060 					struct bio *bio, loff_t file_offset)
5061 {
5062 	struct inode *inode = iter->inode;
5063 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
5064 	enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint);
5065 	enum temp_type temp = f2fs_get_segment_temp(sbi, type);
5066 
5067 	bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp);
5068 	blk_crypto_submit_bio(bio);
5069 }
5070 
5071 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
5072 	.end_io		= f2fs_dio_write_end_io,
5073 	.submit_io	= f2fs_dio_write_submit_io,
5074 };
5075 
f2fs_flush_buffered_write(struct address_space * mapping,loff_t start_pos,loff_t end_pos)5076 static void f2fs_flush_buffered_write(struct address_space *mapping,
5077 				      loff_t start_pos, loff_t end_pos)
5078 {
5079 	int ret;
5080 
5081 	ret = filemap_write_and_wait_range(mapping, start_pos, end_pos);
5082 	if (ret < 0)
5083 		return;
5084 	invalidate_mapping_pages(mapping,
5085 				 start_pos >> PAGE_SHIFT,
5086 				 end_pos >> PAGE_SHIFT);
5087 }
5088 
f2fs_dio_write_iter(struct kiocb * iocb,struct iov_iter * from,bool * may_need_sync)5089 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
5090 				   bool *may_need_sync)
5091 {
5092 	struct file *file = iocb->ki_filp;
5093 	struct inode *inode = file_inode(file);
5094 	struct f2fs_inode_info *fi = F2FS_I(inode);
5095 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
5096 	const bool do_opu = f2fs_lfs_mode(sbi);
5097 	const loff_t pos = iocb->ki_pos;
5098 	const ssize_t count = iov_iter_count(from);
5099 	unsigned int dio_flags;
5100 	struct iomap_dio *dio;
5101 	ssize_t ret;
5102 
5103 	trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
5104 
5105 	if (iocb->ki_flags & IOCB_NOWAIT) {
5106 		/* f2fs_convert_inline_inode() and block allocation can block */
5107 		if (f2fs_has_inline_data(inode) ||
5108 		    !f2fs_overwrite_io(inode, pos, count)) {
5109 			ret = -EAGAIN;
5110 			goto out;
5111 		}
5112 
5113 		if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
5114 			ret = -EAGAIN;
5115 			goto out;
5116 		}
5117 		if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
5118 			f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
5119 			ret = -EAGAIN;
5120 			goto out;
5121 		}
5122 	} else {
5123 		ret = f2fs_convert_inline_inode(inode);
5124 		if (ret)
5125 			goto out;
5126 
5127 		f2fs_down_read(&fi->i_gc_rwsem[WRITE]);
5128 		if (do_opu)
5129 			f2fs_down_read(&fi->i_gc_rwsem[READ]);
5130 	}
5131 
5132 	/*
5133 	 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
5134 	 * the higher-level function iomap_dio_rw() in order to ensure that the
5135 	 * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
5136 	 */
5137 	inc_page_count(sbi, F2FS_DIO_WRITE);
5138 	dio_flags = 0;
5139 	if (pos + count > inode->i_size)
5140 		dio_flags |= IOMAP_DIO_FORCE_WAIT;
5141 	dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
5142 			     &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
5143 	if (IS_ERR_OR_NULL(dio)) {
5144 		ret = PTR_ERR_OR_ZERO(dio);
5145 		if (ret == -ENOTBLK)
5146 			ret = 0;
5147 		if (ret != -EIOCBQUEUED)
5148 			dec_page_count(sbi, F2FS_DIO_WRITE);
5149 	} else {
5150 		ret = iomap_dio_complete(dio);
5151 	}
5152 
5153 	if (do_opu)
5154 		f2fs_up_read(&fi->i_gc_rwsem[READ]);
5155 	f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
5156 
5157 	if (ret < 0)
5158 		goto out;
5159 	if (pos + ret > inode->i_size)
5160 		f2fs_i_size_write(inode, pos + ret);
5161 	if (!do_opu)
5162 		set_inode_flag(inode, FI_UPDATE_WRITE);
5163 
5164 	if (iov_iter_count(from)) {
5165 		ssize_t ret2;
5166 		loff_t bufio_start_pos = iocb->ki_pos;
5167 
5168 		/*
5169 		 * The direct write was partial, so we need to fall back to a
5170 		 * buffered write for the remainder.
5171 		 */
5172 
5173 		ret2 = f2fs_buffered_write_iter(iocb, from);
5174 		if (iov_iter_count(from))
5175 			f2fs_write_failed(inode, iocb->ki_pos);
5176 		if (ret2 < 0)
5177 			goto out;
5178 
5179 		/*
5180 		 * Ensure that the pagecache pages are written to disk and
5181 		 * invalidated to preserve the expected O_DIRECT semantics.
5182 		 */
5183 		if (ret2 > 0) {
5184 			loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
5185 
5186 			ret += ret2;
5187 
5188 			f2fs_flush_buffered_write(file->f_mapping,
5189 						  bufio_start_pos,
5190 						  bufio_end_pos);
5191 		}
5192 	} else {
5193 		/* iomap_dio_rw() already handled the generic_write_sync(). */
5194 		*may_need_sync = false;
5195 	}
5196 out:
5197 	trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
5198 	return ret;
5199 }
5200 
f2fs_file_write_iter(struct kiocb * iocb,struct iov_iter * from)5201 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
5202 {
5203 	struct inode *inode = file_inode(iocb->ki_filp);
5204 	const loff_t orig_pos = iocb->ki_pos;
5205 	const size_t orig_count = iov_iter_count(from);
5206 	loff_t target_size;
5207 	bool dio;
5208 	bool may_need_sync = true;
5209 	int preallocated;
5210 	const loff_t pos = iocb->ki_pos;
5211 	const ssize_t count = iov_iter_count(from);
5212 	ssize_t ret;
5213 
5214 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
5215 		ret = -EIO;
5216 		goto out;
5217 	}
5218 
5219 	if (!f2fs_is_compress_backend_ready(inode)) {
5220 		ret = -EOPNOTSUPP;
5221 		goto out;
5222 	}
5223 
5224 	if (iocb->ki_flags & IOCB_NOWAIT) {
5225 		if (!inode_trylock(inode)) {
5226 			ret = -EAGAIN;
5227 			goto out;
5228 		}
5229 	} else {
5230 		inode_lock(inode);
5231 	}
5232 
5233 	if (f2fs_is_pinned_file(inode) &&
5234 	    !f2fs_overwrite_io(inode, pos, count)) {
5235 		ret = -EIO;
5236 		goto out_unlock;
5237 	}
5238 
5239 	ret = f2fs_write_checks(iocb, from);
5240 	if (ret <= 0)
5241 		goto out_unlock;
5242 
5243 	/* Determine whether we will do a direct write or a buffered write. */
5244 	dio = f2fs_should_use_dio(inode, iocb, from);
5245 
5246 	/* dio is not compatible w/ atomic write */
5247 	if (dio && f2fs_is_atomic_file(inode)) {
5248 		ret = -EOPNOTSUPP;
5249 		goto out_unlock;
5250 	}
5251 
5252 	/* Possibly preallocate the blocks for the write. */
5253 	target_size = iocb->ki_pos + iov_iter_count(from);
5254 	preallocated = f2fs_preallocate_blocks(iocb, from, dio);
5255 	if (preallocated < 0) {
5256 		ret = preallocated;
5257 	} else {
5258 		if (trace_f2fs_datawrite_start_enabled())
5259 			f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
5260 						orig_count, WRITE);
5261 
5262 		/* Do the actual write. */
5263 		ret = dio ?
5264 			f2fs_dio_write_iter(iocb, from, &may_need_sync) :
5265 			f2fs_buffered_write_iter(iocb, from);
5266 
5267 		trace_f2fs_datawrite_end(inode, orig_pos, ret);
5268 	}
5269 
5270 	/* Don't leave any preallocated blocks around past i_size. */
5271 	if (preallocated && i_size_read(inode) < target_size) {
5272 		f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
5273 		filemap_invalidate_lock(inode->i_mapping);
5274 		if (!f2fs_truncate(inode))
5275 			file_dont_truncate(inode);
5276 		filemap_invalidate_unlock(inode->i_mapping);
5277 		f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
5278 	} else {
5279 		file_dont_truncate(inode);
5280 	}
5281 
5282 	clear_inode_flag(inode, FI_PREALLOCATED_ALL);
5283 out_unlock:
5284 	inode_unlock(inode);
5285 out:
5286 	trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
5287 
5288 	if (ret > 0 && may_need_sync)
5289 		ret = generic_write_sync(iocb, ret);
5290 
5291 	/* If buffered IO was forced, flush and drop the data from
5292 	 * the page cache to preserve O_DIRECT semantics
5293 	 */
5294 	if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT))
5295 		f2fs_flush_buffered_write(iocb->ki_filp->f_mapping,
5296 					  orig_pos,
5297 					  orig_pos + ret - 1);
5298 
5299 	return ret;
5300 }
5301 
f2fs_file_fadvise(struct file * filp,loff_t offset,loff_t len,int advice)5302 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
5303 		int advice)
5304 {
5305 	struct address_space *mapping;
5306 	struct backing_dev_info *bdi;
5307 	struct inode *inode = file_inode(filp);
5308 	int err;
5309 
5310 	trace_f2fs_fadvise(inode, offset, len, advice);
5311 
5312 	if (advice == POSIX_FADV_SEQUENTIAL) {
5313 		if (S_ISFIFO(inode->i_mode))
5314 			return -ESPIPE;
5315 
5316 		mapping = filp->f_mapping;
5317 		if (!mapping || len < 0)
5318 			return -EINVAL;
5319 
5320 		bdi = inode_to_bdi(mapping->host);
5321 		filp->f_ra.ra_pages = bdi->ra_pages *
5322 			F2FS_I_SB(inode)->seq_file_ra_mul;
5323 		spin_lock(&filp->f_lock);
5324 		filp->f_mode &= ~FMODE_RANDOM;
5325 		spin_unlock(&filp->f_lock);
5326 		return 0;
5327 	} else if (advice == POSIX_FADV_WILLNEED && offset == 0) {
5328 		/* Load extent cache at the first readahead. */
5329 		f2fs_precache_extents(inode);
5330 	}
5331 
5332 	err = generic_fadvise(filp, offset, len, advice);
5333 	if (err)
5334 		return err;
5335 
5336 	if (advice == POSIX_FADV_DONTNEED &&
5337 	    (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
5338 	     f2fs_compressed_file(inode)))
5339 		f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
5340 	else if (advice == POSIX_FADV_NOREUSE)
5341 		err = f2fs_keep_noreuse_range(inode, offset, len);
5342 	return err;
5343 }
5344 
5345 #ifdef CONFIG_COMPAT
5346 struct compat_f2fs_gc_range {
5347 	u32 sync;
5348 	compat_u64 start;
5349 	compat_u64 len;
5350 };
5351 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE	_IOW(F2FS_IOCTL_MAGIC, 11,\
5352 						struct compat_f2fs_gc_range)
5353 
f2fs_compat_ioc_gc_range(struct file * file,unsigned long arg)5354 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg)
5355 {
5356 	struct compat_f2fs_gc_range __user *urange;
5357 	struct f2fs_gc_range range;
5358 	int err;
5359 
5360 	urange = compat_ptr(arg);
5361 	err = get_user(range.sync, &urange->sync);
5362 	err |= get_user(range.start, &urange->start);
5363 	err |= get_user(range.len, &urange->len);
5364 	if (err)
5365 		return -EFAULT;
5366 
5367 	return __f2fs_ioc_gc_range(file, &range);
5368 }
5369 
5370 struct compat_f2fs_move_range {
5371 	u32 dst_fd;
5372 	compat_u64 pos_in;
5373 	compat_u64 pos_out;
5374 	compat_u64 len;
5375 };
5376 #define F2FS_IOC32_MOVE_RANGE		_IOWR(F2FS_IOCTL_MAGIC, 9,	\
5377 					struct compat_f2fs_move_range)
5378 
f2fs_compat_ioc_move_range(struct file * file,unsigned long arg)5379 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg)
5380 {
5381 	struct compat_f2fs_move_range __user *urange;
5382 	struct f2fs_move_range range;
5383 	int err;
5384 
5385 	urange = compat_ptr(arg);
5386 	err = get_user(range.dst_fd, &urange->dst_fd);
5387 	err |= get_user(range.pos_in, &urange->pos_in);
5388 	err |= get_user(range.pos_out, &urange->pos_out);
5389 	err |= get_user(range.len, &urange->len);
5390 	if (err)
5391 		return -EFAULT;
5392 
5393 	return __f2fs_ioc_move_range(file, &range);
5394 }
5395 
f2fs_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)5396 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
5397 {
5398 	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
5399 		return -EIO;
5400 	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file))))
5401 		return -ENOSPC;
5402 
5403 	switch (cmd) {
5404 	case FS_IOC32_GETVERSION:
5405 		cmd = FS_IOC_GETVERSION;
5406 		break;
5407 	case F2FS_IOC32_GARBAGE_COLLECT_RANGE:
5408 		return f2fs_compat_ioc_gc_range(file, arg);
5409 	case F2FS_IOC32_MOVE_RANGE:
5410 		return f2fs_compat_ioc_move_range(file, arg);
5411 	case F2FS_IOC_START_ATOMIC_WRITE:
5412 	case F2FS_IOC_START_ATOMIC_REPLACE:
5413 	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
5414 	case F2FS_IOC_START_VOLATILE_WRITE:
5415 	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
5416 	case F2FS_IOC_ABORT_ATOMIC_WRITE:
5417 	case F2FS_IOC_SHUTDOWN:
5418 	case FITRIM:
5419 	case FS_IOC_SET_ENCRYPTION_POLICY:
5420 	case FS_IOC_GET_ENCRYPTION_PWSALT:
5421 	case FS_IOC_GET_ENCRYPTION_POLICY:
5422 	case FS_IOC_GET_ENCRYPTION_POLICY_EX:
5423 	case FS_IOC_ADD_ENCRYPTION_KEY:
5424 	case FS_IOC_REMOVE_ENCRYPTION_KEY:
5425 	case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
5426 	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
5427 	case FS_IOC_GET_ENCRYPTION_NONCE:
5428 	case F2FS_IOC_GARBAGE_COLLECT:
5429 	case F2FS_IOC_WRITE_CHECKPOINT:
5430 	case F2FS_IOC_DEFRAGMENT:
5431 	case F2FS_IOC_FLUSH_DEVICE:
5432 	case F2FS_IOC_GET_FEATURES:
5433 	case F2FS_IOC_GET_PIN_FILE:
5434 	case F2FS_IOC_SET_PIN_FILE:
5435 	case F2FS_IOC_PRECACHE_EXTENTS:
5436 	case F2FS_IOC_RESIZE_FS:
5437 	case FS_IOC_ENABLE_VERITY:
5438 	case FS_IOC_MEASURE_VERITY:
5439 	case FS_IOC_READ_VERITY_METADATA:
5440 	case FS_IOC_GETFSLABEL:
5441 	case FS_IOC_SETFSLABEL:
5442 	case F2FS_IOC_GET_COMPRESS_BLOCKS:
5443 	case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
5444 	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
5445 	case F2FS_IOC_SEC_TRIM_FILE:
5446 	case F2FS_IOC_GET_COMPRESS_OPTION:
5447 	case F2FS_IOC_SET_COMPRESS_OPTION:
5448 	case F2FS_IOC_DECOMPRESS_FILE:
5449 	case F2FS_IOC_COMPRESS_FILE:
5450 	case F2FS_IOC_GET_DEV_ALIAS_FILE:
5451 	case F2FS_IOC_IO_PRIO:
5452 		break;
5453 	default:
5454 		return -ENOIOCTLCMD;
5455 	}
5456 	return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
5457 }
5458 #endif
5459 
5460 const struct file_operations f2fs_file_operations = {
5461 	.llseek		= f2fs_llseek,
5462 	.read_iter	= f2fs_file_read_iter,
5463 	.write_iter	= f2fs_file_write_iter,
5464 	.iopoll		= iocb_bio_iopoll,
5465 	.open		= f2fs_file_open,
5466 	.release	= f2fs_release_file,
5467 	.mmap_prepare	= f2fs_file_mmap_prepare,
5468 	.flush		= f2fs_file_flush,
5469 	.fsync		= f2fs_sync_file,
5470 	.fallocate	= f2fs_fallocate,
5471 	.unlocked_ioctl	= f2fs_ioctl,
5472 #ifdef CONFIG_COMPAT
5473 	.compat_ioctl	= f2fs_compat_ioctl,
5474 #endif
5475 	.splice_read	= f2fs_file_splice_read,
5476 	.splice_write	= iter_file_splice_write,
5477 	.fadvise	= f2fs_file_fadvise,
5478 	.fop_flags	= FOP_BUFFER_RASYNC,
5479 	.setlease	= generic_setlease,
5480 };
5481