1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * linux/fs/ext2/file.c
4 *
5 * Copyright (C) 1992, 1993, 1994, 1995
6 * Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI)
9 *
10 * from
11 *
12 * linux/fs/minix/file.c
13 *
14 * Copyright (C) 1991, 1992 Linus Torvalds
15 *
16 * ext2 fs regular file handling primitives
17 *
18 * 64-bit file support on 64-bit platforms by Jakub Jelinek
19 * (jj@sunsite.ms.mff.cuni.cz)
20 */
21
22 #include <linux/time.h>
23 #include <linux/pagemap.h>
24 #include <linux/dax.h>
25 #include <linux/filelock.h>
26 #include <linux/quotaops.h>
27 #include <linux/iomap.h>
28 #include <linux/uio.h>
29 #include <linux/buffer_head.h>
30 #include "ext2.h"
31 #include "xattr.h"
32 #include "acl.h"
33 #include "trace.h"
34
35 #ifdef CONFIG_FS_DAX
ext2_dax_read_iter(struct kiocb * iocb,struct iov_iter * to)36 static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
37 {
38 struct inode *inode = iocb->ki_filp->f_mapping->host;
39 ssize_t ret;
40
41 if (!iov_iter_count(to))
42 return 0; /* skip atime */
43
44 inode_lock_shared(inode);
45 ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops);
46 inode_unlock_shared(inode);
47
48 file_accessed(iocb->ki_filp);
49 return ret;
50 }
51
ext2_dax_write_iter(struct kiocb * iocb,struct iov_iter * from)52 static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
53 {
54 struct file *file = iocb->ki_filp;
55 struct inode *inode = file->f_mapping->host;
56 ssize_t ret;
57
58 inode_lock(inode);
59 ret = generic_write_checks(iocb, from);
60 if (ret <= 0)
61 goto out_unlock;
62 ret = file_remove_privs(file);
63 if (ret)
64 goto out_unlock;
65 ret = file_update_time(file);
66 if (ret)
67 goto out_unlock;
68
69 ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops);
70 if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
71 i_size_write(inode, iocb->ki_pos);
72 mark_inode_dirty(inode);
73 }
74
75 out_unlock:
76 inode_unlock(inode);
77 if (ret > 0)
78 ret = generic_write_sync(iocb, ret);
79 return ret;
80 }
81
82 /*
83 * The lock ordering for ext2 DAX fault paths is:
84 *
85 * mmap_lock (MM)
86 * sb_start_pagefault (vfs, freeze)
87 * address_space->invalidate_lock
88 * address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
89 * ext2_inode_info->truncate_mutex
90 *
91 * The default page_lock and i_size verification done by non-DAX fault paths
92 * is sufficient because ext2 doesn't support hole punching.
93 */
ext2_dax_fault(struct vm_fault * vmf)94 static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
95 {
96 struct inode *inode = file_inode(vmf->vma->vm_file);
97 vm_fault_t ret;
98 bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
99 (vmf->vma->vm_flags & VM_SHARED);
100
101 if (write) {
102 sb_start_pagefault(inode->i_sb);
103 file_update_time(vmf->vma->vm_file);
104 }
105 filemap_invalidate_lock_shared(inode->i_mapping);
106
107 ret = dax_iomap_fault(vmf, 0, NULL, NULL, &ext2_iomap_ops);
108
109 filemap_invalidate_unlock_shared(inode->i_mapping);
110 if (write)
111 sb_end_pagefault(inode->i_sb);
112 return ret;
113 }
114
115 static const struct vm_operations_struct ext2_dax_vm_ops = {
116 .fault = ext2_dax_fault,
117 /*
118 * .huge_fault is not supported for DAX because allocation in ext2
119 * cannot be reliably aligned to huge page sizes and so pmd faults
120 * will always fail and fail back to regular faults.
121 */
122 .page_mkwrite = ext2_dax_fault,
123 .pfn_mkwrite = ext2_dax_fault,
124 };
125
ext2_file_mmap_prepare(struct vm_area_desc * desc)126 static int ext2_file_mmap_prepare(struct vm_area_desc *desc)
127 {
128 struct file *file = desc->file;
129
130 if (!IS_DAX(file_inode(file)))
131 return generic_file_mmap_prepare(desc);
132
133 file_accessed(file);
134 desc->vm_ops = &ext2_dax_vm_ops;
135 return 0;
136 }
137 #else
138 #define ext2_file_mmap_prepare generic_file_mmap_prepare
139 #endif
140
141 /*
142 * Called when filp is released. This happens when all file descriptors
143 * for a single struct file are closed. Note that different open() calls
144 * for the same file yield different struct file structures.
145 */
ext2_release_file(struct inode * inode,struct file * filp)146 static int ext2_release_file (struct inode * inode, struct file * filp)
147 {
148 if (filp->f_mode & FMODE_WRITE) {
149 mutex_lock(&EXT2_I(inode)->truncate_mutex);
150 ext2_discard_reservation(inode);
151 mutex_unlock(&EXT2_I(inode)->truncate_mutex);
152 }
153 return 0;
154 }
155
ext2_fsync(struct file * file,loff_t start,loff_t end,int datasync)156 int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
157 {
158 int ret;
159 struct inode *inode = file->f_mapping->host;
160 struct super_block *sb = inode->i_sb;
161
162 ret = mmb_fsync(file, &EXT2_I(inode)->i_metadata_bhs,
163 start, end, datasync);
164 if (ret == -EIO)
165 /* We don't really know where the IO error happened... */
166 ext2_error(sb, __func__,
167 "detected IO error when writing metadata buffers");
168 return ret;
169 }
170
ext2_dio_read_iter(struct kiocb * iocb,struct iov_iter * to)171 static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
172 {
173 struct file *file = iocb->ki_filp;
174 struct inode *inode = file->f_mapping->host;
175 ssize_t ret;
176
177 trace_ext2_dio_read_begin(iocb, to, 0);
178 inode_lock_shared(inode);
179 ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0);
180 inode_unlock_shared(inode);
181 trace_ext2_dio_read_end(iocb, to, ret);
182
183 return ret;
184 }
185
ext2_dio_write_end_io(struct kiocb * iocb,ssize_t size,int error,unsigned int flags)186 static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size,
187 int error, unsigned int flags)
188 {
189 loff_t pos = iocb->ki_pos;
190 struct inode *inode = file_inode(iocb->ki_filp);
191
192 if (error)
193 goto out;
194
195 /*
196 * If we are extending the file, we have to update i_size here before
197 * page cache gets invalidated in iomap_dio_rw(). This prevents racing
198 * buffered reads from zeroing out too much from page cache pages.
199 * Note that all extending writes always happens synchronously with
200 * inode lock held by ext2_dio_write_iter(). So it is safe to update
201 * inode size here for extending file writes.
202 */
203 pos += size;
204 if (pos > i_size_read(inode)) {
205 i_size_write(inode, pos);
206 mark_inode_dirty(inode);
207 }
208 out:
209 trace_ext2_dio_write_endio(iocb, size, error);
210 return error;
211 }
212
213 static const struct iomap_dio_ops ext2_dio_write_ops = {
214 .end_io = ext2_dio_write_end_io,
215 };
216
ext2_dio_write_iter(struct kiocb * iocb,struct iov_iter * from)217 static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
218 {
219 struct file *file = iocb->ki_filp;
220 struct inode *inode = file->f_mapping->host;
221 ssize_t ret;
222 unsigned int flags = 0;
223 unsigned long blocksize = inode->i_sb->s_blocksize;
224 loff_t offset = iocb->ki_pos;
225 loff_t count = iov_iter_count(from);
226 ssize_t status = 0;
227
228 trace_ext2_dio_write_begin(iocb, from, 0);
229 inode_lock(inode);
230 ret = generic_write_checks(iocb, from);
231 if (ret <= 0)
232 goto out_unlock;
233
234 ret = kiocb_modified(iocb);
235 if (ret)
236 goto out_unlock;
237
238 /* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */
239 if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) ||
240 (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize)))
241 flags |= IOMAP_DIO_FORCE_WAIT;
242
243 ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops,
244 flags, NULL, 0);
245
246 /* ENOTBLK is magic return value for fallback to buffered-io */
247 if (ret == -ENOTBLK)
248 ret = 0;
249
250 if (ret < 0 && ret != -EIOCBQUEUED)
251 ext2_write_failed(inode->i_mapping, offset + count);
252
253 /* handle case for partial write and for fallback to buffered write */
254 if (ret >= 0 && iov_iter_count(from)) {
255 loff_t pos, endbyte;
256 int ret2;
257
258 iocb->ki_flags &= ~IOCB_DIRECT;
259 pos = iocb->ki_pos;
260 status = generic_perform_write(iocb, from);
261 if (unlikely(status < 0)) {
262 ret = status;
263 goto out_unlock;
264 }
265
266 ret += status;
267 endbyte = pos + status - 1;
268 ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
269 endbyte);
270 if (!ret2)
271 invalidate_mapping_pages(inode->i_mapping,
272 pos >> PAGE_SHIFT,
273 endbyte >> PAGE_SHIFT);
274 if (ret > 0)
275 generic_write_sync(iocb, ret);
276 }
277
278 out_unlock:
279 inode_unlock(inode);
280 if (status)
281 trace_ext2_dio_write_buff_end(iocb, from, status);
282 trace_ext2_dio_write_end(iocb, from, ret);
283 return ret;
284 }
285
ext2_file_read_iter(struct kiocb * iocb,struct iov_iter * to)286 static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
287 {
288 #ifdef CONFIG_FS_DAX
289 if (IS_DAX(iocb->ki_filp->f_mapping->host))
290 return ext2_dax_read_iter(iocb, to);
291 #endif
292 if (iocb->ki_flags & IOCB_DIRECT)
293 return ext2_dio_read_iter(iocb, to);
294
295 return generic_file_read_iter(iocb, to);
296 }
297
ext2_file_write_iter(struct kiocb * iocb,struct iov_iter * from)298 static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
299 {
300 #ifdef CONFIG_FS_DAX
301 if (IS_DAX(iocb->ki_filp->f_mapping->host))
302 return ext2_dax_write_iter(iocb, from);
303 #endif
304 if (iocb->ki_flags & IOCB_DIRECT)
305 return ext2_dio_write_iter(iocb, from);
306
307 return generic_file_write_iter(iocb, from);
308 }
309
ext2_file_open(struct inode * inode,struct file * filp)310 static int ext2_file_open(struct inode *inode, struct file *filp)
311 {
312 filp->f_mode |= FMODE_CAN_ODIRECT;
313 return dquot_file_open(inode, filp);
314 }
315
316 const struct file_operations ext2_file_operations = {
317 .llseek = generic_file_llseek,
318 .read_iter = ext2_file_read_iter,
319 .write_iter = ext2_file_write_iter,
320 .unlocked_ioctl = ext2_ioctl,
321 #ifdef CONFIG_COMPAT
322 .compat_ioctl = ext2_compat_ioctl,
323 #endif
324 .mmap_prepare = ext2_file_mmap_prepare,
325 .open = ext2_file_open,
326 .release = ext2_release_file,
327 .fsync = ext2_fsync,
328 .get_unmapped_area = thp_get_unmapped_area,
329 .splice_read = filemap_splice_read,
330 .splice_write = iter_file_splice_write,
331 .setlease = generic_setlease,
332 };
333
334 const struct inode_operations ext2_file_inode_operations = {
335 .listxattr = ext2_listxattr,
336 .getattr = ext2_getattr,
337 .setattr = ext2_setattr,
338 .get_inode_acl = ext2_get_acl,
339 .set_acl = ext2_set_acl,
340 .fiemap = ext2_fiemap,
341 .fileattr_get = ext2_fileattr_get,
342 .fileattr_set = ext2_fileattr_set,
343 };
344