1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Common helpers for stackable filesystems and backing files.
4 *
5 * Forked from fs/overlayfs/file.c.
6 *
7 * Copyright (C) 2017 Red Hat, Inc.
8 * Copyright (C) 2023 CTERA Networks.
9 */
10
11 #include <linux/fs.h>
12 #include <linux/backing-file.h>
13 #include <linux/splice.h>
14 #include <linux/mm.h>
15 #include <linux/security.h>
16
17 #include "internal.h"
18
19 /**
20 * backing_file_open - open a backing file for kernel internal use
21 * @user_path: path that the user reuqested to open
22 * @flags: open flags
23 * @real_path: path of the backing file
24 * @cred: credentials for open
25 *
26 * Open a backing file for a stackable filesystem (e.g., overlayfs).
27 * @user_path may be on the stackable filesystem and @real_path on the
28 * underlying filesystem. In this case, we want to be able to return the
29 * @user_path of the stackable filesystem. This is done by embedding the
30 * returned file into a container structure that also stores the stacked
31 * file's path, which can be retrieved using backing_file_user_path().
32 */
backing_file_open(const struct file * user_file,int flags,const struct path * real_path,const struct cred * cred)33 struct file *backing_file_open(const struct file *user_file, int flags,
34 const struct path *real_path,
35 const struct cred *cred)
36 {
37 const struct path *user_path = &user_file->f_path;
38 struct file *f;
39 int error;
40
41 f = alloc_empty_backing_file(flags, cred, user_file);
42 if (IS_ERR(f))
43 return f;
44
45 path_get(user_path);
46 backing_file_set_user_path(f, user_path);
47 error = vfs_open(real_path, f);
48 if (error) {
49 fput(f);
50 f = ERR_PTR(error);
51 }
52
53 return f;
54 }
55 EXPORT_SYMBOL_GPL(backing_file_open);
56
backing_tmpfile_open(const struct file * user_file,int flags,const struct path * real_parentpath,umode_t mode,const struct cred * cred)57 struct file *backing_tmpfile_open(const struct file *user_file, int flags,
58 const struct path *real_parentpath,
59 umode_t mode, const struct cred *cred)
60 {
61 struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
62 const struct path *user_path = &user_file->f_path;
63 struct file *f;
64 int error;
65
66 f = alloc_empty_backing_file(flags, cred, user_file);
67 if (IS_ERR(f))
68 return f;
69
70 path_get(user_path);
71 backing_file_set_user_path(f, user_path);
72 error = vfs_tmpfile(real_idmap, real_parentpath, f, mode);
73 if (error) {
74 fput(f);
75 f = ERR_PTR(error);
76 }
77 return f;
78 }
79 EXPORT_SYMBOL(backing_tmpfile_open);
80
81 struct backing_aio {
82 struct kiocb iocb;
83 refcount_t ref;
84 struct kiocb *orig_iocb;
85 /* used for aio completion */
86 void (*end_write)(struct kiocb *iocb, ssize_t);
87 struct work_struct work;
88 long res;
89 };
90
91 static struct kmem_cache *backing_aio_cachep;
92
93 #define BACKING_IOCB_MASK \
94 (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
95
iocb_to_rw_flags(int flags)96 static rwf_t iocb_to_rw_flags(int flags)
97 {
98 return (__force rwf_t)(flags & BACKING_IOCB_MASK);
99 }
100
backing_aio_put(struct backing_aio * aio)101 static void backing_aio_put(struct backing_aio *aio)
102 {
103 if (refcount_dec_and_test(&aio->ref)) {
104 fput(aio->iocb.ki_filp);
105 kmem_cache_free(backing_aio_cachep, aio);
106 }
107 }
108
backing_aio_cleanup(struct backing_aio * aio,long res)109 static void backing_aio_cleanup(struct backing_aio *aio, long res)
110 {
111 struct kiocb *iocb = &aio->iocb;
112 struct kiocb *orig_iocb = aio->orig_iocb;
113
114 orig_iocb->ki_pos = iocb->ki_pos;
115 if (aio->end_write)
116 aio->end_write(orig_iocb, res);
117
118 backing_aio_put(aio);
119 }
120
backing_aio_rw_complete(struct kiocb * iocb,long res)121 static void backing_aio_rw_complete(struct kiocb *iocb, long res)
122 {
123 struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
124 struct kiocb *orig_iocb = aio->orig_iocb;
125
126 if (iocb->ki_flags & IOCB_WRITE)
127 kiocb_end_write(iocb);
128
129 backing_aio_cleanup(aio, res);
130 orig_iocb->ki_complete(orig_iocb, res);
131 }
132
backing_aio_complete_work(struct work_struct * work)133 static void backing_aio_complete_work(struct work_struct *work)
134 {
135 struct backing_aio *aio = container_of(work, struct backing_aio, work);
136
137 backing_aio_rw_complete(&aio->iocb, aio->res);
138 }
139
backing_aio_queue_completion(struct kiocb * iocb,long res)140 static void backing_aio_queue_completion(struct kiocb *iocb, long res)
141 {
142 struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
143
144 /*
145 * Punt to a work queue to serialize updates of mtime/size.
146 */
147 aio->res = res;
148 INIT_WORK(&aio->work, backing_aio_complete_work);
149 queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
150 &aio->work);
151 }
152
backing_aio_init_wq(struct kiocb * iocb)153 static int backing_aio_init_wq(struct kiocb *iocb)
154 {
155 struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
156
157 if (sb->s_dio_done_wq)
158 return 0;
159
160 return sb_init_dio_done_wq(sb);
161 }
162
do_backing_file_read_iter(struct file * file,struct iov_iter * iter,struct kiocb * iocb,int flags)163 static int do_backing_file_read_iter(struct file *file, struct iov_iter *iter,
164 struct kiocb *iocb, int flags)
165 {
166 struct backing_aio *aio = NULL;
167 int ret;
168
169 if (is_sync_kiocb(iocb)) {
170 rwf_t rwf = iocb_to_rw_flags(flags);
171
172 return vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
173 }
174
175 aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
176 if (!aio)
177 return -ENOMEM;
178
179 aio->orig_iocb = iocb;
180 kiocb_clone(&aio->iocb, iocb, get_file(file));
181 aio->iocb.ki_complete = backing_aio_rw_complete;
182 refcount_set(&aio->ref, 2);
183 ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
184 backing_aio_put(aio);
185 if (ret != -EIOCBQUEUED)
186 backing_aio_cleanup(aio, ret);
187 return ret;
188 }
189
backing_file_read_iter(struct file * file,struct iov_iter * iter,struct kiocb * iocb,int flags,struct backing_file_ctx * ctx)190 ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
191 struct kiocb *iocb, int flags,
192 struct backing_file_ctx *ctx)
193 {
194 ssize_t ret;
195
196 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
197 return -EIO;
198
199 if (!iov_iter_count(iter))
200 return 0;
201
202 if (iocb->ki_flags & IOCB_DIRECT &&
203 !(file->f_mode & FMODE_CAN_ODIRECT))
204 return -EINVAL;
205
206 scoped_with_creds(ctx->cred)
207 ret = do_backing_file_read_iter(file, iter, iocb, flags);
208
209 if (ctx->accessed)
210 ctx->accessed(iocb->ki_filp);
211
212 return ret;
213 }
214 EXPORT_SYMBOL_GPL(backing_file_read_iter);
215
do_backing_file_write_iter(struct file * file,struct iov_iter * iter,struct kiocb * iocb,int flags,void (* end_write)(struct kiocb *,ssize_t))216 static int do_backing_file_write_iter(struct file *file, struct iov_iter *iter,
217 struct kiocb *iocb, int flags,
218 void (*end_write)(struct kiocb *, ssize_t))
219 {
220 struct backing_aio *aio;
221 int ret;
222
223 if (is_sync_kiocb(iocb)) {
224 rwf_t rwf = iocb_to_rw_flags(flags);
225
226 ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
227 if (end_write)
228 end_write(iocb, ret);
229 return ret;
230 }
231
232 ret = backing_aio_init_wq(iocb);
233 if (ret)
234 return ret;
235
236 aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
237 if (!aio)
238 return -ENOMEM;
239
240 aio->orig_iocb = iocb;
241 aio->end_write = end_write;
242 kiocb_clone(&aio->iocb, iocb, get_file(file));
243 aio->iocb.ki_flags = flags;
244 aio->iocb.ki_complete = backing_aio_queue_completion;
245 refcount_set(&aio->ref, 2);
246 ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
247 backing_aio_put(aio);
248 if (ret != -EIOCBQUEUED)
249 backing_aio_cleanup(aio, ret);
250 return ret;
251 }
252
backing_file_write_iter(struct file * file,struct iov_iter * iter,struct kiocb * iocb,int flags,struct backing_file_ctx * ctx)253 ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
254 struct kiocb *iocb, int flags,
255 struct backing_file_ctx *ctx)
256 {
257 ssize_t ret;
258
259 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
260 return -EIO;
261
262 if (!iov_iter_count(iter))
263 return 0;
264
265 ret = file_remove_privs(iocb->ki_filp);
266 if (ret)
267 return ret;
268
269 if (iocb->ki_flags & IOCB_DIRECT &&
270 !(file->f_mode & FMODE_CAN_ODIRECT))
271 return -EINVAL;
272
273 scoped_with_creds(ctx->cred)
274 return do_backing_file_write_iter(file, iter, iocb, flags, ctx->end_write);
275 }
276 EXPORT_SYMBOL_GPL(backing_file_write_iter);
277
backing_file_splice_read(struct file * in,struct kiocb * iocb,struct pipe_inode_info * pipe,size_t len,unsigned int flags,struct backing_file_ctx * ctx)278 ssize_t backing_file_splice_read(struct file *in, struct kiocb *iocb,
279 struct pipe_inode_info *pipe, size_t len,
280 unsigned int flags,
281 struct backing_file_ctx *ctx)
282 {
283 ssize_t ret;
284
285 if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
286 return -EIO;
287
288 scoped_with_creds(ctx->cred)
289 ret = vfs_splice_read(in, &iocb->ki_pos, pipe, len, flags);
290
291 if (ctx->accessed)
292 ctx->accessed(iocb->ki_filp);
293
294 return ret;
295 }
296 EXPORT_SYMBOL_GPL(backing_file_splice_read);
297
backing_file_splice_write(struct pipe_inode_info * pipe,struct file * out,struct kiocb * iocb,size_t len,unsigned int flags,struct backing_file_ctx * ctx)298 ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
299 struct file *out, struct kiocb *iocb,
300 size_t len, unsigned int flags,
301 struct backing_file_ctx *ctx)
302 {
303 ssize_t ret;
304
305 if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
306 return -EIO;
307
308 if (!out->f_op->splice_write)
309 return -EINVAL;
310
311 ret = file_remove_privs(iocb->ki_filp);
312 if (ret)
313 return ret;
314
315 scoped_with_creds(ctx->cred) {
316 file_start_write(out);
317 ret = out->f_op->splice_write(pipe, out, &iocb->ki_pos, len, flags);
318 file_end_write(out);
319 }
320
321 if (ctx->end_write)
322 ctx->end_write(iocb, ret);
323
324 return ret;
325 }
326 EXPORT_SYMBOL_GPL(backing_file_splice_write);
327
backing_file_mmap(struct file * file,struct vm_area_struct * vma,struct backing_file_ctx * ctx)328 int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
329 struct backing_file_ctx *ctx)
330 {
331 struct file *user_file = vma->vm_file;
332 int ret;
333
334 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
335 return -EIO;
336
337 if (!can_mmap_file(file))
338 return -ENODEV;
339
340 vma_set_file(vma, file);
341
342 scoped_with_creds(ctx->cred) {
343 ret = security_mmap_backing_file(vma, file, user_file);
344 if (ret)
345 return ret;
346
347 ret = vfs_mmap(vma->vm_file, vma);
348 }
349
350 if (ctx->accessed)
351 ctx->accessed(user_file);
352
353 return ret;
354 }
355 EXPORT_SYMBOL_GPL(backing_file_mmap);
356
backing_aio_init(void)357 static int __init backing_aio_init(void)
358 {
359 backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN);
360 if (!backing_aio_cachep)
361 return -ENOMEM;
362
363 return 0;
364 }
365 fs_initcall(backing_aio_init);
366