1 // SPDX-License-Identifier: MIT
2 /*
3  * VirtualBox Guest Shared Folders support: Regular file inode and file ops.
4  *
5  * Copyright (C) 2006-2018 Oracle Corporation
6  */
7 
8 #include <linux/mm.h>
9 #include <linux/page-flags.h>
10 #include <linux/pagemap.h>
11 #include <linux/highmem.h>
12 #include <linux/sizes.h>
13 #include "vfsmod.h"
14 
15 struct vboxsf_handle {
16 	u64 handle;
17 	u32 root;
18 	u32 access_flags;
19 	struct kref refcount;
20 	struct list_head head;
21 };
22 
23 struct vboxsf_handle *vboxsf_create_sf_handle(struct inode *inode,
24 					      u64 handle, u32 access_flags)
25 {
26 	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
27 	struct vboxsf_handle *sf_handle;
28 
29 	sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL);
30 	if (!sf_handle)
31 		return ERR_PTR(-ENOMEM);
32 
33 	/* the host may have given us different attr then requested */
34 	sf_i->force_restat = 1;
35 
36 	/* init our handle struct and add it to the inode's handles list */
37 	sf_handle->handle = handle;
38 	sf_handle->root = VBOXSF_SBI(inode->i_sb)->root;
39 	sf_handle->access_flags = access_flags;
40 	kref_init(&sf_handle->refcount);
41 
42 	mutex_lock(&sf_i->handle_list_mutex);
43 	list_add(&sf_handle->head, &sf_i->handle_list);
44 	mutex_unlock(&sf_i->handle_list_mutex);
45 
46 	return sf_handle;
47 }
48 
49 static int vboxsf_file_open(struct inode *inode, struct file *file)
50 {
51 	struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
52 	struct shfl_createparms params = {};
53 	struct vboxsf_handle *sf_handle;
54 	u32 access_flags = 0;
55 	int err;
56 
57 	/*
58 	 * We check the value of params.handle afterwards to find out if
59 	 * the call succeeded or failed, as the API does not seem to cleanly
60 	 * distinguish error and informational messages.
61 	 *
62 	 * Furthermore, we must set params.handle to SHFL_HANDLE_NIL to
63 	 * make the shared folders host service use our mode parameter.
64 	 */
65 	params.handle = SHFL_HANDLE_NIL;
66 	if (file->f_flags & O_CREAT) {
67 		params.create_flags |= SHFL_CF_ACT_CREATE_IF_NEW;
68 		/*
69 		 * We ignore O_EXCL, as the Linux kernel seems to call create
70 		 * beforehand itself, so O_EXCL should always fail.
71 		 */
72 		if (file->f_flags & O_TRUNC)
73 			params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
74 		else
75 			params.create_flags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
76 	} else {
77 		params.create_flags |= SHFL_CF_ACT_FAIL_IF_NEW;
78 		if (file->f_flags & O_TRUNC)
79 			params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
80 	}
81 
82 	switch (file->f_flags & O_ACCMODE) {
83 	case O_RDONLY:
84 		access_flags |= SHFL_CF_ACCESS_READ;
85 		break;
86 
87 	case O_WRONLY:
88 		access_flags |= SHFL_CF_ACCESS_WRITE;
89 		break;
90 
91 	case O_RDWR:
92 		access_flags |= SHFL_CF_ACCESS_READWRITE;
93 		break;
94 
95 	default:
96 		WARN_ON(1);
97 	}
98 
99 	if (file->f_flags & O_APPEND)
100 		access_flags |= SHFL_CF_ACCESS_APPEND;
101 
102 	params.create_flags |= access_flags;
103 	params.info.attr.mode = inode->i_mode;
104 
105 	err = vboxsf_create_at_dentry(file_dentry(file), &params);
106 	if (err == 0 && params.handle == SHFL_HANDLE_NIL)
107 		err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT;
108 	if (err)
109 		return err;
110 
111 	sf_handle = vboxsf_create_sf_handle(inode, params.handle, access_flags);
112 	if (IS_ERR(sf_handle)) {
113 		vboxsf_close(sbi->root, params.handle);
114 		return PTR_ERR(sf_handle);
115 	}
116 
117 	file->private_data = sf_handle;
118 	return 0;
119 }
120 
121 static void vboxsf_handle_release(struct kref *refcount)
122 {
123 	struct vboxsf_handle *sf_handle =
124 		container_of(refcount, struct vboxsf_handle, refcount);
125 
126 	vboxsf_close(sf_handle->root, sf_handle->handle);
127 	kfree(sf_handle);
128 }
129 
130 void vboxsf_release_sf_handle(struct inode *inode, struct vboxsf_handle *sf_handle)
131 {
132 	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
133 
134 	mutex_lock(&sf_i->handle_list_mutex);
135 	list_del(&sf_handle->head);
136 	mutex_unlock(&sf_i->handle_list_mutex);
137 
138 	kref_put(&sf_handle->refcount, vboxsf_handle_release);
139 }
140 
141 static int vboxsf_file_release(struct inode *inode, struct file *file)
142 {
143 	/*
144 	 * When a file is closed on our (the guest) side, we want any subsequent
145 	 * accesses done on the host side to see all changes done from our side.
146 	 */
147 	filemap_write_and_wait(inode->i_mapping);
148 
149 	vboxsf_release_sf_handle(inode, file->private_data);
150 	return 0;
151 }
152 
153 /*
154  * Write back dirty pages now, because there may not be any suitable
155  * open files later
156  */
157 static void vboxsf_vma_close(struct vm_area_struct *vma)
158 {
159 	filemap_write_and_wait(vma->vm_file->f_mapping);
160 }
161 
162 static const struct vm_operations_struct vboxsf_file_vm_ops = {
163 	.close		= vboxsf_vma_close,
164 	.fault		= filemap_fault,
165 	.map_pages	= filemap_map_pages,
166 };
167 
168 static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma)
169 {
170 	int err;
171 
172 	err = generic_file_mmap(file, vma);
173 	if (!err)
174 		vma->vm_ops = &vboxsf_file_vm_ops;
175 
176 	return err;
177 }
178 
179 /*
180  * Note that since we are accessing files on the host's filesystem, files
181  * may always be changed underneath us by the host!
182  *
183  * The vboxsf API between the guest and the host does not offer any functions
184  * to deal with this. There is no inode-generation to check for changes, no
185  * events / callback on changes and no way to lock files.
186  *
187  * To avoid returning stale data when a file gets *opened* on our (the guest)
188  * side, we do a "stat" on the host side, then compare the mtime with the
189  * last known mtime and invalidate the page-cache if they differ.
190  * This is done from vboxsf_inode_revalidate().
191  *
192  * When reads are done through the read_iter fop, it is possible to do
193  * further cache revalidation then, there are 3 options to deal with this:
194  *
195  * 1)  Rely solely on the revalidation done at open time
196  * 2)  Do another "stat" and compare mtime again. Unfortunately the vboxsf
197  *     host API does not allow stat on handles, so we would need to use
198  *     file->f_path.dentry and the stat will then fail if the file was unlinked
199  *     or renamed (and there is no thing like NFS' silly-rename). So we get:
200  * 2a) "stat" and compare mtime, on stat failure invalidate the cache
201  * 2b) "stat" and compare mtime, on stat failure do nothing
202  * 3)  Simply always call invalidate_inode_pages2_range on the range of the read
203  *
204  * Currently we are keeping things KISS and using option 1. this allows
205  * directly using generic_file_read_iter without wrapping it.
206  *
207  * This means that only data written on the host side before open() on
208  * the guest side is guaranteed to be seen by the guest. If necessary
209  * we may provide other read-cache strategies in the future and make this
210  * configurable through a mount option.
211  */
212 const struct file_operations vboxsf_reg_fops = {
213 	.llseek = generic_file_llseek,
214 	.read_iter = generic_file_read_iter,
215 	.write_iter = generic_file_write_iter,
216 	.mmap = vboxsf_file_mmap,
217 	.open = vboxsf_file_open,
218 	.release = vboxsf_file_release,
219 	.fsync = noop_fsync,
220 	.splice_read = filemap_splice_read,
221 	.setlease = simple_nosetlease,
222 };
223 
224 const struct inode_operations vboxsf_reg_iops = {
225 	.getattr = vboxsf_getattr,
226 	.setattr = vboxsf_setattr
227 };
228 
229 static int vboxsf_read_folio(struct file *file, struct folio *folio)
230 {
231 	struct vboxsf_handle *sf_handle = file->private_data;
232 	loff_t off = folio_pos(folio);
233 	u32 nread = PAGE_SIZE;
234 	u8 *buf;
235 	int err;
236 
237 	buf = kmap_local_folio(folio, 0);
238 
239 	err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf);
240 	buf = folio_zero_tail(folio, nread, buf + nread);
241 
242 	kunmap_local(buf);
243 	folio_end_read(folio, err == 0);
244 	return err;
245 }
246 
247 static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i)
248 {
249 	struct vboxsf_handle *h, *sf_handle = NULL;
250 
251 	mutex_lock(&sf_i->handle_list_mutex);
252 	list_for_each_entry(h, &sf_i->handle_list, head) {
253 		if (h->access_flags == SHFL_CF_ACCESS_WRITE ||
254 		    h->access_flags == SHFL_CF_ACCESS_READWRITE) {
255 			kref_get(&h->refcount);
256 			sf_handle = h;
257 			break;
258 		}
259 	}
260 	mutex_unlock(&sf_i->handle_list_mutex);
261 
262 	return sf_handle;
263 }
264 
265 static int vboxsf_writepages(struct address_space *mapping,
266 		struct writeback_control *wbc)
267 {
268 	struct inode *inode = mapping->host;
269 	struct folio *folio = NULL;
270 	struct vboxsf_inode *sf_i = VBOXSF_I(inode);
271 	struct vboxsf_handle *sf_handle;
272 	loff_t size = i_size_read(inode);
273 	int error;
274 
275 	sf_handle = vboxsf_get_write_handle(sf_i);
276 	if (!sf_handle)
277 		return -EBADF;
278 
279 	while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
280 		loff_t off = folio_pos(folio);
281 		u32 nwrite = folio_size(folio);
282 		u8 *buf;
283 
284 		if (nwrite > size - off)
285 			nwrite = size - off;
286 
287 		buf = kmap_local_folio(folio, 0);
288 		error = vboxsf_write(sf_handle->root, sf_handle->handle,
289 				off, &nwrite, buf);
290 		kunmap_local(buf);
291 
292 		folio_unlock(folio);
293 	}
294 
295 	kref_put(&sf_handle->refcount, vboxsf_handle_release);
296 
297 	/* mtime changed */
298 	if (error == 0)
299 		sf_i->force_restat = 1;
300 	return error;
301 }
302 
303 static int vboxsf_write_end(struct file *file, struct address_space *mapping,
304 			    loff_t pos, unsigned int len, unsigned int copied,
305 			    struct folio *folio, void *fsdata)
306 {
307 	struct inode *inode = mapping->host;
308 	struct vboxsf_handle *sf_handle = file->private_data;
309 	size_t from = offset_in_folio(folio, pos);
310 	u32 nwritten = len;
311 	u8 *buf;
312 	int err;
313 
314 	/* zero the stale part of the folio if we did a short copy */
315 	if (!folio_test_uptodate(folio) && copied < len)
316 		folio_zero_range(folio, from + copied, len - copied);
317 
318 	buf = kmap(&folio->page);
319 	err = vboxsf_write(sf_handle->root, sf_handle->handle,
320 			   pos, &nwritten, buf + from);
321 	kunmap(&folio->page);
322 
323 	if (err) {
324 		nwritten = 0;
325 		goto out;
326 	}
327 
328 	/* mtime changed */
329 	VBOXSF_I(inode)->force_restat = 1;
330 
331 	if (!folio_test_uptodate(folio) && nwritten == folio_size(folio))
332 		folio_mark_uptodate(folio);
333 
334 	pos += nwritten;
335 	if (pos > inode->i_size)
336 		i_size_write(inode, pos);
337 
338 out:
339 	folio_unlock(folio);
340 	folio_put(folio);
341 
342 	return nwritten;
343 }
344 
345 /*
346  * Note simple_write_begin does not read the page from disk on partial writes
347  * this is ok since vboxsf_write_end only writes the written parts of the
348  * page and it does not call folio_mark_uptodate for partial writes.
349  */
350 const struct address_space_operations vboxsf_reg_aops = {
351 	.read_folio = vboxsf_read_folio,
352 	.writepages = vboxsf_writepages,
353 	.dirty_folio = filemap_dirty_folio,
354 	.write_begin = simple_write_begin,
355 	.write_end = vboxsf_write_end,
356 	.migrate_folio = filemap_migrate_folio,
357 };
358 
359 static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode,
360 				   struct delayed_call *done)
361 {
362 	struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
363 	struct shfl_string *path;
364 	char *link;
365 	int err;
366 
367 	if (!dentry)
368 		return ERR_PTR(-ECHILD);
369 
370 	path = vboxsf_path_from_dentry(sbi, dentry);
371 	if (IS_ERR(path))
372 		return ERR_CAST(path);
373 
374 	link = kzalloc(PATH_MAX, GFP_KERNEL);
375 	if (!link) {
376 		__putname(path);
377 		return ERR_PTR(-ENOMEM);
378 	}
379 
380 	err = vboxsf_readlink(sbi->root, path, PATH_MAX, link);
381 	__putname(path);
382 	if (err) {
383 		kfree(link);
384 		return ERR_PTR(err);
385 	}
386 
387 	set_delayed_call(done, kfree_link, link);
388 	return link;
389 }
390 
391 const struct inode_operations vboxsf_lnk_iops = {
392 	.get_link = vboxsf_get_link
393 };
394