1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /* Network filesystem support services.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 *
7 * See:
8 *
9 * Documentation/filesystems/netfs_library.rst
10 *
11 * for a description of the network filesystem interface declared here.
12 */
13
14 #ifndef _LINUX_NETFS_H
15 #define _LINUX_NETFS_H
16
17 #include <linux/workqueue.h>
18 #include <linux/fs.h>
19 #include <linux/pagemap.h>
20 #include <linux/uio.h>
21
22 enum netfs_sreq_ref_trace;
23
24 /*
25 * Overload PG_private_2 to give us PG_fscache - this is used to indicate that
26 * a page is currently backed by a local disk cache
27 */
28 #define folio_test_fscache(folio) folio_test_private_2(folio)
29 #define PageFsCache(page) PagePrivate2((page))
30 #define SetPageFsCache(page) SetPagePrivate2((page))
31 #define ClearPageFsCache(page) ClearPagePrivate2((page))
32 #define TestSetPageFsCache(page) TestSetPagePrivate2((page))
33 #define TestClearPageFsCache(page) TestClearPagePrivate2((page))
34
35 /**
36 * folio_start_fscache - Start an fscache write on a folio.
37 * @folio: The folio.
38 *
39 * Call this function before writing a folio to a local cache. Starting a
40 * second write before the first one finishes is not allowed.
41 */
folio_start_fscache(struct folio * folio)42 static inline void folio_start_fscache(struct folio *folio)
43 {
44 VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio);
45 folio_get(folio);
46 folio_set_private_2(folio);
47 }
48
49 /**
50 * folio_end_fscache - End an fscache write on a folio.
51 * @folio: The folio.
52 *
53 * Call this function after the folio has been written to the local cache.
54 * This will wake any sleepers waiting on this folio.
55 */
folio_end_fscache(struct folio * folio)56 static inline void folio_end_fscache(struct folio *folio)
57 {
58 folio_end_private_2(folio);
59 }
60
61 /**
62 * folio_wait_fscache - Wait for an fscache write on this folio to end.
63 * @folio: The folio.
64 *
65 * If this folio is currently being written to a local cache, wait for
66 * the write to finish. Another write may start after this one finishes,
67 * unless the caller holds the folio lock.
68 */
folio_wait_fscache(struct folio * folio)69 static inline void folio_wait_fscache(struct folio *folio)
70 {
71 folio_wait_private_2(folio);
72 }
73
74 /**
75 * folio_wait_fscache_killable - Wait for an fscache write on this folio to end.
76 * @folio: The folio.
77 *
78 * If this folio is currently being written to a local cache, wait
79 * for the write to finish or for a fatal signal to be received.
80 * Another write may start after this one finishes, unless the caller
81 * holds the folio lock.
82 *
83 * Return:
84 * - 0 if successful.
85 * - -EINTR if a fatal signal was encountered.
86 */
folio_wait_fscache_killable(struct folio * folio)87 static inline int folio_wait_fscache_killable(struct folio *folio)
88 {
89 return folio_wait_private_2_killable(folio);
90 }
91
set_page_fscache(struct page * page)92 static inline void set_page_fscache(struct page *page)
93 {
94 folio_start_fscache(page_folio(page));
95 }
96
end_page_fscache(struct page * page)97 static inline void end_page_fscache(struct page *page)
98 {
99 folio_end_private_2(page_folio(page));
100 }
101
wait_on_page_fscache(struct page * page)102 static inline void wait_on_page_fscache(struct page *page)
103 {
104 folio_wait_private_2(page_folio(page));
105 }
106
wait_on_page_fscache_killable(struct page * page)107 static inline int wait_on_page_fscache_killable(struct page *page)
108 {
109 return folio_wait_private_2_killable(page_folio(page));
110 }
111
112 /* Marks used on xarray-based buffers */
113 #define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */
114 #define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */
115
116 enum netfs_io_source {
117 NETFS_FILL_WITH_ZEROES,
118 NETFS_DOWNLOAD_FROM_SERVER,
119 NETFS_READ_FROM_CACHE,
120 NETFS_INVALID_READ,
121 NETFS_UPLOAD_TO_SERVER,
122 NETFS_WRITE_TO_CACHE,
123 NETFS_INVALID_WRITE,
124 } __mode(byte);
125
126 typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error,
127 bool was_async);
128
129 /*
130 * Per-inode context. This wraps the VFS inode.
131 */
132 struct netfs_inode {
133 struct inode inode; /* The VFS inode */
134 const struct netfs_request_ops *ops;
135 #if IS_ENABLED(CONFIG_FSCACHE)
136 struct fscache_cookie *cache;
137 #endif
138 loff_t remote_i_size; /* Size of the remote file */
139 loff_t zero_point; /* Size after which we assume there's no data
140 * on the server */
141 unsigned long flags;
142 #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
143 #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
144 #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */
145 #define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */
146 };
147
148 /*
149 * A netfs group - for instance a ceph snap. This is marked on dirty pages and
150 * pages marked with a group must be flushed before they can be written under
151 * the domain of another group.
152 */
153 struct netfs_group {
154 refcount_t ref;
155 void (*free)(struct netfs_group *netfs_group);
156 };
157
158 /*
159 * Information about a dirty page (attached only if necessary).
160 * folio->private
161 */
162 struct netfs_folio {
163 struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */
164 unsigned int dirty_offset; /* Write-streaming dirty data offset */
165 unsigned int dirty_len; /* Write-streaming dirty data length */
166 };
167 #define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */
168
netfs_folio_info(struct folio * folio)169 static inline struct netfs_folio *netfs_folio_info(struct folio *folio)
170 {
171 void *priv = folio_get_private(folio);
172
173 if ((unsigned long)priv & NETFS_FOLIO_INFO)
174 return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO);
175 return NULL;
176 }
177
netfs_folio_group(struct folio * folio)178 static inline struct netfs_group *netfs_folio_group(struct folio *folio)
179 {
180 struct netfs_folio *finfo;
181 void *priv = folio_get_private(folio);
182
183 finfo = netfs_folio_info(folio);
184 if (finfo)
185 return finfo->netfs_group;
186 return priv;
187 }
188
189 /*
190 * Resources required to do operations on a cache.
191 */
192 struct netfs_cache_resources {
193 const struct netfs_cache_ops *ops;
194 void *cache_priv;
195 void *cache_priv2;
196 unsigned int debug_id; /* Cookie debug ID */
197 unsigned int inval_counter; /* object->inval_counter at begin_op */
198 };
199
200 /*
201 * Descriptor for a single component subrequest. Each operation represents an
202 * individual read/write from/to a server, a cache, a journal, etc..
203 *
204 * The buffer iterator is persistent for the life of the subrequest struct and
205 * the pages it points to can be relied on to exist for the duration.
206 */
207 struct netfs_io_subrequest {
208 struct netfs_io_request *rreq; /* Supervising I/O request */
209 struct work_struct work;
210 struct list_head rreq_link; /* Link in rreq->subrequests */
211 struct iov_iter io_iter; /* Iterator for this subrequest */
212 loff_t start; /* Where to start the I/O */
213 size_t len; /* Size of the I/O */
214 size_t transferred; /* Amount of data transferred */
215 refcount_t ref;
216 short error; /* 0 or error that occurred */
217 unsigned short debug_index; /* Index in list (for debugging output) */
218 unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */
219 enum netfs_io_source source; /* Where to read from/write to */
220 unsigned long flags;
221 #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
222 #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
223 #define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */
224 #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
225 #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
226 #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
227 };
228
229 enum netfs_io_origin {
230 NETFS_READAHEAD, /* This read was triggered by readahead */
231 NETFS_READPAGE, /* This read is a synchronous read */
232 NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
233 NETFS_WRITEBACK, /* This write was triggered by writepages */
234 NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
235 NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */
236 NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
237 NETFS_DIO_READ, /* This is a direct I/O read */
238 NETFS_DIO_WRITE, /* This is a direct I/O write */
239 nr__netfs_io_origin
240 } __mode(byte);
241
242 /*
243 * Descriptor for an I/O helper request. This is used to make multiple I/O
244 * operations to a variety of data stores and then stitch the result together.
245 */
246 struct netfs_io_request {
247 union {
248 struct work_struct work;
249 struct rcu_head rcu;
250 };
251 struct inode *inode; /* The file being accessed */
252 struct address_space *mapping; /* The mapping being accessed */
253 struct kiocb *iocb; /* AIO completion vector */
254 struct netfs_cache_resources cache_resources;
255 struct list_head proc_link; /* Link in netfs_iorequests */
256 struct list_head subrequests; /* Contributory I/O operations */
257 struct iov_iter iter; /* Unencrypted-side iterator */
258 struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */
259 void *netfs_priv; /* Private data for the netfs */
260 struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */
261 unsigned int direct_bv_count; /* Number of elements in direct_bv[] */
262 unsigned int debug_id;
263 unsigned int rsize; /* Maximum read size (0 for none) */
264 unsigned int wsize; /* Maximum write size (0 for none) */
265 unsigned int subreq_counter; /* Next subreq->debug_index */
266 atomic_t nr_outstanding; /* Number of ops in progress */
267 atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */
268 size_t submitted; /* Amount submitted for I/O so far */
269 size_t len; /* Length of the request */
270 size_t upper_len; /* Length can be extended to here */
271 size_t transferred; /* Amount to be indicated as transferred */
272 short error; /* 0 or error that occurred */
273 enum netfs_io_origin origin; /* Origin of the request */
274 bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
275 loff_t i_size; /* Size of the file */
276 loff_t start; /* Start position */
277 pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
278 refcount_t ref;
279 unsigned long flags;
280 #define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */
281 #define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */
282 #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
283 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
284 #define NETFS_RREQ_FAILED 4 /* The request failed */
285 #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
286 #define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */
287 #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
288 #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
289 #define NETFS_RREQ_BLOCKED 10 /* We blocked */
290 const struct netfs_request_ops *netfs_ops;
291 void (*cleanup)(struct netfs_io_request *req);
292 };
293
294 /*
295 * Operations the network filesystem can/must provide to the helpers.
296 */
297 struct netfs_request_ops {
298 unsigned int io_request_size; /* Alloc size for netfs_io_request struct */
299 unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */
300 int (*init_request)(struct netfs_io_request *rreq, struct file *file);
301 void (*free_request)(struct netfs_io_request *rreq);
302 void (*free_subrequest)(struct netfs_io_subrequest *rreq);
303
304 /* Read request handling */
305 void (*expand_readahead)(struct netfs_io_request *rreq);
306 bool (*clamp_length)(struct netfs_io_subrequest *subreq);
307 void (*issue_read)(struct netfs_io_subrequest *subreq);
308 bool (*is_still_valid)(struct netfs_io_request *rreq);
309 int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
310 struct folio **foliop, void **_fsdata);
311 void (*done)(struct netfs_io_request *rreq);
312
313 /* Modification handling */
314 void (*update_i_size)(struct inode *inode, loff_t i_size);
315
316 /* Write request handling */
317 void (*create_write_requests)(struct netfs_io_request *wreq,
318 loff_t start, size_t len);
319 void (*invalidate_cache)(struct netfs_io_request *wreq);
320 };
321
322 /*
323 * How to handle reading from a hole.
324 */
325 enum netfs_read_from_hole {
326 NETFS_READ_HOLE_IGNORE,
327 NETFS_READ_HOLE_CLEAR,
328 NETFS_READ_HOLE_FAIL,
329 };
330
331 /*
332 * Table of operations for access to a cache.
333 */
334 struct netfs_cache_ops {
335 /* End an operation */
336 void (*end_operation)(struct netfs_cache_resources *cres);
337
338 /* Read data from the cache */
339 int (*read)(struct netfs_cache_resources *cres,
340 loff_t start_pos,
341 struct iov_iter *iter,
342 enum netfs_read_from_hole read_hole,
343 netfs_io_terminated_t term_func,
344 void *term_func_priv);
345
346 /* Write data to the cache */
347 int (*write)(struct netfs_cache_resources *cres,
348 loff_t start_pos,
349 struct iov_iter *iter,
350 netfs_io_terminated_t term_func,
351 void *term_func_priv);
352
353 /* Expand readahead request */
354 void (*expand_readahead)(struct netfs_cache_resources *cres,
355 loff_t *_start, size_t *_len, loff_t i_size);
356
357 /* Prepare a read operation, shortening it to a cached/uncached
358 * boundary as appropriate.
359 */
360 enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq,
361 loff_t i_size);
362
363 /* Prepare a write operation, working out what part of the write we can
364 * actually do.
365 */
366 int (*prepare_write)(struct netfs_cache_resources *cres,
367 loff_t *_start, size_t *_len, size_t upper_len,
368 loff_t i_size, bool no_space_allocated_yet);
369
370 /* Prepare an on-demand read operation, shortening it to a cached/uncached
371 * boundary as appropriate.
372 */
373 enum netfs_io_source (*prepare_ondemand_read)(struct netfs_cache_resources *cres,
374 loff_t start, size_t *_len,
375 loff_t i_size,
376 unsigned long *_flags, ino_t ino);
377
378 /* Query the occupancy of the cache in a region, returning where the
379 * next chunk of data starts and how long it is.
380 */
381 int (*query_occupancy)(struct netfs_cache_resources *cres,
382 loff_t start, size_t len, size_t granularity,
383 loff_t *_data_start, size_t *_data_len);
384 };
385
386 /* High-level read API. */
387 ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
388 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
389 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
390
391 /* High-level write API */
392 ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
393 struct netfs_group *netfs_group);
394 ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from,
395 struct netfs_group *netfs_group);
396 ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from);
397 ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
398
399 /* Address operations API */
400 struct readahead_control;
401 void netfs_readahead(struct readahead_control *);
402 int netfs_read_folio(struct file *, struct folio *);
403 int netfs_write_begin(struct netfs_inode *, struct file *,
404 struct address_space *, loff_t pos, unsigned int len,
405 struct folio **, void **fsdata);
406 int netfs_writepages(struct address_space *mapping,
407 struct writeback_control *wbc);
408 bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
409 int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc);
410 void netfs_clear_inode_writeback(struct inode *inode, const void *aux);
411 void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
412 bool netfs_release_folio(struct folio *folio, gfp_t gfp);
413 int netfs_launder_folio(struct folio *folio);
414
415 /* VMA operations API. */
416 vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
417
418 /* (Sub)request management API. */
419 void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
420 void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
421 enum netfs_sreq_ref_trace what);
422 void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
423 bool was_async, enum netfs_sreq_ref_trace what);
424 ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
425 struct iov_iter *new,
426 iov_iter_extraction_t extraction_flags);
427 size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
428 size_t max_size, size_t max_segs);
429 struct netfs_io_subrequest *netfs_create_write_request(
430 struct netfs_io_request *wreq, enum netfs_io_source dest,
431 loff_t start, size_t len, work_func_t worker);
432 void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
433 bool was_async);
434 void netfs_queue_write_request(struct netfs_io_subrequest *subreq);
435
436 int netfs_start_io_read(struct inode *inode);
437 void netfs_end_io_read(struct inode *inode);
438 int netfs_start_io_write(struct inode *inode);
439 void netfs_end_io_write(struct inode *inode);
440 int netfs_start_io_direct(struct inode *inode);
441 void netfs_end_io_direct(struct inode *inode);
442
443 /**
444 * netfs_inode - Get the netfs inode context from the inode
445 * @inode: The inode to query
446 *
447 * Get the netfs lib inode context from the network filesystem's inode. The
448 * context struct is expected to directly follow on from the VFS inode struct.
449 */
netfs_inode(struct inode * inode)450 static inline struct netfs_inode *netfs_inode(struct inode *inode)
451 {
452 return container_of(inode, struct netfs_inode, inode);
453 }
454
455 /**
456 * netfs_inode_init - Initialise a netfslib inode context
457 * @ctx: The netfs inode to initialise
458 * @ops: The netfs's operations list
459 * @use_zero_point: True to use the zero_point read optimisation
460 *
461 * Initialise the netfs library context struct. This is expected to follow on
462 * directly from the VFS inode struct.
463 */
netfs_inode_init(struct netfs_inode * ctx,const struct netfs_request_ops * ops,bool use_zero_point)464 static inline void netfs_inode_init(struct netfs_inode *ctx,
465 const struct netfs_request_ops *ops,
466 bool use_zero_point)
467 {
468 ctx->ops = ops;
469 ctx->remote_i_size = i_size_read(&ctx->inode);
470 ctx->zero_point = LLONG_MAX;
471 ctx->flags = 0;
472 #if IS_ENABLED(CONFIG_FSCACHE)
473 ctx->cache = NULL;
474 #endif
475 /* ->releasepage() drives zero_point */
476 if (use_zero_point) {
477 ctx->zero_point = ctx->remote_i_size;
478 mapping_set_release_always(ctx->inode.i_mapping);
479 }
480 }
481
482 /**
483 * netfs_resize_file - Note that a file got resized
484 * @ctx: The netfs inode being resized
485 * @new_i_size: The new file size
486 * @changed_on_server: The change was applied to the server
487 *
488 * Inform the netfs lib that a file got resized so that it can adjust its state.
489 */
netfs_resize_file(struct netfs_inode * ctx,loff_t new_i_size,bool changed_on_server)490 static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size,
491 bool changed_on_server)
492 {
493 if (changed_on_server)
494 ctx->remote_i_size = new_i_size;
495 if (new_i_size < ctx->zero_point)
496 ctx->zero_point = new_i_size;
497 }
498
499 /**
500 * netfs_i_cookie - Get the cache cookie from the inode
501 * @ctx: The netfs inode to query
502 *
503 * Get the caching cookie (if enabled) from the network filesystem's inode.
504 */
netfs_i_cookie(struct netfs_inode * ctx)505 static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
506 {
507 #if IS_ENABLED(CONFIG_FSCACHE)
508 return ctx->cache;
509 #else
510 return NULL;
511 #endif
512 }
513
514 #endif /* _LINUX_NETFS_H */
515