1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Network filesystem high-level buffered read support.
3  *
4  * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/export.h>
9 #include <linux/task_io_accounting_ops.h>
10 #include "internal.h"
11 
12 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
13 					 unsigned long long *_start,
14 					 unsigned long long *_len,
15 					 unsigned long long i_size)
16 {
17 	struct netfs_cache_resources *cres = &rreq->cache_resources;
18 
19 	if (cres->ops && cres->ops->expand_readahead)
20 		cres->ops->expand_readahead(cres, _start, _len, i_size);
21 }
22 
23 static void netfs_rreq_expand(struct netfs_io_request *rreq,
24 			      struct readahead_control *ractl)
25 {
26 	/* Give the cache a chance to change the request parameters.  The
27 	 * resultant request must contain the original region.
28 	 */
29 	netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
30 
31 	/* Give the netfs a chance to change the request parameters.  The
32 	 * resultant request must contain the original region.
33 	 */
34 	if (rreq->netfs_ops->expand_readahead)
35 		rreq->netfs_ops->expand_readahead(rreq);
36 
37 	/* Expand the request if the cache wants it to start earlier.  Note
38 	 * that the expansion may get further extended if the VM wishes to
39 	 * insert THPs and the preferred start and/or end wind up in the middle
40 	 * of THPs.
41 	 *
42 	 * If this is the case, however, the THP size should be an integer
43 	 * multiple of the cache granule size, so we get a whole number of
44 	 * granules to deal with.
45 	 */
46 	if (rreq->start  != readahead_pos(ractl) ||
47 	    rreq->len != readahead_length(ractl)) {
48 		readahead_expand(ractl, rreq->start, rreq->len);
49 		rreq->start  = readahead_pos(ractl);
50 		rreq->len = readahead_length(ractl);
51 
52 		trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
53 				 netfs_read_trace_expanded);
54 	}
55 }
56 
57 /*
58  * Begin an operation, and fetch the stored zero point value from the cookie if
59  * available.
60  */
61 static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
62 {
63 	return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
64 }
65 
66 /*
67  * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
68  * @subreq: The subrequest to be set up
69  *
70  * Prepare the I/O iterator representing the read buffer on a subrequest for
71  * the filesystem to use for I/O (it can be passed directly to a socket).  This
72  * is intended to be called from the ->issue_read() method once the filesystem
73  * has trimmed the request to the size it wants.
74  *
75  * Returns the limited size if successful and -ENOMEM if insufficient memory
76  * available.
77  *
78  * [!] NOTE: This must be run in the same thread as ->issue_read() was called
79  * in as we access the readahead_control struct.
80  */
81 static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq,
82 					   struct readahead_control *ractl)
83 {
84 	struct netfs_io_request *rreq = subreq->rreq;
85 	size_t rsize = subreq->len;
86 
87 	if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
88 		rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
89 
90 	if (ractl) {
91 		/* If we don't have sufficient folios in the rolling buffer,
92 		 * extract a folioq's worth from the readahead region at a time
93 		 * into the buffer.  Note that this acquires a ref on each page
94 		 * that we will need to release later - but we don't want to do
95 		 * that until after we've started the I/O.
96 		 */
97 		struct folio_batch put_batch;
98 
99 		folio_batch_init(&put_batch);
100 		while (rreq->submitted < subreq->start + rsize) {
101 			ssize_t added;
102 
103 			added = rolling_buffer_load_from_ra(&rreq->buffer, ractl,
104 							    &put_batch);
105 			if (added < 0)
106 				return added;
107 			rreq->submitted += added;
108 		}
109 		folio_batch_release(&put_batch);
110 	}
111 
112 	subreq->len = rsize;
113 	if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
114 		size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
115 						rreq->io_streams[0].sreq_max_segs);
116 
117 		if (limit < rsize) {
118 			subreq->len = limit;
119 			trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
120 		}
121 	}
122 
123 	subreq->io_iter	= rreq->buffer.iter;
124 
125 	iov_iter_truncate(&subreq->io_iter, subreq->len);
126 	rolling_buffer_advance(&rreq->buffer, subreq->len);
127 	return subreq->len;
128 }
129 
130 static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,
131 						     struct netfs_io_subrequest *subreq,
132 						     loff_t i_size)
133 {
134 	struct netfs_cache_resources *cres = &rreq->cache_resources;
135 	enum netfs_io_source source;
136 
137 	if (!cres->ops)
138 		return NETFS_DOWNLOAD_FROM_SERVER;
139 	source = cres->ops->prepare_read(subreq, i_size);
140 	trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
141 	return source;
142 
143 }
144 
145 /*
146  * Issue a read against the cache.
147  * - Eats the caller's ref on subreq.
148  */
149 static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
150 					  struct netfs_io_subrequest *subreq)
151 {
152 	struct netfs_cache_resources *cres = &rreq->cache_resources;
153 
154 	netfs_stat(&netfs_n_rh_read);
155 	cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE,
156 			netfs_cache_read_terminated, subreq);
157 }
158 
159 static void netfs_queue_read(struct netfs_io_request *rreq,
160 			     struct netfs_io_subrequest *subreq,
161 			     bool last_subreq)
162 {
163 	struct netfs_io_stream *stream = &rreq->io_streams[0];
164 
165 	__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
166 
167 	/* We add to the end of the list whilst the collector may be walking
168 	 * the list.  The collector only goes nextwards and uses the lock to
169 	 * remove entries off of the front.
170 	 */
171 	spin_lock(&rreq->lock);
172 	list_add_tail(&subreq->rreq_link, &stream->subrequests);
173 	if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
174 		stream->front = subreq;
175 		if (!stream->active) {
176 			stream->collected_to = stream->front->start;
177 			/* Store list pointers before active flag */
178 			smp_store_release(&stream->active, true);
179 		}
180 	}
181 
182 	if (last_subreq) {
183 		smp_wmb(); /* Write lists before ALL_QUEUED. */
184 		set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
185 	}
186 
187 	spin_unlock(&rreq->lock);
188 }
189 
190 static void netfs_issue_read(struct netfs_io_request *rreq,
191 			     struct netfs_io_subrequest *subreq)
192 {
193 	switch (subreq->source) {
194 	case NETFS_DOWNLOAD_FROM_SERVER:
195 		rreq->netfs_ops->issue_read(subreq);
196 		break;
197 	case NETFS_READ_FROM_CACHE:
198 		netfs_read_cache_to_pagecache(rreq, subreq);
199 		break;
200 	default:
201 		__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
202 		subreq->error = 0;
203 		iov_iter_zero(subreq->len, &subreq->io_iter);
204 		subreq->transferred = subreq->len;
205 		netfs_read_subreq_terminated(subreq);
206 		break;
207 	}
208 }
209 
210 /*
211  * Perform a read to the pagecache from a series of sources of different types,
212  * slicing up the region to be read according to available cache blocks and
213  * network rsize.
214  */
215 static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
216 				    struct readahead_control *ractl)
217 {
218 	struct netfs_inode *ictx = netfs_inode(rreq->inode);
219 	unsigned long long start = rreq->start;
220 	ssize_t size = rreq->len;
221 	int ret = 0;
222 
223 	do {
224 		struct netfs_io_subrequest *subreq;
225 		enum netfs_io_source source = NETFS_SOURCE_UNKNOWN;
226 		ssize_t slice;
227 
228 		subreq = netfs_alloc_subrequest(rreq);
229 		if (!subreq) {
230 			ret = -ENOMEM;
231 			break;
232 		}
233 
234 		subreq->start	= start;
235 		subreq->len	= size;
236 
237 		source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
238 		subreq->source = source;
239 		if (source == NETFS_DOWNLOAD_FROM_SERVER) {
240 			unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
241 			size_t len = subreq->len;
242 
243 			if (unlikely(rreq->origin == NETFS_READ_SINGLE))
244 				zp = rreq->i_size;
245 			if (subreq->start >= zp) {
246 				subreq->source = source = NETFS_FILL_WITH_ZEROES;
247 				goto fill_with_zeroes;
248 			}
249 
250 			if (len > zp - subreq->start)
251 				len = zp - subreq->start;
252 			if (len == 0) {
253 				pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
254 				       rreq->debug_id, subreq->debug_index,
255 				       subreq->len, size,
256 				       subreq->start, ictx->zero_point, rreq->i_size);
257 				break;
258 			}
259 			subreq->len = len;
260 
261 			netfs_stat(&netfs_n_rh_download);
262 			if (rreq->netfs_ops->prepare_read) {
263 				ret = rreq->netfs_ops->prepare_read(subreq);
264 				if (ret < 0) {
265 					subreq->error = ret;
266 					/* Not queued - release both refs. */
267 					netfs_put_subrequest(subreq,
268 							     netfs_sreq_trace_put_cancel);
269 					netfs_put_subrequest(subreq,
270 							     netfs_sreq_trace_put_cancel);
271 					break;
272 				}
273 				trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
274 			}
275 			goto issue;
276 		}
277 
278 	fill_with_zeroes:
279 		if (source == NETFS_FILL_WITH_ZEROES) {
280 			subreq->source = NETFS_FILL_WITH_ZEROES;
281 			trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
282 			netfs_stat(&netfs_n_rh_zero);
283 			goto issue;
284 		}
285 
286 		if (source == NETFS_READ_FROM_CACHE) {
287 			trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
288 			goto issue;
289 		}
290 
291 		pr_err("Unexpected read source %u\n", source);
292 		WARN_ON_ONCE(1);
293 		break;
294 
295 	issue:
296 		slice = netfs_prepare_read_iterator(subreq, ractl);
297 		if (slice < 0) {
298 			ret = slice;
299 			subreq->error = ret;
300 			trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
301 			/* Not queued - release both refs. */
302 			netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
303 			netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
304 			break;
305 		}
306 		size -= slice;
307 		start += slice;
308 
309 		netfs_queue_read(rreq, subreq, size <= 0);
310 		netfs_issue_read(rreq, subreq);
311 		cond_resched();
312 	} while (size > 0);
313 
314 	if (unlikely(size > 0)) {
315 		smp_wmb(); /* Write lists before ALL_QUEUED. */
316 		set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
317 		netfs_wake_collector(rreq);
318 	}
319 
320 	/* Defer error return as we may need to wait for outstanding I/O. */
321 	cmpxchg(&rreq->error, 0, ret);
322 }
323 
324 /**
325  * netfs_readahead - Helper to manage a read request
326  * @ractl: The description of the readahead request
327  *
328  * Fulfil a readahead request by drawing data from the cache if possible, or
329  * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
330  * requests from different sources will get munged together.  If necessary, the
331  * readahead window can be expanded in either direction to a more convenient
332  * alighment for RPC efficiency or to make storage in the cache feasible.
333  *
334  * The calling netfs must initialise a netfs context contiguous to the vfs
335  * inode before calling this.
336  *
337  * This is usable whether or not caching is enabled.
338  */
339 void netfs_readahead(struct readahead_control *ractl)
340 {
341 	struct netfs_io_request *rreq;
342 	struct netfs_inode *ictx = netfs_inode(ractl->mapping->host);
343 	unsigned long long start = readahead_pos(ractl);
344 	size_t size = readahead_length(ractl);
345 	int ret;
346 
347 	rreq = netfs_alloc_request(ractl->mapping, ractl->file, start, size,
348 				   NETFS_READAHEAD);
349 	if (IS_ERR(rreq))
350 		return;
351 
352 	__set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
353 
354 	ret = netfs_begin_cache_read(rreq, ictx);
355 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
356 		goto cleanup_free;
357 
358 	netfs_stat(&netfs_n_rh_readahead);
359 	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
360 			 netfs_read_trace_readahead);
361 
362 	netfs_rreq_expand(rreq, ractl);
363 
364 	rreq->submitted = rreq->start;
365 	if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
366 		goto cleanup_free;
367 	netfs_read_to_pagecache(rreq, ractl);
368 
369 	return netfs_put_request(rreq, netfs_rreq_trace_put_return);
370 
371 cleanup_free:
372 	return netfs_put_request(rreq, netfs_rreq_trace_put_failed);
373 }
374 EXPORT_SYMBOL(netfs_readahead);
375 
376 /*
377  * Create a rolling buffer with a single occupying folio.
378  */
379 static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio,
380 					unsigned int rollbuf_flags)
381 {
382 	ssize_t added;
383 
384 	if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
385 		return -ENOMEM;
386 
387 	added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags);
388 	if (added < 0)
389 		return added;
390 	rreq->submitted = rreq->start + added;
391 	return 0;
392 }
393 
394 /*
395  * Read into gaps in a folio partially filled by a streaming write.
396  */
397 static int netfs_read_gaps(struct file *file, struct folio *folio)
398 {
399 	struct netfs_io_request *rreq;
400 	struct address_space *mapping = folio->mapping;
401 	struct netfs_folio *finfo = netfs_folio_info(folio);
402 	struct netfs_inode *ctx = netfs_inode(mapping->host);
403 	struct folio *sink = NULL;
404 	struct bio_vec *bvec;
405 	unsigned int from = finfo->dirty_offset;
406 	unsigned int to = from + finfo->dirty_len;
407 	unsigned int off = 0, i = 0;
408 	size_t flen = folio_size(folio);
409 	size_t nr_bvec = flen / PAGE_SIZE + 2;
410 	size_t part;
411 	int ret;
412 
413 	_enter("%lx", folio->index);
414 
415 	rreq = netfs_alloc_request(mapping, file, folio_pos(folio), flen, NETFS_READ_GAPS);
416 	if (IS_ERR(rreq)) {
417 		ret = PTR_ERR(rreq);
418 		goto alloc_error;
419 	}
420 
421 	ret = netfs_begin_cache_read(rreq, ctx);
422 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
423 		goto discard;
424 
425 	netfs_stat(&netfs_n_rh_read_folio);
426 	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_read_gaps);
427 
428 	/* Fiddle the buffer so that a gap at the beginning and/or a gap at the
429 	 * end get copied to, but the middle is discarded.
430 	 */
431 	ret = -ENOMEM;
432 	bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL);
433 	if (!bvec)
434 		goto discard;
435 
436 	sink = folio_alloc(GFP_KERNEL, 0);
437 	if (!sink) {
438 		kfree(bvec);
439 		goto discard;
440 	}
441 
442 	trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
443 
444 	rreq->direct_bv = bvec;
445 	rreq->direct_bv_count = nr_bvec;
446 	if (from > 0) {
447 		bvec_set_folio(&bvec[i++], folio, from, 0);
448 		off = from;
449 	}
450 	while (off < to) {
451 		part = min_t(size_t, to - off, PAGE_SIZE);
452 		bvec_set_folio(&bvec[i++], sink, part, 0);
453 		off += part;
454 	}
455 	if (to < flen)
456 		bvec_set_folio(&bvec[i++], folio, flen - to, to);
457 	iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
458 	rreq->submitted = rreq->start + flen;
459 
460 	netfs_read_to_pagecache(rreq, NULL);
461 
462 	if (sink)
463 		folio_put(sink);
464 
465 	ret = netfs_wait_for_read(rreq);
466 	if (ret >= 0) {
467 		flush_dcache_folio(folio);
468 		folio_mark_uptodate(folio);
469 	}
470 	folio_unlock(folio);
471 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
472 	return ret < 0 ? ret : 0;
473 
474 discard:
475 	netfs_put_request(rreq, netfs_rreq_trace_put_discard);
476 alloc_error:
477 	folio_unlock(folio);
478 	return ret;
479 }
480 
481 /**
482  * netfs_read_folio - Helper to manage a read_folio request
483  * @file: The file to read from
484  * @folio: The folio to read
485  *
486  * Fulfil a read_folio request by drawing data from the cache if
487  * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
488  * Multiple I/O requests from different sources will get munged together.
489  *
490  * The calling netfs must initialise a netfs context contiguous to the vfs
491  * inode before calling this.
492  *
493  * This is usable whether or not caching is enabled.
494  */
495 int netfs_read_folio(struct file *file, struct folio *folio)
496 {
497 	struct address_space *mapping = folio->mapping;
498 	struct netfs_io_request *rreq;
499 	struct netfs_inode *ctx = netfs_inode(mapping->host);
500 	int ret;
501 
502 	if (folio_test_dirty(folio)) {
503 		trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
504 		return netfs_read_gaps(file, folio);
505 	}
506 
507 	_enter("%lx", folio->index);
508 
509 	rreq = netfs_alloc_request(mapping, file,
510 				   folio_pos(folio), folio_size(folio),
511 				   NETFS_READPAGE);
512 	if (IS_ERR(rreq)) {
513 		ret = PTR_ERR(rreq);
514 		goto alloc_error;
515 	}
516 
517 	ret = netfs_begin_cache_read(rreq, ctx);
518 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
519 		goto discard;
520 
521 	netfs_stat(&netfs_n_rh_read_folio);
522 	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
523 
524 	/* Set up the output buffer */
525 	ret = netfs_create_singular_buffer(rreq, folio, 0);
526 	if (ret < 0)
527 		goto discard;
528 
529 	netfs_read_to_pagecache(rreq, NULL);
530 	ret = netfs_wait_for_read(rreq);
531 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
532 	return ret < 0 ? ret : 0;
533 
534 discard:
535 	netfs_put_request(rreq, netfs_rreq_trace_put_discard);
536 alloc_error:
537 	folio_unlock(folio);
538 	return ret;
539 }
540 EXPORT_SYMBOL(netfs_read_folio);
541 
542 /*
543  * Prepare a folio for writing without reading first
544  * @folio: The folio being prepared
545  * @pos: starting position for the write
546  * @len: length of write
547  * @always_fill: T if the folio should always be completely filled/cleared
548  *
549  * In some cases, write_begin doesn't need to read at all:
550  * - full folio write
551  * - write that lies in a folio that is completely beyond EOF
552  * - write that covers the folio from start to EOF or beyond it
553  *
554  * If any of these criteria are met, then zero out the unwritten parts
555  * of the folio and return true. Otherwise, return false.
556  */
557 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
558 				 bool always_fill)
559 {
560 	struct inode *inode = folio_inode(folio);
561 	loff_t i_size = i_size_read(inode);
562 	size_t offset = offset_in_folio(folio, pos);
563 	size_t plen = folio_size(folio);
564 
565 	if (unlikely(always_fill)) {
566 		if (pos - offset + len <= i_size)
567 			return false; /* Page entirely before EOF */
568 		folio_zero_segment(folio, 0, plen);
569 		folio_mark_uptodate(folio);
570 		return true;
571 	}
572 
573 	/* Full folio write */
574 	if (offset == 0 && len >= plen)
575 		return true;
576 
577 	/* Page entirely beyond the end of the file */
578 	if (pos - offset >= i_size)
579 		goto zero_out;
580 
581 	/* Write that covers from the start of the folio to EOF or beyond */
582 	if (offset == 0 && (pos + len) >= i_size)
583 		goto zero_out;
584 
585 	return false;
586 zero_out:
587 	folio_zero_segments(folio, 0, offset, offset + len, plen);
588 	return true;
589 }
590 
591 /**
592  * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
593  * @ctx: The netfs context
594  * @file: The file to read from
595  * @mapping: The mapping to read from
596  * @pos: File position at which the write will begin
597  * @len: The length of the write (may extend beyond the end of the folio chosen)
598  * @_folio: Where to put the resultant folio
599  * @_fsdata: Place for the netfs to store a cookie
600  *
601  * Pre-read data for a write-begin request by drawing data from the cache if
602  * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
603  * Multiple I/O requests from different sources will get munged together.
604  *
605  * The calling netfs must provide a table of operations, only one of which,
606  * issue_read, is mandatory.
607  *
608  * The check_write_begin() operation can be provided to check for and flush
609  * conflicting writes once the folio is grabbed and locked.  It is passed a
610  * pointer to the fsdata cookie that gets returned to the VM to be passed to
611  * write_end.  It is permitted to sleep.  It should return 0 if the request
612  * should go ahead or it may return an error.  It may also unlock and put the
613  * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
614  * will cause the folio to be re-got and the process to be retried.
615  *
616  * The calling netfs must initialise a netfs context contiguous to the vfs
617  * inode before calling this.
618  *
619  * This is usable whether or not caching is enabled.
620  *
621  * Note that this should be considered deprecated and netfs_perform_write()
622  * used instead.
623  */
624 int netfs_write_begin(struct netfs_inode *ctx,
625 		      struct file *file, struct address_space *mapping,
626 		      loff_t pos, unsigned int len, struct folio **_folio,
627 		      void **_fsdata)
628 {
629 	struct netfs_io_request *rreq;
630 	struct folio *folio;
631 	pgoff_t index = pos >> PAGE_SHIFT;
632 	int ret;
633 
634 retry:
635 	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
636 				    mapping_gfp_mask(mapping));
637 	if (IS_ERR(folio))
638 		return PTR_ERR(folio);
639 
640 	if (ctx->ops->check_write_begin) {
641 		/* Allow the netfs (eg. ceph) to flush conflicts. */
642 		ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
643 		if (ret < 0) {
644 			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
645 			goto error;
646 		}
647 		if (!folio)
648 			goto retry;
649 	}
650 
651 	if (folio_test_uptodate(folio))
652 		goto have_folio;
653 
654 	/* If the folio is beyond the EOF, we want to clear it - unless it's
655 	 * within the cache granule containing the EOF, in which case we need
656 	 * to preload the granule.
657 	 */
658 	if (!netfs_is_cache_enabled(ctx) &&
659 	    netfs_skip_folio_read(folio, pos, len, false)) {
660 		netfs_stat(&netfs_n_rh_write_zskip);
661 		goto have_folio_no_wait;
662 	}
663 
664 	rreq = netfs_alloc_request(mapping, file,
665 				   folio_pos(folio), folio_size(folio),
666 				   NETFS_READ_FOR_WRITE);
667 	if (IS_ERR(rreq)) {
668 		ret = PTR_ERR(rreq);
669 		goto error;
670 	}
671 	rreq->no_unlock_folio	= folio->index;
672 	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
673 
674 	ret = netfs_begin_cache_read(rreq, ctx);
675 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
676 		goto error_put;
677 
678 	netfs_stat(&netfs_n_rh_write_begin);
679 	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
680 
681 	/* Set up the output buffer */
682 	ret = netfs_create_singular_buffer(rreq, folio, 0);
683 	if (ret < 0)
684 		goto error_put;
685 
686 	netfs_read_to_pagecache(rreq, NULL);
687 	ret = netfs_wait_for_read(rreq);
688 	if (ret < 0)
689 		goto error;
690 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
691 
692 have_folio:
693 	ret = folio_wait_private_2_killable(folio);
694 	if (ret < 0)
695 		goto error;
696 have_folio_no_wait:
697 	*_folio = folio;
698 	_leave(" = 0");
699 	return 0;
700 
701 error_put:
702 	netfs_put_request(rreq, netfs_rreq_trace_put_failed);
703 error:
704 	if (folio) {
705 		folio_unlock(folio);
706 		folio_put(folio);
707 	}
708 	_leave(" = %d", ret);
709 	return ret;
710 }
711 EXPORT_SYMBOL(netfs_write_begin);
712 
713 /*
714  * Preload the data into a folio we're proposing to write into.
715  */
716 int netfs_prefetch_for_write(struct file *file, struct folio *folio,
717 			     size_t offset, size_t len)
718 {
719 	struct netfs_io_request *rreq;
720 	struct address_space *mapping = folio->mapping;
721 	struct netfs_inode *ctx = netfs_inode(mapping->host);
722 	unsigned long long start = folio_pos(folio);
723 	size_t flen = folio_size(folio);
724 	int ret;
725 
726 	_enter("%zx @%llx", flen, start);
727 
728 	ret = -ENOMEM;
729 
730 	rreq = netfs_alloc_request(mapping, file, start, flen,
731 				   NETFS_READ_FOR_WRITE);
732 	if (IS_ERR(rreq)) {
733 		ret = PTR_ERR(rreq);
734 		goto error;
735 	}
736 
737 	rreq->no_unlock_folio = folio->index;
738 	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
739 	ret = netfs_begin_cache_read(rreq, ctx);
740 	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
741 		goto error_put;
742 
743 	netfs_stat(&netfs_n_rh_write_begin);
744 	trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
745 
746 	/* Set up the output buffer */
747 	ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK);
748 	if (ret < 0)
749 		goto error_put;
750 
751 	netfs_read_to_pagecache(rreq, NULL);
752 	ret = netfs_wait_for_read(rreq);
753 	netfs_put_request(rreq, netfs_rreq_trace_put_return);
754 	return ret < 0 ? ret : 0;
755 
756 error_put:
757 	netfs_put_request(rreq, netfs_rreq_trace_put_discard);
758 error:
759 	_leave(" = %d", ret);
760 	return ret;
761 }
762 
763 /**
764  * netfs_buffered_read_iter - Filesystem buffered I/O read routine
765  * @iocb: kernel I/O control block
766  * @iter: destination for the data read
767  *
768  * This is the ->read_iter() routine for all filesystems that can use the page
769  * cache directly.
770  *
771  * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
772  * returned when no data can be read without waiting for I/O requests to
773  * complete; it doesn't prevent readahead.
774  *
775  * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
776  * shall be made for the read or for readahead.  When no data can be read,
777  * -EAGAIN shall be returned.  When readahead would be triggered, a partial,
778  * possibly empty read shall be returned.
779  *
780  * Return:
781  * * number of bytes copied, even for partial reads
782  * * negative error code (or 0 if IOCB_NOIO) if nothing was read
783  */
784 ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
785 {
786 	struct inode *inode = file_inode(iocb->ki_filp);
787 	struct netfs_inode *ictx = netfs_inode(inode);
788 	ssize_t ret;
789 
790 	if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
791 			 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
792 		return -EINVAL;
793 
794 	ret = netfs_start_io_read(inode);
795 	if (ret == 0) {
796 		ret = filemap_read(iocb, iter, 0);
797 		netfs_end_io_read(inode);
798 	}
799 	return ret;
800 }
801 EXPORT_SYMBOL(netfs_buffered_read_iter);
802 
803 /**
804  * netfs_file_read_iter - Generic filesystem read routine
805  * @iocb: kernel I/O control block
806  * @iter: destination for the data read
807  *
808  * This is the ->read_iter() routine for all filesystems that can use the page
809  * cache directly.
810  *
811  * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
812  * returned when no data can be read without waiting for I/O requests to
813  * complete; it doesn't prevent readahead.
814  *
815  * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
816  * shall be made for the read or for readahead.  When no data can be read,
817  * -EAGAIN shall be returned.  When readahead would be triggered, a partial,
818  * possibly empty read shall be returned.
819  *
820  * Return:
821  * * number of bytes copied, even for partial reads
822  * * negative error code (or 0 if IOCB_NOIO) if nothing was read
823  */
824 ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
825 {
826 	struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
827 
828 	if ((iocb->ki_flags & IOCB_DIRECT) ||
829 	    test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
830 		return netfs_unbuffered_read_iter(iocb, iter);
831 
832 	return netfs_buffered_read_iter(iocb, iter);
833 }
834 EXPORT_SYMBOL(netfs_file_read_iter);
835