1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Network filesystem read subrequest result collection, assessment and
3  * retrying.
4  *
5  * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
6  * Written by David Howells (dhowells@redhat.com)
7  */
8 
9 #include <linux/export.h>
10 #include <linux/fs.h>
11 #include <linux/mm.h>
12 #include <linux/pagemap.h>
13 #include <linux/slab.h>
14 #include <linux/task_io_accounting_ops.h>
15 #include "internal.h"
16 
17 /* Notes made in the collector */
18 #define HIT_PENDING	0x01	/* A front op was still pending */
19 #define MADE_PROGRESS	0x04	/* Made progress cleaning up a stream or the folio set */
20 #define BUFFERED	0x08	/* The pagecache needs cleaning up */
21 #define NEED_RETRY	0x10	/* A front op requests retrying */
22 #define COPY_TO_CACHE	0x40	/* Need to copy subrequest to cache */
23 #define ABANDON_SREQ	0x80	/* Need to abandon untransferred part of subrequest */
24 
25 /*
26  * Clear the unread part of an I/O request.
27  */
28 static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
29 {
30 	netfs_reset_iter(subreq);
31 	WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter));
32 	iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter);
33 	if (subreq->start + subreq->transferred >= subreq->rreq->i_size)
34 		__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
35 }
36 
37 /*
38  * Flush, mark and unlock a folio that's now completely read.  If we want to
39  * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it
40  * dirty and let writeback handle it.
41  */
42 static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
43 				    struct folio_queue *folioq,
44 				    int slot)
45 {
46 	struct netfs_folio *finfo;
47 	struct folio *folio = folioq_folio(folioq, slot);
48 
49 	if (unlikely(folio_pos(folio) < rreq->abandon_to)) {
50 		trace_netfs_folio(folio, netfs_folio_trace_abandon);
51 		goto just_unlock;
52 	}
53 
54 	flush_dcache_folio(folio);
55 	folio_mark_uptodate(folio);
56 
57 	if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
58 		finfo = netfs_folio_info(folio);
59 		if (finfo) {
60 			trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
61 			if (finfo->netfs_group)
62 				folio_change_private(folio, finfo->netfs_group);
63 			else
64 				folio_detach_private(folio);
65 			kfree(finfo);
66 		}
67 
68 		if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) {
69 			if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
70 				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
71 				folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
72 				folio_mark_dirty(folio);
73 			}
74 		} else {
75 			trace_netfs_folio(folio, netfs_folio_trace_read_done);
76 		}
77 
78 		folioq_clear(folioq, slot);
79 	} else {
80 		// TODO: Use of PG_private_2 is deprecated.
81 		if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags))
82 			netfs_pgpriv2_copy_to_cache(rreq, folio);
83 	}
84 
85 just_unlock:
86 	if (folio->index == rreq->no_unlock_folio &&
87 	    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
88 		_debug("no unlock");
89 	} else {
90 		trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
91 		folio_unlock(folio);
92 	}
93 
94 	folioq_clear(folioq, slot);
95 }
96 
97 /*
98  * Unlock any folios we've finished with.
99  */
100 static void netfs_read_unlock_folios(struct netfs_io_request *rreq,
101 				     unsigned int *notes)
102 {
103 	struct folio_queue *folioq = rreq->buffer.tail;
104 	unsigned long long collected_to = rreq->collected_to;
105 	unsigned int slot = rreq->buffer.first_tail_slot;
106 
107 	if (rreq->cleaned_to >= rreq->collected_to)
108 		return;
109 
110 	// TODO: Begin decryption
111 
112 	if (slot >= folioq_nr_slots(folioq)) {
113 		folioq = rolling_buffer_delete_spent(&rreq->buffer);
114 		if (!folioq) {
115 			rreq->front_folio_order = 0;
116 			return;
117 		}
118 		slot = 0;
119 	}
120 
121 	for (;;) {
122 		struct folio *folio;
123 		unsigned long long fpos, fend;
124 		unsigned int order;
125 		size_t fsize;
126 
127 		if (*notes & COPY_TO_CACHE)
128 			set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
129 
130 		folio = folioq_folio(folioq, slot);
131 		if (WARN_ONCE(!folio_test_locked(folio),
132 			      "R=%08x: folio %lx is not locked\n",
133 			      rreq->debug_id, folio->index))
134 			trace_netfs_folio(folio, netfs_folio_trace_not_locked);
135 
136 		order = folioq_folio_order(folioq, slot);
137 		rreq->front_folio_order = order;
138 		fsize = PAGE_SIZE << order;
139 		fpos = folio_pos(folio);
140 		fend = umin(fpos + fsize, rreq->i_size);
141 
142 		trace_netfs_collect_folio(rreq, folio, fend, collected_to);
143 
144 		/* Unlock any folio we've transferred all of. */
145 		if (collected_to < fend)
146 			break;
147 
148 		netfs_unlock_read_folio(rreq, folioq, slot);
149 		WRITE_ONCE(rreq->cleaned_to, fpos + fsize);
150 		*notes |= MADE_PROGRESS;
151 
152 		clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
153 
154 		/* Clean up the head folioq.  If we clear an entire folioq, then
155 		 * we can get rid of it provided it's not also the tail folioq
156 		 * being filled by the issuer.
157 		 */
158 		folioq_clear(folioq, slot);
159 		slot++;
160 		if (slot >= folioq_nr_slots(folioq)) {
161 			folioq = rolling_buffer_delete_spent(&rreq->buffer);
162 			if (!folioq)
163 				goto done;
164 			slot = 0;
165 			trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress);
166 		}
167 
168 		if (fpos + fsize >= collected_to)
169 			break;
170 	}
171 
172 	rreq->buffer.tail = folioq;
173 done:
174 	rreq->buffer.first_tail_slot = slot;
175 }
176 
177 /*
178  * Collect and assess the results of various read subrequests.  We may need to
179  * retry some of the results.
180  *
181  * Note that we have a sequence of subrequests, which may be drawing on
182  * different sources and may or may not be the same size or starting position
183  * and may not even correspond in boundary alignment.
184  */
185 static void netfs_collect_read_results(struct netfs_io_request *rreq)
186 {
187 	struct netfs_io_subrequest *front, *remove;
188 	struct netfs_io_stream *stream = &rreq->io_streams[0];
189 	unsigned int notes;
190 
191 	_enter("%llx-%llx", rreq->start, rreq->start + rreq->len);
192 	trace_netfs_rreq(rreq, netfs_rreq_trace_collect);
193 	trace_netfs_collect(rreq);
194 
195 reassess:
196 	if (rreq->origin == NETFS_READAHEAD ||
197 	    rreq->origin == NETFS_READPAGE ||
198 	    rreq->origin == NETFS_READ_FOR_WRITE)
199 		notes = BUFFERED;
200 	else
201 		notes = 0;
202 
203 	/* Remove completed subrequests from the front of the stream and
204 	 * advance the completion point.  We stop when we hit something that's
205 	 * in progress.  The issuer thread may be adding stuff to the tail
206 	 * whilst we're doing this.
207 	 */
208 	front = READ_ONCE(stream->front);
209 	while (front) {
210 		size_t transferred;
211 
212 		trace_netfs_collect_sreq(rreq, front);
213 		_debug("sreq [%x] %llx %zx/%zx",
214 		       front->debug_index, front->start, front->transferred, front->len);
215 
216 		if (stream->collected_to < front->start) {
217 			trace_netfs_collect_gap(rreq, stream, front->start, 'F');
218 			stream->collected_to = front->start;
219 		}
220 
221 		if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags))
222 			notes |= HIT_PENDING;
223 		smp_rmb(); /* Read counters after IN_PROGRESS flag. */
224 		transferred = READ_ONCE(front->transferred);
225 
226 		/* If we can now collect the next folio, do so.  We don't want
227 		 * to defer this as we have to decide whether we need to copy
228 		 * to the cache or not, and that may differ between adjacent
229 		 * subreqs.
230 		 */
231 		if (notes & BUFFERED) {
232 			size_t fsize = PAGE_SIZE << rreq->front_folio_order;
233 
234 			/* Clear the tail of a short read. */
235 			if (!(notes & HIT_PENDING) &&
236 			    front->error == 0 &&
237 			    transferred < front->len &&
238 			    (test_bit(NETFS_SREQ_HIT_EOF, &front->flags) ||
239 			     test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) {
240 				netfs_clear_unread(front);
241 				transferred = front->transferred = front->len;
242 				trace_netfs_sreq(front, netfs_sreq_trace_clear);
243 			}
244 
245 			stream->collected_to = front->start + transferred;
246 			rreq->collected_to = stream->collected_to;
247 
248 			if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags))
249 				notes |= COPY_TO_CACHE;
250 
251 			if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
252 				rreq->abandon_to = front->start + front->len;
253 				front->transferred = front->len;
254 				transferred = front->len;
255 				trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon);
256 			}
257 			if (front->start + transferred >= rreq->cleaned_to + fsize ||
258 			    test_bit(NETFS_SREQ_HIT_EOF, &front->flags))
259 				netfs_read_unlock_folios(rreq, &notes);
260 		} else {
261 			stream->collected_to = front->start + transferred;
262 			rreq->collected_to = stream->collected_to;
263 		}
264 
265 		/* Stall if the front is still undergoing I/O. */
266 		if (notes & HIT_PENDING)
267 			break;
268 
269 		if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
270 			if (!stream->failed) {
271 				stream->error = front->error;
272 				rreq->error = front->error;
273 				set_bit(NETFS_RREQ_FAILED, &rreq->flags);
274 				stream->failed = true;
275 			}
276 			notes |= MADE_PROGRESS | ABANDON_SREQ;
277 		} else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) {
278 			stream->need_retry = true;
279 			notes |= NEED_RETRY | MADE_PROGRESS;
280 			break;
281 		} else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) {
282 			notes |= MADE_PROGRESS;
283 		} else {
284 			if (!stream->failed)
285 				stream->transferred += transferred;
286 			if (front->transferred < front->len)
287 				set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags);
288 			notes |= MADE_PROGRESS;
289 		}
290 
291 		/* Remove if completely consumed. */
292 		stream->source = front->source;
293 		spin_lock(&rreq->lock);
294 
295 		remove = front;
296 		trace_netfs_sreq(front, netfs_sreq_trace_discard);
297 		list_del_init(&front->rreq_link);
298 		front = list_first_entry_or_null(&stream->subrequests,
299 						 struct netfs_io_subrequest, rreq_link);
300 		stream->front = front;
301 		spin_unlock(&rreq->lock);
302 		netfs_put_subrequest(remove,
303 				     notes & ABANDON_SREQ ?
304 				     netfs_sreq_trace_put_abandon :
305 				     netfs_sreq_trace_put_done);
306 	}
307 
308 	trace_netfs_collect_stream(rreq, stream);
309 	trace_netfs_collect_state(rreq, rreq->collected_to, notes);
310 
311 	if (!(notes & BUFFERED))
312 		rreq->cleaned_to = rreq->collected_to;
313 
314 	if (notes & NEED_RETRY)
315 		goto need_retry;
316 	if (notes & MADE_PROGRESS) {
317 		netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
318 		//cond_resched();
319 		goto reassess;
320 	}
321 
322 out:
323 	_leave(" = %x", notes);
324 	return;
325 
326 need_retry:
327 	/* Okay...  We're going to have to retry parts of the stream.  Note
328 	 * that any partially completed op will have had any wholly transferred
329 	 * folios removed from it.
330 	 */
331 	_debug("retry");
332 	netfs_retry_reads(rreq);
333 	goto out;
334 }
335 
336 /*
337  * Do page flushing and suchlike after DIO.
338  */
339 static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
340 {
341 	unsigned int i;
342 
343 	if (rreq->origin == NETFS_UNBUFFERED_READ ||
344 	    rreq->origin == NETFS_DIO_READ) {
345 		for (i = 0; i < rreq->direct_bv_count; i++) {
346 			flush_dcache_page(rreq->direct_bv[i].bv_page);
347 			// TODO: cifs marks pages in the destination buffer
348 			// dirty under some circumstances after a read.  Do we
349 			// need to do that too?
350 			set_page_dirty(rreq->direct_bv[i].bv_page);
351 		}
352 	}
353 
354 	if (rreq->iocb) {
355 		rreq->iocb->ki_pos += rreq->transferred;
356 		if (rreq->iocb->ki_complete)
357 			rreq->iocb->ki_complete(
358 				rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
359 	}
360 	if (rreq->netfs_ops->done)
361 		rreq->netfs_ops->done(rreq);
362 	if (rreq->origin == NETFS_UNBUFFERED_READ ||
363 	    rreq->origin == NETFS_DIO_READ)
364 		inode_dio_end(rreq->inode);
365 }
366 
367 /*
368  * Do processing after reading a monolithic single object.
369  */
370 static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
371 {
372 	struct netfs_io_stream *stream = &rreq->io_streams[0];
373 
374 	if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER &&
375 	    fscache_resources_valid(&rreq->cache_resources)) {
376 		trace_netfs_rreq(rreq, netfs_rreq_trace_dirty);
377 		netfs_single_mark_inode_dirty(rreq->inode);
378 	}
379 
380 	if (rreq->iocb) {
381 		rreq->iocb->ki_pos += rreq->transferred;
382 		if (rreq->iocb->ki_complete)
383 			rreq->iocb->ki_complete(
384 				rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
385 	}
386 	if (rreq->netfs_ops->done)
387 		rreq->netfs_ops->done(rreq);
388 }
389 
390 /*
391  * Perform the collection of subrequests and folios.
392  *
393  * Note that we're in normal kernel thread context at this point, possibly
394  * running on a workqueue.
395  */
396 bool netfs_read_collection(struct netfs_io_request *rreq)
397 {
398 	struct netfs_io_stream *stream = &rreq->io_streams[0];
399 
400 	netfs_collect_read_results(rreq);
401 
402 	/* We're done when the app thread has finished posting subreqs and the
403 	 * queue is empty.
404 	 */
405 	if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
406 		return false;
407 	smp_rmb(); /* Read ALL_QUEUED before subreq lists. */
408 
409 	if (!list_empty(&stream->subrequests))
410 		return false;
411 
412 	/* Okay, declare that all I/O is complete. */
413 	rreq->transferred = stream->transferred;
414 	trace_netfs_rreq(rreq, netfs_rreq_trace_complete);
415 
416 	//netfs_rreq_is_still_valid(rreq);
417 
418 	switch (rreq->origin) {
419 	case NETFS_UNBUFFERED_READ:
420 	case NETFS_DIO_READ:
421 	case NETFS_READ_GAPS:
422 		netfs_rreq_assess_dio(rreq);
423 		break;
424 	case NETFS_READ_SINGLE:
425 		netfs_rreq_assess_single(rreq);
426 		break;
427 	default:
428 		break;
429 	}
430 	task_io_account_read(rreq->transferred);
431 
432 	netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
433 	/* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
434 
435 	trace_netfs_rreq(rreq, netfs_rreq_trace_done);
436 	netfs_clear_subrequests(rreq);
437 	netfs_unlock_abandoned_read_pages(rreq);
438 	if (unlikely(rreq->copy_to_cache))
439 		netfs_pgpriv2_end_copy_to_cache(rreq);
440 	return true;
441 }
442 
443 void netfs_read_collection_worker(struct work_struct *work)
444 {
445 	struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
446 
447 	netfs_see_request(rreq, netfs_rreq_trace_see_work);
448 	if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
449 		if (netfs_read_collection(rreq))
450 			/* Drop the ref from the IN_PROGRESS flag. */
451 			netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
452 		else
453 			netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
454 	}
455 }
456 
457 /**
458  * netfs_read_subreq_progress - Note progress of a read operation.
459  * @subreq: The read request that has terminated.
460  *
461  * This tells the read side of netfs lib that a contributory I/O operation has
462  * made some progress and that it may be possible to unlock some folios.
463  *
464  * Before calling, the filesystem should update subreq->transferred to track
465  * the amount of data copied into the output buffer.
466  */
467 void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq)
468 {
469 	struct netfs_io_request *rreq = subreq->rreq;
470 	struct netfs_io_stream *stream = &rreq->io_streams[0];
471 	size_t fsize = PAGE_SIZE << rreq->front_folio_order;
472 
473 	trace_netfs_sreq(subreq, netfs_sreq_trace_progress);
474 
475 	/* If we are at the head of the queue, wake up the collector,
476 	 * getting a ref to it if we were the ones to do so.
477 	 */
478 	if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize &&
479 	    (rreq->origin == NETFS_READAHEAD ||
480 	     rreq->origin == NETFS_READPAGE ||
481 	     rreq->origin == NETFS_READ_FOR_WRITE) &&
482 	    list_is_first(&subreq->rreq_link, &stream->subrequests)
483 	    ) {
484 		__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
485 		netfs_wake_collector(rreq);
486 	}
487 }
488 EXPORT_SYMBOL(netfs_read_subreq_progress);
489 
490 /**
491  * netfs_read_subreq_terminated - Note the termination of an I/O operation.
492  * @subreq: The I/O request that has terminated.
493  *
494  * This tells the read helper that a contributory I/O operation has terminated,
495  * one way or another, and that it should integrate the results.
496  *
497  * The caller indicates the outcome of the operation through @subreq->error,
498  * supplying 0 to indicate a successful or retryable transfer (if
499  * NETFS_SREQ_NEED_RETRY is set) or a negative error code.  The helper will
500  * look after reissuing I/O operations as appropriate and writing downloaded
501  * data to the cache.
502  *
503  * Before calling, the filesystem should update subreq->transferred to track
504  * the amount of data copied into the output buffer.
505  */
506 void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
507 {
508 	struct netfs_io_request *rreq = subreq->rreq;
509 
510 	switch (subreq->source) {
511 	case NETFS_READ_FROM_CACHE:
512 		netfs_stat(&netfs_n_rh_read_done);
513 		break;
514 	case NETFS_DOWNLOAD_FROM_SERVER:
515 		netfs_stat(&netfs_n_rh_download_done);
516 		break;
517 	default:
518 		break;
519 	}
520 
521 	/* Deal with retry requests, short reads and errors.  If we retry
522 	 * but don't make progress, we abandon the attempt.
523 	 */
524 	if (!subreq->error && subreq->transferred < subreq->len) {
525 		if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) {
526 			trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof);
527 		} else if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) {
528 			trace_netfs_sreq(subreq, netfs_sreq_trace_need_clear);
529 		} else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
530 			trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry);
531 		} else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
532 			__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
533 			trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read);
534 		} else {
535 			__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
536 			subreq->error = -ENODATA;
537 			trace_netfs_sreq(subreq, netfs_sreq_trace_short);
538 		}
539 	}
540 
541 	if (unlikely(subreq->error < 0)) {
542 		trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read);
543 		if (subreq->source == NETFS_READ_FROM_CACHE) {
544 			netfs_stat(&netfs_n_rh_read_failed);
545 			__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
546 		} else {
547 			netfs_stat(&netfs_n_rh_download_failed);
548 			__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
549 		}
550 		trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause);
551 		set_bit(NETFS_RREQ_PAUSE, &rreq->flags);
552 	}
553 
554 	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
555 	netfs_subreq_clear_in_progress(subreq);
556 	netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
557 }
558 EXPORT_SYMBOL(netfs_read_subreq_terminated);
559 
560 /*
561  * Handle termination of a read from the cache.
562  */
563 void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error)
564 {
565 	struct netfs_io_subrequest *subreq = priv;
566 
567 	if (transferred_or_error > 0) {
568 		subreq->error = 0;
569 		if (transferred_or_error > 0) {
570 			subreq->transferred += transferred_or_error;
571 			__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
572 		}
573 	} else {
574 		subreq->error = transferred_or_error;
575 	}
576 	netfs_read_subreq_terminated(subreq);
577 }
578