1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Network filesystem read subrequest result collection, assessment and 3 * retrying. 4 * 5 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 6 * Written by David Howells (dhowells@redhat.com) 7 */ 8 9 #include <linux/export.h> 10 #include <linux/fs.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/slab.h> 14 #include <linux/task_io_accounting_ops.h> 15 #include "internal.h" 16 17 /* Notes made in the collector */ 18 #define HIT_PENDING 0x01 /* A front op was still pending */ 19 #define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */ 20 #define BUFFERED 0x08 /* The pagecache needs cleaning up */ 21 #define NEED_RETRY 0x10 /* A front op requests retrying */ 22 #define COPY_TO_CACHE 0x40 /* Need to copy subrequest to cache */ 23 #define ABANDON_SREQ 0x80 /* Need to abandon untransferred part of subrequest */ 24 25 /* 26 * Clear the unread part of an I/O request. 27 */ 28 static void netfs_clear_unread(struct netfs_io_subrequest *subreq) 29 { 30 netfs_reset_iter(subreq); 31 WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter)); 32 iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter); 33 if (subreq->start + subreq->transferred >= subreq->rreq->i_size) 34 __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); 35 } 36 37 /* 38 * Flush, mark and unlock a folio that's now completely read. If we want to 39 * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it 40 * dirty and let writeback handle it. 41 */ 42 static void netfs_unlock_read_folio(struct netfs_io_request *rreq, 43 struct folio_queue *folioq, 44 int slot) 45 { 46 struct netfs_folio *finfo; 47 struct folio *folio = folioq_folio(folioq, slot); 48 49 if (unlikely(folio_pos(folio) < rreq->abandon_to)) { 50 trace_netfs_folio(folio, netfs_folio_trace_abandon); 51 goto just_unlock; 52 } 53 54 flush_dcache_folio(folio); 55 folio_mark_uptodate(folio); 56 57 if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { 58 finfo = netfs_folio_info(folio); 59 if (finfo) { 60 trace_netfs_folio(folio, netfs_folio_trace_filled_gaps); 61 if (finfo->netfs_group) 62 folio_change_private(folio, finfo->netfs_group); 63 else 64 folio_detach_private(folio); 65 kfree(finfo); 66 } 67 68 if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) { 69 if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) { 70 trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); 71 folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE); 72 folio_mark_dirty(folio); 73 } 74 } else { 75 trace_netfs_folio(folio, netfs_folio_trace_read_done); 76 } 77 78 folioq_clear(folioq, slot); 79 } else { 80 // TODO: Use of PG_private_2 is deprecated. 81 if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) 82 netfs_pgpriv2_copy_to_cache(rreq, folio); 83 } 84 85 just_unlock: 86 if (folio->index == rreq->no_unlock_folio && 87 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { 88 _debug("no unlock"); 89 } else { 90 trace_netfs_folio(folio, netfs_folio_trace_read_unlock); 91 folio_unlock(folio); 92 } 93 94 folioq_clear(folioq, slot); 95 } 96 97 /* 98 * Unlock any folios we've finished with. 99 */ 100 static void netfs_read_unlock_folios(struct netfs_io_request *rreq, 101 unsigned int *notes) 102 { 103 struct folio_queue *folioq = rreq->buffer.tail; 104 unsigned long long collected_to = rreq->collected_to; 105 unsigned int slot = rreq->buffer.first_tail_slot; 106 107 if (rreq->cleaned_to >= rreq->collected_to) 108 return; 109 110 // TODO: Begin decryption 111 112 if (slot >= folioq_nr_slots(folioq)) { 113 folioq = rolling_buffer_delete_spent(&rreq->buffer); 114 if (!folioq) { 115 rreq->front_folio_order = 0; 116 return; 117 } 118 slot = 0; 119 } 120 121 for (;;) { 122 struct folio *folio; 123 unsigned long long fpos, fend; 124 unsigned int order; 125 size_t fsize; 126 127 if (*notes & COPY_TO_CACHE) 128 set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); 129 130 folio = folioq_folio(folioq, slot); 131 if (WARN_ONCE(!folio_test_locked(folio), 132 "R=%08x: folio %lx is not locked\n", 133 rreq->debug_id, folio->index)) 134 trace_netfs_folio(folio, netfs_folio_trace_not_locked); 135 136 order = folioq_folio_order(folioq, slot); 137 rreq->front_folio_order = order; 138 fsize = PAGE_SIZE << order; 139 fpos = folio_pos(folio); 140 fend = umin(fpos + fsize, rreq->i_size); 141 142 trace_netfs_collect_folio(rreq, folio, fend, collected_to); 143 144 /* Unlock any folio we've transferred all of. */ 145 if (collected_to < fend) 146 break; 147 148 netfs_unlock_read_folio(rreq, folioq, slot); 149 WRITE_ONCE(rreq->cleaned_to, fpos + fsize); 150 *notes |= MADE_PROGRESS; 151 152 clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); 153 154 /* Clean up the head folioq. If we clear an entire folioq, then 155 * we can get rid of it provided it's not also the tail folioq 156 * being filled by the issuer. 157 */ 158 folioq_clear(folioq, slot); 159 slot++; 160 if (slot >= folioq_nr_slots(folioq)) { 161 folioq = rolling_buffer_delete_spent(&rreq->buffer); 162 if (!folioq) 163 goto done; 164 slot = 0; 165 trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress); 166 } 167 168 if (fpos + fsize >= collected_to) 169 break; 170 } 171 172 rreq->buffer.tail = folioq; 173 done: 174 rreq->buffer.first_tail_slot = slot; 175 } 176 177 /* 178 * Collect and assess the results of various read subrequests. We may need to 179 * retry some of the results. 180 * 181 * Note that we have a sequence of subrequests, which may be drawing on 182 * different sources and may or may not be the same size or starting position 183 * and may not even correspond in boundary alignment. 184 */ 185 static void netfs_collect_read_results(struct netfs_io_request *rreq) 186 { 187 struct netfs_io_subrequest *front, *remove; 188 struct netfs_io_stream *stream = &rreq->io_streams[0]; 189 unsigned int notes; 190 191 _enter("%llx-%llx", rreq->start, rreq->start + rreq->len); 192 trace_netfs_rreq(rreq, netfs_rreq_trace_collect); 193 trace_netfs_collect(rreq); 194 195 reassess: 196 if (rreq->origin == NETFS_READAHEAD || 197 rreq->origin == NETFS_READPAGE || 198 rreq->origin == NETFS_READ_FOR_WRITE) 199 notes = BUFFERED; 200 else 201 notes = 0; 202 203 /* Remove completed subrequests from the front of the stream and 204 * advance the completion point. We stop when we hit something that's 205 * in progress. The issuer thread may be adding stuff to the tail 206 * whilst we're doing this. 207 */ 208 front = READ_ONCE(stream->front); 209 while (front) { 210 size_t transferred; 211 212 trace_netfs_collect_sreq(rreq, front); 213 _debug("sreq [%x] %llx %zx/%zx", 214 front->debug_index, front->start, front->transferred, front->len); 215 216 if (stream->collected_to < front->start) { 217 trace_netfs_collect_gap(rreq, stream, front->start, 'F'); 218 stream->collected_to = front->start; 219 } 220 221 if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) 222 notes |= HIT_PENDING; 223 smp_rmb(); /* Read counters after IN_PROGRESS flag. */ 224 transferred = READ_ONCE(front->transferred); 225 226 /* If we can now collect the next folio, do so. We don't want 227 * to defer this as we have to decide whether we need to copy 228 * to the cache or not, and that may differ between adjacent 229 * subreqs. 230 */ 231 if (notes & BUFFERED) { 232 size_t fsize = PAGE_SIZE << rreq->front_folio_order; 233 234 /* Clear the tail of a short read. */ 235 if (!(notes & HIT_PENDING) && 236 front->error == 0 && 237 transferred < front->len && 238 (test_bit(NETFS_SREQ_HIT_EOF, &front->flags) || 239 test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) { 240 netfs_clear_unread(front); 241 transferred = front->transferred = front->len; 242 trace_netfs_sreq(front, netfs_sreq_trace_clear); 243 } 244 245 stream->collected_to = front->start + transferred; 246 rreq->collected_to = stream->collected_to; 247 248 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags)) 249 notes |= COPY_TO_CACHE; 250 251 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 252 rreq->abandon_to = front->start + front->len; 253 front->transferred = front->len; 254 transferred = front->len; 255 trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon); 256 } 257 if (front->start + transferred >= rreq->cleaned_to + fsize || 258 test_bit(NETFS_SREQ_HIT_EOF, &front->flags)) 259 netfs_read_unlock_folios(rreq, ¬es); 260 } else { 261 stream->collected_to = front->start + transferred; 262 rreq->collected_to = stream->collected_to; 263 } 264 265 /* Stall if the front is still undergoing I/O. */ 266 if (notes & HIT_PENDING) 267 break; 268 269 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 270 if (!stream->failed) { 271 stream->error = front->error; 272 rreq->error = front->error; 273 set_bit(NETFS_RREQ_FAILED, &rreq->flags); 274 stream->failed = true; 275 } 276 notes |= MADE_PROGRESS | ABANDON_SREQ; 277 } else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) { 278 stream->need_retry = true; 279 notes |= NEED_RETRY | MADE_PROGRESS; 280 break; 281 } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { 282 notes |= MADE_PROGRESS; 283 } else { 284 if (!stream->failed) 285 stream->transferred += transferred; 286 if (front->transferred < front->len) 287 set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); 288 notes |= MADE_PROGRESS; 289 } 290 291 /* Remove if completely consumed. */ 292 stream->source = front->source; 293 spin_lock(&rreq->lock); 294 295 remove = front; 296 trace_netfs_sreq(front, netfs_sreq_trace_discard); 297 list_del_init(&front->rreq_link); 298 front = list_first_entry_or_null(&stream->subrequests, 299 struct netfs_io_subrequest, rreq_link); 300 stream->front = front; 301 spin_unlock(&rreq->lock); 302 netfs_put_subrequest(remove, 303 notes & ABANDON_SREQ ? 304 netfs_sreq_trace_put_abandon : 305 netfs_sreq_trace_put_done); 306 } 307 308 trace_netfs_collect_stream(rreq, stream); 309 trace_netfs_collect_state(rreq, rreq->collected_to, notes); 310 311 if (!(notes & BUFFERED)) 312 rreq->cleaned_to = rreq->collected_to; 313 314 if (notes & NEED_RETRY) 315 goto need_retry; 316 if (notes & MADE_PROGRESS) { 317 netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); 318 //cond_resched(); 319 goto reassess; 320 } 321 322 out: 323 _leave(" = %x", notes); 324 return; 325 326 need_retry: 327 /* Okay... We're going to have to retry parts of the stream. Note 328 * that any partially completed op will have had any wholly transferred 329 * folios removed from it. 330 */ 331 _debug("retry"); 332 netfs_retry_reads(rreq); 333 goto out; 334 } 335 336 /* 337 * Do page flushing and suchlike after DIO. 338 */ 339 static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) 340 { 341 unsigned int i; 342 343 if (rreq->origin == NETFS_UNBUFFERED_READ || 344 rreq->origin == NETFS_DIO_READ) { 345 for (i = 0; i < rreq->direct_bv_count; i++) { 346 flush_dcache_page(rreq->direct_bv[i].bv_page); 347 // TODO: cifs marks pages in the destination buffer 348 // dirty under some circumstances after a read. Do we 349 // need to do that too? 350 set_page_dirty(rreq->direct_bv[i].bv_page); 351 } 352 } 353 354 if (rreq->iocb) { 355 rreq->iocb->ki_pos += rreq->transferred; 356 if (rreq->iocb->ki_complete) 357 rreq->iocb->ki_complete( 358 rreq->iocb, rreq->error ? rreq->error : rreq->transferred); 359 } 360 if (rreq->netfs_ops->done) 361 rreq->netfs_ops->done(rreq); 362 if (rreq->origin == NETFS_UNBUFFERED_READ || 363 rreq->origin == NETFS_DIO_READ) 364 inode_dio_end(rreq->inode); 365 } 366 367 /* 368 * Do processing after reading a monolithic single object. 369 */ 370 static void netfs_rreq_assess_single(struct netfs_io_request *rreq) 371 { 372 struct netfs_io_stream *stream = &rreq->io_streams[0]; 373 374 if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER && 375 fscache_resources_valid(&rreq->cache_resources)) { 376 trace_netfs_rreq(rreq, netfs_rreq_trace_dirty); 377 netfs_single_mark_inode_dirty(rreq->inode); 378 } 379 380 if (rreq->iocb) { 381 rreq->iocb->ki_pos += rreq->transferred; 382 if (rreq->iocb->ki_complete) 383 rreq->iocb->ki_complete( 384 rreq->iocb, rreq->error ? rreq->error : rreq->transferred); 385 } 386 if (rreq->netfs_ops->done) 387 rreq->netfs_ops->done(rreq); 388 } 389 390 /* 391 * Perform the collection of subrequests and folios. 392 * 393 * Note that we're in normal kernel thread context at this point, possibly 394 * running on a workqueue. 395 */ 396 bool netfs_read_collection(struct netfs_io_request *rreq) 397 { 398 struct netfs_io_stream *stream = &rreq->io_streams[0]; 399 400 netfs_collect_read_results(rreq); 401 402 /* We're done when the app thread has finished posting subreqs and the 403 * queue is empty. 404 */ 405 if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) 406 return false; 407 smp_rmb(); /* Read ALL_QUEUED before subreq lists. */ 408 409 if (!list_empty(&stream->subrequests)) 410 return false; 411 412 /* Okay, declare that all I/O is complete. */ 413 rreq->transferred = stream->transferred; 414 trace_netfs_rreq(rreq, netfs_rreq_trace_complete); 415 416 //netfs_rreq_is_still_valid(rreq); 417 418 switch (rreq->origin) { 419 case NETFS_UNBUFFERED_READ: 420 case NETFS_DIO_READ: 421 case NETFS_READ_GAPS: 422 netfs_rreq_assess_dio(rreq); 423 break; 424 case NETFS_READ_SINGLE: 425 netfs_rreq_assess_single(rreq); 426 break; 427 default: 428 break; 429 } 430 task_io_account_read(rreq->transferred); 431 432 netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); 433 /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ 434 435 trace_netfs_rreq(rreq, netfs_rreq_trace_done); 436 netfs_clear_subrequests(rreq); 437 netfs_unlock_abandoned_read_pages(rreq); 438 if (unlikely(rreq->copy_to_cache)) 439 netfs_pgpriv2_end_copy_to_cache(rreq); 440 return true; 441 } 442 443 void netfs_read_collection_worker(struct work_struct *work) 444 { 445 struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); 446 447 netfs_see_request(rreq, netfs_rreq_trace_see_work); 448 if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { 449 if (netfs_read_collection(rreq)) 450 /* Drop the ref from the IN_PROGRESS flag. */ 451 netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); 452 else 453 netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); 454 } 455 } 456 457 /** 458 * netfs_read_subreq_progress - Note progress of a read operation. 459 * @subreq: The read request that has terminated. 460 * 461 * This tells the read side of netfs lib that a contributory I/O operation has 462 * made some progress and that it may be possible to unlock some folios. 463 * 464 * Before calling, the filesystem should update subreq->transferred to track 465 * the amount of data copied into the output buffer. 466 */ 467 void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq) 468 { 469 struct netfs_io_request *rreq = subreq->rreq; 470 struct netfs_io_stream *stream = &rreq->io_streams[0]; 471 size_t fsize = PAGE_SIZE << rreq->front_folio_order; 472 473 trace_netfs_sreq(subreq, netfs_sreq_trace_progress); 474 475 /* If we are at the head of the queue, wake up the collector, 476 * getting a ref to it if we were the ones to do so. 477 */ 478 if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize && 479 (rreq->origin == NETFS_READAHEAD || 480 rreq->origin == NETFS_READPAGE || 481 rreq->origin == NETFS_READ_FOR_WRITE) && 482 list_is_first(&subreq->rreq_link, &stream->subrequests) 483 ) { 484 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); 485 netfs_wake_collector(rreq); 486 } 487 } 488 EXPORT_SYMBOL(netfs_read_subreq_progress); 489 490 /** 491 * netfs_read_subreq_terminated - Note the termination of an I/O operation. 492 * @subreq: The I/O request that has terminated. 493 * 494 * This tells the read helper that a contributory I/O operation has terminated, 495 * one way or another, and that it should integrate the results. 496 * 497 * The caller indicates the outcome of the operation through @subreq->error, 498 * supplying 0 to indicate a successful or retryable transfer (if 499 * NETFS_SREQ_NEED_RETRY is set) or a negative error code. The helper will 500 * look after reissuing I/O operations as appropriate and writing downloaded 501 * data to the cache. 502 * 503 * Before calling, the filesystem should update subreq->transferred to track 504 * the amount of data copied into the output buffer. 505 */ 506 void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) 507 { 508 struct netfs_io_request *rreq = subreq->rreq; 509 510 switch (subreq->source) { 511 case NETFS_READ_FROM_CACHE: 512 netfs_stat(&netfs_n_rh_read_done); 513 break; 514 case NETFS_DOWNLOAD_FROM_SERVER: 515 netfs_stat(&netfs_n_rh_download_done); 516 break; 517 default: 518 break; 519 } 520 521 /* Deal with retry requests, short reads and errors. If we retry 522 * but don't make progress, we abandon the attempt. 523 */ 524 if (!subreq->error && subreq->transferred < subreq->len) { 525 if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) { 526 trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof); 527 } else if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) { 528 trace_netfs_sreq(subreq, netfs_sreq_trace_need_clear); 529 } else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 530 trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry); 531 } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) { 532 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 533 trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read); 534 } else { 535 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 536 subreq->error = -ENODATA; 537 trace_netfs_sreq(subreq, netfs_sreq_trace_short); 538 } 539 } 540 541 if (unlikely(subreq->error < 0)) { 542 trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read); 543 if (subreq->source == NETFS_READ_FROM_CACHE) { 544 netfs_stat(&netfs_n_rh_read_failed); 545 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 546 } else { 547 netfs_stat(&netfs_n_rh_download_failed); 548 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 549 } 550 trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause); 551 set_bit(NETFS_RREQ_PAUSE, &rreq->flags); 552 } 553 554 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 555 netfs_subreq_clear_in_progress(subreq); 556 netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); 557 } 558 EXPORT_SYMBOL(netfs_read_subreq_terminated); 559 560 /* 561 * Handle termination of a read from the cache. 562 */ 563 void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error) 564 { 565 struct netfs_io_subrequest *subreq = priv; 566 567 if (transferred_or_error > 0) { 568 subreq->error = 0; 569 if (transferred_or_error > 0) { 570 subreq->transferred += transferred_or_error; 571 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); 572 } 573 } else { 574 subreq->error = transferred_or_error; 575 } 576 netfs_read_subreq_terminated(subreq); 577 } 578