1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * linux/fs/jbd2/recovery.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 8 * 9 * Journal recovery routines for the generic filesystem journaling code; 10 * part of the ext2fs journaling system. 11 */ 12 13 #ifndef __KERNEL__ 14 #include "jfs_user.h" 15 #else 16 #include <linux/time.h> 17 #include <linux/fs.h> 18 #include <linux/jbd2.h> 19 #include <linux/errno.h> 20 #include <linux/crc32.h> 21 #include <linux/blkdev.h> 22 #include <linux/string_choices.h> 23 #endif 24 25 /* 26 * Maintain information about the progress of the recovery job, so that 27 * the different passes can carry information between them. 28 */ 29 struct recovery_info 30 { 31 tid_t start_transaction; 32 tid_t end_transaction; 33 unsigned long head_block; 34 35 int nr_replays; 36 int nr_revokes; 37 int nr_revoke_hits; 38 }; 39 40 static int do_one_pass(journal_t *journal, 41 struct recovery_info *info, enum passtype pass); 42 static int scan_revoke_records(journal_t *, enum passtype, struct buffer_head *, 43 tid_t, struct recovery_info *); 44 45 #ifdef __KERNEL__ 46 47 /* Release readahead buffers after use */ 48 static void journal_brelse_array(struct buffer_head *b[], int n) 49 { 50 while (--n >= 0) 51 brelse (b[n]); 52 } 53 54 55 /* 56 * When reading from the journal, we are going through the block device 57 * layer directly and so there is no readahead being done for us. We 58 * need to implement any readahead ourselves if we want it to happen at 59 * all. Recovery is basically one long sequential read, so make sure we 60 * do the IO in reasonably large chunks. 61 * 62 * This is not so critical that we need to be enormously clever about 63 * the readahead size, though. 128K is a purely arbitrary, good-enough 64 * fixed value. 65 */ 66 67 #define MAXBUF 8 68 static void do_readahead(journal_t *journal, unsigned int start) 69 { 70 unsigned int max, nbufs, next; 71 unsigned long long blocknr; 72 struct buffer_head *bh; 73 74 struct buffer_head * bufs[MAXBUF]; 75 76 /* Do up to 128K of readahead */ 77 max = start + (128 * 1024 / journal->j_blocksize); 78 if (max > journal->j_total_len) 79 max = journal->j_total_len; 80 81 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 82 * a time to the block device IO layer. */ 83 84 nbufs = 0; 85 86 for (next = start; next < max; next++) { 87 int err = jbd2_journal_bmap(journal, next, &blocknr); 88 89 if (err) { 90 printk(KERN_ERR "JBD2: bad block at offset %u\n", 91 next); 92 goto failed; 93 } 94 95 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 96 if (!bh) 97 goto failed; 98 99 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 100 bufs[nbufs++] = bh; 101 if (nbufs == MAXBUF) { 102 bh_readahead_batch(nbufs, bufs, 0); 103 journal_brelse_array(bufs, nbufs); 104 nbufs = 0; 105 } 106 } else 107 brelse(bh); 108 } 109 110 if (nbufs) 111 bh_readahead_batch(nbufs, bufs, 0); 112 113 failed: 114 if (nbufs) 115 journal_brelse_array(bufs, nbufs); 116 } 117 118 #endif /* __KERNEL__ */ 119 120 121 /* 122 * Read a block from the journal 123 */ 124 125 static int jread(struct buffer_head **bhp, journal_t *journal, 126 unsigned int offset) 127 { 128 int err; 129 unsigned long long blocknr; 130 struct buffer_head *bh; 131 132 *bhp = NULL; 133 134 if (offset >= journal->j_total_len) { 135 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 136 return -EFSCORRUPTED; 137 } 138 139 err = jbd2_journal_bmap(journal, offset, &blocknr); 140 141 if (err) { 142 printk(KERN_ERR "JBD2: bad block at offset %u\n", 143 offset); 144 return err; 145 } 146 147 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 148 if (!bh) 149 return -ENOMEM; 150 151 if (!buffer_uptodate(bh)) { 152 /* 153 * If this is a brand new buffer, start readahead. 154 * Otherwise, we assume we are already reading it. 155 */ 156 bool need_readahead = !buffer_req(bh); 157 158 bh_read_nowait(bh, 0); 159 if (need_readahead) 160 do_readahead(journal, offset); 161 wait_on_buffer(bh); 162 } 163 164 if (!buffer_uptodate(bh)) { 165 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 166 offset); 167 brelse(bh); 168 return -EIO; 169 } 170 171 *bhp = bh; 172 return 0; 173 } 174 175 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) 176 { 177 struct jbd2_journal_block_tail *tail; 178 __be32 provided; 179 __u32 calculated; 180 181 if (!jbd2_journal_has_csum_v2or3(j)) 182 return 1; 183 184 tail = (struct jbd2_journal_block_tail *)((char *)buf + 185 j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); 186 provided = tail->t_checksum; 187 tail->t_checksum = 0; 188 calculated = jbd2_chksum(j->j_csum_seed, buf, j->j_blocksize); 189 tail->t_checksum = provided; 190 191 return provided == cpu_to_be32(calculated); 192 } 193 194 /* 195 * Count the number of in-use tags in a journal descriptor block. 196 */ 197 198 static int count_tags(journal_t *journal, struct buffer_head *bh) 199 { 200 char * tagp; 201 journal_block_tag_t tag; 202 int nr = 0, size = journal->j_blocksize; 203 int tag_bytes = journal_tag_bytes(journal); 204 205 if (jbd2_journal_has_csum_v2or3(journal)) 206 size -= sizeof(struct jbd2_journal_block_tail); 207 208 tagp = &bh->b_data[sizeof(journal_header_t)]; 209 210 while ((tagp - bh->b_data + tag_bytes) <= size) { 211 memcpy(&tag, tagp, sizeof(tag)); 212 213 nr++; 214 tagp += tag_bytes; 215 if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 216 tagp += 16; 217 218 if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 219 break; 220 } 221 222 return nr; 223 } 224 225 226 /* Make sure we wrap around the log correctly! */ 227 #define wrap(journal, var) \ 228 do { \ 229 if (var >= (journal)->j_last) \ 230 var -= ((journal)->j_last - (journal)->j_first); \ 231 } while (0) 232 233 static int fc_do_one_pass(journal_t *journal, 234 struct recovery_info *info, enum passtype pass) 235 { 236 unsigned int expected_commit_id = info->end_transaction; 237 unsigned long next_fc_block; 238 struct buffer_head *bh; 239 int err = 0; 240 241 next_fc_block = journal->j_fc_first; 242 if (!journal->j_fc_replay_callback) 243 return 0; 244 245 while (next_fc_block <= journal->j_fc_last) { 246 jbd2_debug(3, "Fast commit replay: next block %ld\n", 247 next_fc_block); 248 err = jread(&bh, journal, next_fc_block); 249 if (err) { 250 jbd2_debug(3, "Fast commit replay: read error\n"); 251 break; 252 } 253 254 err = journal->j_fc_replay_callback(journal, bh, pass, 255 next_fc_block - journal->j_fc_first, 256 expected_commit_id); 257 brelse(bh); 258 next_fc_block++; 259 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 260 break; 261 err = 0; 262 } 263 264 if (err) 265 jbd2_debug(3, "Fast commit replay failed, err = %d\n", err); 266 267 return err; 268 } 269 270 /** 271 * jbd2_journal_recover - recovers a on-disk journal 272 * @journal: the journal to recover 273 * 274 * The primary function for recovering the log contents when mounting a 275 * journaled device. 276 * 277 * Recovery is done in three passes. In the first pass, we look for the 278 * end of the log. In the second, we assemble the list of revoke 279 * blocks. In the third and final pass, we replay any un-revoked blocks 280 * in the log. 281 */ 282 int jbd2_journal_recover(journal_t *journal) 283 { 284 int err, err2; 285 struct recovery_info info; 286 287 memset(&info, 0, sizeof(info)); 288 289 /* 290 * The journal superblock's s_start field (the current log head) 291 * is always zero if, and only if, the journal was cleanly 292 * unmounted. We use its in-memory version j_tail here because 293 * jbd2_journal_wipe() could have updated it without updating journal 294 * superblock. 295 */ 296 if (!journal->j_tail) { 297 journal_superblock_t *sb = journal->j_superblock; 298 299 jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n", 300 be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head)); 301 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 302 journal->j_head = be32_to_cpu(sb->s_head); 303 return 0; 304 } 305 306 err = do_one_pass(journal, &info, PASS_SCAN); 307 if (!err) 308 err = do_one_pass(journal, &info, PASS_REVOKE); 309 if (!err) 310 err = do_one_pass(journal, &info, PASS_REPLAY); 311 312 jbd2_debug(1, "JBD2: recovery, exit status %d, " 313 "recovered transactions %u to %u\n", 314 err, info.start_transaction, info.end_transaction); 315 jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 316 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 317 318 /* Restart the log at the next transaction ID, thus invalidating 319 * any existing commit records in the log. */ 320 journal->j_transaction_sequence = ++info.end_transaction; 321 journal->j_head = info.head_block; 322 jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n", 323 journal->j_transaction_sequence, journal->j_head); 324 325 jbd2_journal_clear_revoke(journal); 326 /* Free revoke table allocated for replay */ 327 if (journal->j_revoke != journal->j_revoke_table[0] && 328 journal->j_revoke != journal->j_revoke_table[1]) { 329 jbd2_journal_destroy_revoke_table(journal->j_revoke); 330 journal->j_revoke = journal->j_revoke_table[1]; 331 } 332 err2 = sync_blockdev(journal->j_fs_dev); 333 if (!err) 334 err = err2; 335 err2 = jbd2_check_fs_dev_write_error(journal); 336 if (!err) 337 err = err2; 338 /* Make sure all replayed data is on permanent storage */ 339 if (journal->j_flags & JBD2_BARRIER) { 340 err2 = blkdev_issue_flush(journal->j_fs_dev); 341 if (!err) 342 err = err2; 343 } 344 return err; 345 } 346 347 /** 348 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 349 * @journal: journal to startup 350 * 351 * Locate any valid recovery information from the journal and set up the 352 * journal structures in memory to ignore it (presumably because the 353 * caller has evidence that it is out of date). 354 * This function doesn't appear to be exported.. 355 * 356 * We perform one pass over the journal to allow us to tell the user how 357 * much recovery information is being erased, and to let us initialise 358 * the journal transaction sequence numbers to the next unused ID. 359 */ 360 int jbd2_journal_skip_recovery(journal_t *journal) 361 { 362 int err; 363 364 struct recovery_info info; 365 366 memset (&info, 0, sizeof(info)); 367 368 err = do_one_pass(journal, &info, PASS_SCAN); 369 370 if (err) { 371 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 372 ++journal->j_transaction_sequence; 373 journal->j_head = journal->j_first; 374 } else { 375 #ifdef CONFIG_JBD2_DEBUG 376 int dropped = info.end_transaction - 377 be32_to_cpu(journal->j_superblock->s_sequence); 378 jbd2_debug(1, 379 "JBD2: ignoring %d transaction%s from the journal.\n", 380 dropped, str_plural(dropped)); 381 #endif 382 journal->j_transaction_sequence = ++info.end_transaction; 383 journal->j_head = info.head_block; 384 } 385 386 journal->j_tail = 0; 387 return err; 388 } 389 390 static inline unsigned long long read_tag_block(journal_t *journal, 391 journal_block_tag_t *tag) 392 { 393 unsigned long long block = be32_to_cpu(tag->t_blocknr); 394 if (jbd2_has_feature_64bit(journal)) 395 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 396 return block; 397 } 398 399 /* 400 * calc_chksums calculates the checksums for the blocks described in the 401 * descriptor block. 402 */ 403 static int calc_chksums(journal_t *journal, struct buffer_head *bh, 404 unsigned long *next_log_block, __u32 *crc32_sum) 405 { 406 int i, num_blks, err; 407 unsigned long io_block; 408 struct buffer_head *obh; 409 410 num_blks = count_tags(journal, bh); 411 /* Calculate checksum of the descriptor block. */ 412 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 413 414 for (i = 0; i < num_blks; i++) { 415 io_block = (*next_log_block)++; 416 wrap(journal, *next_log_block); 417 err = jread(&obh, journal, io_block); 418 if (err) { 419 printk(KERN_ERR "JBD2: IO error %d recovering block " 420 "%lu in log\n", err, io_block); 421 return 1; 422 } else { 423 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 424 obh->b_size); 425 } 426 put_bh(obh); 427 } 428 return 0; 429 } 430 431 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 432 { 433 struct commit_header *h; 434 __be32 provided; 435 __u32 calculated; 436 437 if (!jbd2_journal_has_csum_v2or3(j)) 438 return 1; 439 440 h = buf; 441 provided = h->h_chksum[0]; 442 h->h_chksum[0] = 0; 443 calculated = jbd2_chksum(j->j_csum_seed, buf, j->j_blocksize); 444 h->h_chksum[0] = provided; 445 446 return provided == cpu_to_be32(calculated); 447 } 448 449 static bool jbd2_commit_block_csum_verify_partial(journal_t *j, void *buf) 450 { 451 struct commit_header *h; 452 __be32 provided; 453 __u32 calculated; 454 void *tmpbuf; 455 456 tmpbuf = kzalloc(j->j_blocksize, GFP_KERNEL); 457 if (!tmpbuf) 458 return false; 459 460 memcpy(tmpbuf, buf, sizeof(struct commit_header)); 461 h = tmpbuf; 462 provided = h->h_chksum[0]; 463 h->h_chksum[0] = 0; 464 calculated = jbd2_chksum(j->j_csum_seed, tmpbuf, j->j_blocksize); 465 kfree(tmpbuf); 466 467 return provided == cpu_to_be32(calculated); 468 } 469 470 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 471 journal_block_tag3_t *tag3, 472 void *buf, __u32 sequence) 473 { 474 __u32 csum32; 475 __be32 seq; 476 477 if (!jbd2_journal_has_csum_v2or3(j)) 478 return 1; 479 480 seq = cpu_to_be32(sequence); 481 csum32 = jbd2_chksum(j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 482 csum32 = jbd2_chksum(csum32, buf, j->j_blocksize); 483 484 if (jbd2_has_feature_csum3(j)) 485 return tag3->t_checksum == cpu_to_be32(csum32); 486 else 487 return tag->t_checksum == cpu_to_be16(csum32); 488 } 489 490 static __always_inline int jbd2_do_replay(journal_t *journal, 491 struct recovery_info *info, 492 struct buffer_head *bh, 493 unsigned long *next_log_block, 494 unsigned int next_commit_ID) 495 { 496 char *tagp; 497 int flags; 498 int ret = 0; 499 int tag_bytes = journal_tag_bytes(journal); 500 int descr_csum_size = 0; 501 unsigned long io_block; 502 journal_block_tag_t tag; 503 struct buffer_head *obh; 504 struct buffer_head *nbh; 505 506 if (jbd2_journal_has_csum_v2or3(journal)) 507 descr_csum_size = sizeof(struct jbd2_journal_block_tail); 508 509 tagp = &bh->b_data[sizeof(journal_header_t)]; 510 while (tagp - bh->b_data + tag_bytes <= 511 journal->j_blocksize - descr_csum_size) { 512 int err; 513 514 memcpy(&tag, tagp, sizeof(tag)); 515 flags = be16_to_cpu(tag.t_flags); 516 517 io_block = (*next_log_block)++; 518 wrap(journal, *next_log_block); 519 err = jread(&obh, journal, io_block); 520 if (err) { 521 /* Recover what we can, but report failure at the end. */ 522 ret = err; 523 pr_err("JBD2: IO error %d recovering block %lu in log\n", 524 err, io_block); 525 } else { 526 unsigned long long blocknr; 527 528 J_ASSERT(obh != NULL); 529 blocknr = read_tag_block(journal, &tag); 530 531 /* If the block has been revoked, then we're all done here. */ 532 if (jbd2_journal_test_revoke(journal, blocknr, 533 next_commit_ID)) { 534 brelse(obh); 535 ++info->nr_revoke_hits; 536 goto skip_write; 537 } 538 539 /* Look for block corruption */ 540 if (!jbd2_block_tag_csum_verify(journal, &tag, 541 (journal_block_tag3_t *)tagp, 542 obh->b_data, next_commit_ID)) { 543 brelse(obh); 544 ret = -EFSBADCRC; 545 pr_err("JBD2: Invalid checksum recovering data block %llu in journal block %lu\n", 546 blocknr, io_block); 547 goto skip_write; 548 } 549 550 /* Find a buffer for the new data being restored */ 551 nbh = __getblk(journal->j_fs_dev, blocknr, 552 journal->j_blocksize); 553 if (nbh == NULL) { 554 pr_err("JBD2: Out of memory during recovery.\n"); 555 brelse(obh); 556 return -ENOMEM; 557 } 558 559 lock_buffer(nbh); 560 memcpy(nbh->b_data, obh->b_data, journal->j_blocksize); 561 if (flags & JBD2_FLAG_ESCAPE) { 562 *((__be32 *)nbh->b_data) = 563 cpu_to_be32(JBD2_MAGIC_NUMBER); 564 } 565 566 BUFFER_TRACE(nbh, "marking dirty"); 567 set_buffer_uptodate(nbh); 568 mark_buffer_dirty(nbh); 569 BUFFER_TRACE(nbh, "marking uptodate"); 570 ++info->nr_replays; 571 unlock_buffer(nbh); 572 brelse(obh); 573 brelse(nbh); 574 } 575 576 skip_write: 577 tagp += tag_bytes; 578 if (!(flags & JBD2_FLAG_SAME_UUID)) 579 tagp += 16; 580 581 if (flags & JBD2_FLAG_LAST_TAG) 582 break; 583 } 584 585 return ret; 586 } 587 588 static int do_one_pass(journal_t *journal, 589 struct recovery_info *info, enum passtype pass) 590 { 591 unsigned int first_commit_ID, next_commit_ID; 592 unsigned long next_log_block, head_block; 593 int err, success = 0; 594 journal_superblock_t * sb; 595 journal_header_t * tmp; 596 struct buffer_head *bh = NULL; 597 unsigned int sequence; 598 int blocktype; 599 __u32 crc32_sum = ~0; /* Transactional Checksums */ 600 bool need_check_commit_time = false; 601 __u64 last_trans_commit_time = 0, commit_time; 602 603 /* 604 * First thing is to establish what we expect to find in the log 605 * (in terms of transaction IDs), and where (in terms of log 606 * block offsets): query the superblock. 607 */ 608 609 sb = journal->j_superblock; 610 next_commit_ID = be32_to_cpu(sb->s_sequence); 611 next_log_block = be32_to_cpu(sb->s_start); 612 head_block = next_log_block; 613 614 first_commit_ID = next_commit_ID; 615 if (pass == PASS_SCAN) 616 info->start_transaction = first_commit_ID; 617 else if (pass == PASS_REVOKE) { 618 /* 619 * Would the default revoke table have too long hash chains 620 * during replay? 621 */ 622 if (info->nr_revokes > JOURNAL_REVOKE_DEFAULT_HASH * 16) { 623 unsigned int hash_size; 624 625 /* 626 * Aim for average chain length of 8, limit at 1M 627 * entries to avoid problems with malicious 628 * filesystems. 629 */ 630 hash_size = min(roundup_pow_of_two(info->nr_revokes / 8), 631 1U << 20); 632 journal->j_revoke = 633 jbd2_journal_init_revoke_table(hash_size); 634 if (!journal->j_revoke) { 635 printk(KERN_ERR 636 "JBD2: failed to allocate revoke table for replay with %u entries. " 637 "Journal replay may be slow.\n", hash_size); 638 journal->j_revoke = journal->j_revoke_table[1]; 639 } 640 } 641 } 642 643 jbd2_debug(1, "Starting recovery pass %d\n", pass); 644 645 /* 646 * Now we walk through the log, transaction by transaction, 647 * making sure that each transaction has a commit block in the 648 * expected place. Each complete transaction gets replayed back 649 * into the main filesystem. 650 */ 651 652 while (1) { 653 cond_resched(); 654 655 /* If we already know where to stop the log traversal, 656 * check right now that we haven't gone past the end of 657 * the log. */ 658 659 if (pass != PASS_SCAN) 660 if (tid_geq(next_commit_ID, info->end_transaction)) 661 break; 662 663 jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 664 next_commit_ID, next_log_block, journal->j_last); 665 666 /* Skip over each chunk of the transaction looking 667 * either the next descriptor block or the final commit 668 * record. */ 669 670 jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block); 671 brelse(bh); 672 bh = NULL; 673 err = jread(&bh, journal, next_log_block); 674 if (err) 675 goto failed; 676 677 next_log_block++; 678 wrap(journal, next_log_block); 679 680 /* What kind of buffer is it? 681 * 682 * If it is a descriptor block, check that it has the 683 * expected sequence number. Otherwise, we're all done 684 * here. */ 685 686 tmp = (journal_header_t *)bh->b_data; 687 688 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) 689 break; 690 691 blocktype = be32_to_cpu(tmp->h_blocktype); 692 sequence = be32_to_cpu(tmp->h_sequence); 693 jbd2_debug(3, "Found magic %d, sequence %d\n", 694 blocktype, sequence); 695 696 if (sequence != next_commit_ID) 697 break; 698 699 /* OK, we have a valid descriptor block which matches 700 * all of the sequence number checks. What are we going 701 * to do with it? That depends on the pass... */ 702 703 switch(blocktype) { 704 case JBD2_DESCRIPTOR_BLOCK: 705 /* Verify checksum first */ 706 if (!jbd2_descriptor_block_csum_verify(journal, 707 bh->b_data)) { 708 /* 709 * PASS_SCAN can see stale blocks due to lazy 710 * journal init. Don't error out on those yet. 711 */ 712 if (pass != PASS_SCAN) { 713 pr_err("JBD2: Invalid checksum recovering block %lu in log\n", 714 next_log_block); 715 err = -EFSBADCRC; 716 goto failed; 717 } 718 need_check_commit_time = true; 719 jbd2_debug(1, 720 "invalid descriptor block found in %lu\n", 721 next_log_block); 722 } 723 724 /* If it is a valid descriptor block, replay it 725 * in pass REPLAY; if journal_checksums enabled, then 726 * calculate checksums in PASS_SCAN, otherwise, 727 * just skip over the blocks it describes. */ 728 if (pass != PASS_REPLAY) { 729 if (pass == PASS_SCAN && 730 jbd2_has_feature_checksum(journal) && 731 !info->end_transaction) { 732 if (calc_chksums(journal, bh, 733 &next_log_block, 734 &crc32_sum)) 735 break; 736 continue; 737 } 738 next_log_block += count_tags(journal, bh); 739 wrap(journal, next_log_block); 740 continue; 741 } 742 743 /* 744 * A descriptor block: we can now write all of the 745 * data blocks. Yay, useful work is finally getting 746 * done here! 747 */ 748 err = jbd2_do_replay(journal, info, bh, &next_log_block, 749 next_commit_ID); 750 if (err) { 751 if (err == -ENOMEM) 752 goto failed; 753 success = err; 754 } 755 756 continue; 757 758 case JBD2_COMMIT_BLOCK: 759 if (pass != PASS_SCAN) { 760 next_commit_ID++; 761 continue; 762 } 763 764 /* How to differentiate between interrupted commit 765 * and journal corruption ? 766 * 767 * {nth transaction} 768 * Checksum Verification Failed 769 * | 770 * ____________________ 771 * | | 772 * async_commit sync_commit 773 * | | 774 * | GO TO NEXT "Journal Corruption" 775 * | TRANSACTION 776 * | 777 * {(n+1)th transanction} 778 * | 779 * _______|______________ 780 * | | 781 * Commit block found Commit block not found 782 * | | 783 * "Journal Corruption" | 784 * _____________|_________ 785 * | | 786 * nth trans corrupt OR nth trans 787 * and (n+1)th interrupted interrupted 788 * before commit block 789 * could reach the disk. 790 * (Cannot find the difference in above 791 * mentioned conditions. Hence assume 792 * "Interrupted Commit".) 793 */ 794 commit_time = be64_to_cpu( 795 ((struct commit_header *)bh->b_data)->h_commit_sec); 796 /* 797 * If need_check_commit_time is set, it means we are in 798 * PASS_SCAN and csum verify failed before. If 799 * commit_time is increasing, it's the same journal, 800 * otherwise it is stale journal block, just end this 801 * recovery. 802 */ 803 if (need_check_commit_time) { 804 if (commit_time >= last_trans_commit_time) { 805 pr_err("JBD2: Invalid checksum found in transaction %u\n", 806 next_commit_ID); 807 err = -EFSBADCRC; 808 goto failed; 809 } 810 ignore_crc_mismatch: 811 /* 812 * It likely does not belong to same journal, 813 * just end this recovery with success. 814 */ 815 jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", 816 next_commit_ID); 817 goto done; 818 } 819 820 /* 821 * Found an expected commit block: if checksums 822 * are present, verify them in PASS_SCAN; else not 823 * much to do other than move on to the next sequence 824 * number. 825 */ 826 if (jbd2_has_feature_checksum(journal)) { 827 struct commit_header *cbh = 828 (struct commit_header *)bh->b_data; 829 unsigned found_chksum = 830 be32_to_cpu(cbh->h_chksum[0]); 831 832 if (info->end_transaction) { 833 journal->j_failed_commit = 834 info->end_transaction; 835 break; 836 } 837 838 /* Neither checksum match nor unused? */ 839 if (!((crc32_sum == found_chksum && 840 cbh->h_chksum_type == 841 JBD2_CRC32_CHKSUM && 842 cbh->h_chksum_size == 843 JBD2_CRC32_CHKSUM_SIZE) || 844 (cbh->h_chksum_type == 0 && 845 cbh->h_chksum_size == 0 && 846 found_chksum == 0))) 847 goto chksum_error; 848 849 crc32_sum = ~0; 850 goto chksum_ok; 851 } 852 853 if (jbd2_commit_block_csum_verify(journal, bh->b_data)) 854 goto chksum_ok; 855 856 if (jbd2_commit_block_csum_verify_partial(journal, 857 bh->b_data)) { 858 pr_notice("JBD2: Find incomplete commit block in transaction %u block %lu\n", 859 next_commit_ID, next_log_block); 860 goto chksum_ok; 861 } 862 863 chksum_error: 864 if (commit_time < last_trans_commit_time) 865 goto ignore_crc_mismatch; 866 info->end_transaction = next_commit_ID; 867 info->head_block = head_block; 868 869 if (!jbd2_has_feature_async_commit(journal)) { 870 journal->j_failed_commit = next_commit_ID; 871 break; 872 } 873 874 chksum_ok: 875 last_trans_commit_time = commit_time; 876 head_block = next_log_block; 877 next_commit_ID++; 878 continue; 879 880 case JBD2_REVOKE_BLOCK: 881 /* 882 * If we aren't in the SCAN or REVOKE pass, then we can 883 * just skip over this block. 884 */ 885 if (pass != PASS_REVOKE && pass != PASS_SCAN) 886 continue; 887 888 /* 889 * Check revoke block crc in pass_scan, if csum verify 890 * failed, check commit block time later. 891 */ 892 if (pass == PASS_SCAN && 893 !jbd2_descriptor_block_csum_verify(journal, 894 bh->b_data)) { 895 jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n", 896 next_log_block); 897 need_check_commit_time = true; 898 } 899 900 err = scan_revoke_records(journal, pass, bh, 901 next_commit_ID, info); 902 if (err) 903 goto failed; 904 continue; 905 906 default: 907 jbd2_debug(3, "Unrecognised magic %d, end of scan.\n", 908 blocktype); 909 goto done; 910 } 911 } 912 913 done: 914 brelse(bh); 915 /* 916 * We broke out of the log scan loop: either we came to the 917 * known end of the log or we found an unexpected block in the 918 * log. If the latter happened, then we know that the "current" 919 * transaction marks the end of the valid log. 920 */ 921 922 if (pass == PASS_SCAN) { 923 if (!info->end_transaction) 924 info->end_transaction = next_commit_ID; 925 if (!info->head_block) 926 info->head_block = head_block; 927 } else { 928 /* It's really bad news if different passes end up at 929 * different places (but possible due to IO errors). */ 930 if (info->end_transaction != next_commit_ID) { 931 printk(KERN_ERR "JBD2: recovery pass %d ended at " 932 "transaction %u, expected %u\n", 933 pass, next_commit_ID, info->end_transaction); 934 if (!success) 935 success = -EIO; 936 } 937 } 938 939 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { 940 err = fc_do_one_pass(journal, info, pass); 941 if (err) 942 success = err; 943 } 944 945 return success; 946 947 failed: 948 brelse(bh); 949 return err; 950 } 951 952 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 953 954 static int scan_revoke_records(journal_t *journal, enum passtype pass, 955 struct buffer_head *bh, tid_t sequence, 956 struct recovery_info *info) 957 { 958 jbd2_journal_revoke_header_t *header; 959 int offset, max; 960 unsigned csum_size = 0; 961 __u32 rcount; 962 int record_len = 4; 963 964 header = (jbd2_journal_revoke_header_t *) bh->b_data; 965 offset = sizeof(jbd2_journal_revoke_header_t); 966 rcount = be32_to_cpu(header->r_count); 967 968 if (jbd2_journal_has_csum_v2or3(journal)) 969 csum_size = sizeof(struct jbd2_journal_block_tail); 970 if (rcount > journal->j_blocksize - csum_size) 971 return -EINVAL; 972 max = rcount; 973 974 if (jbd2_has_feature_64bit(journal)) 975 record_len = 8; 976 977 if (pass == PASS_SCAN) { 978 info->nr_revokes += (max - offset) / record_len; 979 return 0; 980 } 981 982 while (offset + record_len <= max) { 983 unsigned long long blocknr; 984 int err; 985 986 if (record_len == 4) 987 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 988 else 989 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 990 offset += record_len; 991 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 992 if (err) 993 return err; 994 } 995 return 0; 996 } 997