Lines Matching +full:v +full:- +full:pos +full:- +full:supply

1 // SPDX-License-Identifier: GPL-2.0+
7 * Copyright 1998 Red Hat corp --- All Rights Reserved
9 * Generic filesystem journal-writing code; part of the ext2fs
17 * file: that is left to a per-journal policy function, which allows us
18 * to store the journal within a filesystem-specified area for ext2
40 #include <linux/backing-dev.h>
123 old_csum = sb->s_checksum;
124 sb->s_checksum = 0;
126 sb->s_checksum = old_csum;
139 wake_up_process(journal->j_task);
169 timer_setup(&journal->j_commit_timer, commit_timeout, 0);
174 journal->j_task = current;
175 wake_up(&journal->j_wait_done_commit);
188 write_lock(&journal->j_state_lock);
191 if (journal->j_flags & JBD2_UNMOUNT)
195 journal->j_commit_sequence, journal->j_commit_request);
197 if (journal->j_commit_sequence != journal->j_commit_request) {
199 write_unlock(&journal->j_state_lock);
200 timer_delete_sync(&journal->j_commit_timer);
202 write_lock(&journal->j_state_lock);
206 wake_up(&journal->j_wait_done_commit);
214 write_unlock(&journal->j_state_lock);
216 write_lock(&journal->j_state_lock);
224 prepare_to_wait(&journal->j_wait_commit, &wait,
226 transaction = journal->j_running_transaction;
228 time_before(jiffies, transaction->t_expires)) {
229 write_unlock(&journal->j_state_lock);
231 write_lock(&journal->j_state_lock);
233 finish_wait(&journal->j_wait_commit, &wait);
241 transaction = journal->j_running_transaction;
242 if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
243 journal->j_commit_request = transaction->t_tid;
249 timer_delete_sync(&journal->j_commit_timer);
250 journal->j_task = NULL;
251 wake_up(&journal->j_wait_done_commit);
253 write_unlock(&journal->j_state_lock);
262 journal->j_devname);
266 wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
272 write_lock(&journal->j_state_lock);
273 journal->j_flags |= JBD2_UNMOUNT;
275 while (journal->j_task) {
276 write_unlock(&journal->j_state_lock);
277 wake_up(&journal->j_wait_commit);
278 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
279 write_lock(&journal->j_state_lock);
281 write_unlock(&journal->j_state_lock);
301 * Any magic-number escaping which needs to be done will cause a
302 * copy-out here. If the buffer happens to start with the
335 journal_t *journal = transaction->t_journal;
351 atomic_set(&new_bh->b_count, 1);
353 spin_lock(&jh_in->b_state_lock);
355 * If a new transaction has already done a buffer copy-out, then
358 if (jh_in->b_frozen_data) {
359 new_folio = virt_to_folio(jh_in->b_frozen_data);
360 new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
361 do_escape = jbd2_data_needs_escaping(jh_in->b_frozen_data);
363 jbd2_data_do_escape(jh_in->b_frozen_data);
368 new_folio = bh_in->b_folio;
369 new_offset = offset_in_folio(new_folio, bh_in->b_data);
374 * the magic offset. If a copy-out happens afterwards, it will
378 jh_in->b_triggers);
387 spin_unlock(&jh_in->b_state_lock);
388 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS | __GFP_NOFAIL);
389 spin_lock(&jh_in->b_state_lock);
390 if (jh_in->b_frozen_data) {
391 jbd2_free(tmp, bh_in->b_size);
395 jh_in->b_frozen_data = tmp;
396 memcpy_from_folio(tmp, new_folio, new_offset, bh_in->b_size);
402 jh_in->b_frozen_triggers = jh_in->b_triggers;
405 new_folio = virt_to_folio(jh_in->b_frozen_data);
406 new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
407 jbd2_data_do_escape(jh_in->b_frozen_data);
412 new_bh->b_size = bh_in->b_size;
413 new_bh->b_bdev = journal->j_dev;
414 new_bh->b_blocknr = blocknr;
415 new_bh->b_private = bh_in;
422 * The to-be-written buffer needs to get moved to the io queue,
427 spin_lock(&journal->j_list_lock);
429 spin_unlock(&journal->j_list_lock);
431 spin_unlock(&jh_in->b_state_lock);
448 if (journal->j_commit_request == target)
456 if (journal->j_running_transaction &&
457 journal->j_running_transaction->t_tid == target) {
463 journal->j_commit_request = target;
465 journal->j_commit_request,
466 journal->j_commit_sequence);
467 journal->j_running_transaction->t_requested = jiffies;
468 wake_up(&journal->j_wait_commit);
470 } else if (!tid_geq(journal->j_commit_request, target))
475 journal->j_commit_request,
476 journal->j_commit_sequence,
477 target, journal->j_running_transaction ?
478 journal->j_running_transaction->t_tid : 0);
486 write_lock(&journal->j_state_lock);
488 write_unlock(&journal->j_state_lock);
505 read_lock(&journal->j_state_lock);
506 if (journal->j_running_transaction && !current->journal_info) {
507 transaction = journal->j_running_transaction;
508 if (!tid_geq(journal->j_commit_request, transaction->t_tid))
510 } else if (journal->j_committing_transaction)
511 transaction = journal->j_committing_transaction;
515 read_unlock(&journal->j_state_lock);
518 tid = transaction->t_tid;
519 read_unlock(&journal->j_state_lock);
530 * jbd2_journal_force_commit_nested - Force and wait upon a commit if the
536 * This is used for forcing out undo-protected data which contains
548 * jbd2_journal_force_commit() - force any uncommitted transactions
558 J_ASSERT(!current->journal_info);
574 write_lock(&journal->j_state_lock);
575 if (journal->j_running_transaction) {
576 tid_t tid = journal->j_running_transaction->t_tid;
584 } else if (journal->j_committing_transaction) {
590 *ptid = journal->j_committing_transaction->t_tid;
593 write_unlock(&journal->j_state_lock);
608 if (!(journal->j_flags & JBD2_BARRIER))
610 read_lock(&journal->j_state_lock);
612 if (tid_geq(journal->j_commit_sequence, tid))
614 commit_trans = journal->j_committing_transaction;
615 if (!commit_trans || commit_trans->t_tid != tid) {
616 running_trans = journal->j_running_transaction;
622 running_trans->t_tid != tid))
625 running_trans->t_need_data_flush = 1;
633 if (journal->j_fs_dev != journal->j_dev) {
634 if (!commit_trans->t_need_data_flush ||
635 commit_trans->t_state >= T_COMMIT_DFLUSH)
638 if (commit_trans->t_state >= T_COMMIT_JFLUSH)
643 read_unlock(&journal->j_state_lock);
656 read_lock(&journal->j_state_lock);
663 if (tid_gt(tid, journal->j_commit_sequence) &&
664 (!journal->j_committing_transaction ||
665 journal->j_committing_transaction->t_tid != tid)) {
666 read_unlock(&journal->j_state_lock);
668 read_lock(&journal->j_state_lock);
672 if (!tid_geq(journal->j_commit_request, tid)) {
675 __func__, journal->j_commit_request, tid);
678 while (tid_gt(tid, journal->j_commit_sequence)) {
680 tid, journal->j_commit_sequence);
681 read_unlock(&journal->j_state_lock);
682 wake_up(&journal->j_wait_commit);
683 wait_event(journal->j_wait_done_commit,
684 !tid_gt(tid, journal->j_commit_sequence));
685 read_lock(&journal->j_state_lock);
687 read_unlock(&journal->j_state_lock);
690 err = -EIO;
696 * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY
698 * going on or this tid has already been committed. Returns -EINVAL if no jbd2
704 return -EIO;
709 if (!journal->j_stats.ts_tid)
710 return -EINVAL;
712 write_lock(&journal->j_state_lock);
713 if (tid_geq(journal->j_commit_sequence, tid)) {
714 write_unlock(&journal->j_state_lock);
715 return -EALREADY;
718 if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
719 (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
722 prepare_to_wait(&journal->j_fc_wait, &wait,
724 write_unlock(&journal->j_state_lock);
726 finish_wait(&journal->j_fc_wait, &wait);
727 return -EALREADY;
729 journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
730 write_unlock(&journal->j_state_lock);
742 if (journal->j_fc_cleanup_callback)
743 journal->j_fc_cleanup_callback(journal, 0, tid);
744 write_lock(&journal->j_state_lock);
745 journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
747 journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
748 write_unlock(&journal->j_state_lock);
749 wake_up(&journal->j_fc_wait);
765 read_lock(&journal->j_state_lock);
766 tid = journal->j_running_transaction ?
767 journal->j_running_transaction->t_tid : 0;
768 read_unlock(&journal->j_state_lock);
776 return tid_geq(READ_ONCE(journal->j_commit_sequence), tid);
791 read_lock(&journal->j_state_lock);
792 if (journal->j_running_transaction &&
793 journal->j_running_transaction->t_tid == tid) {
794 if (journal->j_commit_request != tid) {
796 read_unlock(&journal->j_state_lock);
800 } else if (!(journal->j_committing_transaction &&
801 journal->j_committing_transaction->t_tid == tid))
803 read_unlock(&journal->j_state_lock);
819 write_lock(&journal->j_state_lock);
820 J_ASSERT(journal->j_free > 1);
822 blocknr = journal->j_head;
823 journal->j_head++;
824 journal->j_free--;
825 if (journal->j_head == journal->j_last)
826 journal->j_head = journal->j_first;
827 write_unlock(&journal->j_state_lock);
842 if (journal->j_fc_off + journal->j_fc_first >= journal->j_fc_last)
843 return -EINVAL;
845 fc_off = journal->j_fc_off;
846 blocknr = journal->j_fc_first + fc_off;
847 journal->j_fc_off++;
852 bh = __getblk(journal->j_dev, pblock, journal->j_blocksize);
854 return -ENOMEM;
856 journal->j_fc_wbuf[fc_off] = bh;
873 j_fc_off = journal->j_fc_off;
879 for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
880 bh = journal->j_fc_wbuf[i];
887 journal->j_fc_off = i + 1;
888 return -EIO;
891 journal->j_fc_wbuf[i] = NULL;
903 j_fc_off = journal->j_fc_off;
905 for (i = j_fc_off - 1; i >= 0; i--) {
906 bh = journal->j_fc_wbuf[i];
910 journal->j_fc_wbuf[i] = NULL;
918 * On external journals the journal blocks are identity-mapped, so
919 * this is a no-op. If needed, we can use j_blk_offset - everything is
929 if (journal->j_bmap) {
930 err = journal->j_bmap(journal, &block);
933 } else if (journal->j_inode) {
934 ret = bmap(journal->j_inode, &block);
939 __func__, blocknr, journal->j_devname);
940 err = -EIO;
947 *retp = blocknr; /* +journal->j_blk_offset */
958 * the buffer's contents they really should run flush_dcache_folio(bh->b_folio).
960 * mmaps of blockdevs which hold live JBD-controlled filesystems.
965 journal_t *journal = transaction->t_journal;
976 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
979 atomic_dec(&transaction->t_outstanding_credits);
981 memset(bh->b_data, 0, journal->j_blocksize);
982 header = (journal_header_t *)bh->b_data;
983 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
984 header->h_blocktype = cpu_to_be32(type);
985 header->h_sequence = cpu_to_be32(transaction->t_tid);
1000 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
1002 tail->t_checksum = 0;
1003 csum = jbd2_chksum(j->j_csum_seed, bh->b_data, j->j_blocksize);
1004 tail->t_checksum = cpu_to_be32(csum);
1023 read_lock(&journal->j_state_lock);
1024 spin_lock(&journal->j_list_lock);
1025 transaction = journal->j_checkpoint_transactions;
1027 *tid = transaction->t_tid;
1028 *block = transaction->t_log_start;
1029 } else if ((transaction = journal->j_committing_transaction) != NULL) {
1030 *tid = transaction->t_tid;
1031 *block = transaction->t_log_start;
1032 } else if ((transaction = journal->j_running_transaction) != NULL) {
1033 *tid = transaction->t_tid;
1034 *block = journal->j_head;
1036 *tid = journal->j_transaction_sequence;
1037 *block = journal->j_head;
1039 ret = tid_gt(*tid, journal->j_tail_sequence);
1040 spin_unlock(&journal->j_list_lock);
1041 read_unlock(&journal->j_state_lock);
1061 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1073 write_lock(&journal->j_state_lock);
1074 freed = block - journal->j_tail;
1075 if (block < journal->j_tail)
1076 freed += journal->j_last - journal->j_first;
1082 journal->j_tail_sequence, tid, block, freed);
1084 journal->j_free += freed;
1085 journal->j_tail_sequence = tid;
1086 journal->j_tail = block;
1087 write_unlock(&journal->j_state_lock);
1100 mutex_lock_io(&journal->j_checkpoint_mutex);
1101 if (tid_gt(tid, journal->j_tail_sequence))
1103 mutex_unlock(&journal->j_checkpoint_mutex);
1113 static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
1115 return *pos ? NULL : SEQ_START_TOKEN;
1118 static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
1120 (*pos)++;
1124 static int jbd2_seq_info_show(struct seq_file *seq, void *v)
1126 struct jbd2_stats_proc_session *s = seq->private;
1128 if (v != SEQ_START_TOKEN)
1132 s->stats->ts_tid, s->stats->ts_requested,
1133 s->journal->j_max_transaction_buffers);
1134 if (s->stats->ts_tid == 0)
1137 jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid));
1139 (s->stats->ts_requested == 0) ? 0 :
1140 jiffies_to_msecs(s->stats->run.rs_request_delay /
1141 s->stats->ts_requested));
1143 jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid));
1145 jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid));
1147 jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid));
1149 jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid));
1151 div_u64(s->journal->j_average_commit_time, 1000));
1153 s->stats->run.rs_handle_count / s->stats->ts_tid);
1155 s->stats->run.rs_blocks / s->stats->ts_tid);
1157 s->stats->run.rs_blocks_logged / s->stats->ts_tid);
1161 static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
1180 return -ENOMEM;
1182 s->stats = kmalloc(size, GFP_KERNEL);
1183 if (s->stats == NULL) {
1185 return -ENOMEM;
1187 spin_lock(&journal->j_history_lock);
1188 memcpy(s->stats, &journal->j_stats, size);
1189 s->journal = journal;
1190 spin_unlock(&journal->j_history_lock);
1194 struct seq_file *m = file->private_data;
1195 m->private = s;
1197 kfree(s->stats);
1206 struct seq_file *seq = file->private_data;
1207 struct jbd2_stats_proc_session *s = seq->private;
1208 kfree(s->stats);
1224 journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
1225 if (journal->j_proc_entry) {
1226 proc_create_data("info", S_IRUGO, journal->j_proc_entry,
1233 remove_proc_entry("info", journal->j_proc_entry);
1234 remove_proc_entry(journal->j_devname, proc_jbd2_stats);
1241 * Tag with 32-bit block numbers does not use last four bytes of the
1244 return sizeof(journal_block_tag_t) - 4;
1258 journal_t *journal = shrink->private_data;
1259 unsigned long nr_to_scan = sc->nr_to_scan;
1263 count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count);
1264 trace_jbd2_shrink_scan_enter(journal, sc->nr_to_scan, count);
1268 count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count);
1284 journal_t *journal = shrink->private_data;
1287 count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count);
1288 trace_jbd2_shrink_count(journal, sc->nr_to_scan, count);
1300 struct buffer_head *bh = journal->j_sb_buffer;
1302 journal->j_sb_buffer = NULL;
1311 journal_superblock_t *sb = journal->j_superblock;
1313 int err = -EINVAL;
1315 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
1316 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
1321 if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
1322 be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
1327 if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
1332 if (be32_to_cpu(sb->s_first) == 0 ||
1333 be32_to_cpu(sb->s_first) >= journal->j_total_len) {
1336 be32_to_cpu(sb->s_first));
1347 if ((sb->s_feature_ro_compat &
1349 (sb->s_feature_incompat &
1357 if (be32_to_cpu(sb->s_maxlen) < JBD2_MIN_JOURNAL_BLOCKS ||
1358 be32_to_cpu(sb->s_maxlen) - JBD2_MIN_JOURNAL_BLOCKS < num_fc_blks) {
1360 be32_to_cpu(sb->s_maxlen), num_fc_blks);
1381 if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) {
1387 if (sb->s_checksum != jbd2_superblock_csum(sb)) {
1389 err = -EFSBADCRC;
1400 int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
1408 space -= sizeof(struct jbd2_journal_block_tail);
1414 return (journal->j_total_len - journal->j_fc_wbufsize) / 3;
1422 int tag_space = journal->j_blocksize - sizeof(journal_header_t);
1426 tag_space -= 16;
1428 tag_space -= sizeof(struct jbd2_journal_block_tail);
1430 tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
1446 journal->j_revoke_records_per_block =
1448 journal->j_transaction_overhead_buffers =
1450 journal->j_max_transaction_buffers =
1455 * Load the on-disk journal superblock and read the key fields into the
1464 bh = getblk_unmovable(journal->j_dev, journal->j_blk_offset,
1465 journal->j_blocksize);
1471 return -EIO;
1474 journal->j_sb_buffer = bh;
1475 sb = (journal_superblock_t *)bh->b_data;
1476 journal->j_superblock = sb;
1483 journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
1484 journal->j_tail = be32_to_cpu(sb->s_start);
1485 journal->j_first = be32_to_cpu(sb->s_first);
1486 journal->j_errno = be32_to_cpu(sb->s_errno);
1487 journal->j_last = be32_to_cpu(sb->s_maxlen);
1489 if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
1490 journal->j_total_len = be32_to_cpu(sb->s_maxlen);
1493 journal->j_csum_seed = jbd2_chksum(~0, sb->s_uuid,
1494 sizeof(sb->s_uuid));
1499 journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
1500 journal->j_last = journal->j_fc_last -
1502 journal->j_fc_first = journal->j_last + 1;
1503 journal->j_fc_off = 0;
1516 * in memory. It calls journal_load_superblock() to load the on-disk journal
1531 return ERR_PTR(-ENOMEM);
1533 journal->j_blocksize = blocksize;
1534 journal->j_dev = bdev;
1535 journal->j_fs_dev = fs_dev;
1536 journal->j_blk_offset = start;
1537 journal->j_total_len = len;
1544 init_waitqueue_head(&journal->j_wait_transaction_locked);
1545 init_waitqueue_head(&journal->j_wait_done_commit);
1546 init_waitqueue_head(&journal->j_wait_commit);
1547 init_waitqueue_head(&journal->j_wait_updates);
1548 init_waitqueue_head(&journal->j_wait_reserved);
1549 init_waitqueue_head(&journal->j_fc_wait);
1550 mutex_init(&journal->j_abort_mutex);
1551 mutex_init(&journal->j_barrier);
1552 mutex_init(&journal->j_checkpoint_mutex);
1553 spin_lock_init(&journal->j_revoke_lock);
1554 spin_lock_init(&journal->j_list_lock);
1555 spin_lock_init(&journal->j_history_lock);
1556 rwlock_init(&journal->j_state_lock);
1558 journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
1559 journal->j_min_batch_time = 0;
1560 journal->j_max_batch_time = 15000; /* 15ms */
1561 atomic_set(&journal->j_reserved_credits, 0);
1562 lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
1566 journal->j_flags = JBD2_ABORT;
1568 /* Set up a default-sized revoke table for the new mount. */
1577 err = -ENOMEM;
1578 n = journal->j_blocksize / jbd2_min_tag_size();
1579 journal->j_wbufsize = n;
1580 journal->j_fc_wbuf = NULL;
1581 journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
1583 if (!journal->j_wbuf)
1586 err = percpu_counter_init(&journal->j_checkpoint_jh_count, 0,
1591 journal->j_shrink_transaction = NULL;
1593 journal->j_shrinker = shrinker_alloc(0, "jbd2-journal:(%u:%u)",
1594 MAJOR(bdev->bd_dev),
1595 MINOR(bdev->bd_dev));
1596 if (!journal->j_shrinker) {
1597 err = -ENOMEM;
1601 journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan;
1602 journal->j_shrinker->count_objects = jbd2_journal_shrink_count;
1603 journal->j_shrinker->private_data = journal;
1605 shrinker_register(journal->j_shrinker);
1610 percpu_counter_destroy(&journal->j_checkpoint_jh_count);
1611 kfree(journal->j_wbuf);
1628 * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
1651 snprintf(journal->j_devname, sizeof(journal->j_devname),
1652 "%pg", journal->j_dev);
1653 strreplace(journal->j_devname, '/', '!');
1660 * journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode.
1663 * jbd2_journal_init_inode creates a journal which maps an on-disk inode as
1677 return err ? ERR_PTR(err) : ERR_PTR(-EINVAL);
1681 inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
1682 inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
1684 journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
1685 blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
1686 inode->i_sb->s_blocksize);
1690 journal->j_inode = inode;
1691 snprintf(journal->j_devname, sizeof(journal->j_devname),
1692 "%pg-%lu", journal->j_dev, journal->j_inode->i_ino);
1693 strreplace(journal->j_devname, '/', '!');
1708 journal_superblock_t *sb = journal->j_superblock;
1711 first = be32_to_cpu(sb->s_first);
1712 last = be32_to_cpu(sb->s_maxlen);
1714 printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
1717 return -EINVAL;
1720 journal->j_first = first;
1721 journal->j_last = last;
1723 if (journal->j_head != 0 && journal->j_flags & JBD2_CYCLE_RECORD) {
1728 if (journal->j_head < first || journal->j_head >= last) {
1731 journal->j_head);
1732 journal->j_head = journal->j_first;
1735 journal->j_head = journal->j_first;
1737 journal->j_tail = journal->j_head;
1738 journal->j_free = journal->j_last - journal->j_first;
1740 journal->j_tail_sequence = journal->j_transaction_sequence;
1741 journal->j_commit_sequence = journal->j_transaction_sequence - 1;
1742 journal->j_commit_request = journal->j_commit_sequence;
1752 * As a special case, if the on-disk copy is already marked as needing
1755 * attempting a write to a potential-readonly device.
1757 if (sb->s_start == 0) {
1760 journal->j_tail, journal->j_tail_sequence,
1761 journal->j_errno);
1762 journal->j_flags |= JBD2_FLUSHED;
1765 mutex_lock_io(&journal->j_checkpoint_mutex);
1773 journal->j_tail_sequence,
1774 journal->j_tail, REQ_FUA);
1775 mutex_unlock(&journal->j_checkpoint_mutex);
1786 struct buffer_head *bh = journal->j_sb_buffer;
1787 journal_superblock_t *sb = journal->j_superblock;
1793 return -EIO;
1801 if (!(journal->j_flags & JBD2_BARRIER))
1817 journal->j_devname);
1822 sb->s_checksum = jbd2_superblock_csum(sb);
1824 bh->b_end_io = end_buffer_write_sync;
1830 ret = -EIO;
1834 journal->j_devname);
1843 * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
1856 journal_superblock_t *sb = journal->j_superblock;
1860 return -EIO;
1862 jbd2_journal_abort(journal, -EIO);
1863 return -EIO;
1866 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1870 lock_buffer(journal->j_sb_buffer);
1871 sb->s_sequence = cpu_to_be32(tail_tid);
1872 sb->s_start = cpu_to_be32(tail_block);
1879 write_lock(&journal->j_state_lock);
1880 journal->j_flags &= ~JBD2_FLUSHED;
1881 write_unlock(&journal->j_state_lock);
1888 * jbd2_mark_journal_empty() - Mark on disk journal as empty.
1897 journal_superblock_t *sb = journal->j_superblock;
1900 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1901 lock_buffer(journal->j_sb_buffer);
1902 if (sb->s_start == 0) { /* Is it already empty? */
1903 unlock_buffer(journal->j_sb_buffer);
1908 journal->j_tail_sequence);
1910 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1911 sb->s_start = cpu_to_be32(0);
1912 sb->s_head = cpu_to_be32(journal->j_head);
1928 write_lock(&journal->j_state_lock);
1929 journal->j_flags |= JBD2_FLUSHED;
1930 write_unlock(&journal->j_state_lock);
1934 * __jbd2_journal_erase() - Discard or zeroout journal blocks (excluding superblock)
1956 return -EINVAL;
1959 !bdev_max_discard_sectors(journal->j_dev))
1960 return -EOPNOTSUPP;
1966 log_offset = be32_to_cpu(journal->j_superblock->s_first);
1968 for (block = log_offset; block < journal->j_total_len; block++) {
1984 block--;
1992 if (block != journal->j_total_len - 1)
2000 byte_start = block_start * journal->j_blocksize;
2001 byte_stop = block_stop * journal->j_blocksize;
2002 byte_count = (block_stop - block_start) * journal->j_blocksize;
2004 truncate_inode_pages_range(journal->j_dev->bd_mapping,
2005 byte_start, byte_stop - 1);
2008 err = blkdev_issue_discard(journal->j_dev,
2013 err = blkdev_issue_zeroout(journal->j_dev,
2029 return blkdev_issue_flush(journal->j_dev);
2033 * jbd2_journal_update_sb_errno() - Update error in the journal.
2041 journal_superblock_t *sb = journal->j_superblock;
2044 lock_buffer(journal->j_sb_buffer);
2045 errcode = journal->j_errno;
2046 if (errcode == -ESHUTDOWN)
2049 sb->s_errno = cpu_to_be32(errcode);
2056 * jbd2_journal_load() - Read journal from disk.
2060 * a journal, read the journal from disk to initialise the in-memory
2066 journal_superblock_t *sb = journal->j_superblock;
2071 err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
2083 if (journal->j_failed_commit) {
2085 "is corrupt.\n", journal->j_failed_commit,
2086 journal->j_devname);
2087 return -EFSCORRUPTED;
2093 journal->j_flags &= ~JBD2_ABORT;
2104 journal->j_flags |= JBD2_LOADED;
2109 * jbd2_journal_destroy() - Release a journal_t structure.
2124 if (journal->j_running_transaction)
2130 spin_lock(&journal->j_list_lock);
2131 while (journal->j_checkpoint_transactions != NULL) {
2132 spin_unlock(&journal->j_list_lock);
2133 mutex_lock_io(&journal->j_checkpoint_mutex);
2135 mutex_unlock(&journal->j_checkpoint_mutex);
2142 spin_lock(&journal->j_list_lock);
2145 spin_lock(&journal->j_list_lock);
2148 J_ASSERT(journal->j_running_transaction == NULL);
2149 J_ASSERT(journal->j_committing_transaction == NULL);
2150 J_ASSERT(journal->j_checkpoint_transactions == NULL);
2151 spin_unlock(&journal->j_list_lock);
2161 jbd2_journal_abort(journal, -EIO);
2163 if (journal->j_sb_buffer) {
2165 mutex_lock_io(&journal->j_checkpoint_mutex);
2167 write_lock(&journal->j_state_lock);
2168 journal->j_tail_sequence =
2169 ++journal->j_transaction_sequence;
2170 write_unlock(&journal->j_state_lock);
2173 mutex_unlock(&journal->j_checkpoint_mutex);
2175 err = -EIO;
2176 brelse(journal->j_sb_buffer);
2179 if (journal->j_shrinker) {
2180 percpu_counter_destroy(&journal->j_checkpoint_jh_count);
2181 shrinker_free(journal->j_shrinker);
2183 if (journal->j_proc_entry)
2185 iput(journal->j_inode);
2186 if (journal->j_revoke)
2188 kfree(journal->j_fc_wbuf);
2189 kfree(journal->j_wbuf);
2197 * jbd2_journal_check_used_features() - Check if features specified are used.
2200 * @ro: bitmask of features that force read-only mount
2204 * features. Return true (non-zero) if it does.
2217 sb = journal->j_superblock;
2219 if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
2220 ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
2221 ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
2228 * jbd2_journal_check_available_features() - Check feature set in journalling layer
2231 * @ro: bitmask of features that force read-only mount
2236 * (non-zero) if it can. */
2258 journal_superblock_t *sb = journal->j_superblock;
2262 if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
2263 return -ENOSPC;
2266 WARN_ON(journal->j_fc_wbuf != NULL);
2267 journal->j_fc_wbuf = kmalloc_array(num_fc_blks,
2269 if (!journal->j_fc_wbuf)
2270 return -ENOMEM;
2272 journal->j_fc_wbufsize = num_fc_blks;
2273 journal->j_fc_last = journal->j_last;
2274 journal->j_last = journal->j_fc_last - num_fc_blks;
2275 journal->j_fc_first = journal->j_last + 1;
2276 journal->j_fc_off = 0;
2277 journal->j_free = journal->j_last - journal->j_first;
2283 * jbd2_journal_set_features() - Mark a given journal feature in the superblock
2286 * @ro: bitmask of features that force read-only mount
2298 ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f)))
2300 ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f)))
2323 sb = journal->j_superblock;
2332 lock_buffer(journal->j_sb_buffer);
2336 sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
2337 sb->s_feature_compat &=
2339 journal->j_csum_seed = jbd2_chksum(~0, sb->s_uuid,
2340 sizeof(sb->s_uuid));
2345 sb->s_feature_incompat &=
2349 sb->s_feature_compat |= cpu_to_be32(compat);
2350 sb->s_feature_ro_compat |= cpu_to_be32(ro);
2351 sb->s_feature_incompat |= cpu_to_be32(incompat);
2352 unlock_buffer(journal->j_sb_buffer);
2361 * jbd2_journal_clear_features() - Clear a given journal feature in the
2365 * @ro: bitmask of features that force read-only mount
2379 sb = journal->j_superblock;
2381 sb->s_feature_compat &= ~cpu_to_be32(compat);
2382 sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
2383 sb->s_feature_incompat &= ~cpu_to_be32(incompat);
2389 * jbd2_journal_flush() - Flush journal
2407 write_lock(&journal->j_state_lock);
2410 if (journal->j_running_transaction) {
2411 transaction = journal->j_running_transaction;
2412 __jbd2_log_start_commit(journal, transaction->t_tid);
2413 } else if (journal->j_committing_transaction)
2414 transaction = journal->j_committing_transaction;
2418 tid_t tid = transaction->t_tid;
2420 write_unlock(&journal->j_state_lock);
2423 write_unlock(&journal->j_state_lock);
2427 spin_lock(&journal->j_list_lock);
2428 while (!err && journal->j_checkpoint_transactions != NULL) {
2429 spin_unlock(&journal->j_list_lock);
2430 mutex_lock_io(&journal->j_checkpoint_mutex);
2432 mutex_unlock(&journal->j_checkpoint_mutex);
2433 spin_lock(&journal->j_list_lock);
2435 spin_unlock(&journal->j_list_lock);
2438 return -EIO;
2440 mutex_lock_io(&journal->j_checkpoint_mutex);
2444 mutex_unlock(&journal->j_checkpoint_mutex);
2452 * the magic code for a fully-recovered superblock. Any future
2460 mutex_unlock(&journal->j_checkpoint_mutex);
2461 write_lock(&journal->j_state_lock);
2462 J_ASSERT(!journal->j_running_transaction);
2463 J_ASSERT(!journal->j_committing_transaction);
2464 J_ASSERT(!journal->j_checkpoint_transactions);
2465 J_ASSERT(journal->j_head == journal->j_tail);
2466 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
2467 write_unlock(&journal->j_state_lock);
2473 * jbd2_journal_wipe() - Wipe journal contents
2481 * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
2489 J_ASSERT (!(journal->j_flags & JBD2_LOADED));
2491 if (!journal->j_tail)
2500 mutex_lock_io(&journal->j_checkpoint_mutex);
2502 mutex_unlock(&journal->j_checkpoint_mutex);
2509 * jbd2_journal_abort () - Shutdown the journal immediately.
2519 * recovery mechanisms such as the ext2/ext3 remount-readonly error
2533 * ABORT state will just result in an -EROFS error return. A
2534 * jbd2_journal_stop on an existing handle will return -EIO if we have
2538 * final jbd2_journal_stop, which will receive the -EIO error.
2540 * Finally, the jbd2_journal_abort call allows the caller to supply an errno
2559 mutex_lock(&journal->j_abort_mutex);
2565 write_lock(&journal->j_state_lock);
2566 if (journal->j_flags & JBD2_ABORT) {
2567 int old_errno = journal->j_errno;
2569 write_unlock(&journal->j_state_lock);
2570 if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) {
2571 journal->j_errno = errno;
2574 mutex_unlock(&journal->j_abort_mutex);
2582 pr_err("Aborting journal on device %s.\n", journal->j_devname);
2584 journal->j_flags |= JBD2_ABORT;
2585 journal->j_errno = errno;
2586 transaction = journal->j_running_transaction;
2588 __jbd2_log_start_commit(journal, transaction->t_tid);
2589 write_unlock(&journal->j_state_lock);
2596 mutex_unlock(&journal->j_abort_mutex);
2600 * jbd2_journal_errno() - returns the journal's error state.
2604 * time the journal was mounted - if the journal was stopped
2607 * If the journal has been aborted on this mount time -EROFS will
2614 read_lock(&journal->j_state_lock);
2615 if (journal->j_flags & JBD2_ABORT)
2616 err = -EROFS;
2618 err = journal->j_errno;
2619 read_unlock(&journal->j_state_lock);
2624 * jbd2_journal_clear_err() - clears the journal's error state
2634 write_lock(&journal->j_state_lock);
2635 if (journal->j_flags & JBD2_ABORT)
2636 err = -EROFS;
2638 journal->j_errno = 0;
2639 write_unlock(&journal->j_state_lock);
2644 * jbd2_journal_ack_err() - Ack journal err.
2652 write_lock(&journal->j_state_lock);
2653 if (journal->j_errno)
2654 journal->j_flags |= JBD2_ACK_ERR;
2655 write_unlock(&journal->j_state_lock);
2660 return 1 << (PAGE_SHIFT + mapping_max_folio_order(inode->i_mapping) -
2661 inode->i_sb->s_blocksize_bits);
2682 return sz - sizeof(__u32);
2688 * These functions are used to allocate block-sized chunks of memory
2690 * page-sized chunks of data, but sometimes it will be in
2691 * sub-page-size chunks. (For example, 16k pages on Power systems
2722 int i = order_base_2(size) - 10;
2729 return -EINVAL;
2745 return -ENOMEM;
2752 int i = order_base_2(size) - 10;
2765 BUG_ON(size & (size-1)); /* Must be a power of 2 */
2774 BUG_ON(((unsigned long) ptr) & (size-1));
2805 return -ENOMEM;
2833 spin_lock_init(&ret->b_state_lock);
2850 * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
2852 * JBD-specific actions. Testing the zeroness of ->b_private is not reliable
2855 * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
2858 * core kernel code, mainly via ->b_count.
2877 * jh->b_transaction = xxx;
2901 (atomic_read(&bh->b_count) > 0) ||
2902 (bh->b_folio && bh->b_folio->mapping));
2912 bh->b_private = jh;
2913 jh->b_bh = bh;
2917 jh->b_jcount++;
2921 return bh->b_private;
2935 jh->b_jcount++;
2946 J_ASSERT_JH(jh, jh->b_transaction == NULL);
2947 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
2948 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
2949 J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
2955 bh->b_private = NULL;
2956 jh->b_bh = NULL; /* debug, really */
2962 if (jh->b_frozen_data) {
2964 jbd2_free(jh->b_frozen_data, b_size);
2966 if (jh->b_committed_data) {
2968 jbd2_free(jh->b_committed_data, b_size);
2982 J_ASSERT_JH(jh, jh->b_jcount > 0);
2983 --jh->b_jcount;
2984 if (!jh->b_jcount) {
2987 journal_release_journal_head(jh, bh->b_size);
3000 jinode->i_transaction = NULL;
3001 jinode->i_next_transaction = NULL;
3002 jinode->i_vfs_inode = inode;
3003 jinode->i_flags = 0;
3004 jinode->i_dirty_start = 0;
3005 jinode->i_dirty_end = 0;
3006 INIT_LIST_HEAD(&jinode->i_list);
3020 spin_lock(&journal->j_list_lock);
3021 /* Is commit writing out inode - we have to wait */
3022 if (jinode->i_flags & JI_COMMIT_RUNNING) {
3024 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
3025 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
3027 spin_unlock(&journal->j_list_lock);
3033 if (jinode->i_transaction) {
3034 list_del(&jinode->i_list);
3035 jinode->i_transaction = NULL;
3037 spin_unlock(&journal->j_list_lock);
3071 return -ENOMEM;
3082 return -ENOMEM;
3158 MODULE_DESCRIPTION("Generic filesystem journal-writing module");