Lines Matching +full:add +full:- +full:disk

1 // SPDX-License-Identifier: GPL-2.0-only
15 #include "raid5-log.h"
34 * stripe. The modifed raid data chunks form an m-by-n matrix, where m is the
38 * stripe of a 4 disk array, 16k chunk size):
40 * sh->sector dd0 dd1 dd2 ppl
41 * +-----+-----+-----+
42 * 0 | --- | --- | --- | +----+
43 * 8 | -W- | -W- | --- | | pp | data_sector = 8
44 * 16 | -W- | -W- | --- | | pp | data_size = 3 * 2 * 4k
45 * 24 | -W- | -W- | --- | | pp | pp_size = 3 * 4k
46 * +-----+-----+-----+ +----+
56 * disk of the corresponding stripe. For each member disk there is one ppl_log
57 * used to handle logging for this disk, independently from others. They are
59 * r5conf->log_private.
67 * a bio containing the header page and partial parity pages (sh->ppl_page) for
76 * data+parity is written). The log->io_list tracks all io_units of a log
77 * (for a single member disk). New io_units are added to the end of the list
81 * If write-back cache is enabled for any of the disks in the array, its data
90 /* array of child logs, one for each raid disk */
118 struct md_rdev *rdev; /* array member disk associated with
144 struct list_head log_sibling; /* log->io_list */
148 atomic_t pending_flushes; /* how many disk flushes are in progress */
161 int disks = sh->disks; in ops_run_partial_parity()
162 struct page **srcs = percpu->scribble; in ops_run_partial_parity()
163 int count = 0, pd_idx = sh->pd_idx, i; in ops_run_partial_parity()
166 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); in ops_run_partial_parity()
171 * (read-modify-write vs. reconstruct-write case) we calculate it in ops_run_partial_parity()
174 if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { in ops_run_partial_parity()
180 srcs[count++] = sh->dev[pd_idx].page; in ops_run_partial_parity()
181 } else if (sh->reconstruct_state == reconstruct_state_drain_run) { in ops_run_partial_parity()
183 for (i = disks; i--;) { in ops_run_partial_parity()
184 struct r5dev *dev = &sh->dev[i]; in ops_run_partial_parity()
185 if (test_bit(R5_UPTODATE, &dev->flags)) in ops_run_partial_parity()
186 srcs[count++] = dev->page; in ops_run_partial_parity()
193 NULL, sh, (void *) (srcs + sh->disks + 2)); in ops_run_partial_parity()
196 tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE, in ops_run_partial_parity()
199 tx = async_xor(sh->ppl_page, srcs, 0, count, PAGE_SIZE, in ops_run_partial_parity()
214 io->header_page = alloc_page(gfp_mask); in ppl_io_pool_alloc()
215 if (!io->header_page) { in ppl_io_pool_alloc()
228 __free_page(io->header_page); in ppl_io_pool_free()
235 struct ppl_conf *ppl_conf = log->ppl_conf; in ppl_new_iounit()
240 io = mempool_alloc(&ppl_conf->io_pool, GFP_NOWAIT); in ppl_new_iounit()
244 header_page = io->header_page; in ppl_new_iounit()
246 io->header_page = header_page; in ppl_new_iounit()
248 io->log = log; in ppl_new_iounit()
249 INIT_LIST_HEAD(&io->log_sibling); in ppl_new_iounit()
250 INIT_LIST_HEAD(&io->stripe_list); in ppl_new_iounit()
251 atomic_set(&io->pending_stripes, 0); in ppl_new_iounit()
252 atomic_set(&io->pending_flushes, 0); in ppl_new_iounit()
253 bio_init(&io->bio, log->rdev->bdev, io->biovec, PPL_IO_INLINE_BVECS, in ppl_new_iounit()
256 pplhdr = page_address(io->header_page); in ppl_new_iounit()
258 memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); in ppl_new_iounit()
259 pplhdr->signature = cpu_to_le32(ppl_conf->signature); in ppl_new_iounit()
261 io->seq = atomic64_inc_return(&ppl_conf->seq); in ppl_new_iounit()
262 pplhdr->generation = cpu_to_le64(io->seq); in ppl_new_iounit()
269 struct ppl_io_unit *io = log->current_io; in ppl_log_stripe()
275 struct r5conf *conf = sh->raid_conf; in ppl_log_stripe()
277 pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector); in ppl_log_stripe()
280 if (io && (io->pp_size == log->entry_space || in ppl_log_stripe()
281 io->entries_count == PPL_HDR_MAX_ENTRIES)) { in ppl_log_stripe()
282 pr_debug("%s: add io_unit blocked by seq: %llu\n", in ppl_log_stripe()
283 __func__, io->seq); in ppl_log_stripe()
287 /* add a new unit if there is none or the current is full */ in ppl_log_stripe()
291 return -ENOMEM; in ppl_log_stripe()
292 spin_lock_irq(&log->io_list_lock); in ppl_log_stripe()
293 list_add_tail(&io->log_sibling, &log->io_list); in ppl_log_stripe()
294 spin_unlock_irq(&log->io_list_lock); in ppl_log_stripe()
296 log->current_io = io; in ppl_log_stripe()
299 for (i = 0; i < sh->disks; i++) { in ppl_log_stripe()
300 struct r5dev *dev = &sh->dev[i]; in ppl_log_stripe()
302 if (i != sh->pd_idx && test_bit(R5_Wantwrite, &dev->flags)) { in ppl_log_stripe()
303 if (!data_disks || dev->sector < data_sector) in ppl_log_stripe()
304 data_sector = dev->sector; in ppl_log_stripe()
311 io->seq, (unsigned long long)data_sector, data_disks); in ppl_log_stripe()
313 pplhdr = page_address(io->header_page); in ppl_log_stripe()
315 if (io->entries_count > 0) { in ppl_log_stripe()
317 &pplhdr->entries[io->entries_count - 1]; in ppl_log_stripe()
319 &io->stripe_list, struct stripe_head, log_list); in ppl_log_stripe()
320 u64 data_sector_last = le64_to_cpu(last->data_sector); in ppl_log_stripe()
321 u32 data_size_last = le32_to_cpu(last->data_size); in ppl_log_stripe()
326 * disks. Use bit shift and logarithm to avoid 64-bit division. in ppl_log_stripe()
328 if ((sh->sector == sh_last->sector + RAID5_STRIPE_SECTORS(conf)) && in ppl_log_stripe()
329 (data_sector >> ilog2(conf->chunk_sectors) == in ppl_log_stripe()
330 data_sector_last >> ilog2(conf->chunk_sectors)) && in ppl_log_stripe()
331 ((data_sector - data_sector_last) * data_disks == in ppl_log_stripe()
337 e = &pplhdr->entries[io->entries_count++]; in ppl_log_stripe()
338 e->data_sector = cpu_to_le64(data_sector); in ppl_log_stripe()
339 e->parity_disk = cpu_to_le32(sh->pd_idx); in ppl_log_stripe()
340 e->checksum = cpu_to_le32(~0); in ppl_log_stripe()
343 le32_add_cpu(&e->data_size, data_disks << PAGE_SHIFT); in ppl_log_stripe()
346 if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) { in ppl_log_stripe()
347 le32_add_cpu(&e->pp_size, PAGE_SIZE); in ppl_log_stripe()
348 io->pp_size += PAGE_SIZE; in ppl_log_stripe()
349 e->checksum = cpu_to_le32(crc32c(le32_to_cpu(e->checksum), in ppl_log_stripe()
350 page_address(sh->ppl_page), in ppl_log_stripe()
354 list_add_tail(&sh->log_list, &io->stripe_list); in ppl_log_stripe()
355 atomic_inc(&io->pending_stripes); in ppl_log_stripe()
356 sh->ppl_io = io; in ppl_log_stripe()
363 struct ppl_conf *ppl_conf = conf->log_private; in ppl_write_stripe()
364 struct ppl_io_unit *io = sh->ppl_io; in ppl_write_stripe()
367 if (io || test_bit(STRIPE_SYNCING, &sh->state) || !sh->ppl_page || in ppl_write_stripe()
368 !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) || in ppl_write_stripe()
369 !test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) { in ppl_write_stripe()
370 clear_bit(STRIPE_LOG_TRAPPED, &sh->state); in ppl_write_stripe()
371 return -EAGAIN; in ppl_write_stripe()
374 log = &ppl_conf->child_logs[sh->pd_idx]; in ppl_write_stripe()
376 mutex_lock(&log->io_mutex); in ppl_write_stripe()
378 if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) { in ppl_write_stripe()
379 mutex_unlock(&log->io_mutex); in ppl_write_stripe()
380 return -EAGAIN; in ppl_write_stripe()
383 set_bit(STRIPE_LOG_TRAPPED, &sh->state); in ppl_write_stripe()
384 clear_bit(STRIPE_DELAYED, &sh->state); in ppl_write_stripe()
385 atomic_inc(&sh->count); in ppl_write_stripe()
388 spin_lock_irq(&ppl_conf->no_mem_stripes_lock); in ppl_write_stripe()
389 list_add_tail(&sh->log_list, &ppl_conf->no_mem_stripes); in ppl_write_stripe()
390 spin_unlock_irq(&ppl_conf->no_mem_stripes_lock); in ppl_write_stripe()
393 mutex_unlock(&log->io_mutex); in ppl_write_stripe()
400 struct ppl_io_unit *io = bio->bi_private; in ppl_log_endio()
401 struct ppl_log *log = io->log; in ppl_log_endio()
402 struct ppl_conf *ppl_conf = log->ppl_conf; in ppl_log_endio()
405 pr_debug("%s: seq: %llu\n", __func__, io->seq); in ppl_log_endio()
407 if (bio->bi_status) in ppl_log_endio()
408 md_error(ppl_conf->mddev, log->rdev); in ppl_log_endio()
410 list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) { in ppl_log_endio()
411 list_del_init(&sh->log_list); in ppl_log_endio()
413 set_bit(STRIPE_HANDLE, &sh->state); in ppl_log_endio()
421 __func__, io->seq, bio->bi_iter.bi_size, in ppl_submit_iounit_bio()
422 (unsigned long long)bio->bi_iter.bi_sector, in ppl_submit_iounit_bio()
423 bio->bi_bdev); in ppl_submit_iounit_bio()
430 struct ppl_log *log = io->log; in ppl_submit_iounit()
431 struct ppl_conf *ppl_conf = log->ppl_conf; in ppl_submit_iounit()
432 struct ppl_header *pplhdr = page_address(io->header_page); in ppl_submit_iounit()
433 struct bio *bio = &io->bio; in ppl_submit_iounit()
437 bio->bi_private = io; in ppl_submit_iounit()
439 if (!log->rdev || test_bit(Faulty, &log->rdev->flags)) { in ppl_submit_iounit()
444 for (i = 0; i < io->entries_count; i++) { in ppl_submit_iounit()
445 struct ppl_header_entry *e = &pplhdr->entries[i]; in ppl_submit_iounit()
448 __func__, io->seq, i, le64_to_cpu(e->data_sector), in ppl_submit_iounit()
449 le32_to_cpu(e->pp_size), le32_to_cpu(e->data_size)); in ppl_submit_iounit()
451 e->data_sector = cpu_to_le64(le64_to_cpu(e->data_sector) >> in ppl_submit_iounit()
452 ilog2(ppl_conf->block_size >> 9)); in ppl_submit_iounit()
453 e->checksum = cpu_to_le32(~le32_to_cpu(e->checksum)); in ppl_submit_iounit()
456 pplhdr->entries_count = cpu_to_le32(io->entries_count); in ppl_submit_iounit()
457 pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PPL_HEADER_SIZE)); in ppl_submit_iounit()
460 if (log->use_multippl && in ppl_submit_iounit()
461 log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector < in ppl_submit_iounit()
462 (PPL_HEADER_SIZE + io->pp_size) >> 9) in ppl_submit_iounit()
463 log->next_io_sector = log->rdev->ppl.sector; in ppl_submit_iounit()
466 bio->bi_end_io = ppl_log_endio; in ppl_submit_iounit()
467 bio->bi_iter.bi_sector = log->next_io_sector; in ppl_submit_iounit()
468 __bio_add_page(bio, io->header_page, PAGE_SIZE, 0); in ppl_submit_iounit()
470 pr_debug("%s: log->current_io_sector: %llu\n", __func__, in ppl_submit_iounit()
471 (unsigned long long)log->next_io_sector); in ppl_submit_iounit()
473 if (log->use_multippl) in ppl_submit_iounit()
474 log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9; in ppl_submit_iounit()
476 WARN_ON(log->disk_flush_bitmap != 0); in ppl_submit_iounit()
478 list_for_each_entry(sh, &io->stripe_list, log_list) { in ppl_submit_iounit()
479 for (i = 0; i < sh->disks; i++) { in ppl_submit_iounit()
480 struct r5dev *dev = &sh->dev[i]; in ppl_submit_iounit()
482 if ((ppl_conf->child_logs[i].wb_cache_on) && in ppl_submit_iounit()
483 (test_bit(R5_Wantwrite, &dev->flags))) { in ppl_submit_iounit()
484 set_bit(i, &log->disk_flush_bitmap); in ppl_submit_iounit()
489 if (test_bit(STRIPE_FULL_WRITE, &sh->state)) in ppl_submit_iounit()
492 if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) { in ppl_submit_iounit()
495 bio = bio_alloc_bioset(prev->bi_bdev, BIO_MAX_VECS, in ppl_submit_iounit()
496 prev->bi_opf, GFP_NOIO, in ppl_submit_iounit()
497 &ppl_conf->bs); in ppl_submit_iounit()
498 bio->bi_iter.bi_sector = bio_end_sector(prev); in ppl_submit_iounit()
499 __bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); in ppl_submit_iounit()
513 spin_lock_irq(&log->io_list_lock); in ppl_submit_current_io()
515 io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit, in ppl_submit_current_io()
517 if (io && io->submitted) in ppl_submit_current_io()
520 spin_unlock_irq(&log->io_list_lock); in ppl_submit_current_io()
523 io->submitted = true; in ppl_submit_current_io()
525 if (io == log->current_io) in ppl_submit_current_io()
526 log->current_io = NULL; in ppl_submit_current_io()
534 struct ppl_conf *ppl_conf = conf->log_private; in ppl_write_stripe_run()
538 for (i = 0; i < ppl_conf->count; i++) { in ppl_write_stripe_run()
539 log = &ppl_conf->child_logs[i]; in ppl_write_stripe_run()
541 mutex_lock(&log->io_mutex); in ppl_write_stripe_run()
543 mutex_unlock(&log->io_mutex); in ppl_write_stripe_run()
549 struct ppl_log *log = io->log; in ppl_io_unit_finished()
550 struct ppl_conf *ppl_conf = log->ppl_conf; in ppl_io_unit_finished()
551 struct r5conf *conf = ppl_conf->mddev->private; in ppl_io_unit_finished()
554 pr_debug("%s: seq: %llu\n", __func__, io->seq); in ppl_io_unit_finished()
558 spin_lock(&log->io_list_lock); in ppl_io_unit_finished()
559 list_del(&io->log_sibling); in ppl_io_unit_finished()
560 spin_unlock(&log->io_list_lock); in ppl_io_unit_finished()
562 mempool_free(io, &ppl_conf->io_pool); in ppl_io_unit_finished()
564 spin_lock(&ppl_conf->no_mem_stripes_lock); in ppl_io_unit_finished()
565 if (!list_empty(&ppl_conf->no_mem_stripes)) { in ppl_io_unit_finished()
568 sh = list_first_entry(&ppl_conf->no_mem_stripes, in ppl_io_unit_finished()
570 list_del_init(&sh->log_list); in ppl_io_unit_finished()
571 set_bit(STRIPE_HANDLE, &sh->state); in ppl_io_unit_finished()
574 spin_unlock(&ppl_conf->no_mem_stripes_lock); in ppl_io_unit_finished()
578 wake_up(&conf->wait_for_quiescent); in ppl_io_unit_finished()
583 struct ppl_io_unit *io = bio->bi_private; in ppl_flush_endio()
584 struct ppl_log *log = io->log; in ppl_flush_endio()
585 struct ppl_conf *ppl_conf = log->ppl_conf; in ppl_flush_endio()
586 struct r5conf *conf = ppl_conf->mddev->private; in ppl_flush_endio()
588 pr_debug("%s: dev: %pg\n", __func__, bio->bi_bdev); in ppl_flush_endio()
590 if (bio->bi_status) { in ppl_flush_endio()
594 rdev = md_find_rdev_rcu(conf->mddev, bio_dev(bio)); in ppl_flush_endio()
596 md_error(rdev->mddev, rdev); in ppl_flush_endio()
602 if (atomic_dec_and_test(&io->pending_flushes)) { in ppl_flush_endio()
604 md_wakeup_thread(conf->mddev->thread); in ppl_flush_endio()
610 struct ppl_log *log = io->log; in ppl_do_flush()
611 struct ppl_conf *ppl_conf = log->ppl_conf; in ppl_do_flush()
612 struct r5conf *conf = ppl_conf->mddev->private; in ppl_do_flush()
613 int raid_disks = conf->raid_disks; in ppl_do_flush()
617 atomic_set(&io->pending_flushes, raid_disks); in ppl_do_flush()
619 for_each_set_bit(i, &log->disk_flush_bitmap, raid_disks) { in ppl_do_flush()
623 rdev = conf->disks[i].rdev; in ppl_do_flush()
624 if (rdev && !test_bit(Faulty, &rdev->flags)) in ppl_do_flush()
625 bdev = rdev->bdev; in ppl_do_flush()
632 GFP_NOIO, &ppl_conf->flush_bs); in ppl_do_flush()
633 bio->bi_private = io; in ppl_do_flush()
634 bio->bi_end_io = ppl_flush_endio; in ppl_do_flush()
636 pr_debug("%s: dev: %ps\n", __func__, bio->bi_bdev); in ppl_do_flush()
643 log->disk_flush_bitmap = 0; in ppl_do_flush()
646 if (atomic_dec_and_test(&io->pending_flushes)) in ppl_do_flush()
656 io = list_first_entry_or_null(&log->io_list, struct ppl_io_unit, in ppl_no_io_unit_submitted()
659 return !io || !io->submitted; in ppl_no_io_unit_submitted()
664 struct ppl_conf *ppl_conf = conf->log_private; in ppl_quiesce()
668 for (i = 0; i < ppl_conf->count; i++) { in ppl_quiesce()
669 struct ppl_log *log = &ppl_conf->child_logs[i]; in ppl_quiesce()
671 spin_lock_irq(&log->io_list_lock); in ppl_quiesce()
672 wait_event_lock_irq(conf->wait_for_quiescent, in ppl_quiesce()
674 log->io_list_lock); in ppl_quiesce()
675 spin_unlock_irq(&log->io_list_lock); in ppl_quiesce()
682 if (bio->bi_iter.bi_size == 0) { in ppl_handle_flush_request()
686 bio->bi_opf &= ~REQ_PREFLUSH; in ppl_handle_flush_request()
687 return -EAGAIN; in ppl_handle_flush_request()
694 io = sh->ppl_io; in ppl_stripe_write_finished()
695 sh->ppl_io = NULL; in ppl_stripe_write_finished()
697 if (io && atomic_dec_and_test(&io->pending_stripes)) { in ppl_stripe_write_finished()
698 if (io->log->disk_flush_bitmap) in ppl_stripe_write_finished()
730 * case 0: single data disk write:
732 * +--------+--------+--------+ +--------------------+
733 * | ------ | ------ | ------ | +----+ | (no change) |
734 * | ------ | -data- | ------ | | pp | -> | data1 ^ pp |
735 * | ------ | -data- | ------ | | pp | -> | data1 ^ pp |
736 * | ------ | ------ | ------ | +----+ | (no change) |
737 * +--------+--------+--------+ +--------------------+
740 * case 1: more than one data disk write:
742 * +--------+--------+--------+ +--------------------+
743 * | ------ | ------ | ------ | +----+ | (no change) |
744 * | -data- | -data- | ------ | | pp | -> | data0 ^ data1 ^ pp |
745 * | -data- | -data- | ------ | | pp | -> | data0 ^ data1 ^ pp |
746 * | ------ | ------ | ------ | +----+ | (no change) |
747 * +--------+--------+--------+ +--------------------+
752 * +--------+--------+--------+ +--------------------+
753 * | ------ | ------ | ------ | | (no change) |
754 * | -data- | -data- | -data- | --------> | xor all data |
755 * | ------ | ------ | ------ | --------> | (no change) |
756 * | ------ | ------ | ------ | | (no change) |
757 * +--------+--------+--------+ +--------------------+
766 * +--------+--------+--------+ +----+ +--------------------+
767 * | ------ | -data- | -data- | | pp | | data1 ^ data2 ^ pp |
768 * | ------ | -data- | -data- | | pp | -> | data1 ^ data2 ^ pp |
769 * | -data- | -data- | -data- | | -- | -> | xor all data |
770 * | -data- | -data- | ------ | | pp | | data0 ^ data1 ^ pp |
771 * +--------+--------+--------+ +----+ +--------------------+
776 * +--------+--------+--------+ +----+ +--------------------+
777 * | ------ | -data- | ------ | | pp | | data1 ^ pp |
778 * | ------ | ------ | ------ | | -- | -> | (no change) |
779 * | ------ | ------ | ------ | | -- | -> | (no change) |
780 * | -data- | ------ | ------ | | pp | | data0 ^ pp |
781 * +--------+--------+--------+ +----+ +--------------------+
787 struct ppl_conf *ppl_conf = log->ppl_conf; in ppl_recover_entry()
788 struct mddev *mddev = ppl_conf->mddev; in ppl_recover_entry()
789 struct r5conf *conf = mddev->private; in ppl_recover_entry()
790 int block_size = ppl_conf->block_size; in ppl_recover_entry()
799 unsigned int pp_size = le32_to_cpu(e->pp_size); in ppl_recover_entry()
800 unsigned int data_size = le32_to_cpu(e->data_size); in ppl_recover_entry()
806 ret = -ENOMEM; in ppl_recover_entry()
810 r_sector_first = le64_to_cpu(e->data_sector) * (block_size >> 9); in ppl_recover_entry()
812 if ((pp_size >> 9) < conf->chunk_sectors) { in ppl_recover_entry()
817 data_disks = conf->raid_disks - conf->max_degraded; in ppl_recover_entry()
821 (data_disks - 1) * conf->chunk_sectors + in ppl_recover_entry()
824 data_disks = conf->raid_disks - conf->max_degraded; in ppl_recover_entry()
825 strip_sectors = conf->chunk_sectors; in ppl_recover_entry()
835 (r_sector_first & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0 && in ppl_recover_entry()
836 (r_sector_last & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0) in ppl_recover_entry()
845 int disk; in ppl_recover_entry() local
854 for (disk = 0; disk < data_disks; disk++) { in ppl_recover_entry()
859 (disk * conf->chunk_sectors); in ppl_recover_entry()
861 pr_debug("%s:%*s data member disk %d start\n", in ppl_recover_entry()
862 __func__, indent, "", disk); in ppl_recover_entry()
869 indent -= 2; in ppl_recover_entry()
875 /* map raid sector to member disk */ in ppl_recover_entry()
878 pr_debug("%s:%*s processing array sector %llu => data member disk %d, sector %llu\n", in ppl_recover_entry()
883 rdev = conf->disks[dd_idx].rdev; in ppl_recover_entry()
884 if (!rdev || (!test_bit(In_sync, &rdev->flags) && in ppl_recover_entry()
885 sector >= rdev->recovery_offset)) { in ppl_recover_entry()
886 pr_debug("%s:%*s data member disk %d missing\n", in ppl_recover_entry()
892 pr_debug("%s:%*s reading data member disk %pg sector %llu\n", in ppl_recover_entry()
893 __func__, indent, "", rdev->bdev, in ppl_recover_entry()
900 ret = -EIO; in ppl_recover_entry()
906 indent -= 2; in ppl_recover_entry()
913 pr_debug("%s:%*s reading pp disk sector %llu\n", in ppl_recover_entry()
916 if (!sync_page_io(log->rdev, in ppl_recover_entry()
917 ppl_sector - log->rdev->data_offset + i, in ppl_recover_entry()
922 md_error(mddev, log->rdev); in ppl_recover_entry()
923 ret = -EIO; in ppl_recover_entry()
930 /* map raid sector to parity disk */ in ppl_recover_entry()
932 0, &disk, &sh); in ppl_recover_entry()
933 BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk)); in ppl_recover_entry()
935 parity_rdev = conf->disks[sh.pd_idx].rdev; in ppl_recover_entry()
937 BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev); in ppl_recover_entry()
938 pr_debug("%s:%*s write parity at sector %llu, disk %pg\n", in ppl_recover_entry()
941 parity_rdev->bdev); in ppl_recover_entry()
947 ret = -EIO; in ppl_recover_entry()
962 struct ppl_conf *ppl_conf = log->ppl_conf; in ppl_recover()
963 struct md_rdev *rdev = log->rdev; in ppl_recover()
964 struct mddev *mddev = rdev->mddev; in ppl_recover()
965 sector_t ppl_sector = rdev->ppl.sector + offset + in ppl_recover()
973 return -ENOMEM; in ppl_recover()
976 for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++) { in ppl_recover()
977 struct ppl_header_entry *e = &pplhdr->entries[i]; in ppl_recover()
978 u32 pp_size = le32_to_cpu(e->pp_size); in ppl_recover()
983 pr_debug("%s: disk: %d entry: %d ppl_sector: %llu pp_size: %u\n", in ppl_recover()
984 __func__, rdev->raid_disk, i, in ppl_recover()
988 crc_stored = le32_to_cpu(e->checksum); in ppl_recover()
994 if (!sync_page_io(rdev, sector - rdev->data_offset, in ppl_recover()
997 ret = -EIO; in ppl_recover()
1003 pp_size -= s; in ppl_recover()
1017 ppl_conf->mismatch_count++; in ppl_recover()
1022 ppl_conf->recovered_entries++; in ppl_recover()
1028 /* flush the disk cache after recovery if necessary */ in ppl_recover()
1029 ret = blkdev_issue_flush(rdev->bdev); in ppl_recover()
1039 struct md_rdev *rdev = log->rdev; in ppl_write_empty_header()
1042 pr_debug("%s: disk: %d ppl_sector: %llu\n", __func__, in ppl_write_empty_header()
1043 rdev->raid_disk, (unsigned long long)rdev->ppl.sector); in ppl_write_empty_header()
1047 return -ENOMEM; in ppl_write_empty_header()
1051 blkdev_issue_zeroout(rdev->bdev, rdev->ppl.sector, in ppl_write_empty_header()
1052 log->rdev->ppl.size, GFP_NOIO, 0); in ppl_write_empty_header()
1053 memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); in ppl_write_empty_header()
1054 pplhdr->signature = cpu_to_le32(log->ppl_conf->signature); in ppl_write_empty_header()
1055 pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PAGE_SIZE)); in ppl_write_empty_header()
1057 if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, in ppl_write_empty_header()
1060 md_error(rdev->mddev, rdev); in ppl_write_empty_header()
1061 ret = -EIO; in ppl_write_empty_header()
1070 struct ppl_conf *ppl_conf = log->ppl_conf; in ppl_load_distributed()
1071 struct md_rdev *rdev = log->rdev; in ppl_load_distributed()
1072 struct mddev *mddev = rdev->mddev; in ppl_load_distributed()
1080 pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk); in ppl_load_distributed()
1084 return -ENOMEM; in ppl_load_distributed()
1089 return -ENOMEM; in ppl_load_distributed()
1093 while (pplhdr_offset < rdev->ppl.size - (PPL_HEADER_SIZE >> 9)) { in ppl_load_distributed()
1095 rdev->ppl.sector - rdev->data_offset + in ppl_load_distributed()
1099 ret = -EIO; in ppl_load_distributed()
1100 /* if not able to read - don't recover any PPL */ in ppl_load_distributed()
1107 crc_stored = le32_to_cpu(pplhdr->checksum); in ppl_load_distributed()
1108 pplhdr->checksum = 0; in ppl_load_distributed()
1120 signature = le32_to_cpu(pplhdr->signature); in ppl_load_distributed()
1122 if (mddev->external) { in ppl_load_distributed()
1127 ppl_conf->signature = signature; in ppl_load_distributed()
1128 } else if (ppl_conf->signature != signature) { in ppl_load_distributed()
1130 __func__, signature, ppl_conf->signature, in ppl_load_distributed()
1137 if (prev_pplhdr && le64_to_cpu(prev_pplhdr->generation) > in ppl_load_distributed()
1138 le64_to_cpu(pplhdr->generation)) { in ppl_load_distributed()
1151 for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++) in ppl_load_distributed()
1153 le32_to_cpu(pplhdr->entries[i].pp_size) >> 9; in ppl_load_distributed()
1159 ppl_conf->mismatch_count++; in ppl_load_distributed()
1163 le64_to_cpu(pplhdr->generation)); in ppl_load_distributed()
1166 if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector) in ppl_load_distributed()
1170 if (!ret && !mddev->pers) in ppl_load_distributed()
1177 __func__, ret, ppl_conf->mismatch_count, in ppl_load_distributed()
1178 ppl_conf->recovered_entries); in ppl_load_distributed()
1189 for (i = 0; i < ppl_conf->count; i++) { in ppl_load()
1190 struct ppl_log *log = &ppl_conf->child_logs[i]; in ppl_load()
1193 if (!log->rdev) in ppl_load()
1205 if (ppl_conf->mddev->external) { in ppl_load()
1207 signature = ppl_conf->signature; in ppl_load()
1209 } else if (signature != ppl_conf->signature) { in ppl_load()
1211 mdname(ppl_conf->mddev)); in ppl_load()
1212 ret = -EINVAL; in ppl_load()
1219 __func__, ret, ppl_conf->mismatch_count, in ppl_load()
1220 ppl_conf->recovered_entries); in ppl_load()
1226 clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags); in __ppl_exit_log()
1227 clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags); in __ppl_exit_log()
1229 kfree(ppl_conf->child_logs); in __ppl_exit_log()
1231 bioset_exit(&ppl_conf->bs); in __ppl_exit_log()
1232 bioset_exit(&ppl_conf->flush_bs); in __ppl_exit_log()
1233 mempool_exit(&ppl_conf->io_pool); in __ppl_exit_log()
1234 kmem_cache_destroy(ppl_conf->io_kc); in __ppl_exit_log()
1241 struct ppl_conf *ppl_conf = conf->log_private; in ppl_exit_log()
1245 conf->log_private = NULL; in ppl_exit_log()
1260 ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9); in ppl_validate_rdev()
1264 RAID5_STRIPE_SECTORS((struct r5conf *)rdev->mddev->private)); in ppl_validate_rdev()
1268 mdname(rdev->mddev), rdev->bdev); in ppl_validate_rdev()
1269 return -ENOSPC; in ppl_validate_rdev()
1274 if ((rdev->ppl.sector < rdev->data_offset && in ppl_validate_rdev()
1275 rdev->ppl.sector + ppl_size_new > rdev->data_offset) || in ppl_validate_rdev()
1276 (rdev->ppl.sector >= rdev->data_offset && in ppl_validate_rdev()
1277 rdev->data_offset + rdev->sectors > rdev->ppl.sector)) { in ppl_validate_rdev()
1279 mdname(rdev->mddev), rdev->bdev); in ppl_validate_rdev()
1280 return -EINVAL; in ppl_validate_rdev()
1283 if (!rdev->mddev->external && in ppl_validate_rdev()
1284 ((rdev->ppl.offset > 0 && rdev->ppl.offset < (rdev->sb_size >> 9)) || in ppl_validate_rdev()
1285 (rdev->ppl.offset <= 0 && rdev->ppl.offset + ppl_size_new > 0))) { in ppl_validate_rdev()
1287 mdname(rdev->mddev), rdev->bdev); in ppl_validate_rdev()
1288 return -EINVAL; in ppl_validate_rdev()
1291 rdev->ppl.size = ppl_size_new; in ppl_validate_rdev()
1298 if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE + in ppl_init_child_log()
1300 log->use_multippl = true; in ppl_init_child_log()
1302 &log->ppl_conf->mddev->flags); in ppl_init_child_log()
1303 log->entry_space = PPL_SPACE_SIZE; in ppl_init_child_log()
1305 log->use_multippl = false; in ppl_init_child_log()
1306 log->entry_space = (log->rdev->ppl.size << 9) - in ppl_init_child_log()
1309 log->next_io_sector = rdev->ppl.sector; in ppl_init_child_log()
1311 if (bdev_write_cache(rdev->bdev)) in ppl_init_child_log()
1312 log->wb_cache_on = true; in ppl_init_child_log()
1318 struct mddev *mddev = conf->mddev; in ppl_init_log()
1324 mdname(conf->mddev)); in ppl_init_log()
1327 return -EINVAL; in ppl_init_log()
1329 if (mddev->level != 5) { in ppl_init_log()
1331 mdname(mddev), mddev->level); in ppl_init_log()
1332 return -EINVAL; in ppl_init_log()
1335 if (mddev->bitmap_info.file || mddev->bitmap_info.offset) { in ppl_init_log()
1338 return -EINVAL; in ppl_init_log()
1341 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { in ppl_init_log()
1344 return -EINVAL; in ppl_init_log()
1349 if (conf->raid_disks > max_disks) { in ppl_init_log()
1352 return -EINVAL; in ppl_init_log()
1357 return -ENOMEM; in ppl_init_log()
1359 ppl_conf->mddev = mddev; in ppl_init_log()
1361 ppl_conf->io_kc = KMEM_CACHE(ppl_io_unit, 0); in ppl_init_log()
1362 if (!ppl_conf->io_kc) { in ppl_init_log()
1363 ret = -ENOMEM; in ppl_init_log()
1367 ret = mempool_init(&ppl_conf->io_pool, conf->raid_disks, ppl_io_pool_alloc, in ppl_init_log()
1368 ppl_io_pool_free, ppl_conf->io_kc); in ppl_init_log()
1372 ret = bioset_init(&ppl_conf->bs, conf->raid_disks, 0, BIOSET_NEED_BVECS); in ppl_init_log()
1376 ret = bioset_init(&ppl_conf->flush_bs, conf->raid_disks, 0, 0); in ppl_init_log()
1380 ppl_conf->count = conf->raid_disks; in ppl_init_log()
1381 ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log), in ppl_init_log()
1383 if (!ppl_conf->child_logs) { in ppl_init_log()
1384 ret = -ENOMEM; in ppl_init_log()
1388 atomic64_set(&ppl_conf->seq, 0); in ppl_init_log()
1389 INIT_LIST_HEAD(&ppl_conf->no_mem_stripes); in ppl_init_log()
1390 spin_lock_init(&ppl_conf->no_mem_stripes_lock); in ppl_init_log()
1392 if (!mddev->external) { in ppl_init_log()
1393 ppl_conf->signature = ~crc32c(~0, mddev->uuid, sizeof(mddev->uuid)); in ppl_init_log()
1394 ppl_conf->block_size = 512; in ppl_init_log()
1396 ppl_conf->block_size = in ppl_init_log()
1397 queue_logical_block_size(mddev->gendisk->queue); in ppl_init_log()
1400 for (i = 0; i < ppl_conf->count; i++) { in ppl_init_log()
1401 struct ppl_log *log = &ppl_conf->child_logs[i]; in ppl_init_log()
1402 struct md_rdev *rdev = conf->disks[i].rdev; in ppl_init_log()
1404 mutex_init(&log->io_mutex); in ppl_init_log()
1405 spin_lock_init(&log->io_list_lock); in ppl_init_log()
1406 INIT_LIST_HEAD(&log->io_list); in ppl_init_log()
1408 log->ppl_conf = ppl_conf; in ppl_init_log()
1409 log->rdev = rdev; in ppl_init_log()
1425 } else if (!mddev->pers && mddev->recovery_cp == 0 && in ppl_init_log()
1426 ppl_conf->recovered_entries > 0 && in ppl_init_log()
1427 ppl_conf->mismatch_count == 0) { in ppl_init_log()
1432 mddev->recovery_cp = MaxSector; in ppl_init_log()
1433 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); in ppl_init_log()
1434 } else if (mddev->pers && ppl_conf->mismatch_count > 0) { in ppl_init_log()
1436 ret = -EINVAL; in ppl_init_log()
1440 conf->log_private = ppl_conf; in ppl_init_log()
1441 set_bit(MD_HAS_PPL, &ppl_conf->mddev->flags); in ppl_init_log()
1449 int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add) in ppl_modify_log() argument
1451 struct ppl_conf *ppl_conf = conf->log_private; in ppl_modify_log()
1456 return -EINVAL; in ppl_modify_log()
1458 pr_debug("%s: disk: %d operation: %s dev: %pg\n", in ppl_modify_log()
1459 __func__, rdev->raid_disk, add ? "add" : "remove", in ppl_modify_log()
1460 rdev->bdev); in ppl_modify_log()
1462 if (rdev->raid_disk < 0) in ppl_modify_log()
1465 if (rdev->raid_disk >= ppl_conf->count) in ppl_modify_log()
1466 return -ENODEV; in ppl_modify_log()
1468 log = &ppl_conf->child_logs[rdev->raid_disk]; in ppl_modify_log()
1470 mutex_lock(&log->io_mutex); in ppl_modify_log()
1471 if (add) { in ppl_modify_log()
1474 log->rdev = rdev; in ppl_modify_log()
1479 log->rdev = NULL; in ppl_modify_log()
1481 mutex_unlock(&log->io_mutex); in ppl_modify_log()
1500 return -EINVAL; in ppl_write_hint_store()
1502 return -EINVAL; in ppl_write_hint_store()
1508 conf = mddev->private; in ppl_write_hint_store()
1510 err = -ENODEV; in ppl_write_hint_store()
1511 else if (!raid5_has_ppl(conf) || !conf->log_private) in ppl_write_hint_store()
1512 err = -EINVAL; in ppl_write_hint_store()