1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 4 * 5 * bitmap_create - sets up the bitmap structure 6 * bitmap_destroy - destroys the bitmap structure 7 * 8 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: 9 * - added disk storage for bitmap 10 * - changes to allow various bitmap chunk sizes 11 */ 12 13 /* 14 * Still to do: 15 * 16 * flush after percent set rather than just time based. (maybe both). 17 */ 18 19 #include <linux/blkdev.h> 20 #include <linux/module.h> 21 #include <linux/errno.h> 22 #include <linux/slab.h> 23 #include <linux/init.h> 24 #include <linux/timer.h> 25 #include <linux/sched.h> 26 #include <linux/list.h> 27 #include <linux/file.h> 28 #include <linux/mount.h> 29 #include <linux/buffer_head.h> 30 #include <linux/seq_file.h> 31 #include <trace/events/block.h> 32 33 #include "md.h" 34 #include "md-bitmap.h" 35 #include "md-cluster.h" 36 37 #define BITMAP_MAJOR_LO 3 38 /* version 4 insists the bitmap is in little-endian order 39 * with version 3, it is host-endian which is non-portable 40 * Version 5 is currently set only for clustered devices 41 */ 42 #define BITMAP_MAJOR_HI 4 43 #define BITMAP_MAJOR_CLUSTERED 5 44 #define BITMAP_MAJOR_HOSTENDIAN 3 45 46 /* 47 * in-memory bitmap: 48 * 49 * Use 16 bit block counters to track pending writes to each "chunk". 50 * The 2 high order bits are special-purpose, the first is a flag indicating 51 * whether a resync is needed. The second is a flag indicating whether a 52 * resync is active. 53 * This means that the counter is actually 14 bits: 54 * 55 * +--------+--------+------------------------------------------------+ 56 * | resync | resync | counter | 57 * | needed | active | | 58 * | (0-1) | (0-1) | (0-16383) | 59 * +--------+--------+------------------------------------------------+ 60 * 61 * The "resync needed" bit is set when: 62 * a '1' bit is read from storage at startup. 63 * a write request fails on some drives 64 * a resync is aborted on a chunk with 'resync active' set 65 * It is cleared (and resync-active set) when a resync starts across all drives 66 * of the chunk. 67 * 68 * 69 * The "resync active" bit is set when: 70 * a resync is started on all drives, and resync_needed is set. 71 * resync_needed will be cleared (as long as resync_active wasn't already set). 72 * It is cleared when a resync completes. 73 * 74 * The counter counts pending write requests, plus the on-disk bit. 75 * When the counter is '1' and the resync bits are clear, the on-disk 76 * bit can be cleared as well, thus setting the counter to 0. 77 * When we set a bit, or in the counter (to start a write), if the fields is 78 * 0, we first set the disk bit and set the counter to 1. 79 * 80 * If the counter is 0, the on-disk bit is clear and the stripe is clean 81 * Anything that dirties the stripe pushes the counter to 2 (at least) 82 * and sets the on-disk bit (lazily). 83 * If a periodic sweep find the counter at 2, it is decremented to 1. 84 * If the sweep find the counter at 1, the on-disk bit is cleared and the 85 * counter goes to zero. 86 * 87 * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block 88 * counters as a fallback when "page" memory cannot be allocated: 89 * 90 * Normal case (page memory allocated): 91 * 92 * page pointer (32-bit) 93 * 94 * [ ] ------+ 95 * | 96 * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) 97 * c1 c2 c2048 98 * 99 * Hijacked case (page memory allocation failed): 100 * 101 * hijacked page pointer (32-bit) 102 * 103 * [ ][ ] (no page memory allocated) 104 * counter #1 (16-bit) counter #2 (16-bit) 105 * 106 */ 107 108 typedef __u16 bitmap_counter_t; 109 110 #define PAGE_BITS (PAGE_SIZE << 3) 111 #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) 112 113 #define COUNTER_BITS 16 114 #define COUNTER_BIT_SHIFT 4 115 #define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3) 116 117 #define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1))) 118 #define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2))) 119 #define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1) 120 121 #define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) 122 #define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) 123 #define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) 124 125 /* how many counters per page? */ 126 #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) 127 /* same, except a shift value for more efficient bitops */ 128 #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) 129 /* same, except a mask value for more efficient bitops */ 130 #define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) 131 132 #define BITMAP_BLOCK_SHIFT 9 133 134 /* 135 * bitmap structures: 136 */ 137 138 /* the in-memory bitmap is represented by bitmap_pages */ 139 struct bitmap_page { 140 /* 141 * map points to the actual memory page 142 */ 143 char *map; 144 /* 145 * in emergencies (when map cannot be alloced), hijack the map 146 * pointer and use it as two counters itself 147 */ 148 unsigned int hijacked:1; 149 /* 150 * If any counter in this page is '1' or '2' - and so could be 151 * cleared then that page is marked as 'pending' 152 */ 153 unsigned int pending:1; 154 /* 155 * count of dirty bits on the page 156 */ 157 unsigned int count:30; 158 }; 159 160 /* the main bitmap structure - one per mddev */ 161 struct bitmap { 162 163 struct bitmap_counts { 164 spinlock_t lock; 165 struct bitmap_page *bp; 166 /* total number of pages in the bitmap */ 167 unsigned long pages; 168 /* number of pages not yet allocated */ 169 unsigned long missing_pages; 170 /* chunksize = 2^chunkshift (for bitops) */ 171 unsigned long chunkshift; 172 /* total number of data chunks for the array */ 173 unsigned long chunks; 174 } counts; 175 176 struct mddev *mddev; /* the md device that the bitmap is for */ 177 178 __u64 events_cleared; 179 int need_sync; 180 181 struct bitmap_storage { 182 /* backing disk file */ 183 struct file *file; 184 /* cached copy of the bitmap file superblock */ 185 struct page *sb_page; 186 unsigned long sb_index; 187 /* list of cache pages for the file */ 188 struct page **filemap; 189 /* attributes associated filemap pages */ 190 unsigned long *filemap_attr; 191 /* number of pages in the file */ 192 unsigned long file_pages; 193 /* total bytes in the bitmap */ 194 unsigned long bytes; 195 } storage; 196 197 unsigned long flags; 198 199 int allclean; 200 201 atomic_t behind_writes; 202 /* highest actual value at runtime */ 203 unsigned long behind_writes_used; 204 205 /* 206 * the bitmap daemon - periodically wakes up and sweeps the bitmap 207 * file, cleaning up bits and flushing out pages to disk as necessary 208 */ 209 unsigned long daemon_lastrun; /* jiffies of last run */ 210 /* 211 * when we lasted called end_sync to update bitmap with resync 212 * progress. 213 */ 214 unsigned long last_end_sync; 215 216 /* pending writes to the bitmap file */ 217 atomic_t pending_writes; 218 wait_queue_head_t write_wait; 219 wait_queue_head_t overflow_wait; 220 wait_queue_head_t behind_wait; 221 222 struct kernfs_node *sysfs_can_clear; 223 /* slot offset for clustered env */ 224 int cluster_slot; 225 }; 226 227 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, 228 int chunksize, bool init); 229 230 static inline char *bmname(struct bitmap *bitmap) 231 { 232 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 233 } 234 235 static bool __bitmap_enabled(struct bitmap *bitmap) 236 { 237 return bitmap->storage.filemap && 238 !test_bit(BITMAP_STALE, &bitmap->flags); 239 } 240 241 static bool bitmap_enabled(struct mddev *mddev) 242 { 243 struct bitmap *bitmap = mddev->bitmap; 244 245 if (!bitmap) 246 return false; 247 248 return __bitmap_enabled(bitmap); 249 } 250 251 /* 252 * check a page and, if necessary, allocate it (or hijack it if the alloc fails) 253 * 254 * 1) check to see if this page is allocated, if it's not then try to alloc 255 * 2) if the alloc fails, set the page's hijacked flag so we'll use the 256 * page pointer directly as a counter 257 * 258 * if we find our page, we increment the page's refcount so that it stays 259 * allocated while we're using it 260 */ 261 static int md_bitmap_checkpage(struct bitmap_counts *bitmap, 262 unsigned long page, int create, int no_hijack) 263 __releases(bitmap->lock) 264 __acquires(bitmap->lock) 265 { 266 unsigned char *mappage; 267 268 WARN_ON_ONCE(page >= bitmap->pages); 269 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 270 return 0; 271 272 if (bitmap->bp[page].map) /* page is already allocated, just return */ 273 return 0; 274 275 if (!create) 276 return -ENOENT; 277 278 /* this page has not been allocated yet */ 279 280 spin_unlock_irq(&bitmap->lock); 281 /* It is possible that this is being called inside a 282 * prepare_to_wait/finish_wait loop from raid5c:make_request(). 283 * In general it is not permitted to sleep in that context as it 284 * can cause the loop to spin freely. 285 * That doesn't apply here as we can only reach this point 286 * once with any loop. 287 * When this function completes, either bp[page].map or 288 * bp[page].hijacked. In either case, this function will 289 * abort before getting to this point again. So there is 290 * no risk of a free-spin, and so it is safe to assert 291 * that sleeping here is allowed. 292 */ 293 sched_annotate_sleep(); 294 mappage = kzalloc(PAGE_SIZE, GFP_NOIO); 295 spin_lock_irq(&bitmap->lock); 296 297 if (mappage == NULL) { 298 pr_debug("md/bitmap: map page allocation failed, hijacking\n"); 299 /* We don't support hijack for cluster raid */ 300 if (no_hijack) 301 return -ENOMEM; 302 /* failed - set the hijacked flag so that we can use the 303 * pointer as a counter */ 304 if (!bitmap->bp[page].map) 305 bitmap->bp[page].hijacked = 1; 306 } else if (bitmap->bp[page].map || 307 bitmap->bp[page].hijacked) { 308 /* somebody beat us to getting the page */ 309 kfree(mappage); 310 } else { 311 312 /* no page was in place and we have one, so install it */ 313 314 bitmap->bp[page].map = mappage; 315 bitmap->missing_pages--; 316 } 317 return 0; 318 } 319 320 /* if page is completely empty, put it back on the free list, or dealloc it */ 321 /* if page was hijacked, unmark the flag so it might get alloced next time */ 322 /* Note: lock should be held when calling this */ 323 static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page) 324 { 325 char *ptr; 326 327 if (bitmap->bp[page].count) /* page is still busy */ 328 return; 329 330 /* page is no longer in use, it can be released */ 331 332 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 333 bitmap->bp[page].hijacked = 0; 334 bitmap->bp[page].map = NULL; 335 } else { 336 /* normal case, free the page */ 337 ptr = bitmap->bp[page].map; 338 bitmap->bp[page].map = NULL; 339 bitmap->missing_pages++; 340 kfree(ptr); 341 } 342 } 343 344 /* 345 * bitmap file handling - read and write the bitmap file and its superblock 346 */ 347 348 /* 349 * basic page I/O operations 350 */ 351 352 /* IO operations when bitmap is stored near all superblocks */ 353 354 /* choose a good rdev and read the page from there */ 355 static int read_sb_page(struct mddev *mddev, loff_t offset, 356 struct page *page, unsigned long index, int size) 357 { 358 359 sector_t sector = mddev->bitmap_info.offset + offset + 360 index * (PAGE_SIZE / SECTOR_SIZE); 361 struct md_rdev *rdev; 362 363 rdev_for_each(rdev, mddev) { 364 u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev)); 365 366 if (!test_bit(In_sync, &rdev->flags) || 367 test_bit(Faulty, &rdev->flags) || 368 test_bit(Bitmap_sync, &rdev->flags)) 369 continue; 370 371 if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true)) 372 return 0; 373 } 374 return -EIO; 375 } 376 377 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev) 378 { 379 /* Iterate the disks of an mddev, using rcu to protect access to the 380 * linked list, and raising the refcount of devices we return to ensure 381 * they don't disappear while in use. 382 * As devices are only added or removed when raid_disk is < 0 and 383 * nr_pending is 0 and In_sync is clear, the entries we return will 384 * still be in the same position on the list when we re-enter 385 * list_for_each_entry_continue_rcu. 386 * 387 * Note that if entered with 'rdev == NULL' to start at the 388 * beginning, we temporarily assign 'rdev' to an address which 389 * isn't really an rdev, but which can be used by 390 * list_for_each_entry_continue_rcu() to find the first entry. 391 */ 392 rcu_read_lock(); 393 if (rdev == NULL) 394 /* start at the beginning */ 395 rdev = list_entry(&mddev->disks, struct md_rdev, same_set); 396 else { 397 /* release the previous rdev and start from there. */ 398 rdev_dec_pending(rdev, mddev); 399 } 400 list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) { 401 if (rdev->raid_disk >= 0 && 402 !test_bit(Faulty, &rdev->flags)) { 403 /* this is a usable devices */ 404 atomic_inc(&rdev->nr_pending); 405 rcu_read_unlock(); 406 return rdev; 407 } 408 } 409 rcu_read_unlock(); 410 return NULL; 411 } 412 413 static unsigned int optimal_io_size(struct block_device *bdev, 414 unsigned int last_page_size, 415 unsigned int io_size) 416 { 417 if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev)) 418 return roundup(last_page_size, bdev_io_opt(bdev)); 419 return io_size; 420 } 421 422 static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size, 423 loff_t start, loff_t boundary) 424 { 425 if (io_size != opt_size && 426 start + opt_size / SECTOR_SIZE <= boundary) 427 return opt_size; 428 if (start + io_size / SECTOR_SIZE <= boundary) 429 return io_size; 430 431 /* Overflows boundary */ 432 return 0; 433 } 434 435 static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, 436 unsigned long pg_index, struct page *page) 437 { 438 struct block_device *bdev; 439 struct mddev *mddev = bitmap->mddev; 440 struct bitmap_storage *store = &bitmap->storage; 441 unsigned long num_pages = bitmap->storage.file_pages; 442 unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT; 443 loff_t sboff, offset = mddev->bitmap_info.offset; 444 sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE; 445 unsigned int size = PAGE_SIZE; 446 unsigned int opt_size = PAGE_SIZE; 447 sector_t doff; 448 449 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; 450 /* we compare length (page numbers), not page offset. */ 451 if ((pg_index - store->sb_index) == num_pages - 1) { 452 unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1); 453 454 if (last_page_size == 0) 455 last_page_size = PAGE_SIZE; 456 size = roundup(last_page_size, bdev_logical_block_size(bdev)); 457 opt_size = optimal_io_size(bdev, last_page_size, size); 458 } 459 460 sboff = rdev->sb_start + offset; 461 doff = rdev->data_offset; 462 463 /* Just make sure we aren't corrupting data or metadata */ 464 if (mddev->external) { 465 /* Bitmap could be anywhere. */ 466 if (sboff + ps > doff && 467 sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE)) 468 return -EINVAL; 469 } else if (offset < 0) { 470 /* DATA BITMAP METADATA */ 471 size = bitmap_io_size(size, opt_size, offset + ps, 0); 472 if (size == 0) 473 /* bitmap runs in to metadata */ 474 return -EINVAL; 475 476 if (doff + mddev->dev_sectors > sboff) 477 /* data runs in to bitmap */ 478 return -EINVAL; 479 } else if (rdev->sb_start < rdev->data_offset) { 480 /* METADATA BITMAP DATA */ 481 size = bitmap_io_size(size, opt_size, sboff + ps, doff); 482 if (size == 0) 483 /* bitmap runs in to data */ 484 return -EINVAL; 485 } 486 487 md_super_write(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), page); 488 return 0; 489 } 490 491 static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index, 492 struct page *page, bool wait) 493 { 494 struct mddev *mddev = bitmap->mddev; 495 496 do { 497 struct md_rdev *rdev = NULL; 498 499 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { 500 if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) { 501 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 502 return; 503 } 504 } 505 } while (wait && md_super_wait(mddev) < 0); 506 } 507 508 static void md_bitmap_file_kick(struct bitmap *bitmap); 509 510 #ifdef CONFIG_MD_BITMAP_FILE 511 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) 512 { 513 struct buffer_head *bh = page_buffers(page); 514 515 while (bh && bh->b_blocknr) { 516 atomic_inc(&bitmap->pending_writes); 517 set_buffer_locked(bh); 518 set_buffer_mapped(bh); 519 submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); 520 bh = bh->b_this_page; 521 } 522 523 if (wait) 524 wait_event(bitmap->write_wait, 525 atomic_read(&bitmap->pending_writes) == 0); 526 } 527 528 static void end_bitmap_write(struct buffer_head *bh, int uptodate) 529 { 530 struct bitmap *bitmap = bh->b_private; 531 532 if (!uptodate) 533 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 534 if (atomic_dec_and_test(&bitmap->pending_writes)) 535 wake_up(&bitmap->write_wait); 536 } 537 538 static void free_buffers(struct page *page) 539 { 540 struct buffer_head *bh; 541 542 if (!PagePrivate(page)) 543 return; 544 545 bh = page_buffers(page); 546 while (bh) { 547 struct buffer_head *next = bh->b_this_page; 548 free_buffer_head(bh); 549 bh = next; 550 } 551 detach_page_private(page); 552 put_page(page); 553 } 554 555 /* read a page from a file. 556 * We both read the page, and attach buffers to the page to record the 557 * address of each block (using bmap). These addresses will be used 558 * to write the block later, completely bypassing the filesystem. 559 * This usage is similar to how swap files are handled, and allows us 560 * to write to a file with no concerns of memory allocation failing. 561 */ 562 static int read_file_page(struct file *file, unsigned long index, 563 struct bitmap *bitmap, unsigned long count, struct page *page) 564 { 565 int ret = 0; 566 struct inode *inode = file_inode(file); 567 struct buffer_head *bh; 568 sector_t block, blk_cur; 569 unsigned long blocksize = i_blocksize(inode); 570 571 pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, 572 (unsigned long long)index << PAGE_SHIFT); 573 574 bh = alloc_page_buffers(page, blocksize); 575 if (!bh) { 576 ret = -ENOMEM; 577 goto out; 578 } 579 attach_page_private(page, bh); 580 blk_cur = index << (PAGE_SHIFT - inode->i_blkbits); 581 while (bh) { 582 block = blk_cur; 583 584 if (count == 0) 585 bh->b_blocknr = 0; 586 else { 587 ret = bmap(inode, &block); 588 if (ret || !block) { 589 ret = -EINVAL; 590 bh->b_blocknr = 0; 591 goto out; 592 } 593 594 bh->b_blocknr = block; 595 bh->b_bdev = inode->i_sb->s_bdev; 596 if (count < blocksize) 597 count = 0; 598 else 599 count -= blocksize; 600 601 bh->b_end_io = end_bitmap_write; 602 bh->b_private = bitmap; 603 atomic_inc(&bitmap->pending_writes); 604 set_buffer_locked(bh); 605 set_buffer_mapped(bh); 606 submit_bh(REQ_OP_READ, bh); 607 } 608 blk_cur++; 609 bh = bh->b_this_page; 610 } 611 612 wait_event(bitmap->write_wait, 613 atomic_read(&bitmap->pending_writes)==0); 614 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 615 ret = -EIO; 616 out: 617 if (ret) 618 pr_err("md: bitmap read error: (%dB @ %llu): %d\n", 619 (int)PAGE_SIZE, 620 (unsigned long long)index << PAGE_SHIFT, 621 ret); 622 return ret; 623 } 624 #else /* CONFIG_MD_BITMAP_FILE */ 625 static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) 626 { 627 } 628 static int read_file_page(struct file *file, unsigned long index, 629 struct bitmap *bitmap, unsigned long count, struct page *page) 630 { 631 return -EIO; 632 } 633 static void free_buffers(struct page *page) 634 { 635 put_page(page); 636 } 637 #endif /* CONFIG_MD_BITMAP_FILE */ 638 639 /* 640 * bitmap file superblock operations 641 */ 642 643 /* 644 * write out a page to a file 645 */ 646 static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index, 647 bool wait) 648 { 649 struct bitmap_storage *store = &bitmap->storage; 650 struct page *page = store->filemap[pg_index]; 651 652 if (mddev_is_clustered(bitmap->mddev)) { 653 /* go to node bitmap area starting point */ 654 pg_index += store->sb_index; 655 } 656 657 if (store->file) 658 write_file_page(bitmap, page, wait); 659 else 660 write_sb_page(bitmap, pg_index, page, wait); 661 } 662 663 /* 664 * md_bitmap_wait_writes() should be called before writing any bitmap 665 * blocks, to ensure previous writes, particularly from 666 * md_bitmap_daemon_work(), have completed. 667 */ 668 static void md_bitmap_wait_writes(struct bitmap *bitmap) 669 { 670 if (bitmap->storage.file) 671 wait_event(bitmap->write_wait, 672 atomic_read(&bitmap->pending_writes)==0); 673 else 674 /* Note that we ignore the return value. The writes 675 * might have failed, but that would just mean that 676 * some bits which should be cleared haven't been, 677 * which is safe. The relevant bitmap blocks will 678 * probably get written again, but there is no great 679 * loss if they aren't. 680 */ 681 md_super_wait(bitmap->mddev); 682 } 683 684 685 /* update the event counter and sync the superblock to disk */ 686 static void bitmap_update_sb(void *data) 687 { 688 bitmap_super_t *sb; 689 struct bitmap *bitmap = data; 690 691 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 692 return; 693 if (bitmap->mddev->bitmap_info.external) 694 return; 695 if (!bitmap->storage.sb_page) /* no superblock */ 696 return; 697 sb = kmap_local_page(bitmap->storage.sb_page); 698 sb->events = cpu_to_le64(bitmap->mddev->events); 699 if (bitmap->mddev->events < bitmap->events_cleared) 700 /* rocking back to read-only */ 701 bitmap->events_cleared = bitmap->mddev->events; 702 sb->events_cleared = cpu_to_le64(bitmap->events_cleared); 703 /* 704 * clear BITMAP_WRITE_ERROR bit to protect against the case that 705 * a bitmap write error occurred but the later writes succeeded. 706 */ 707 sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR)); 708 /* Just in case these have been changed via sysfs: */ 709 sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); 710 sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); 711 /* This might have been changed by a reshape */ 712 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 713 sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize); 714 sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes); 715 sb->sectors_reserved = cpu_to_le32(bitmap->mddev-> 716 bitmap_info.space); 717 kunmap_local(sb); 718 719 if (bitmap->storage.file) 720 write_file_page(bitmap, bitmap->storage.sb_page, 1); 721 else 722 write_sb_page(bitmap, bitmap->storage.sb_index, 723 bitmap->storage.sb_page, 1); 724 } 725 726 static void bitmap_print_sb(struct bitmap *bitmap) 727 { 728 bitmap_super_t *sb; 729 730 if (!bitmap || !bitmap->storage.sb_page) 731 return; 732 sb = kmap_local_page(bitmap->storage.sb_page); 733 pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); 734 pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); 735 pr_debug(" version: %u\n", le32_to_cpu(sb->version)); 736 pr_debug(" uuid: %08x.%08x.%08x.%08x\n", 737 le32_to_cpu(*(__le32 *)(sb->uuid+0)), 738 le32_to_cpu(*(__le32 *)(sb->uuid+4)), 739 le32_to_cpu(*(__le32 *)(sb->uuid+8)), 740 le32_to_cpu(*(__le32 *)(sb->uuid+12))); 741 pr_debug(" events: %llu\n", 742 (unsigned long long) le64_to_cpu(sb->events)); 743 pr_debug("events cleared: %llu\n", 744 (unsigned long long) le64_to_cpu(sb->events_cleared)); 745 pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); 746 pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); 747 pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); 748 pr_debug(" sync size: %llu KB\n", 749 (unsigned long long)le64_to_cpu(sb->sync_size)/2); 750 pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); 751 kunmap_local(sb); 752 } 753 754 /* 755 * bitmap_new_disk_sb 756 * @bitmap 757 * 758 * This function is somewhat the reverse of bitmap_read_sb. bitmap_read_sb 759 * reads and verifies the on-disk bitmap superblock and populates bitmap_info. 760 * This function verifies 'bitmap_info' and populates the on-disk bitmap 761 * structure, which is to be written to disk. 762 * 763 * Returns: 0 on success, -Exxx on error 764 */ 765 static int md_bitmap_new_disk_sb(struct bitmap *bitmap) 766 { 767 bitmap_super_t *sb; 768 unsigned long chunksize, daemon_sleep, write_behind; 769 770 bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 771 if (bitmap->storage.sb_page == NULL) 772 return -ENOMEM; 773 bitmap->storage.sb_index = 0; 774 775 sb = kmap_local_page(bitmap->storage.sb_page); 776 777 sb->magic = cpu_to_le32(BITMAP_MAGIC); 778 sb->version = cpu_to_le32(BITMAP_MAJOR_HI); 779 780 chunksize = bitmap->mddev->bitmap_info.chunksize; 781 BUG_ON(!chunksize); 782 if (!is_power_of_2(chunksize)) { 783 kunmap_local(sb); 784 pr_warn("bitmap chunksize not a power of 2\n"); 785 return -EINVAL; 786 } 787 sb->chunksize = cpu_to_le32(chunksize); 788 789 daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; 790 if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { 791 pr_debug("Choosing daemon_sleep default (5 sec)\n"); 792 daemon_sleep = 5 * HZ; 793 } 794 sb->daemon_sleep = cpu_to_le32(daemon_sleep); 795 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 796 797 /* 798 * FIXME: write_behind for RAID1. If not specified, what 799 * is a good choice? We choose COUNTER_MAX / 2 arbitrarily. 800 */ 801 write_behind = bitmap->mddev->bitmap_info.max_write_behind; 802 if (write_behind > COUNTER_MAX / 2) 803 write_behind = COUNTER_MAX / 2; 804 sb->write_behind = cpu_to_le32(write_behind); 805 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 806 807 /* keep the array size field of the bitmap superblock up to date */ 808 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 809 810 memcpy(sb->uuid, bitmap->mddev->uuid, 16); 811 812 set_bit(BITMAP_STALE, &bitmap->flags); 813 sb->state = cpu_to_le32(bitmap->flags); 814 bitmap->events_cleared = bitmap->mddev->events; 815 sb->events_cleared = cpu_to_le64(bitmap->mddev->events); 816 bitmap->mddev->bitmap_info.nodes = 0; 817 818 kunmap_local(sb); 819 820 return 0; 821 } 822 823 /* read the superblock from the bitmap file and initialize some bitmap fields */ 824 static int md_bitmap_read_sb(struct bitmap *bitmap) 825 { 826 char *reason = NULL; 827 bitmap_super_t *sb; 828 unsigned long chunksize, daemon_sleep, write_behind; 829 unsigned long long events; 830 int nodes = 0; 831 unsigned long sectors_reserved = 0; 832 int err = -EINVAL; 833 struct page *sb_page; 834 loff_t offset = 0; 835 836 if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { 837 chunksize = 128 * 1024 * 1024; 838 daemon_sleep = 5 * HZ; 839 write_behind = 0; 840 set_bit(BITMAP_STALE, &bitmap->flags); 841 err = 0; 842 goto out_no_sb; 843 } 844 /* page 0 is the superblock, read it... */ 845 sb_page = alloc_page(GFP_KERNEL); 846 if (!sb_page) 847 return -ENOMEM; 848 bitmap->storage.sb_page = sb_page; 849 850 re_read: 851 /* If cluster_slot is set, the cluster is setup */ 852 if (bitmap->cluster_slot >= 0) { 853 sector_t bm_blocks = bitmap->mddev->resync_max_sectors; 854 855 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 856 (bitmap->mddev->bitmap_info.chunksize >> 9)); 857 /* bits to bytes */ 858 bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); 859 /* to 4k blocks */ 860 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); 861 offset = bitmap->cluster_slot * (bm_blocks << 3); 862 pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, 863 bitmap->cluster_slot, offset); 864 } 865 866 if (bitmap->storage.file) { 867 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); 868 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; 869 870 err = read_file_page(bitmap->storage.file, 0, 871 bitmap, bytes, sb_page); 872 } else { 873 err = read_sb_page(bitmap->mddev, offset, sb_page, 0, 874 sizeof(bitmap_super_t)); 875 } 876 if (err) 877 return err; 878 879 err = -EINVAL; 880 sb = kmap_local_page(sb_page); 881 882 chunksize = le32_to_cpu(sb->chunksize); 883 daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; 884 write_behind = le32_to_cpu(sb->write_behind); 885 sectors_reserved = le32_to_cpu(sb->sectors_reserved); 886 887 /* verify that the bitmap-specific fields are valid */ 888 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) 889 reason = "bad magic"; 890 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || 891 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED) 892 reason = "unrecognized superblock version"; 893 else if (chunksize < 512) 894 reason = "bitmap chunksize too small"; 895 else if (!is_power_of_2(chunksize)) 896 reason = "bitmap chunksize not a power of 2"; 897 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT) 898 reason = "daemon sleep period out of range"; 899 else if (write_behind > COUNTER_MAX) 900 reason = "write-behind limit out of range (0 - 16383)"; 901 if (reason) { 902 pr_warn("%s: invalid bitmap file superblock: %s\n", 903 bmname(bitmap), reason); 904 goto out; 905 } 906 907 /* 908 * Setup nodes/clustername only if bitmap version is 909 * cluster-compatible 910 */ 911 if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { 912 nodes = le32_to_cpu(sb->nodes); 913 strscpy(bitmap->mddev->bitmap_info.cluster_name, 914 sb->cluster_name, 64); 915 } 916 917 /* keep the array size field of the bitmap superblock up to date */ 918 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 919 920 if (bitmap->mddev->persistent) { 921 /* 922 * We have a persistent array superblock, so compare the 923 * bitmap's UUID and event counter to the mddev's 924 */ 925 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { 926 pr_warn("%s: bitmap superblock UUID mismatch\n", 927 bmname(bitmap)); 928 goto out; 929 } 930 events = le64_to_cpu(sb->events); 931 if (!nodes && (events < bitmap->mddev->events)) { 932 pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n", 933 bmname(bitmap), events, 934 (unsigned long long) bitmap->mddev->events); 935 set_bit(BITMAP_STALE, &bitmap->flags); 936 } 937 } 938 939 /* assign fields using values from superblock */ 940 bitmap->flags |= le32_to_cpu(sb->state); 941 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) 942 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags); 943 bitmap->events_cleared = le64_to_cpu(sb->events_cleared); 944 err = 0; 945 946 out: 947 kunmap_local(sb); 948 if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { 949 /* Assigning chunksize is required for "re_read" */ 950 bitmap->mddev->bitmap_info.chunksize = chunksize; 951 err = md_setup_cluster(bitmap->mddev, nodes); 952 if (err) { 953 pr_warn("%s: Could not setup cluster service (%d)\n", 954 bmname(bitmap), err); 955 goto out_no_sb; 956 } 957 bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev); 958 goto re_read; 959 } 960 961 out_no_sb: 962 if (err == 0) { 963 if (test_bit(BITMAP_STALE, &bitmap->flags)) 964 bitmap->events_cleared = bitmap->mddev->events; 965 bitmap->mddev->bitmap_info.chunksize = chunksize; 966 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 967 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 968 bitmap->mddev->bitmap_info.nodes = nodes; 969 if (bitmap->mddev->bitmap_info.space == 0 || 970 bitmap->mddev->bitmap_info.space > sectors_reserved) 971 bitmap->mddev->bitmap_info.space = sectors_reserved; 972 } else { 973 bitmap_print_sb(bitmap); 974 if (bitmap->cluster_slot < 0) 975 md_cluster_stop(bitmap->mddev); 976 } 977 return err; 978 } 979 980 /* 981 * general bitmap file operations 982 */ 983 984 /* 985 * on-disk bitmap: 986 * 987 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap 988 * file a page at a time. There's a superblock at the start of the file. 989 */ 990 /* calculate the index of the page that contains this bit */ 991 static inline unsigned long file_page_index(struct bitmap_storage *store, 992 unsigned long chunk) 993 { 994 if (store->sb_page) 995 chunk += sizeof(bitmap_super_t) << 3; 996 return chunk >> PAGE_BIT_SHIFT; 997 } 998 999 /* calculate the (bit) offset of this bit within a page */ 1000 static inline unsigned long file_page_offset(struct bitmap_storage *store, 1001 unsigned long chunk) 1002 { 1003 if (store->sb_page) 1004 chunk += sizeof(bitmap_super_t) << 3; 1005 return chunk & (PAGE_BITS - 1); 1006 } 1007 1008 /* 1009 * return a pointer to the page in the filemap that contains the given bit 1010 * 1011 */ 1012 static inline struct page *filemap_get_page(struct bitmap_storage *store, 1013 unsigned long chunk) 1014 { 1015 if (file_page_index(store, chunk) >= store->file_pages) 1016 return NULL; 1017 return store->filemap[file_page_index(store, chunk)]; 1018 } 1019 1020 static int md_bitmap_storage_alloc(struct bitmap_storage *store, 1021 unsigned long chunks, int with_super, 1022 int slot_number) 1023 { 1024 int pnum, offset = 0; 1025 unsigned long num_pages; 1026 unsigned long bytes; 1027 1028 bytes = DIV_ROUND_UP(chunks, 8); 1029 if (with_super) 1030 bytes += sizeof(bitmap_super_t); 1031 1032 num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); 1033 offset = slot_number * num_pages; 1034 1035 store->filemap = kmalloc_array(num_pages, sizeof(struct page *), 1036 GFP_KERNEL); 1037 if (!store->filemap) 1038 return -ENOMEM; 1039 1040 if (with_super && !store->sb_page) { 1041 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); 1042 if (store->sb_page == NULL) 1043 return -ENOMEM; 1044 } 1045 1046 pnum = 0; 1047 if (store->sb_page) { 1048 store->filemap[0] = store->sb_page; 1049 pnum = 1; 1050 store->sb_index = offset; 1051 } 1052 1053 for ( ; pnum < num_pages; pnum++) { 1054 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); 1055 if (!store->filemap[pnum]) { 1056 store->file_pages = pnum; 1057 return -ENOMEM; 1058 } 1059 } 1060 store->file_pages = pnum; 1061 1062 /* We need 4 bits per page, rounded up to a multiple 1063 * of sizeof(unsigned long) */ 1064 store->filemap_attr = kzalloc( 1065 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 1066 GFP_KERNEL); 1067 if (!store->filemap_attr) 1068 return -ENOMEM; 1069 1070 store->bytes = bytes; 1071 1072 return 0; 1073 } 1074 1075 static void md_bitmap_file_unmap(struct bitmap_storage *store) 1076 { 1077 struct file *file = store->file; 1078 struct page *sb_page = store->sb_page; 1079 struct page **map = store->filemap; 1080 int pages = store->file_pages; 1081 1082 while (pages--) 1083 if (map[pages] != sb_page) /* 0 is sb_page, release it below */ 1084 free_buffers(map[pages]); 1085 kfree(map); 1086 kfree(store->filemap_attr); 1087 1088 if (sb_page) 1089 free_buffers(sb_page); 1090 1091 if (file) { 1092 struct inode *inode = file_inode(file); 1093 invalidate_mapping_pages(inode->i_mapping, 0, -1); 1094 fput(file); 1095 } 1096 } 1097 1098 /* 1099 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 1100 * then it is no longer reliable, so we stop using it and we mark the file 1101 * as failed in the superblock 1102 */ 1103 static void md_bitmap_file_kick(struct bitmap *bitmap) 1104 { 1105 if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { 1106 bitmap_update_sb(bitmap); 1107 1108 if (bitmap->storage.file) { 1109 pr_warn("%s: kicking failed bitmap file %pD4 from array!\n", 1110 bmname(bitmap), bitmap->storage.file); 1111 1112 } else 1113 pr_warn("%s: disabling internal bitmap due to errors\n", 1114 bmname(bitmap)); 1115 } 1116 } 1117 1118 enum bitmap_page_attr { 1119 BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ 1120 BITMAP_PAGE_PENDING = 1, /* there are bits that are being cleaned. 1121 * i.e. counter is 1 or 2. */ 1122 BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ 1123 }; 1124 1125 static inline void set_page_attr(struct bitmap *bitmap, int pnum, 1126 enum bitmap_page_attr attr) 1127 { 1128 set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 1129 } 1130 1131 static inline void clear_page_attr(struct bitmap *bitmap, int pnum, 1132 enum bitmap_page_attr attr) 1133 { 1134 clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 1135 } 1136 1137 static inline int test_page_attr(struct bitmap *bitmap, int pnum, 1138 enum bitmap_page_attr attr) 1139 { 1140 return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 1141 } 1142 1143 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum, 1144 enum bitmap_page_attr attr) 1145 { 1146 return test_and_clear_bit((pnum<<2) + attr, 1147 bitmap->storage.filemap_attr); 1148 } 1149 /* 1150 * bitmap_file_set_bit -- called before performing a write to the md device 1151 * to set (and eventually sync) a particular bit in the bitmap file 1152 * 1153 * we set the bit immediately, then we record the page number so that 1154 * when an unplug occurs, we can flush the dirty pages out to disk 1155 */ 1156 static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 1157 { 1158 unsigned long bit; 1159 struct page *page; 1160 void *kaddr; 1161 unsigned long chunk = block >> bitmap->counts.chunkshift; 1162 struct bitmap_storage *store = &bitmap->storage; 1163 unsigned long index = file_page_index(store, chunk); 1164 unsigned long node_offset = 0; 1165 1166 index += store->sb_index; 1167 if (mddev_is_clustered(bitmap->mddev)) 1168 node_offset = bitmap->cluster_slot * store->file_pages; 1169 1170 page = filemap_get_page(&bitmap->storage, chunk); 1171 if (!page) 1172 return; 1173 bit = file_page_offset(&bitmap->storage, chunk); 1174 1175 /* set the bit */ 1176 kaddr = kmap_local_page(page); 1177 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1178 set_bit(bit, kaddr); 1179 else 1180 set_bit_le(bit, kaddr); 1181 kunmap_local(kaddr); 1182 pr_debug("set file bit %lu page %lu\n", bit, index); 1183 /* record page number so it gets flushed to disk when unplug occurs */ 1184 set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY); 1185 } 1186 1187 static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) 1188 { 1189 unsigned long bit; 1190 struct page *page; 1191 void *paddr; 1192 unsigned long chunk = block >> bitmap->counts.chunkshift; 1193 struct bitmap_storage *store = &bitmap->storage; 1194 unsigned long index = file_page_index(store, chunk); 1195 unsigned long node_offset = 0; 1196 1197 index += store->sb_index; 1198 if (mddev_is_clustered(bitmap->mddev)) 1199 node_offset = bitmap->cluster_slot * store->file_pages; 1200 1201 page = filemap_get_page(&bitmap->storage, chunk); 1202 if (!page) 1203 return; 1204 bit = file_page_offset(&bitmap->storage, chunk); 1205 paddr = kmap_local_page(page); 1206 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1207 clear_bit(bit, paddr); 1208 else 1209 clear_bit_le(bit, paddr); 1210 kunmap_local(paddr); 1211 if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) { 1212 set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING); 1213 bitmap->allclean = 0; 1214 } 1215 } 1216 1217 static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) 1218 { 1219 unsigned long bit; 1220 struct page *page; 1221 void *paddr; 1222 unsigned long chunk = block >> bitmap->counts.chunkshift; 1223 int set = 0; 1224 1225 page = filemap_get_page(&bitmap->storage, chunk); 1226 if (!page) 1227 return -EINVAL; 1228 bit = file_page_offset(&bitmap->storage, chunk); 1229 paddr = kmap_local_page(page); 1230 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1231 set = test_bit(bit, paddr); 1232 else 1233 set = test_bit_le(bit, paddr); 1234 kunmap_local(paddr); 1235 return set; 1236 } 1237 1238 /* this gets called when the md device is ready to unplug its underlying 1239 * (slave) device queues -- before we let any writes go down, we need to 1240 * sync the dirty pages of the bitmap file to disk */ 1241 static void __bitmap_unplug(struct bitmap *bitmap) 1242 { 1243 unsigned long i; 1244 int dirty, need_write; 1245 int writing = 0; 1246 1247 if (!__bitmap_enabled(bitmap)) 1248 return; 1249 1250 /* look at each page to see if there are any set bits that need to be 1251 * flushed out to disk */ 1252 for (i = 0; i < bitmap->storage.file_pages; i++) { 1253 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 1254 need_write = test_and_clear_page_attr(bitmap, i, 1255 BITMAP_PAGE_NEEDWRITE); 1256 if (dirty || need_write) { 1257 if (!writing) { 1258 md_bitmap_wait_writes(bitmap); 1259 mddev_add_trace_msg(bitmap->mddev, 1260 "md bitmap_unplug"); 1261 } 1262 clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); 1263 filemap_write_page(bitmap, i, false); 1264 writing = 1; 1265 } 1266 } 1267 if (writing) 1268 md_bitmap_wait_writes(bitmap); 1269 1270 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 1271 md_bitmap_file_kick(bitmap); 1272 } 1273 1274 struct bitmap_unplug_work { 1275 struct work_struct work; 1276 struct bitmap *bitmap; 1277 struct completion *done; 1278 }; 1279 1280 static void md_bitmap_unplug_fn(struct work_struct *work) 1281 { 1282 struct bitmap_unplug_work *unplug_work = 1283 container_of(work, struct bitmap_unplug_work, work); 1284 1285 __bitmap_unplug(unplug_work->bitmap); 1286 complete(unplug_work->done); 1287 } 1288 1289 static void bitmap_unplug_async(struct bitmap *bitmap) 1290 { 1291 DECLARE_COMPLETION_ONSTACK(done); 1292 struct bitmap_unplug_work unplug_work; 1293 1294 INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn); 1295 unplug_work.bitmap = bitmap; 1296 unplug_work.done = &done; 1297 1298 queue_work(md_bitmap_wq, &unplug_work.work); 1299 wait_for_completion(&done); 1300 destroy_work_on_stack(&unplug_work.work); 1301 } 1302 1303 static void bitmap_unplug(struct mddev *mddev, bool sync) 1304 { 1305 struct bitmap *bitmap = mddev->bitmap; 1306 1307 if (!bitmap) 1308 return; 1309 1310 if (sync) 1311 __bitmap_unplug(bitmap); 1312 else 1313 bitmap_unplug_async(bitmap); 1314 } 1315 1316 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 1317 1318 /* 1319 * Initialize the in-memory bitmap from the on-disk bitmap and set up the memory 1320 * mapping of the bitmap file. 1321 * 1322 * Special case: If there's no bitmap file, or if the bitmap file had been 1323 * previously kicked from the array, we mark all the bits as 1's in order to 1324 * cause a full resync. 1325 * 1326 * We ignore all bits for sectors that end earlier than 'start'. 1327 * This is used when reading an out-of-date bitmap. 1328 */ 1329 static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) 1330 { 1331 bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags); 1332 struct mddev *mddev = bitmap->mddev; 1333 unsigned long chunks = bitmap->counts.chunks; 1334 struct bitmap_storage *store = &bitmap->storage; 1335 struct file *file = store->file; 1336 unsigned long node_offset = 0; 1337 unsigned long bit_cnt = 0; 1338 unsigned long i; 1339 int ret; 1340 1341 if (!file && !mddev->bitmap_info.offset) { 1342 /* No permanent bitmap - fill with '1s'. */ 1343 store->filemap = NULL; 1344 store->file_pages = 0; 1345 for (i = 0; i < chunks ; i++) { 1346 /* if the disk bit is set, set the memory bit */ 1347 int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift) 1348 >= start); 1349 md_bitmap_set_memory_bits(bitmap, 1350 (sector_t)i << bitmap->counts.chunkshift, 1351 needed); 1352 } 1353 return 0; 1354 } 1355 1356 if (file && i_size_read(file->f_mapping->host) < store->bytes) { 1357 pr_warn("%s: bitmap file too short %lu < %lu\n", 1358 bmname(bitmap), 1359 (unsigned long) i_size_read(file->f_mapping->host), 1360 store->bytes); 1361 ret = -ENOSPC; 1362 goto err; 1363 } 1364 1365 if (mddev_is_clustered(mddev)) 1366 node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE)); 1367 1368 for (i = 0; i < store->file_pages; i++) { 1369 struct page *page = store->filemap[i]; 1370 int count; 1371 1372 /* unmap the old page, we're done with it */ 1373 if (i == store->file_pages - 1) 1374 count = store->bytes - i * PAGE_SIZE; 1375 else 1376 count = PAGE_SIZE; 1377 1378 if (file) 1379 ret = read_file_page(file, i, bitmap, count, page); 1380 else 1381 ret = read_sb_page(mddev, 0, page, i + node_offset, 1382 count); 1383 if (ret) 1384 goto err; 1385 } 1386 1387 if (outofdate) { 1388 pr_warn("%s: bitmap file is out of date, doing full recovery\n", 1389 bmname(bitmap)); 1390 1391 for (i = 0; i < store->file_pages; i++) { 1392 struct page *page = store->filemap[i]; 1393 unsigned long offset = 0; 1394 void *paddr; 1395 1396 if (i == 0 && !mddev->bitmap_info.external) 1397 offset = sizeof(bitmap_super_t); 1398 1399 /* 1400 * If the bitmap is out of date, dirty the whole page 1401 * and write it out 1402 */ 1403 paddr = kmap_local_page(page); 1404 memset(paddr + offset, 0xff, PAGE_SIZE - offset); 1405 kunmap_local(paddr); 1406 1407 filemap_write_page(bitmap, i, true); 1408 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) { 1409 ret = -EIO; 1410 goto err; 1411 } 1412 } 1413 } 1414 1415 for (i = 0; i < chunks; i++) { 1416 struct page *page = filemap_get_page(&bitmap->storage, i); 1417 unsigned long bit = file_page_offset(&bitmap->storage, i); 1418 void *paddr; 1419 bool was_set; 1420 1421 paddr = kmap_local_page(page); 1422 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1423 was_set = test_bit(bit, paddr); 1424 else 1425 was_set = test_bit_le(bit, paddr); 1426 kunmap_local(paddr); 1427 1428 if (was_set) { 1429 /* if the disk bit is set, set the memory bit */ 1430 int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift 1431 >= start); 1432 md_bitmap_set_memory_bits(bitmap, 1433 (sector_t)i << bitmap->counts.chunkshift, 1434 needed); 1435 bit_cnt++; 1436 } 1437 } 1438 1439 pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", 1440 bmname(bitmap), store->file_pages, 1441 bit_cnt, chunks); 1442 1443 return 0; 1444 1445 err: 1446 pr_warn("%s: bitmap initialisation failed: %d\n", 1447 bmname(bitmap), ret); 1448 return ret; 1449 } 1450 1451 /* just flag bitmap pages as needing to be written. */ 1452 static void bitmap_write_all(struct mddev *mddev) 1453 { 1454 int i; 1455 struct bitmap *bitmap = mddev->bitmap; 1456 1457 if (!bitmap || !bitmap->storage.filemap) 1458 return; 1459 1460 /* Only one copy, so nothing needed */ 1461 if (bitmap->storage.file) 1462 return; 1463 1464 for (i = 0; i < bitmap->storage.file_pages; i++) 1465 set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); 1466 bitmap->allclean = 0; 1467 } 1468 1469 static void md_bitmap_count_page(struct bitmap_counts *bitmap, 1470 sector_t offset, int inc) 1471 { 1472 sector_t chunk = offset >> bitmap->chunkshift; 1473 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1474 bitmap->bp[page].count += inc; 1475 md_bitmap_checkfree(bitmap, page); 1476 } 1477 1478 static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) 1479 { 1480 sector_t chunk = offset >> bitmap->chunkshift; 1481 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1482 struct bitmap_page *bp = &bitmap->bp[page]; 1483 1484 if (!bp->pending) 1485 bp->pending = 1; 1486 } 1487 1488 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, 1489 sector_t offset, sector_t *blocks, 1490 int create); 1491 1492 static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout, 1493 bool force) 1494 { 1495 struct md_thread *thread; 1496 1497 rcu_read_lock(); 1498 thread = rcu_dereference(mddev->thread); 1499 1500 if (!thread) 1501 goto out; 1502 1503 if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT) 1504 thread->timeout = timeout; 1505 1506 out: 1507 rcu_read_unlock(); 1508 } 1509 1510 /* 1511 * bitmap daemon -- periodically wakes up to clean bits and flush pages 1512 * out to disk 1513 */ 1514 static void bitmap_daemon_work(struct mddev *mddev) 1515 { 1516 struct bitmap *bitmap; 1517 unsigned long j; 1518 unsigned long nextpage; 1519 sector_t blocks; 1520 struct bitmap_counts *counts; 1521 1522 /* Use a mutex to guard daemon_work against 1523 * bitmap_destroy. 1524 */ 1525 mutex_lock(&mddev->bitmap_info.mutex); 1526 bitmap = mddev->bitmap; 1527 if (bitmap == NULL) { 1528 mutex_unlock(&mddev->bitmap_info.mutex); 1529 return; 1530 } 1531 if (time_before(jiffies, bitmap->daemon_lastrun 1532 + mddev->bitmap_info.daemon_sleep)) 1533 goto done; 1534 1535 bitmap->daemon_lastrun = jiffies; 1536 if (bitmap->allclean) { 1537 mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); 1538 goto done; 1539 } 1540 bitmap->allclean = 1; 1541 1542 mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work"); 1543 1544 /* Any file-page which is PENDING now needs to be written. 1545 * So set NEEDWRITE now, then after we make any last-minute changes 1546 * we will write it. 1547 */ 1548 for (j = 0; j < bitmap->storage.file_pages; j++) 1549 if (test_and_clear_page_attr(bitmap, j, 1550 BITMAP_PAGE_PENDING)) 1551 set_page_attr(bitmap, j, 1552 BITMAP_PAGE_NEEDWRITE); 1553 1554 if (bitmap->need_sync && 1555 mddev->bitmap_info.external == 0) { 1556 /* Arrange for superblock update as well as 1557 * other changes */ 1558 bitmap_super_t *sb; 1559 bitmap->need_sync = 0; 1560 if (bitmap->storage.filemap) { 1561 sb = kmap_local_page(bitmap->storage.sb_page); 1562 sb->events_cleared = 1563 cpu_to_le64(bitmap->events_cleared); 1564 kunmap_local(sb); 1565 set_page_attr(bitmap, 0, 1566 BITMAP_PAGE_NEEDWRITE); 1567 } 1568 } 1569 /* Now look at the bitmap counters and if any are '2' or '1', 1570 * decrement and handle accordingly. 1571 */ 1572 counts = &bitmap->counts; 1573 spin_lock_irq(&counts->lock); 1574 nextpage = 0; 1575 for (j = 0; j < counts->chunks; j++) { 1576 bitmap_counter_t *bmc; 1577 sector_t block = (sector_t)j << counts->chunkshift; 1578 1579 if (j == nextpage) { 1580 nextpage += PAGE_COUNTER_RATIO; 1581 if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) { 1582 j |= PAGE_COUNTER_MASK; 1583 continue; 1584 } 1585 counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0; 1586 } 1587 1588 bmc = md_bitmap_get_counter(counts, block, &blocks, 0); 1589 if (!bmc) { 1590 j |= PAGE_COUNTER_MASK; 1591 continue; 1592 } 1593 if (*bmc == 1 && !bitmap->need_sync) { 1594 /* We can clear the bit */ 1595 *bmc = 0; 1596 md_bitmap_count_page(counts, block, -1); 1597 md_bitmap_file_clear_bit(bitmap, block); 1598 } else if (*bmc && *bmc <= 2) { 1599 *bmc = 1; 1600 md_bitmap_set_pending(counts, block); 1601 bitmap->allclean = 0; 1602 } 1603 } 1604 spin_unlock_irq(&counts->lock); 1605 1606 md_bitmap_wait_writes(bitmap); 1607 /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. 1608 * DIRTY pages need to be written by bitmap_unplug so it can wait 1609 * for them. 1610 * If we find any DIRTY page we stop there and let bitmap_unplug 1611 * handle all the rest. This is important in the case where 1612 * the first blocking holds the superblock and it has been updated. 1613 * We mustn't write any other blocks before the superblock. 1614 */ 1615 for (j = 0; 1616 j < bitmap->storage.file_pages 1617 && !test_bit(BITMAP_STALE, &bitmap->flags); 1618 j++) { 1619 if (test_page_attr(bitmap, j, 1620 BITMAP_PAGE_DIRTY)) 1621 /* bitmap_unplug will handle the rest */ 1622 break; 1623 if (bitmap->storage.filemap && 1624 test_and_clear_page_attr(bitmap, j, 1625 BITMAP_PAGE_NEEDWRITE)) 1626 filemap_write_page(bitmap, j, false); 1627 } 1628 1629 done: 1630 if (bitmap->allclean == 0) 1631 mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); 1632 mutex_unlock(&mddev->bitmap_info.mutex); 1633 } 1634 1635 static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, 1636 sector_t offset, sector_t *blocks, 1637 int create) 1638 __releases(bitmap->lock) 1639 __acquires(bitmap->lock) 1640 { 1641 /* If 'create', we might release the lock and reclaim it. 1642 * The lock must have been taken with interrupts enabled. 1643 * If !create, we don't release the lock. 1644 */ 1645 sector_t chunk = offset >> bitmap->chunkshift; 1646 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1647 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1648 sector_t csize = ((sector_t)1) << bitmap->chunkshift; 1649 int err; 1650 1651 if (page >= bitmap->pages) { 1652 /* 1653 * This can happen if bitmap_start_sync goes beyond 1654 * End-of-device while looking for a whole page or 1655 * user set a huge number to sysfs bitmap_set_bits. 1656 */ 1657 *blocks = csize - (offset & (csize - 1)); 1658 return NULL; 1659 } 1660 err = md_bitmap_checkpage(bitmap, page, create, 0); 1661 1662 if (bitmap->bp[page].hijacked || 1663 bitmap->bp[page].map == NULL) 1664 csize = ((sector_t)1) << (bitmap->chunkshift + 1665 PAGE_COUNTER_SHIFT); 1666 1667 *blocks = csize - (offset & (csize - 1)); 1668 1669 if (err < 0) 1670 return NULL; 1671 1672 /* now locked ... */ 1673 1674 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1675 /* should we use the first or second counter field 1676 * of the hijacked pointer? */ 1677 int hi = (pageoff > PAGE_COUNTER_MASK); 1678 return &((bitmap_counter_t *) 1679 &bitmap->bp[page].map)[hi]; 1680 } else /* page is allocated */ 1681 return (bitmap_counter_t *) 1682 &(bitmap->bp[page].map[pageoff]); 1683 } 1684 1685 static void bitmap_start_write(struct mddev *mddev, sector_t offset, 1686 unsigned long sectors) 1687 { 1688 struct bitmap *bitmap = mddev->bitmap; 1689 1690 if (!bitmap) 1691 return; 1692 1693 while (sectors) { 1694 sector_t blocks; 1695 bitmap_counter_t *bmc; 1696 1697 spin_lock_irq(&bitmap->counts.lock); 1698 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1); 1699 if (!bmc) { 1700 spin_unlock_irq(&bitmap->counts.lock); 1701 return; 1702 } 1703 1704 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) { 1705 DEFINE_WAIT(__wait); 1706 /* note that it is safe to do the prepare_to_wait 1707 * after the test as long as we do it before dropping 1708 * the spinlock. 1709 */ 1710 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1711 TASK_UNINTERRUPTIBLE); 1712 spin_unlock_irq(&bitmap->counts.lock); 1713 schedule(); 1714 finish_wait(&bitmap->overflow_wait, &__wait); 1715 continue; 1716 } 1717 1718 switch (*bmc) { 1719 case 0: 1720 md_bitmap_file_set_bit(bitmap, offset); 1721 md_bitmap_count_page(&bitmap->counts, offset, 1); 1722 fallthrough; 1723 case 1: 1724 *bmc = 2; 1725 } 1726 1727 (*bmc)++; 1728 1729 spin_unlock_irq(&bitmap->counts.lock); 1730 1731 offset += blocks; 1732 if (sectors > blocks) 1733 sectors -= blocks; 1734 else 1735 sectors = 0; 1736 } 1737 } 1738 1739 static void bitmap_end_write(struct mddev *mddev, sector_t offset, 1740 unsigned long sectors) 1741 { 1742 struct bitmap *bitmap = mddev->bitmap; 1743 1744 if (!bitmap) 1745 return; 1746 1747 while (sectors) { 1748 sector_t blocks; 1749 unsigned long flags; 1750 bitmap_counter_t *bmc; 1751 1752 spin_lock_irqsave(&bitmap->counts.lock, flags); 1753 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0); 1754 if (!bmc) { 1755 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1756 return; 1757 } 1758 1759 if (!bitmap->mddev->degraded) { 1760 if (bitmap->events_cleared < bitmap->mddev->events) { 1761 bitmap->events_cleared = bitmap->mddev->events; 1762 bitmap->need_sync = 1; 1763 sysfs_notify_dirent_safe( 1764 bitmap->sysfs_can_clear); 1765 } 1766 } else if (!NEEDED(*bmc)) { 1767 *bmc |= NEEDED_MASK; 1768 } 1769 1770 if (COUNTER(*bmc) == COUNTER_MAX) 1771 wake_up(&bitmap->overflow_wait); 1772 1773 (*bmc)--; 1774 if (*bmc <= 2) { 1775 md_bitmap_set_pending(&bitmap->counts, offset); 1776 bitmap->allclean = 0; 1777 } 1778 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1779 offset += blocks; 1780 if (sectors > blocks) 1781 sectors -= blocks; 1782 else 1783 sectors = 0; 1784 } 1785 } 1786 1787 static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, 1788 sector_t *blocks, bool degraded) 1789 { 1790 bitmap_counter_t *bmc; 1791 bool rv; 1792 1793 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ 1794 *blocks = 1024; 1795 return true; /* always resync if no bitmap */ 1796 } 1797 spin_lock_irq(&bitmap->counts.lock); 1798 1799 rv = false; 1800 bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1801 if (bmc) { 1802 /* locked */ 1803 if (RESYNC(*bmc)) { 1804 rv = true; 1805 } else if (NEEDED(*bmc)) { 1806 rv = true; 1807 if (!degraded) { /* don't set/clear bits if degraded */ 1808 *bmc |= RESYNC_MASK; 1809 *bmc &= ~NEEDED_MASK; 1810 } 1811 } 1812 } 1813 spin_unlock_irq(&bitmap->counts.lock); 1814 1815 return rv; 1816 } 1817 1818 static bool bitmap_start_sync(struct mddev *mddev, sector_t offset, 1819 sector_t *blocks, bool degraded) 1820 { 1821 /* bitmap_start_sync must always report on multiples of whole 1822 * pages, otherwise resync (which is very PAGE_SIZE based) will 1823 * get confused. 1824 * So call __bitmap_start_sync repeatedly (if needed) until 1825 * At least PAGE_SIZE>>9 blocks are covered. 1826 * Return the 'or' of the result. 1827 */ 1828 bool rv = false; 1829 sector_t blocks1; 1830 1831 *blocks = 0; 1832 while (*blocks < (PAGE_SIZE>>9)) { 1833 rv |= __bitmap_start_sync(mddev->bitmap, offset, 1834 &blocks1, degraded); 1835 offset += blocks1; 1836 *blocks += blocks1; 1837 } 1838 1839 return rv; 1840 } 1841 1842 static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset, 1843 sector_t *blocks, bool aborted) 1844 { 1845 bitmap_counter_t *bmc; 1846 unsigned long flags; 1847 1848 if (bitmap == NULL) { 1849 *blocks = 1024; 1850 return; 1851 } 1852 spin_lock_irqsave(&bitmap->counts.lock, flags); 1853 bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1854 if (bmc == NULL) 1855 goto unlock; 1856 /* locked */ 1857 if (RESYNC(*bmc)) { 1858 *bmc &= ~RESYNC_MASK; 1859 1860 if (!NEEDED(*bmc) && aborted) 1861 *bmc |= NEEDED_MASK; 1862 else { 1863 if (*bmc <= 2) { 1864 md_bitmap_set_pending(&bitmap->counts, offset); 1865 bitmap->allclean = 0; 1866 } 1867 } 1868 } 1869 unlock: 1870 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1871 } 1872 1873 static void bitmap_end_sync(struct mddev *mddev, sector_t offset, 1874 sector_t *blocks) 1875 { 1876 __bitmap_end_sync(mddev->bitmap, offset, blocks, true); 1877 } 1878 1879 static void bitmap_close_sync(struct mddev *mddev) 1880 { 1881 /* Sync has finished, and any bitmap chunks that weren't synced 1882 * properly have been aborted. It remains to us to clear the 1883 * RESYNC bit wherever it is still on 1884 */ 1885 sector_t sector = 0; 1886 sector_t blocks; 1887 struct bitmap *bitmap = mddev->bitmap; 1888 1889 if (!bitmap) 1890 return; 1891 1892 while (sector < bitmap->mddev->resync_max_sectors) { 1893 __bitmap_end_sync(bitmap, sector, &blocks, false); 1894 sector += blocks; 1895 } 1896 } 1897 1898 static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector, 1899 bool force) 1900 { 1901 sector_t s = 0; 1902 sector_t blocks; 1903 struct bitmap *bitmap = mddev->bitmap; 1904 1905 if (!bitmap) 1906 return; 1907 if (sector == 0) { 1908 bitmap->last_end_sync = jiffies; 1909 return; 1910 } 1911 if (!force && time_before(jiffies, (bitmap->last_end_sync 1912 + bitmap->mddev->bitmap_info.daemon_sleep))) 1913 return; 1914 wait_event(bitmap->mddev->recovery_wait, 1915 atomic_read(&bitmap->mddev->recovery_active) == 0); 1916 1917 bitmap->mddev->curr_resync_completed = sector; 1918 set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags); 1919 sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); 1920 s = 0; 1921 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1922 __bitmap_end_sync(bitmap, s, &blocks, false); 1923 s += blocks; 1924 } 1925 bitmap->last_end_sync = jiffies; 1926 sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed); 1927 } 1928 1929 static void bitmap_sync_with_cluster(struct mddev *mddev, 1930 sector_t old_lo, sector_t old_hi, 1931 sector_t new_lo, sector_t new_hi) 1932 { 1933 struct bitmap *bitmap = mddev->bitmap; 1934 sector_t sector, blocks = 0; 1935 1936 for (sector = old_lo; sector < new_lo; ) { 1937 __bitmap_end_sync(bitmap, sector, &blocks, false); 1938 sector += blocks; 1939 } 1940 WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n"); 1941 1942 for (sector = old_hi; sector < new_hi; ) { 1943 bitmap_start_sync(mddev, sector, &blocks, false); 1944 sector += blocks; 1945 } 1946 WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n"); 1947 } 1948 1949 static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1950 { 1951 /* For each chunk covered by any of these sectors, set the 1952 * counter to 2 and possibly set resync_needed. They should all 1953 * be 0 at this point 1954 */ 1955 1956 sector_t secs; 1957 bitmap_counter_t *bmc; 1958 spin_lock_irq(&bitmap->counts.lock); 1959 bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1); 1960 if (!bmc) { 1961 spin_unlock_irq(&bitmap->counts.lock); 1962 return; 1963 } 1964 if (!*bmc) { 1965 *bmc = 2; 1966 md_bitmap_count_page(&bitmap->counts, offset, 1); 1967 md_bitmap_set_pending(&bitmap->counts, offset); 1968 bitmap->allclean = 0; 1969 } 1970 if (needed) 1971 *bmc |= NEEDED_MASK; 1972 spin_unlock_irq(&bitmap->counts.lock); 1973 } 1974 1975 /* dirty the memory and file bits for bitmap chunks "s" to "e" */ 1976 static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s, 1977 unsigned long e) 1978 { 1979 unsigned long chunk; 1980 struct bitmap *bitmap = mddev->bitmap; 1981 1982 if (!bitmap) 1983 return; 1984 1985 for (chunk = s; chunk <= e; chunk++) { 1986 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; 1987 1988 md_bitmap_set_memory_bits(bitmap, sec, 1); 1989 md_bitmap_file_set_bit(bitmap, sec); 1990 if (sec < bitmap->mddev->recovery_cp) 1991 /* We are asserting that the array is dirty, 1992 * so move the recovery_cp address back so 1993 * that it is obvious that it is dirty 1994 */ 1995 bitmap->mddev->recovery_cp = sec; 1996 } 1997 } 1998 1999 static void bitmap_flush(struct mddev *mddev) 2000 { 2001 struct bitmap *bitmap = mddev->bitmap; 2002 long sleep; 2003 2004 if (!bitmap) /* there was no bitmap */ 2005 return; 2006 2007 /* run the daemon_work three time to ensure everything is flushed 2008 * that can be 2009 */ 2010 sleep = mddev->bitmap_info.daemon_sleep * 2; 2011 bitmap->daemon_lastrun -= sleep; 2012 bitmap_daemon_work(mddev); 2013 bitmap->daemon_lastrun -= sleep; 2014 bitmap_daemon_work(mddev); 2015 bitmap->daemon_lastrun -= sleep; 2016 bitmap_daemon_work(mddev); 2017 if (mddev->bitmap_info.external) 2018 md_super_wait(mddev); 2019 bitmap_update_sb(bitmap); 2020 } 2021 2022 static void md_bitmap_free(void *data) 2023 { 2024 unsigned long k, pages; 2025 struct bitmap_page *bp; 2026 struct bitmap *bitmap = data; 2027 2028 if (!bitmap) /* there was no bitmap */ 2029 return; 2030 2031 if (bitmap->sysfs_can_clear) 2032 sysfs_put(bitmap->sysfs_can_clear); 2033 2034 if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info && 2035 bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev)) 2036 md_cluster_stop(bitmap->mddev); 2037 2038 /* Shouldn't be needed - but just in case.... */ 2039 wait_event(bitmap->write_wait, 2040 atomic_read(&bitmap->pending_writes) == 0); 2041 2042 /* release the bitmap file */ 2043 md_bitmap_file_unmap(&bitmap->storage); 2044 2045 bp = bitmap->counts.bp; 2046 pages = bitmap->counts.pages; 2047 2048 /* free all allocated memory */ 2049 2050 if (bp) /* deallocate the page memory */ 2051 for (k = 0; k < pages; k++) 2052 if (bp[k].map && !bp[k].hijacked) 2053 kfree(bp[k].map); 2054 kfree(bp); 2055 kfree(bitmap); 2056 } 2057 2058 static void bitmap_start_behind_write(struct mddev *mddev) 2059 { 2060 struct bitmap *bitmap = mddev->bitmap; 2061 int bw; 2062 2063 if (!bitmap) 2064 return; 2065 2066 atomic_inc(&bitmap->behind_writes); 2067 bw = atomic_read(&bitmap->behind_writes); 2068 if (bw > bitmap->behind_writes_used) 2069 bitmap->behind_writes_used = bw; 2070 2071 pr_debug("inc write-behind count %d/%lu\n", 2072 bw, bitmap->mddev->bitmap_info.max_write_behind); 2073 } 2074 2075 static void bitmap_end_behind_write(struct mddev *mddev) 2076 { 2077 struct bitmap *bitmap = mddev->bitmap; 2078 2079 if (!bitmap) 2080 return; 2081 2082 if (atomic_dec_and_test(&bitmap->behind_writes)) 2083 wake_up(&bitmap->behind_wait); 2084 pr_debug("dec write-behind count %d/%lu\n", 2085 atomic_read(&bitmap->behind_writes), 2086 bitmap->mddev->bitmap_info.max_write_behind); 2087 } 2088 2089 static void bitmap_wait_behind_writes(struct mddev *mddev) 2090 { 2091 struct bitmap *bitmap = mddev->bitmap; 2092 2093 /* wait for behind writes to complete */ 2094 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { 2095 pr_debug("md:%s: behind writes in progress - waiting to stop.\n", 2096 mdname(mddev)); 2097 /* need to kick something here to make sure I/O goes? */ 2098 wait_event(bitmap->behind_wait, 2099 atomic_read(&bitmap->behind_writes) == 0); 2100 } 2101 } 2102 2103 static void bitmap_destroy(struct mddev *mddev) 2104 { 2105 struct bitmap *bitmap = mddev->bitmap; 2106 2107 if (!bitmap) /* there was no bitmap */ 2108 return; 2109 2110 bitmap_wait_behind_writes(mddev); 2111 if (!mddev->serialize_policy) 2112 mddev_destroy_serial_pool(mddev, NULL); 2113 2114 mutex_lock(&mddev->bitmap_info.mutex); 2115 spin_lock(&mddev->lock); 2116 mddev->bitmap = NULL; /* disconnect from the md device */ 2117 spin_unlock(&mddev->lock); 2118 mutex_unlock(&mddev->bitmap_info.mutex); 2119 mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); 2120 2121 md_bitmap_free(bitmap); 2122 } 2123 2124 /* 2125 * initialize the bitmap structure 2126 * if this returns an error, bitmap_destroy must be called to do clean up 2127 * once mddev->bitmap is set 2128 */ 2129 static struct bitmap *__bitmap_create(struct mddev *mddev, int slot) 2130 { 2131 struct bitmap *bitmap; 2132 sector_t blocks = mddev->resync_max_sectors; 2133 struct file *file = mddev->bitmap_info.file; 2134 int err; 2135 struct kernfs_node *bm = NULL; 2136 2137 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 2138 2139 BUG_ON(file && mddev->bitmap_info.offset); 2140 2141 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { 2142 pr_notice("md/raid:%s: array with journal cannot have bitmap\n", 2143 mdname(mddev)); 2144 return ERR_PTR(-EBUSY); 2145 } 2146 2147 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); 2148 if (!bitmap) 2149 return ERR_PTR(-ENOMEM); 2150 2151 spin_lock_init(&bitmap->counts.lock); 2152 atomic_set(&bitmap->pending_writes, 0); 2153 init_waitqueue_head(&bitmap->write_wait); 2154 init_waitqueue_head(&bitmap->overflow_wait); 2155 init_waitqueue_head(&bitmap->behind_wait); 2156 2157 bitmap->mddev = mddev; 2158 bitmap->cluster_slot = slot; 2159 2160 if (mddev->kobj.sd) 2161 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); 2162 if (bm) { 2163 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); 2164 sysfs_put(bm); 2165 } else 2166 bitmap->sysfs_can_clear = NULL; 2167 2168 bitmap->storage.file = file; 2169 if (file) { 2170 get_file(file); 2171 /* As future accesses to this file will use bmap, 2172 * and bypass the page cache, we must sync the file 2173 * first. 2174 */ 2175 vfs_fsync(file, 1); 2176 } 2177 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ 2178 if (!mddev->bitmap_info.external) { 2179 /* 2180 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is 2181 * instructing us to create a new on-disk bitmap instance. 2182 */ 2183 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags)) 2184 err = md_bitmap_new_disk_sb(bitmap); 2185 else 2186 err = md_bitmap_read_sb(bitmap); 2187 } else { 2188 err = 0; 2189 if (mddev->bitmap_info.chunksize == 0 || 2190 mddev->bitmap_info.daemon_sleep == 0) 2191 /* chunksize and time_base need to be 2192 * set first. */ 2193 err = -EINVAL; 2194 } 2195 if (err) 2196 goto error; 2197 2198 bitmap->daemon_lastrun = jiffies; 2199 err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 2200 true); 2201 if (err) 2202 goto error; 2203 2204 pr_debug("created bitmap (%lu pages) for device %s\n", 2205 bitmap->counts.pages, bmname(bitmap)); 2206 2207 err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; 2208 if (err) 2209 goto error; 2210 2211 return bitmap; 2212 error: 2213 md_bitmap_free(bitmap); 2214 return ERR_PTR(err); 2215 } 2216 2217 static int bitmap_create(struct mddev *mddev) 2218 { 2219 struct bitmap *bitmap = __bitmap_create(mddev, -1); 2220 2221 if (IS_ERR(bitmap)) 2222 return PTR_ERR(bitmap); 2223 2224 mddev->bitmap = bitmap; 2225 return 0; 2226 } 2227 2228 static int bitmap_load(struct mddev *mddev) 2229 { 2230 int err = 0; 2231 sector_t start = 0; 2232 sector_t sector = 0; 2233 struct bitmap *bitmap = mddev->bitmap; 2234 struct md_rdev *rdev; 2235 2236 if (!bitmap) 2237 goto out; 2238 2239 rdev_for_each(rdev, mddev) 2240 mddev_create_serial_pool(mddev, rdev); 2241 2242 if (mddev_is_clustered(mddev)) 2243 mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); 2244 2245 /* Clear out old bitmap info first: Either there is none, or we 2246 * are resuming after someone else has possibly changed things, 2247 * so we should forget old cached info. 2248 * All chunks should be clean, but some might need_sync. 2249 */ 2250 while (sector < mddev->resync_max_sectors) { 2251 sector_t blocks; 2252 bitmap_start_sync(mddev, sector, &blocks, false); 2253 sector += blocks; 2254 } 2255 bitmap_close_sync(mddev); 2256 2257 if (mddev->degraded == 0 2258 || bitmap->events_cleared == mddev->events) 2259 /* no need to keep dirty bits to optimise a 2260 * re-add of a missing device */ 2261 start = mddev->recovery_cp; 2262 2263 mutex_lock(&mddev->bitmap_info.mutex); 2264 err = md_bitmap_init_from_disk(bitmap, start); 2265 mutex_unlock(&mddev->bitmap_info.mutex); 2266 2267 if (err) 2268 goto out; 2269 clear_bit(BITMAP_STALE, &bitmap->flags); 2270 2271 /* Kick recovery in case any bits were set */ 2272 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); 2273 2274 mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); 2275 md_wakeup_thread(mddev->thread); 2276 2277 bitmap_update_sb(bitmap); 2278 2279 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 2280 err = -EIO; 2281 out: 2282 return err; 2283 } 2284 2285 /* caller need to free returned bitmap with md_bitmap_free() */ 2286 static void *bitmap_get_from_slot(struct mddev *mddev, int slot) 2287 { 2288 int rv = 0; 2289 struct bitmap *bitmap; 2290 2291 bitmap = __bitmap_create(mddev, slot); 2292 if (IS_ERR(bitmap)) { 2293 rv = PTR_ERR(bitmap); 2294 return ERR_PTR(rv); 2295 } 2296 2297 rv = md_bitmap_init_from_disk(bitmap, 0); 2298 if (rv) { 2299 md_bitmap_free(bitmap); 2300 return ERR_PTR(rv); 2301 } 2302 2303 return bitmap; 2304 } 2305 2306 /* Loads the bitmap associated with slot and copies the resync information 2307 * to our bitmap 2308 */ 2309 static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low, 2310 sector_t *high, bool clear_bits) 2311 { 2312 int rv = 0, i, j; 2313 sector_t block, lo = 0, hi = 0; 2314 struct bitmap_counts *counts; 2315 struct bitmap *bitmap; 2316 2317 bitmap = bitmap_get_from_slot(mddev, slot); 2318 if (IS_ERR(bitmap)) { 2319 pr_err("%s can't get bitmap from slot %d\n", __func__, slot); 2320 return -1; 2321 } 2322 2323 counts = &bitmap->counts; 2324 for (j = 0; j < counts->chunks; j++) { 2325 block = (sector_t)j << counts->chunkshift; 2326 if (md_bitmap_file_test_bit(bitmap, block)) { 2327 if (!lo) 2328 lo = block; 2329 hi = block; 2330 md_bitmap_file_clear_bit(bitmap, block); 2331 md_bitmap_set_memory_bits(mddev->bitmap, block, 1); 2332 md_bitmap_file_set_bit(mddev->bitmap, block); 2333 } 2334 } 2335 2336 if (clear_bits) { 2337 bitmap_update_sb(bitmap); 2338 /* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs 2339 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */ 2340 for (i = 0; i < bitmap->storage.file_pages; i++) 2341 if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING)) 2342 set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); 2343 __bitmap_unplug(bitmap); 2344 } 2345 __bitmap_unplug(mddev->bitmap); 2346 *low = lo; 2347 *high = hi; 2348 md_bitmap_free(bitmap); 2349 2350 return rv; 2351 } 2352 2353 static void bitmap_set_pages(void *data, unsigned long pages) 2354 { 2355 struct bitmap *bitmap = data; 2356 2357 bitmap->counts.pages = pages; 2358 } 2359 2360 static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) 2361 { 2362 struct bitmap_storage *storage; 2363 struct bitmap_counts *counts; 2364 struct bitmap *bitmap = data; 2365 bitmap_super_t *sb; 2366 2367 if (!bitmap) 2368 return -ENOENT; 2369 if (!bitmap->mddev->bitmap_info.external && 2370 !bitmap->storage.sb_page) 2371 return -EINVAL; 2372 sb = kmap_local_page(bitmap->storage.sb_page); 2373 stats->sync_size = le64_to_cpu(sb->sync_size); 2374 kunmap_local(sb); 2375 2376 counts = &bitmap->counts; 2377 stats->missing_pages = counts->missing_pages; 2378 stats->pages = counts->pages; 2379 2380 storage = &bitmap->storage; 2381 stats->file_pages = storage->file_pages; 2382 stats->file = storage->file; 2383 2384 stats->behind_writes = atomic_read(&bitmap->behind_writes); 2385 stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait); 2386 stats->events_cleared = bitmap->events_cleared; 2387 return 0; 2388 } 2389 2390 static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, 2391 int chunksize, bool init) 2392 { 2393 /* If chunk_size is 0, choose an appropriate chunk size. 2394 * Then possibly allocate new storage space. 2395 * Then quiesce, copy bits, replace bitmap, and re-start 2396 * 2397 * This function is called both to set up the initial bitmap 2398 * and to resize the bitmap while the array is active. 2399 * If this happens as a result of the array being resized, 2400 * chunksize will be zero, and we need to choose a suitable 2401 * chunksize, otherwise we use what we are given. 2402 */ 2403 struct bitmap_storage store; 2404 struct bitmap_counts old_counts; 2405 unsigned long chunks; 2406 sector_t block; 2407 sector_t old_blocks, new_blocks; 2408 int chunkshift; 2409 int ret = 0; 2410 long pages; 2411 struct bitmap_page *new_bp; 2412 2413 if (bitmap->storage.file && !init) { 2414 pr_info("md: cannot resize file-based bitmap\n"); 2415 return -EINVAL; 2416 } 2417 2418 if (chunksize == 0) { 2419 /* If there is enough space, leave the chunk size unchanged, 2420 * else increase by factor of two until there is enough space. 2421 */ 2422 long bytes; 2423 long space = bitmap->mddev->bitmap_info.space; 2424 2425 if (space == 0) { 2426 /* We don't know how much space there is, so limit 2427 * to current size - in sectors. 2428 */ 2429 bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8); 2430 if (!bitmap->mddev->bitmap_info.external) 2431 bytes += sizeof(bitmap_super_t); 2432 space = DIV_ROUND_UP(bytes, 512); 2433 bitmap->mddev->bitmap_info.space = space; 2434 } 2435 chunkshift = bitmap->counts.chunkshift; 2436 chunkshift--; 2437 do { 2438 /* 'chunkshift' is shift from block size to chunk size */ 2439 chunkshift++; 2440 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2441 bytes = DIV_ROUND_UP(chunks, 8); 2442 if (!bitmap->mddev->bitmap_info.external) 2443 bytes += sizeof(bitmap_super_t); 2444 } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < 2445 (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); 2446 } else 2447 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; 2448 2449 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2450 memset(&store, 0, sizeof(store)); 2451 if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) 2452 ret = md_bitmap_storage_alloc(&store, chunks, 2453 !bitmap->mddev->bitmap_info.external, 2454 mddev_is_clustered(bitmap->mddev) 2455 ? bitmap->cluster_slot : 0); 2456 if (ret) { 2457 md_bitmap_file_unmap(&store); 2458 goto err; 2459 } 2460 2461 pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); 2462 2463 new_bp = kcalloc(pages, sizeof(*new_bp), GFP_KERNEL); 2464 ret = -ENOMEM; 2465 if (!new_bp) { 2466 md_bitmap_file_unmap(&store); 2467 goto err; 2468 } 2469 2470 if (!init) 2471 bitmap->mddev->pers->quiesce(bitmap->mddev, 1); 2472 2473 store.file = bitmap->storage.file; 2474 bitmap->storage.file = NULL; 2475 2476 if (store.sb_page && bitmap->storage.sb_page) 2477 memcpy(page_address(store.sb_page), 2478 page_address(bitmap->storage.sb_page), 2479 sizeof(bitmap_super_t)); 2480 spin_lock_irq(&bitmap->counts.lock); 2481 md_bitmap_file_unmap(&bitmap->storage); 2482 bitmap->storage = store; 2483 2484 old_counts = bitmap->counts; 2485 bitmap->counts.bp = new_bp; 2486 bitmap->counts.pages = pages; 2487 bitmap->counts.missing_pages = pages; 2488 bitmap->counts.chunkshift = chunkshift; 2489 bitmap->counts.chunks = chunks; 2490 bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + 2491 BITMAP_BLOCK_SHIFT); 2492 2493 blocks = min(old_counts.chunks << old_counts.chunkshift, 2494 chunks << chunkshift); 2495 2496 /* For cluster raid, need to pre-allocate bitmap */ 2497 if (mddev_is_clustered(bitmap->mddev)) { 2498 unsigned long page; 2499 for (page = 0; page < pages; page++) { 2500 ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1); 2501 if (ret) { 2502 unsigned long k; 2503 2504 /* deallocate the page memory */ 2505 for (k = 0; k < page; k++) { 2506 kfree(new_bp[k].map); 2507 } 2508 kfree(new_bp); 2509 2510 /* restore some fields from old_counts */ 2511 bitmap->counts.bp = old_counts.bp; 2512 bitmap->counts.pages = old_counts.pages; 2513 bitmap->counts.missing_pages = old_counts.pages; 2514 bitmap->counts.chunkshift = old_counts.chunkshift; 2515 bitmap->counts.chunks = old_counts.chunks; 2516 bitmap->mddev->bitmap_info.chunksize = 2517 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); 2518 blocks = old_counts.chunks << old_counts.chunkshift; 2519 pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); 2520 break; 2521 } else 2522 bitmap->counts.bp[page].count += 1; 2523 } 2524 } 2525 2526 for (block = 0; block < blocks; ) { 2527 bitmap_counter_t *bmc_old, *bmc_new; 2528 int set; 2529 2530 bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0); 2531 set = bmc_old && NEEDED(*bmc_old); 2532 2533 if (set) { 2534 bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); 2535 if (bmc_new) { 2536 if (*bmc_new == 0) { 2537 /* need to set on-disk bits too. */ 2538 sector_t end = block + new_blocks; 2539 sector_t start = block >> chunkshift; 2540 2541 start <<= chunkshift; 2542 while (start < end) { 2543 md_bitmap_file_set_bit(bitmap, block); 2544 start += 1 << chunkshift; 2545 } 2546 *bmc_new = 2; 2547 md_bitmap_count_page(&bitmap->counts, block, 1); 2548 md_bitmap_set_pending(&bitmap->counts, block); 2549 } 2550 *bmc_new |= NEEDED_MASK; 2551 } 2552 if (new_blocks < old_blocks) 2553 old_blocks = new_blocks; 2554 } 2555 block += old_blocks; 2556 } 2557 2558 if (bitmap->counts.bp != old_counts.bp) { 2559 unsigned long k; 2560 for (k = 0; k < old_counts.pages; k++) 2561 if (!old_counts.bp[k].hijacked) 2562 kfree(old_counts.bp[k].map); 2563 kfree(old_counts.bp); 2564 } 2565 2566 if (!init) { 2567 int i; 2568 while (block < (chunks << chunkshift)) { 2569 bitmap_counter_t *bmc; 2570 bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); 2571 if (bmc) { 2572 /* new space. It needs to be resynced, so 2573 * we set NEEDED_MASK. 2574 */ 2575 if (*bmc == 0) { 2576 *bmc = NEEDED_MASK | 2; 2577 md_bitmap_count_page(&bitmap->counts, block, 1); 2578 md_bitmap_set_pending(&bitmap->counts, block); 2579 } 2580 } 2581 block += new_blocks; 2582 } 2583 for (i = 0; i < bitmap->storage.file_pages; i++) 2584 set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 2585 } 2586 spin_unlock_irq(&bitmap->counts.lock); 2587 2588 if (!init) { 2589 __bitmap_unplug(bitmap); 2590 bitmap->mddev->pers->quiesce(bitmap->mddev, 0); 2591 } 2592 ret = 0; 2593 err: 2594 return ret; 2595 } 2596 2597 static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize, 2598 bool init) 2599 { 2600 struct bitmap *bitmap = mddev->bitmap; 2601 2602 if (!bitmap) 2603 return 0; 2604 2605 return __bitmap_resize(bitmap, blocks, chunksize, init); 2606 } 2607 2608 static ssize_t 2609 location_show(struct mddev *mddev, char *page) 2610 { 2611 ssize_t len; 2612 if (mddev->bitmap_info.file) 2613 len = sprintf(page, "file"); 2614 else if (mddev->bitmap_info.offset) 2615 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); 2616 else 2617 len = sprintf(page, "none"); 2618 len += sprintf(page+len, "\n"); 2619 return len; 2620 } 2621 2622 static ssize_t 2623 location_store(struct mddev *mddev, const char *buf, size_t len) 2624 { 2625 int rv; 2626 2627 rv = mddev_suspend_and_lock(mddev); 2628 if (rv) 2629 return rv; 2630 2631 if (mddev->pers) { 2632 if (mddev->recovery || mddev->sync_thread) { 2633 rv = -EBUSY; 2634 goto out; 2635 } 2636 } 2637 2638 if (mddev->bitmap || mddev->bitmap_info.file || 2639 mddev->bitmap_info.offset) { 2640 /* bitmap already configured. Only option is to clear it */ 2641 if (strncmp(buf, "none", 4) != 0) { 2642 rv = -EBUSY; 2643 goto out; 2644 } 2645 2646 bitmap_destroy(mddev); 2647 mddev->bitmap_info.offset = 0; 2648 if (mddev->bitmap_info.file) { 2649 struct file *f = mddev->bitmap_info.file; 2650 mddev->bitmap_info.file = NULL; 2651 fput(f); 2652 } 2653 } else { 2654 /* No bitmap, OK to set a location */ 2655 long long offset; 2656 2657 if (strncmp(buf, "none", 4) == 0) 2658 /* nothing to be done */; 2659 else if (strncmp(buf, "file:", 5) == 0) { 2660 /* Not supported yet */ 2661 rv = -EINVAL; 2662 goto out; 2663 } else { 2664 if (buf[0] == '+') 2665 rv = kstrtoll(buf+1, 10, &offset); 2666 else 2667 rv = kstrtoll(buf, 10, &offset); 2668 if (rv) 2669 goto out; 2670 if (offset == 0) { 2671 rv = -EINVAL; 2672 goto out; 2673 } 2674 if (mddev->bitmap_info.external == 0 && 2675 mddev->major_version == 0 && 2676 offset != mddev->bitmap_info.default_offset) { 2677 rv = -EINVAL; 2678 goto out; 2679 } 2680 2681 mddev->bitmap_info.offset = offset; 2682 rv = bitmap_create(mddev); 2683 if (rv) 2684 goto out; 2685 2686 rv = bitmap_load(mddev); 2687 if (rv) { 2688 mddev->bitmap_info.offset = 0; 2689 bitmap_destroy(mddev); 2690 goto out; 2691 } 2692 } 2693 } 2694 if (!mddev->external) { 2695 /* Ensure new bitmap info is stored in 2696 * metadata promptly. 2697 */ 2698 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 2699 md_wakeup_thread(mddev->thread); 2700 } 2701 rv = 0; 2702 out: 2703 mddev_unlock_and_resume(mddev); 2704 if (rv) 2705 return rv; 2706 return len; 2707 } 2708 2709 static struct md_sysfs_entry bitmap_location = 2710 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); 2711 2712 /* 'bitmap/space' is the space available at 'location' for the 2713 * bitmap. This allows the kernel to know when it is safe to 2714 * resize the bitmap to match a resized array. 2715 */ 2716 static ssize_t 2717 space_show(struct mddev *mddev, char *page) 2718 { 2719 return sprintf(page, "%lu\n", mddev->bitmap_info.space); 2720 } 2721 2722 static ssize_t 2723 space_store(struct mddev *mddev, const char *buf, size_t len) 2724 { 2725 struct bitmap *bitmap; 2726 unsigned long sectors; 2727 int rv; 2728 2729 rv = kstrtoul(buf, 10, §ors); 2730 if (rv) 2731 return rv; 2732 2733 if (sectors == 0) 2734 return -EINVAL; 2735 2736 bitmap = mddev->bitmap; 2737 if (bitmap && sectors < (bitmap->storage.bytes + 511) >> 9) 2738 return -EFBIG; /* Bitmap is too big for this small space */ 2739 2740 /* could make sure it isn't too big, but that isn't really 2741 * needed - user-space should be careful. 2742 */ 2743 mddev->bitmap_info.space = sectors; 2744 return len; 2745 } 2746 2747 static struct md_sysfs_entry bitmap_space = 2748 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store); 2749 2750 static ssize_t 2751 timeout_show(struct mddev *mddev, char *page) 2752 { 2753 ssize_t len; 2754 unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; 2755 unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; 2756 2757 len = sprintf(page, "%lu", secs); 2758 if (jifs) 2759 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); 2760 len += sprintf(page+len, "\n"); 2761 return len; 2762 } 2763 2764 static ssize_t 2765 timeout_store(struct mddev *mddev, const char *buf, size_t len) 2766 { 2767 /* timeout can be set at any time */ 2768 unsigned long timeout; 2769 int rv = strict_strtoul_scaled(buf, &timeout, 4); 2770 if (rv) 2771 return rv; 2772 2773 /* just to make sure we don't overflow... */ 2774 if (timeout >= LONG_MAX / HZ) 2775 return -EINVAL; 2776 2777 timeout = timeout * HZ / 10000; 2778 2779 if (timeout >= MAX_SCHEDULE_TIMEOUT) 2780 timeout = MAX_SCHEDULE_TIMEOUT-1; 2781 if (timeout < 1) 2782 timeout = 1; 2783 2784 mddev->bitmap_info.daemon_sleep = timeout; 2785 mddev_set_timeout(mddev, timeout, false); 2786 md_wakeup_thread(mddev->thread); 2787 2788 return len; 2789 } 2790 2791 static struct md_sysfs_entry bitmap_timeout = 2792 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store); 2793 2794 static ssize_t 2795 backlog_show(struct mddev *mddev, char *page) 2796 { 2797 return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind); 2798 } 2799 2800 static ssize_t 2801 backlog_store(struct mddev *mddev, const char *buf, size_t len) 2802 { 2803 unsigned long backlog; 2804 unsigned long old_mwb = mddev->bitmap_info.max_write_behind; 2805 struct md_rdev *rdev; 2806 bool has_write_mostly = false; 2807 int rv = kstrtoul(buf, 10, &backlog); 2808 if (rv) 2809 return rv; 2810 if (backlog > COUNTER_MAX) 2811 return -EINVAL; 2812 2813 rv = mddev_suspend_and_lock(mddev); 2814 if (rv) 2815 return rv; 2816 2817 /* 2818 * Without write mostly device, it doesn't make sense to set 2819 * backlog for max_write_behind. 2820 */ 2821 rdev_for_each(rdev, mddev) { 2822 if (test_bit(WriteMostly, &rdev->flags)) { 2823 has_write_mostly = true; 2824 break; 2825 } 2826 } 2827 if (!has_write_mostly) { 2828 pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n", 2829 mdname(mddev)); 2830 mddev_unlock(mddev); 2831 return -EINVAL; 2832 } 2833 2834 mddev->bitmap_info.max_write_behind = backlog; 2835 if (!backlog && mddev->serial_info_pool) { 2836 /* serial_info_pool is not needed if backlog is zero */ 2837 if (!mddev->serialize_policy) 2838 mddev_destroy_serial_pool(mddev, NULL); 2839 } else if (backlog && !mddev->serial_info_pool) { 2840 /* serial_info_pool is needed since backlog is not zero */ 2841 rdev_for_each(rdev, mddev) 2842 mddev_create_serial_pool(mddev, rdev); 2843 } 2844 if (old_mwb != backlog) 2845 bitmap_update_sb(mddev->bitmap); 2846 2847 mddev_unlock_and_resume(mddev); 2848 return len; 2849 } 2850 2851 static struct md_sysfs_entry bitmap_backlog = 2852 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store); 2853 2854 static ssize_t 2855 chunksize_show(struct mddev *mddev, char *page) 2856 { 2857 return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize); 2858 } 2859 2860 static ssize_t 2861 chunksize_store(struct mddev *mddev, const char *buf, size_t len) 2862 { 2863 /* Can only be changed when no bitmap is active */ 2864 int rv; 2865 unsigned long csize; 2866 if (mddev->bitmap) 2867 return -EBUSY; 2868 rv = kstrtoul(buf, 10, &csize); 2869 if (rv) 2870 return rv; 2871 if (csize < 512 || 2872 !is_power_of_2(csize)) 2873 return -EINVAL; 2874 if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * 2875 sizeof(((bitmap_super_t *)0)->chunksize)))) 2876 return -EOVERFLOW; 2877 mddev->bitmap_info.chunksize = csize; 2878 return len; 2879 } 2880 2881 static struct md_sysfs_entry bitmap_chunksize = 2882 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); 2883 2884 static ssize_t metadata_show(struct mddev *mddev, char *page) 2885 { 2886 if (mddev_is_clustered(mddev)) 2887 return sprintf(page, "clustered\n"); 2888 return sprintf(page, "%s\n", (mddev->bitmap_info.external 2889 ? "external" : "internal")); 2890 } 2891 2892 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len) 2893 { 2894 if (mddev->bitmap || 2895 mddev->bitmap_info.file || 2896 mddev->bitmap_info.offset) 2897 return -EBUSY; 2898 if (strncmp(buf, "external", 8) == 0) 2899 mddev->bitmap_info.external = 1; 2900 else if ((strncmp(buf, "internal", 8) == 0) || 2901 (strncmp(buf, "clustered", 9) == 0)) 2902 mddev->bitmap_info.external = 0; 2903 else 2904 return -EINVAL; 2905 return len; 2906 } 2907 2908 static struct md_sysfs_entry bitmap_metadata = 2909 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); 2910 2911 static ssize_t can_clear_show(struct mddev *mddev, char *page) 2912 { 2913 int len; 2914 struct bitmap *bitmap; 2915 2916 spin_lock(&mddev->lock); 2917 bitmap = mddev->bitmap; 2918 if (bitmap) 2919 len = sprintf(page, "%s\n", (bitmap->need_sync ? "false" : 2920 "true")); 2921 else 2922 len = sprintf(page, "\n"); 2923 spin_unlock(&mddev->lock); 2924 return len; 2925 } 2926 2927 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len) 2928 { 2929 struct bitmap *bitmap = mddev->bitmap; 2930 2931 if (!bitmap) 2932 return -ENOENT; 2933 2934 if (strncmp(buf, "false", 5) == 0) { 2935 bitmap->need_sync = 1; 2936 return len; 2937 } 2938 2939 if (strncmp(buf, "true", 4) == 0) { 2940 if (mddev->degraded) 2941 return -EBUSY; 2942 bitmap->need_sync = 0; 2943 return len; 2944 } 2945 2946 return -EINVAL; 2947 } 2948 2949 static struct md_sysfs_entry bitmap_can_clear = 2950 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); 2951 2952 static ssize_t 2953 behind_writes_used_show(struct mddev *mddev, char *page) 2954 { 2955 ssize_t ret; 2956 struct bitmap *bitmap; 2957 2958 spin_lock(&mddev->lock); 2959 bitmap = mddev->bitmap; 2960 if (!bitmap) 2961 ret = sprintf(page, "0\n"); 2962 else 2963 ret = sprintf(page, "%lu\n", bitmap->behind_writes_used); 2964 spin_unlock(&mddev->lock); 2965 2966 return ret; 2967 } 2968 2969 static ssize_t 2970 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len) 2971 { 2972 struct bitmap *bitmap = mddev->bitmap; 2973 2974 if (bitmap) 2975 bitmap->behind_writes_used = 0; 2976 return len; 2977 } 2978 2979 static struct md_sysfs_entry max_backlog_used = 2980 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR, 2981 behind_writes_used_show, behind_writes_used_reset); 2982 2983 static struct attribute *md_bitmap_attrs[] = { 2984 &bitmap_location.attr, 2985 &bitmap_space.attr, 2986 &bitmap_timeout.attr, 2987 &bitmap_backlog.attr, 2988 &bitmap_chunksize.attr, 2989 &bitmap_metadata.attr, 2990 &bitmap_can_clear.attr, 2991 &max_backlog_used.attr, 2992 NULL 2993 }; 2994 const struct attribute_group md_bitmap_group = { 2995 .name = "bitmap", 2996 .attrs = md_bitmap_attrs, 2997 }; 2998 2999 static struct bitmap_operations bitmap_ops = { 3000 .enabled = bitmap_enabled, 3001 .create = bitmap_create, 3002 .resize = bitmap_resize, 3003 .load = bitmap_load, 3004 .destroy = bitmap_destroy, 3005 .flush = bitmap_flush, 3006 .write_all = bitmap_write_all, 3007 .dirty_bits = bitmap_dirty_bits, 3008 .unplug = bitmap_unplug, 3009 .daemon_work = bitmap_daemon_work, 3010 3011 .start_behind_write = bitmap_start_behind_write, 3012 .end_behind_write = bitmap_end_behind_write, 3013 .wait_behind_writes = bitmap_wait_behind_writes, 3014 3015 .start_write = bitmap_start_write, 3016 .end_write = bitmap_end_write, 3017 .start_sync = bitmap_start_sync, 3018 .end_sync = bitmap_end_sync, 3019 .cond_end_sync = bitmap_cond_end_sync, 3020 .close_sync = bitmap_close_sync, 3021 3022 .update_sb = bitmap_update_sb, 3023 .get_stats = bitmap_get_stats, 3024 3025 .sync_with_cluster = bitmap_sync_with_cluster, 3026 .get_from_slot = bitmap_get_from_slot, 3027 .copy_from_slot = bitmap_copy_from_slot, 3028 .set_pages = bitmap_set_pages, 3029 .free = md_bitmap_free, 3030 }; 3031 3032 void mddev_set_bitmap_ops(struct mddev *mddev) 3033 { 3034 mddev->bitmap_ops = &bitmap_ops; 3035 } 3036