xref: /linux/drivers/md/md-llbitmap.c (revision 7fe6ac157b7e15c8976bd62ad7cb98e248884e83)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 
3 #include <linux/blkdev.h>
4 #include <linux/module.h>
5 #include <linux/errno.h>
6 #include <linux/slab.h>
7 #include <linux/init.h>
8 #include <linux/timer.h>
9 #include <linux/sched.h>
10 #include <linux/list.h>
11 #include <linux/file.h>
12 #include <linux/seq_file.h>
13 #include <trace/events/block.h>
14 
15 #include "md.h"
16 #include "md-bitmap.h"
17 
18 /*
19  * #### Background
20  *
21  * Redundant data is used to enhance data fault tolerance, and the storage
22  * methods for redundant data vary depending on the RAID levels. And it's
23  * important to maintain the consistency of redundant data.
24  *
25  * Bitmap is used to record which data blocks have been synchronized and which
26  * ones need to be resynchronized or recovered. Each bit in the bitmap
27  * represents a segment of data in the array. When a bit is set, it indicates
28  * that the multiple redundant copies of that data segment may not be
29  * consistent. Data synchronization can be performed based on the bitmap after
30  * power failure or readding a disk. If there is no bitmap, a full disk
31  * synchronization is required.
32  *
33  * #### Key Features
34  *
35  *  - IO fastpath is lockless, if user issues lots of write IO to the same
36  *  bitmap bit in a short time, only the first write has additional overhead
37  *  to update bitmap bit, no additional overhead for the following writes;
38  *  - support only resync or recover written data, means in the case creating
39  *  new array or replacing with a new disk, there is no need to do a full disk
40  *  resync/recovery;
41  *
42  * #### Key Concept
43  *
44  * ##### State Machine
45  *
46  * Each bit is one byte, contain 6 different states, see llbitmap_state. And
47  * there are total 8 different actions, see llbitmap_action, can change state:
48  *
49  * llbitmap state machine: transitions between states
50  *
51  * |           | Startwrite | Startsync | Endsync | Abortsync|
52  * | --------- | ---------- | --------- | ------- | -------  |
53  * | Unwritten | Dirty      | x         | x       | x        |
54  * | Clean     | Dirty      | x         | x       | x        |
55  * | Dirty     | x          | x         | x       | x        |
56  * | NeedSync  | x          | Syncing   | x       | x        |
57  * | Syncing   | x          | Syncing   | Dirty   | NeedSync |
58  *
59  * |           | Reload   | Daemon | Discard   | Stale     |
60  * | --------- | -------- | ------ | --------- | --------- |
61  * | Unwritten | x        | x      | x         | x         |
62  * | Clean     | x        | x      | Unwritten | NeedSync  |
63  * | Dirty     | NeedSync | Clean  | Unwritten | NeedSync  |
64  * | NeedSync  | x        | x      | Unwritten | x         |
65  * | Syncing   | NeedSync | x      | Unwritten | NeedSync  |
66  *
67  * Typical scenarios:
68  *
69  * 1) Create new array
70  * All bits will be set to Unwritten by default, if --assume-clean is set,
71  * all bits will be set to Clean instead.
72  *
73  * 2) write data, raid1/raid10 have full copy of data, while raid456 doesn't and
74  * rely on xor data
75  *
76  * 2.1) write new data to raid1/raid10:
77  * Unwritten --StartWrite--> Dirty
78  *
79  * 2.2) write new data to raid456:
80  * Unwritten --StartWrite--> NeedSync
81  *
82  * Because the initial recover for raid456 is skipped, the xor data is not built
83  * yet, the bit must be set to NeedSync first and after lazy initial recover is
84  * finished, the bit will finally set to Dirty(see 5.1 and 5.4);
85  *
86  * 2.3) cover write
87  * Clean --StartWrite--> Dirty
88  *
89  * 3) daemon, if the array is not degraded:
90  * Dirty --Daemon--> Clean
91  *
92  * 4) discard
93  * {Clean, Dirty, NeedSync, Syncing} --Discard--> Unwritten
94  *
95  * 5) resync and recover
96  *
97  * 5.1) common process
98  * NeedSync --Startsync--> Syncing --Endsync--> Dirty --Daemon--> Clean
99  *
100  * 5.2) resync after power failure
101  * Dirty --Reload--> NeedSync
102  *
103  * 5.3) recover while replacing with a new disk
104  * By default, the old bitmap framework will recover all data, and llbitmap
105  * implements this by a new helper, see llbitmap_skip_sync_blocks:
106  *
107  * skip recover for bits other than dirty or clean;
108  *
109  * 5.4) lazy initial recover for raid5:
110  * By default, the old bitmap framework will only allow new recover when there
111  * are spares(new disk), a new recovery flag MD_RECOVERY_LAZY_RECOVER is added
112  * to perform raid456 lazy recover for set bits(from 2.2).
113  *
114  * 6. special handling for degraded array:
115  *
116  * - Dirty bits will never be cleared, daemon will just do nothing, so that if
117  *   a disk is readded, Clean bits can be skipped with recovery;
118  * - Dirty bits will convert to Syncing from start write, to do data recovery
119  *   for new added disks;
120  * - New write will convert bits to NeedSync directly;
121  *
122  * ##### Bitmap IO
123  *
124  * ##### Chunksize
125  *
126  * The default bitmap size is 128k, incluing 1k bitmap super block, and
127  * the default size of segment of data in the array each bit(chunksize) is 64k,
128  * and chunksize will adjust to twice the old size each time if the total number
129  * bits is not less than 127k.(see llbitmap_init)
130  *
131  * ##### READ
132  *
133  * While creating bitmap, all pages will be allocated and read for llbitmap,
134  * there won't be read afterwards
135  *
136  * ##### WRITE
137  *
138  * WRITE IO is divided into logical_block_size of the array, the dirty state
139  * of each block is tracked independently, for example:
140  *
141  * each page is 4k, contain 8 blocks; each block is 512 bytes contain 512 bit;
142  *
143  * | page0 | page1 | ... | page 31 |
144  * |       |
145  * |        \-----------------------\
146  * |                                |
147  * | block0 | block1 | ... | block 8|
148  * |        |
149  * |         \-----------------\
150  * |                            |
151  * | bit0 | bit1 | ... | bit511 |
152  *
153  * From IO path, if one bit is changed to Dirty or NeedSync, the corresponding
154  * subpage will be marked dirty, such block must write first before the IO is
155  * issued. This behaviour will affect IO performance, to reduce the impact, if
156  * multiple bits are changed in the same block in a short time, all bits in this
157  * block will be changed to Dirty/NeedSync, so that there won't be any overhead
158  * until daemon clears dirty bits.
159  *
160  * ##### Dirty Bits synchronization
161  *
162  * IO fast path will set bits to dirty, and those dirty bits will be cleared
163  * by daemon after IO is done. llbitmap_page_ctl is used to synchronize between
164  * IO path and daemon;
165  *
166  * IO path:
167  *  1) try to grab a reference, if succeed, set expire time after 5s and return;
168  *  2) if failed to grab a reference, wait for daemon to finish clearing dirty
169  *  bits;
170  *
171  * Daemon (Daemon will be woken up every daemon_sleep seconds):
172  * For each page:
173  *  1) check if page expired, if not skip this page; for expired page:
174  *  2) suspend the page and wait for inflight write IO to be done;
175  *  3) change dirty page to clean;
176  *  4) resume the page;
177  */
178 
179 #define BITMAP_DATA_OFFSET 1024
180 
181 /* 64k is the max IO size of sync IO for raid1/raid10 */
182 #define MIN_CHUNK_SIZE (64 * 2)
183 
184 /* By default, daemon will be woken up every 30s */
185 #define DEFAULT_DAEMON_SLEEP 30
186 
187 /*
188  * Dirtied bits that have not been accessed for more than 5s will be cleared
189  * by daemon.
190  */
191 #define DEFAULT_BARRIER_IDLE 5
192 
193 enum llbitmap_state {
194 	/* No valid data, init state after assemble the array */
195 	BitUnwritten = 0,
196 	/* data is consistent */
197 	BitClean,
198 	/* data will be consistent after IO is done, set directly for writes */
199 	BitDirty,
200 	/*
201 	 * data need to be resynchronized:
202 	 * 1) set directly for writes if array is degraded, prevent full disk
203 	 * synchronization after readding a disk;
204 	 * 2) reassemble the array after power failure, and dirty bits are
205 	 * found after reloading the bitmap;
206 	 * 3) set for first write for raid5, to build initial xor data lazily
207 	 */
208 	BitNeedSync,
209 	/* data is synchronizing */
210 	BitSyncing,
211 	/*
212 	 * Proactive sync requested for unwritten region (raid456 only).
213 	 * Triggered via sysfs when user wants to pre-build XOR parity
214 	 * for regions that have never been written.
215 	 */
216 	BitNeedSyncUnwritten,
217 	/* Proactive sync in progress for unwritten region */
218 	BitSyncingUnwritten,
219 	/*
220 	 * XOR parity has been pre-built for a region that has never had
221 	 * user data written. When user writes to this region, it transitions
222 	 * to BitDirty.
223 	 */
224 	BitCleanUnwritten,
225 	BitStateCount,
226 	BitNone = 0xff,
227 };
228 
229 enum llbitmap_action {
230 	/* User write new data, this is the only action from IO fast path */
231 	BitmapActionStartwrite = 0,
232 	/* Start recovery */
233 	BitmapActionStartsync,
234 	/* Finish recovery */
235 	BitmapActionEndsync,
236 	/* Failed recovery */
237 	BitmapActionAbortsync,
238 	/* Reassemble the array */
239 	BitmapActionReload,
240 	/* Daemon thread is trying to clear dirty bits */
241 	BitmapActionDaemon,
242 	/* Data is deleted */
243 	BitmapActionDiscard,
244 	/*
245 	 * Bitmap is stale, mark all bits in addition to BitUnwritten to
246 	 * BitNeedSync.
247 	 */
248 	BitmapActionStale,
249 	/*
250 	 * Proactive sync trigger for raid456 - builds XOR parity for
251 	 * Unwritten regions without requiring user data write first.
252 	 */
253 	BitmapActionProactiveSync,
254 	BitmapActionClearUnwritten,
255 	BitmapActionCount,
256 	/* Init state is BitUnwritten */
257 	BitmapActionInit,
258 };
259 
260 enum llbitmap_page_state {
261 	LLPageFlush = 0,
262 	LLPageDirty,
263 };
264 
265 struct llbitmap_page_ctl {
266 	char *state;
267 	struct page *page;
268 	unsigned long expire;
269 	unsigned long flags;
270 	wait_queue_head_t wait;
271 	struct percpu_ref active;
272 	/* Per block size dirty state, maximum 64k page / 1 sector = 128 */
273 	unsigned long dirty[];
274 };
275 
276 struct llbitmap {
277 	struct mddev *mddev;
278 	struct llbitmap_page_ctl **pctl;
279 
280 	unsigned int nr_pages;
281 	unsigned int io_size;
282 	unsigned int blocks_per_page;
283 
284 	/* shift of one chunk */
285 	unsigned long chunkshift;
286 	/* size of one chunk in sector */
287 	unsigned long chunksize;
288 	/* total number of chunks */
289 	unsigned long chunks;
290 	unsigned long last_end_sync;
291 	/*
292 	 * time in seconds that dirty bits will be cleared if the page is not
293 	 * accessed.
294 	 */
295 	unsigned long barrier_idle;
296 	/* fires on first BitDirty state */
297 	struct timer_list pending_timer;
298 	struct work_struct daemon_work;
299 
300 	unsigned long flags;
301 	__u64	events_cleared;
302 
303 	/* for slow disks */
304 	atomic_t behind_writes;
305 	wait_queue_head_t behind_wait;
306 };
307 
308 struct llbitmap_unplug_work {
309 	struct work_struct work;
310 	struct llbitmap *llbitmap;
311 	struct completion *done;
312 };
313 
314 static struct workqueue_struct *md_llbitmap_io_wq;
315 static struct workqueue_struct *md_llbitmap_unplug_wq;
316 
317 static char state_machine[BitStateCount][BitmapActionCount] = {
318 	[BitUnwritten] = {
319 		[BitmapActionStartwrite]	= BitDirty,
320 		[BitmapActionStartsync]		= BitNone,
321 		[BitmapActionEndsync]		= BitNone,
322 		[BitmapActionAbortsync]		= BitNone,
323 		[BitmapActionReload]		= BitNone,
324 		[BitmapActionDaemon]		= BitNone,
325 		[BitmapActionDiscard]		= BitNone,
326 		[BitmapActionStale]		= BitNone,
327 		[BitmapActionProactiveSync]	= BitNeedSyncUnwritten,
328 		[BitmapActionClearUnwritten]	= BitNone,
329 	},
330 	[BitClean] = {
331 		[BitmapActionStartwrite]	= BitDirty,
332 		[BitmapActionStartsync]		= BitNone,
333 		[BitmapActionEndsync]		= BitNone,
334 		[BitmapActionAbortsync]		= BitNone,
335 		[BitmapActionReload]		= BitNone,
336 		[BitmapActionDaemon]		= BitNone,
337 		[BitmapActionDiscard]		= BitUnwritten,
338 		[BitmapActionStale]		= BitNeedSync,
339 		[BitmapActionProactiveSync]	= BitNone,
340 		[BitmapActionClearUnwritten]	= BitNone,
341 	},
342 	[BitDirty] = {
343 		[BitmapActionStartwrite]	= BitNone,
344 		[BitmapActionStartsync]		= BitNone,
345 		[BitmapActionEndsync]		= BitNone,
346 		[BitmapActionAbortsync]		= BitNone,
347 		[BitmapActionReload]		= BitNeedSync,
348 		[BitmapActionDaemon]		= BitClean,
349 		[BitmapActionDiscard]		= BitUnwritten,
350 		[BitmapActionStale]		= BitNeedSync,
351 		[BitmapActionProactiveSync]	= BitNone,
352 		[BitmapActionClearUnwritten]	= BitNone,
353 	},
354 	[BitNeedSync] = {
355 		[BitmapActionStartwrite]	= BitNone,
356 		[BitmapActionStartsync]		= BitSyncing,
357 		[BitmapActionEndsync]		= BitNone,
358 		[BitmapActionAbortsync]		= BitNone,
359 		[BitmapActionReload]		= BitNone,
360 		[BitmapActionDaemon]		= BitNone,
361 		[BitmapActionDiscard]		= BitUnwritten,
362 		[BitmapActionStale]		= BitNone,
363 		[BitmapActionProactiveSync]	= BitNone,
364 		[BitmapActionClearUnwritten]	= BitNone,
365 	},
366 	[BitSyncing] = {
367 		[BitmapActionStartwrite]	= BitNone,
368 		[BitmapActionStartsync]		= BitSyncing,
369 		[BitmapActionEndsync]		= BitDirty,
370 		[BitmapActionAbortsync]		= BitNeedSync,
371 		[BitmapActionReload]		= BitNeedSync,
372 		[BitmapActionDaemon]		= BitNone,
373 		[BitmapActionDiscard]		= BitUnwritten,
374 		[BitmapActionStale]		= BitNeedSync,
375 		[BitmapActionProactiveSync]	= BitNone,
376 		[BitmapActionClearUnwritten]	= BitNone,
377 	},
378 	[BitNeedSyncUnwritten] = {
379 		[BitmapActionStartwrite]	= BitNeedSync,
380 		[BitmapActionStartsync]		= BitSyncingUnwritten,
381 		[BitmapActionEndsync]		= BitNone,
382 		[BitmapActionAbortsync]		= BitUnwritten,
383 		[BitmapActionReload]		= BitUnwritten,
384 		[BitmapActionDaemon]		= BitNone,
385 		[BitmapActionDiscard]		= BitUnwritten,
386 		[BitmapActionStale]		= BitUnwritten,
387 		[BitmapActionProactiveSync]	= BitNone,
388 		[BitmapActionClearUnwritten]	= BitUnwritten,
389 	},
390 	[BitSyncingUnwritten] = {
391 		[BitmapActionStartwrite]	= BitSyncing,
392 		[BitmapActionStartsync]		= BitSyncingUnwritten,
393 		[BitmapActionEndsync]		= BitCleanUnwritten,
394 		[BitmapActionAbortsync]		= BitUnwritten,
395 		[BitmapActionReload]		= BitUnwritten,
396 		[BitmapActionDaemon]		= BitNone,
397 		[BitmapActionDiscard]		= BitUnwritten,
398 		[BitmapActionStale]		= BitUnwritten,
399 		[BitmapActionProactiveSync]	= BitNone,
400 		[BitmapActionClearUnwritten]	= BitUnwritten,
401 	},
402 	[BitCleanUnwritten] = {
403 		[BitmapActionStartwrite]	= BitDirty,
404 		[BitmapActionStartsync]		= BitNone,
405 		[BitmapActionEndsync]		= BitNone,
406 		[BitmapActionAbortsync]		= BitNone,
407 		[BitmapActionReload]		= BitNone,
408 		[BitmapActionDaemon]		= BitNone,
409 		[BitmapActionDiscard]		= BitUnwritten,
410 		[BitmapActionStale]		= BitUnwritten,
411 		[BitmapActionProactiveSync]	= BitNone,
412 		[BitmapActionClearUnwritten]	= BitUnwritten,
413 	},
414 };
415 
416 static void __llbitmap_flush(struct mddev *mddev);
417 
llbitmap_read(struct llbitmap * llbitmap,loff_t pos)418 static enum llbitmap_state llbitmap_read(struct llbitmap *llbitmap, loff_t pos)
419 {
420 	unsigned int idx;
421 	unsigned int offset;
422 
423 	pos += BITMAP_DATA_OFFSET;
424 	idx = pos >> PAGE_SHIFT;
425 	offset = offset_in_page(pos);
426 
427 	return llbitmap->pctl[idx]->state[offset];
428 }
429 
430 /* set all the bits in the subpage as dirty */
llbitmap_infect_dirty_bits(struct llbitmap * llbitmap,struct llbitmap_page_ctl * pctl,unsigned int block)431 static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap,
432 				       struct llbitmap_page_ctl *pctl,
433 				       unsigned int block)
434 {
435 	bool level_456 = raid_is_456(llbitmap->mddev);
436 	unsigned int io_size = llbitmap->io_size;
437 	int pos;
438 
439 	for (pos = block * io_size; pos < (block + 1) * io_size; pos++) {
440 		switch (pctl->state[pos]) {
441 		case BitUnwritten:
442 			pctl->state[pos] = level_456 ? BitNeedSync : BitDirty;
443 			break;
444 		case BitClean:
445 		case BitCleanUnwritten:
446 			pctl->state[pos] = BitDirty;
447 			break;
448 		}
449 	}
450 }
451 
llbitmap_set_page_dirty(struct llbitmap * llbitmap,int idx,int offset,bool infect)452 static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx,
453 				    int offset, bool infect)
454 {
455 	struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx];
456 	unsigned int io_size = llbitmap->io_size;
457 	int block = offset / io_size;
458 	int pos;
459 
460 	if (!test_bit(LLPageDirty, &pctl->flags))
461 		set_bit(LLPageDirty, &pctl->flags);
462 
463 	/*
464 	 * For degraded array, dirty bits will never be cleared, and we must
465 	 * resync all the dirty bits, hence skip infect new dirty bits to
466 	 * prevent resync unnecessary data.
467 	 */
468 	if (llbitmap->mddev->degraded || !infect) {
469 		set_bit(block, pctl->dirty);
470 		return;
471 	}
472 
473 	/*
474 	 * The subpage usually contains a total of 512 bits. If any single bit
475 	 * within the subpage is marked as dirty, the entire sector will be
476 	 * written. To avoid impacting write performance, when multiple bits
477 	 * within the same sector are modified within llbitmap->barrier_idle,
478 	 * all bits in the sector will be collectively marked as dirty at once.
479 	 */
480 	if (test_and_set_bit(block, pctl->dirty)) {
481 		llbitmap_infect_dirty_bits(llbitmap, pctl, block);
482 		return;
483 	}
484 
485 	for (pos = block * io_size; pos < (block + 1) * io_size; pos++) {
486 		if (pos == offset)
487 			continue;
488 		if (pctl->state[pos] == BitDirty ||
489 		    pctl->state[pos] == BitNeedSync) {
490 			llbitmap_infect_dirty_bits(llbitmap, pctl, block);
491 			return;
492 		}
493 	}
494 }
495 
llbitmap_write(struct llbitmap * llbitmap,enum llbitmap_state state,loff_t pos)496 static void llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state,
497 			   loff_t pos)
498 {
499 	unsigned int idx;
500 	unsigned int bit;
501 
502 	pos += BITMAP_DATA_OFFSET;
503 	idx = pos >> PAGE_SHIFT;
504 	bit = offset_in_page(pos);
505 
506 	llbitmap->pctl[idx]->state[bit] = state;
507 	if (state == BitDirty || state == BitNeedSync)
508 		llbitmap_set_page_dirty(llbitmap, idx, bit, true);
509 	else if (state == BitNeedSyncUnwritten)
510 		llbitmap_set_page_dirty(llbitmap, idx, bit, false);
511 }
512 
llbitmap_read_page(struct llbitmap * llbitmap,int idx)513 static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx)
514 {
515 	struct mddev *mddev = llbitmap->mddev;
516 	struct page *page = NULL;
517 	struct md_rdev *rdev;
518 
519 	if (llbitmap->pctl && llbitmap->pctl[idx])
520 		page = llbitmap->pctl[idx]->page;
521 	if (page)
522 		return page;
523 
524 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
525 	if (!page)
526 		return ERR_PTR(-ENOMEM);
527 
528 	rdev_for_each(rdev, mddev) {
529 		sector_t sector;
530 
531 		if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags) ||
532 		    !test_bit(In_sync, &rdev->flags))
533 			continue;
534 
535 		sector = mddev->bitmap_info.offset +
536 			 (idx << PAGE_SECTORS_SHIFT);
537 
538 		if (sync_page_io(rdev, sector, PAGE_SIZE, page, REQ_OP_READ,
539 				 true))
540 			return page;
541 
542 		md_error(mddev, rdev);
543 	}
544 
545 	__free_page(page);
546 	return ERR_PTR(-EIO);
547 }
548 
llbitmap_write_page(struct llbitmap * llbitmap,int idx)549 static void llbitmap_write_page(struct llbitmap *llbitmap, int idx)
550 {
551 	struct page *page = llbitmap->pctl[idx]->page;
552 	struct mddev *mddev = llbitmap->mddev;
553 	struct md_rdev *rdev;
554 	int block;
555 
556 	for (block = 0; block < llbitmap->blocks_per_page; block++) {
557 		struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx];
558 
559 		if (!test_and_clear_bit(block, pctl->dirty))
560 			continue;
561 
562 		rdev_for_each(rdev, mddev) {
563 			sector_t sector;
564 			sector_t bit_sector = llbitmap->io_size >> SECTOR_SHIFT;
565 
566 			if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
567 				continue;
568 
569 			sector = mddev->bitmap_info.offset + rdev->sb_start +
570 				 (idx << PAGE_SECTORS_SHIFT) +
571 				 block * bit_sector;
572 			md_write_metadata(mddev, rdev, sector,
573 					  llbitmap->io_size, page,
574 					  block * llbitmap->io_size);
575 		}
576 	}
577 }
578 
active_release(struct percpu_ref * ref)579 static void active_release(struct percpu_ref *ref)
580 {
581 	struct llbitmap_page_ctl *pctl =
582 		container_of(ref, struct llbitmap_page_ctl, active);
583 
584 	wake_up(&pctl->wait);
585 }
586 
llbitmap_free_pages(struct llbitmap * llbitmap)587 static void llbitmap_free_pages(struct llbitmap *llbitmap)
588 {
589 	int i;
590 
591 	if (!llbitmap->pctl)
592 		return;
593 
594 	for (i = 0; i < llbitmap->nr_pages; i++) {
595 		struct llbitmap_page_ctl *pctl = llbitmap->pctl[i];
596 
597 		if (!pctl || !pctl->page)
598 			break;
599 
600 		__free_page(pctl->page);
601 		percpu_ref_exit(&pctl->active);
602 	}
603 
604 	kfree(llbitmap->pctl[0]);
605 	kfree(llbitmap->pctl);
606 	llbitmap->pctl = NULL;
607 }
608 
llbitmap_cache_pages(struct llbitmap * llbitmap)609 static int llbitmap_cache_pages(struct llbitmap *llbitmap)
610 {
611 	struct llbitmap_page_ctl *pctl;
612 	unsigned int nr_pages = DIV_ROUND_UP(llbitmap->chunks +
613 					     BITMAP_DATA_OFFSET, PAGE_SIZE);
614 	unsigned int size = struct_size(pctl, dirty, BITS_TO_LONGS(
615 						llbitmap->blocks_per_page));
616 	int i;
617 
618 	llbitmap->pctl = kmalloc_array(nr_pages, sizeof(void *),
619 				       GFP_KERNEL | __GFP_ZERO);
620 	if (!llbitmap->pctl)
621 		return -ENOMEM;
622 
623 	size = round_up(size, cache_line_size());
624 	pctl = kmalloc_array(nr_pages, size, GFP_KERNEL | __GFP_ZERO);
625 	if (!pctl) {
626 		kfree(llbitmap->pctl);
627 		return -ENOMEM;
628 	}
629 
630 	llbitmap->nr_pages = nr_pages;
631 
632 	for (i = 0; i < nr_pages; i++, pctl = (void *)pctl + size) {
633 		struct page *page = llbitmap_read_page(llbitmap, i);
634 
635 		llbitmap->pctl[i] = pctl;
636 
637 		if (IS_ERR(page)) {
638 			llbitmap_free_pages(llbitmap);
639 			return PTR_ERR(page);
640 		}
641 
642 		if (percpu_ref_init(&pctl->active, active_release,
643 				    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
644 			__free_page(page);
645 			llbitmap_free_pages(llbitmap);
646 			return -ENOMEM;
647 		}
648 
649 		pctl->page = page;
650 		pctl->state = page_address(page);
651 		init_waitqueue_head(&pctl->wait);
652 	}
653 
654 	return 0;
655 }
656 
657 /*
658  * Check if all underlying disks support write_zeroes with unmap.
659  */
llbitmap_all_disks_support_wzeroes_unmap(struct llbitmap * llbitmap)660 static bool llbitmap_all_disks_support_wzeroes_unmap(struct llbitmap *llbitmap)
661 {
662 	struct mddev *mddev = llbitmap->mddev;
663 	struct md_rdev *rdev;
664 
665 	rdev_for_each(rdev, mddev) {
666 		if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
667 			continue;
668 
669 		if (bdev_write_zeroes_unmap_sectors(rdev->bdev) == 0)
670 			return false;
671 	}
672 
673 	return true;
674 }
675 
676 /*
677  * Issue write_zeroes to all underlying disks to zero their data regions.
678  * This ensures parity consistency for RAID-456 (0 XOR 0 = 0).
679  * Returns true if all disks were successfully zeroed.
680  */
llbitmap_zero_all_disks(struct llbitmap * llbitmap)681 static bool llbitmap_zero_all_disks(struct llbitmap *llbitmap)
682 {
683 	struct mddev *mddev = llbitmap->mddev;
684 	struct md_rdev *rdev;
685 	sector_t dev_sectors = mddev->dev_sectors;
686 	int ret;
687 
688 	rdev_for_each(rdev, mddev) {
689 		if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
690 			continue;
691 
692 		ret = blkdev_issue_zeroout(rdev->bdev,
693 					   rdev->data_offset,
694 					   dev_sectors,
695 					   GFP_KERNEL, 0);
696 		if (ret) {
697 			pr_warn("md/llbitmap: failed to zero disk %pg: %d\n",
698 				rdev->bdev, ret);
699 			return false;
700 		}
701 	}
702 
703 	return true;
704 }
705 
llbitmap_init_state(struct llbitmap * llbitmap)706 static void llbitmap_init_state(struct llbitmap *llbitmap)
707 {
708 	struct mddev *mddev = llbitmap->mddev;
709 	enum llbitmap_state state = BitUnwritten;
710 	unsigned long i;
711 
712 	if (test_and_clear_bit(BITMAP_CLEAN, &llbitmap->flags)) {
713 		state = BitClean;
714 	} else if (raid_is_456(mddev) &&
715 		   llbitmap_all_disks_support_wzeroes_unmap(llbitmap)) {
716 		/*
717 		 * All disks support write_zeroes with unmap. Zero all disks
718 		 * to ensure parity consistency, then set BitCleanUnwritten
719 		 * to skip initial sync.
720 		 */
721 		if (llbitmap_zero_all_disks(llbitmap))
722 			state = BitCleanUnwritten;
723 	}
724 
725 	for (i = 0; i < llbitmap->chunks; i++)
726 		llbitmap_write(llbitmap, state, i);
727 }
728 
729 /* The return value is only used from resync, where @start == @end. */
llbitmap_state_machine(struct llbitmap * llbitmap,unsigned long start,unsigned long end,enum llbitmap_action action)730 static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap,
731 						  unsigned long start,
732 						  unsigned long end,
733 						  enum llbitmap_action action)
734 {
735 	struct mddev *mddev = llbitmap->mddev;
736 	enum llbitmap_state state = BitNone;
737 	bool level_456 = raid_is_456(llbitmap->mddev);
738 	bool need_resync = false;
739 	bool need_recovery = false;
740 
741 	if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags))
742 		return BitNone;
743 
744 	if (action == BitmapActionInit) {
745 		llbitmap_init_state(llbitmap);
746 		return BitNone;
747 	}
748 
749 	while (start <= end) {
750 		enum llbitmap_state c = llbitmap_read(llbitmap, start);
751 
752 		if (c < 0 || c >= BitStateCount) {
753 			pr_err("%s: invalid bit %lu state %d action %d, forcing resync\n",
754 			       __func__, start, c, action);
755 			state = BitNeedSync;
756 			goto write_bitmap;
757 		}
758 
759 		if (c == BitNeedSync || c == BitNeedSyncUnwritten)
760 			need_resync = !mddev->degraded;
761 
762 		state = state_machine[c][action];
763 write_bitmap:
764 		if (unlikely(mddev->degraded)) {
765 			/* For degraded array, mark new data as need sync. */
766 			if (state == BitDirty &&
767 			    action == BitmapActionStartwrite)
768 				state = BitNeedSync;
769 			/*
770 			 * For degraded array, resync dirty data as well, noted
771 			 * if array is still degraded after resync is done, all
772 			 * new data will still be dirty until array is clean.
773 			 */
774 			else if (c == BitDirty &&
775 				action == BitmapActionStartsync)
776 				state = BitSyncing;
777 		} else if (c == BitUnwritten && state == BitDirty &&
778 			   action == BitmapActionStartwrite && level_456) {
779 			/* Delay raid456 initial recovery to first write. */
780 			state = BitNeedSync;
781 		}
782 
783 		if (state == BitNone) {
784 			start++;
785 			continue;
786 		}
787 
788 		llbitmap_write(llbitmap, state, start);
789 		if (state == BitNeedSync || state == BitNeedSyncUnwritten)
790 			need_resync = !mddev->degraded;
791 		else if (state == BitDirty &&
792 			 !timer_pending(&llbitmap->pending_timer))
793 			mod_timer(&llbitmap->pending_timer,
794 				  jiffies + mddev->bitmap_info.daemon_sleep * HZ);
795 
796 		start++;
797 	}
798 
799 	if (need_resync && level_456)
800 		need_recovery = true;
801 
802 	if (need_recovery) {
803 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
804 		set_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
805 		md_wakeup_thread(mddev->thread);
806 	} else if (need_resync) {
807 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
808 		set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
809 		md_wakeup_thread(mddev->thread);
810 	}
811 
812 	return state;
813 }
814 
llbitmap_raise_barrier(struct llbitmap * llbitmap,int page_idx)815 static void llbitmap_raise_barrier(struct llbitmap *llbitmap, int page_idx)
816 {
817 	struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx];
818 
819 retry:
820 	if (likely(percpu_ref_tryget_live(&pctl->active))) {
821 		WRITE_ONCE(pctl->expire, jiffies + llbitmap->barrier_idle * HZ);
822 		return;
823 	}
824 
825 	wait_event(pctl->wait, !percpu_ref_is_dying(&pctl->active));
826 	goto retry;
827 }
828 
llbitmap_release_barrier(struct llbitmap * llbitmap,int page_idx)829 static void llbitmap_release_barrier(struct llbitmap *llbitmap, int page_idx)
830 {
831 	struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx];
832 
833 	percpu_ref_put(&pctl->active);
834 }
835 
llbitmap_suspend_timeout(struct llbitmap * llbitmap,int page_idx)836 static int llbitmap_suspend_timeout(struct llbitmap *llbitmap, int page_idx)
837 {
838 	struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx];
839 
840 	percpu_ref_kill(&pctl->active);
841 
842 	if (!wait_event_timeout(pctl->wait, percpu_ref_is_zero(&pctl->active),
843 			llbitmap->mddev->bitmap_info.daemon_sleep * HZ)) {
844 		percpu_ref_resurrect(&pctl->active);
845 		return -ETIMEDOUT;
846 	}
847 
848 	return 0;
849 }
850 
llbitmap_resume(struct llbitmap * llbitmap,int page_idx)851 static void llbitmap_resume(struct llbitmap *llbitmap, int page_idx)
852 {
853 	struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx];
854 
855 	pctl->expire = LONG_MAX;
856 	percpu_ref_resurrect(&pctl->active);
857 	wake_up(&pctl->wait);
858 }
859 
llbitmap_check_support(struct mddev * mddev)860 static int llbitmap_check_support(struct mddev *mddev)
861 {
862 	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
863 		pr_notice("md/llbitmap: %s: array with journal cannot have bitmap\n",
864 			  mdname(mddev));
865 		return -EBUSY;
866 	}
867 
868 	if (mddev->bitmap_info.space == 0) {
869 		if (mddev->bitmap_info.default_space == 0) {
870 			pr_notice("md/llbitmap: %s: no space for bitmap\n",
871 				  mdname(mddev));
872 			return -ENOSPC;
873 		}
874 	}
875 
876 	if (!mddev->persistent) {
877 		pr_notice("md/llbitmap: %s: array must be persistent\n",
878 			  mdname(mddev));
879 		return -EOPNOTSUPP;
880 	}
881 
882 	if (mddev->bitmap_info.file) {
883 		pr_notice("md/llbitmap: %s: doesn't support bitmap file\n",
884 			  mdname(mddev));
885 		return -EOPNOTSUPP;
886 	}
887 
888 	if (mddev->bitmap_info.external) {
889 		pr_notice("md/llbitmap: %s: doesn't support external metadata\n",
890 			  mdname(mddev));
891 		return -EOPNOTSUPP;
892 	}
893 
894 	if (mddev_is_dm(mddev)) {
895 		pr_notice("md/llbitmap: %s: doesn't support dm-raid\n",
896 			  mdname(mddev));
897 		return -EOPNOTSUPP;
898 	}
899 
900 	return 0;
901 }
902 
llbitmap_init(struct llbitmap * llbitmap)903 static int llbitmap_init(struct llbitmap *llbitmap)
904 {
905 	struct mddev *mddev = llbitmap->mddev;
906 	sector_t blocks = mddev->resync_max_sectors;
907 	unsigned long chunksize = MIN_CHUNK_SIZE;
908 	unsigned long chunks = DIV_ROUND_UP(blocks, chunksize);
909 	unsigned long space = mddev->bitmap_info.space << SECTOR_SHIFT;
910 	int ret;
911 
912 	while (chunks > space) {
913 		chunksize = chunksize << 1;
914 		chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize);
915 	}
916 
917 	llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE;
918 	llbitmap->chunkshift = ffz(~chunksize);
919 	llbitmap->chunksize = chunksize;
920 	llbitmap->chunks = chunks;
921 	mddev->bitmap_info.daemon_sleep = DEFAULT_DAEMON_SLEEP;
922 
923 	ret = llbitmap_cache_pages(llbitmap);
924 	if (ret)
925 		return ret;
926 
927 	llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
928 			       BitmapActionInit);
929 	/* flush initial llbitmap to disk */
930 	__llbitmap_flush(mddev);
931 
932 	return 0;
933 }
934 
llbitmap_read_sb(struct llbitmap * llbitmap)935 static int llbitmap_read_sb(struct llbitmap *llbitmap)
936 {
937 	struct mddev *mddev = llbitmap->mddev;
938 	unsigned long daemon_sleep;
939 	unsigned long chunksize;
940 	unsigned long events;
941 	struct page *sb_page;
942 	bitmap_super_t *sb;
943 	int ret = -EINVAL;
944 
945 	if (!mddev->bitmap_info.offset) {
946 		pr_err("md/llbitmap: %s: no super block found", mdname(mddev));
947 		return -EINVAL;
948 	}
949 
950 	sb_page = llbitmap_read_page(llbitmap, 0);
951 	if (IS_ERR(sb_page)) {
952 		pr_err("md/llbitmap: %s: read super block failed",
953 		       mdname(mddev));
954 		return -EIO;
955 	}
956 
957 	sb = kmap_local_page(sb_page);
958 	if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) {
959 		pr_err("md/llbitmap: %s: invalid super block magic number",
960 		       mdname(mddev));
961 		goto out_put_page;
962 	}
963 
964 	if (sb->version != cpu_to_le32(BITMAP_MAJOR_LOCKLESS)) {
965 		pr_err("md/llbitmap: %s: invalid super block version",
966 		       mdname(mddev));
967 		goto out_put_page;
968 	}
969 
970 	if (memcmp(sb->uuid, mddev->uuid, 16)) {
971 		pr_err("md/llbitmap: %s: bitmap superblock UUID mismatch\n",
972 		       mdname(mddev));
973 		goto out_put_page;
974 	}
975 
976 	if (mddev->bitmap_info.space == 0) {
977 		int room = le32_to_cpu(sb->sectors_reserved);
978 
979 		if (room)
980 			mddev->bitmap_info.space = room;
981 		else
982 			mddev->bitmap_info.space = mddev->bitmap_info.default_space;
983 	}
984 	llbitmap->flags = le32_to_cpu(sb->state);
985 	if (test_and_clear_bit(BITMAP_FIRST_USE, &llbitmap->flags)) {
986 		ret = llbitmap_init(llbitmap);
987 		goto out_put_page;
988 	}
989 
990 	chunksize = le32_to_cpu(sb->chunksize);
991 	if (!is_power_of_2(chunksize)) {
992 		pr_err("md/llbitmap: %s: chunksize not a power of 2",
993 		       mdname(mddev));
994 		goto out_put_page;
995 	}
996 
997 	if (chunksize < DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors,
998 					      mddev->bitmap_info.space << SECTOR_SHIFT)) {
999 		pr_err("md/llbitmap: %s: chunksize too small %lu < %llu / %lu",
1000 		       mdname(mddev), chunksize, mddev->resync_max_sectors,
1001 		       mddev->bitmap_info.space);
1002 		goto out_put_page;
1003 	}
1004 
1005 	daemon_sleep = le32_to_cpu(sb->daemon_sleep);
1006 	if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) {
1007 		pr_err("md/llbitmap: %s: daemon sleep %lu period out of range",
1008 		       mdname(mddev), daemon_sleep);
1009 		goto out_put_page;
1010 	}
1011 
1012 	events = le64_to_cpu(sb->events);
1013 	if (events < mddev->events) {
1014 		pr_warn("md/llbitmap :%s: bitmap file is out of date (%lu < %llu) -- forcing full recovery",
1015 			mdname(mddev), events, mddev->events);
1016 		set_bit(BITMAP_STALE, &llbitmap->flags);
1017 	}
1018 
1019 	sb->sync_size = cpu_to_le64(mddev->resync_max_sectors);
1020 	mddev->bitmap_info.chunksize = chunksize;
1021 	mddev->bitmap_info.daemon_sleep = daemon_sleep;
1022 
1023 	llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE;
1024 	llbitmap->chunksize = chunksize;
1025 	llbitmap->chunks = DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, chunksize);
1026 	llbitmap->chunkshift = ffz(~chunksize);
1027 	ret = llbitmap_cache_pages(llbitmap);
1028 
1029 out_put_page:
1030 	__free_page(sb_page);
1031 	kunmap_local(sb);
1032 	return ret;
1033 }
1034 
llbitmap_pending_timer_fn(struct timer_list * pending_timer)1035 static void llbitmap_pending_timer_fn(struct timer_list *pending_timer)
1036 {
1037 	struct llbitmap *llbitmap =
1038 		container_of(pending_timer, struct llbitmap, pending_timer);
1039 
1040 	if (work_busy(&llbitmap->daemon_work)) {
1041 		pr_warn("md/llbitmap: %s daemon_work not finished in %lu seconds\n",
1042 			mdname(llbitmap->mddev),
1043 			llbitmap->mddev->bitmap_info.daemon_sleep);
1044 		set_bit(BITMAP_DAEMON_BUSY, &llbitmap->flags);
1045 		return;
1046 	}
1047 
1048 	queue_work(md_llbitmap_io_wq, &llbitmap->daemon_work);
1049 }
1050 
md_llbitmap_daemon_fn(struct work_struct * work)1051 static void md_llbitmap_daemon_fn(struct work_struct *work)
1052 {
1053 	struct llbitmap *llbitmap =
1054 		container_of(work, struct llbitmap, daemon_work);
1055 	unsigned long start;
1056 	unsigned long end;
1057 	bool restart;
1058 	int idx;
1059 
1060 	if (llbitmap->mddev->degraded)
1061 		return;
1062 retry:
1063 	start = 0;
1064 	end = min(llbitmap->chunks, PAGE_SIZE - BITMAP_DATA_OFFSET) - 1;
1065 	restart = false;
1066 
1067 	for (idx = 0; idx < llbitmap->nr_pages; idx++) {
1068 		struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx];
1069 
1070 		if (idx > 0) {
1071 			start = end + 1;
1072 			end = min(end + PAGE_SIZE, llbitmap->chunks - 1);
1073 		}
1074 
1075 		if (!test_bit(LLPageFlush, &pctl->flags) &&
1076 		    time_before(jiffies, pctl->expire)) {
1077 			restart = true;
1078 			continue;
1079 		}
1080 
1081 		if (llbitmap_suspend_timeout(llbitmap, idx) < 0) {
1082 			pr_warn("md/llbitmap: %s: %s waiting for page %d timeout\n",
1083 				mdname(llbitmap->mddev), __func__, idx);
1084 			continue;
1085 		}
1086 
1087 		llbitmap_state_machine(llbitmap, start, end, BitmapActionDaemon);
1088 		llbitmap_resume(llbitmap, idx);
1089 	}
1090 
1091 	/*
1092 	 * If the daemon took a long time to finish, retry to prevent missing
1093 	 * clearing dirty bits.
1094 	 */
1095 	if (test_and_clear_bit(BITMAP_DAEMON_BUSY, &llbitmap->flags))
1096 		goto retry;
1097 
1098 	/* If some page is dirty but not expired, setup timer again */
1099 	if (restart)
1100 		mod_timer(&llbitmap->pending_timer,
1101 			  jiffies + llbitmap->mddev->bitmap_info.daemon_sleep * HZ);
1102 }
1103 
llbitmap_create(struct mddev * mddev)1104 static int llbitmap_create(struct mddev *mddev)
1105 {
1106 	struct llbitmap *llbitmap;
1107 	int ret;
1108 
1109 	ret = llbitmap_check_support(mddev);
1110 	if (ret)
1111 		return ret;
1112 
1113 	llbitmap = kzalloc_obj(*llbitmap);
1114 	if (!llbitmap)
1115 		return -ENOMEM;
1116 
1117 	llbitmap->mddev = mddev;
1118 	llbitmap->io_size = bdev_logical_block_size(mddev->gendisk->part0);
1119 	llbitmap->blocks_per_page = PAGE_SIZE / llbitmap->io_size;
1120 
1121 	timer_setup(&llbitmap->pending_timer, llbitmap_pending_timer_fn, 0);
1122 	INIT_WORK(&llbitmap->daemon_work, md_llbitmap_daemon_fn);
1123 	atomic_set(&llbitmap->behind_writes, 0);
1124 	init_waitqueue_head(&llbitmap->behind_wait);
1125 
1126 	mutex_lock(&mddev->bitmap_info.mutex);
1127 	mddev->bitmap = llbitmap;
1128 	ret = llbitmap_read_sb(llbitmap);
1129 	mutex_unlock(&mddev->bitmap_info.mutex);
1130 	if (ret) {
1131 		kfree(llbitmap);
1132 		mddev->bitmap = NULL;
1133 	}
1134 
1135 	return ret;
1136 }
1137 
llbitmap_resize(struct mddev * mddev,sector_t blocks,int chunksize)1138 static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize)
1139 {
1140 	struct llbitmap *llbitmap = mddev->bitmap;
1141 	unsigned long chunks;
1142 
1143 	if (chunksize == 0)
1144 		chunksize = llbitmap->chunksize;
1145 
1146 	/* If there is enough space, leave the chunksize unchanged. */
1147 	chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize);
1148 	while (chunks > mddev->bitmap_info.space << SECTOR_SHIFT) {
1149 		chunksize = chunksize << 1;
1150 		chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize);
1151 	}
1152 
1153 	llbitmap->chunkshift = ffz(~chunksize);
1154 	llbitmap->chunksize = chunksize;
1155 	llbitmap->chunks = chunks;
1156 
1157 	return 0;
1158 }
1159 
llbitmap_load(struct mddev * mddev)1160 static int llbitmap_load(struct mddev *mddev)
1161 {
1162 	enum llbitmap_action action = BitmapActionReload;
1163 	struct llbitmap *llbitmap = mddev->bitmap;
1164 
1165 	if (test_and_clear_bit(BITMAP_STALE, &llbitmap->flags))
1166 		action = BitmapActionStale;
1167 
1168 	llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, action);
1169 	return 0;
1170 }
1171 
llbitmap_destroy(struct mddev * mddev)1172 static void llbitmap_destroy(struct mddev *mddev)
1173 {
1174 	struct llbitmap *llbitmap = mddev->bitmap;
1175 
1176 	if (!llbitmap)
1177 		return;
1178 
1179 	mutex_lock(&mddev->bitmap_info.mutex);
1180 
1181 	timer_delete_sync(&llbitmap->pending_timer);
1182 	flush_workqueue(md_llbitmap_io_wq);
1183 	flush_workqueue(md_llbitmap_unplug_wq);
1184 
1185 	mddev->bitmap = NULL;
1186 	llbitmap_free_pages(llbitmap);
1187 	kfree(llbitmap);
1188 	mutex_unlock(&mddev->bitmap_info.mutex);
1189 }
1190 
llbitmap_start_write(struct mddev * mddev,sector_t offset,unsigned long sectors)1191 static void llbitmap_start_write(struct mddev *mddev, sector_t offset,
1192 				 unsigned long sectors)
1193 {
1194 	struct llbitmap *llbitmap = mddev->bitmap;
1195 	unsigned long start = offset >> llbitmap->chunkshift;
1196 	unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
1197 	int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1198 	int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1199 
1200 	while (page_start <= page_end) {
1201 		llbitmap_raise_barrier(llbitmap, page_start);
1202 		page_start++;
1203 	}
1204 
1205 	llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite);
1206 }
1207 
llbitmap_end_write(struct mddev * mddev,sector_t offset,unsigned long sectors)1208 static void llbitmap_end_write(struct mddev *mddev, sector_t offset,
1209 			       unsigned long sectors)
1210 {
1211 	struct llbitmap *llbitmap = mddev->bitmap;
1212 	unsigned long start = offset >> llbitmap->chunkshift;
1213 	unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
1214 	int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1215 	int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1216 
1217 	while (page_start <= page_end) {
1218 		llbitmap_release_barrier(llbitmap, page_start);
1219 		page_start++;
1220 	}
1221 }
1222 
llbitmap_start_discard(struct mddev * mddev,sector_t offset,unsigned long sectors)1223 static void llbitmap_start_discard(struct mddev *mddev, sector_t offset,
1224 				   unsigned long sectors)
1225 {
1226 	struct llbitmap *llbitmap = mddev->bitmap;
1227 	unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize);
1228 	unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
1229 	int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1230 	int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1231 
1232 	while (page_start <= page_end) {
1233 		llbitmap_raise_barrier(llbitmap, page_start);
1234 		page_start++;
1235 	}
1236 
1237 	llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard);
1238 }
1239 
llbitmap_end_discard(struct mddev * mddev,sector_t offset,unsigned long sectors)1240 static void llbitmap_end_discard(struct mddev *mddev, sector_t offset,
1241 				 unsigned long sectors)
1242 {
1243 	struct llbitmap *llbitmap = mddev->bitmap;
1244 	unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize);
1245 	unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
1246 	int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1247 	int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1248 
1249 	while (page_start <= page_end) {
1250 		llbitmap_release_barrier(llbitmap, page_start);
1251 		page_start++;
1252 	}
1253 }
1254 
llbitmap_unplug_fn(struct work_struct * work)1255 static void llbitmap_unplug_fn(struct work_struct *work)
1256 {
1257 	struct llbitmap_unplug_work *unplug_work =
1258 		container_of(work, struct llbitmap_unplug_work, work);
1259 	struct llbitmap *llbitmap = unplug_work->llbitmap;
1260 	struct blk_plug plug;
1261 	int i;
1262 
1263 	blk_start_plug(&plug);
1264 
1265 	for (i = 0; i < llbitmap->nr_pages; i++) {
1266 		if (!test_bit(LLPageDirty, &llbitmap->pctl[i]->flags) ||
1267 		    !test_and_clear_bit(LLPageDirty, &llbitmap->pctl[i]->flags))
1268 			continue;
1269 
1270 		llbitmap_write_page(llbitmap, i);
1271 	}
1272 
1273 	blk_finish_plug(&plug);
1274 	md_super_wait(llbitmap->mddev);
1275 	complete(unplug_work->done);
1276 }
1277 
llbitmap_dirty(struct llbitmap * llbitmap)1278 static bool llbitmap_dirty(struct llbitmap *llbitmap)
1279 {
1280 	int i;
1281 
1282 	for (i = 0; i < llbitmap->nr_pages; i++)
1283 		if (test_bit(LLPageDirty, &llbitmap->pctl[i]->flags))
1284 			return true;
1285 
1286 	return false;
1287 }
1288 
llbitmap_unplug(struct mddev * mddev,bool sync)1289 static void llbitmap_unplug(struct mddev *mddev, bool sync)
1290 {
1291 	DECLARE_COMPLETION_ONSTACK(done);
1292 	struct llbitmap *llbitmap = mddev->bitmap;
1293 	struct llbitmap_unplug_work unplug_work = {
1294 		.llbitmap = llbitmap,
1295 		.done = &done,
1296 	};
1297 
1298 	if (!llbitmap_dirty(llbitmap))
1299 		return;
1300 
1301 	/*
1302 	 * Issue new bitmap IO under submit_bio() context will deadlock:
1303 	 *  - the bio will wait for bitmap bio to be done, before it can be
1304 	 *  issued;
1305 	 *  - bitmap bio will be added to current->bio_list and wait for this
1306 	 *  bio to be issued;
1307 	 */
1308 	INIT_WORK_ONSTACK(&unplug_work.work, llbitmap_unplug_fn);
1309 	queue_work(md_llbitmap_unplug_wq, &unplug_work.work);
1310 	wait_for_completion(&done);
1311 	destroy_work_on_stack(&unplug_work.work);
1312 }
1313 
1314 /*
1315  * Force to write all bitmap pages to disk, called when stopping the array, or
1316  * every daemon_sleep seconds when sync_thread is running.
1317  */
__llbitmap_flush(struct mddev * mddev)1318 static void __llbitmap_flush(struct mddev *mddev)
1319 {
1320 	struct llbitmap *llbitmap = mddev->bitmap;
1321 	struct blk_plug plug;
1322 	int i;
1323 
1324 	blk_start_plug(&plug);
1325 	for (i = 0; i < llbitmap->nr_pages; i++) {
1326 		struct llbitmap_page_ctl *pctl = llbitmap->pctl[i];
1327 
1328 		/* mark all blocks as dirty */
1329 		set_bit(LLPageDirty, &pctl->flags);
1330 		bitmap_fill(pctl->dirty, llbitmap->blocks_per_page);
1331 		llbitmap_write_page(llbitmap, i);
1332 	}
1333 	blk_finish_plug(&plug);
1334 	md_super_wait(llbitmap->mddev);
1335 }
1336 
llbitmap_flush(struct mddev * mddev)1337 static void llbitmap_flush(struct mddev *mddev)
1338 {
1339 	struct llbitmap *llbitmap = mddev->bitmap;
1340 	int i;
1341 
1342 	for (i = 0; i < llbitmap->nr_pages; i++)
1343 		set_bit(LLPageFlush, &llbitmap->pctl[i]->flags);
1344 
1345 	timer_delete_sync(&llbitmap->pending_timer);
1346 	queue_work(md_llbitmap_io_wq, &llbitmap->daemon_work);
1347 	flush_work(&llbitmap->daemon_work);
1348 
1349 	__llbitmap_flush(mddev);
1350 }
1351 
1352 /* This is used for raid5 lazy initial recovery */
llbitmap_blocks_synced(struct mddev * mddev,sector_t offset)1353 static bool llbitmap_blocks_synced(struct mddev *mddev, sector_t offset)
1354 {
1355 	struct llbitmap *llbitmap = mddev->bitmap;
1356 	unsigned long p = offset >> llbitmap->chunkshift;
1357 	enum llbitmap_state c = llbitmap_read(llbitmap, p);
1358 
1359 	return c == BitClean || c == BitDirty || c == BitCleanUnwritten;
1360 }
1361 
llbitmap_skip_sync_blocks(struct mddev * mddev,sector_t offset)1362 static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset)
1363 {
1364 	struct llbitmap *llbitmap = mddev->bitmap;
1365 	unsigned long p = offset >> llbitmap->chunkshift;
1366 	int blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1));
1367 	enum llbitmap_state c = llbitmap_read(llbitmap, p);
1368 
1369 	/* always skip unwritten blocks */
1370 	if (c == BitUnwritten)
1371 		return blocks;
1372 
1373 	/* Skip CleanUnwritten - no user data, will be reset after recovery */
1374 	if (c == BitCleanUnwritten)
1375 		return blocks;
1376 
1377 	/* For degraded array, don't skip */
1378 	if (mddev->degraded)
1379 		return 0;
1380 
1381 	/* For resync also skip clean/dirty blocks */
1382 	if ((c == BitClean || c == BitDirty) &&
1383 	    test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
1384 	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
1385 		return blocks;
1386 
1387 	return 0;
1388 }
1389 
llbitmap_start_sync(struct mddev * mddev,sector_t offset,sector_t * blocks,bool degraded)1390 static bool llbitmap_start_sync(struct mddev *mddev, sector_t offset,
1391 				sector_t *blocks, bool degraded)
1392 {
1393 	struct llbitmap *llbitmap = mddev->bitmap;
1394 	unsigned long p = offset >> llbitmap->chunkshift;
1395 	enum llbitmap_state state;
1396 
1397 	/*
1398 	 * Before recovery starts, convert CleanUnwritten to Unwritten.
1399 	 * This ensures the new disk won't have stale parity data.
1400 	 */
1401 	if (offset == 0 && test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
1402 	    !test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery))
1403 		llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
1404 				       BitmapActionClearUnwritten);
1405 
1406 
1407 	/*
1408 	 * Handle one bit at a time, this is much simpler. And it doesn't matter
1409 	 * if md_do_sync() loop more times.
1410 	 */
1411 	*blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1));
1412 	state = llbitmap_state_machine(llbitmap, p, p, BitmapActionStartsync);
1413 	return state == BitSyncing || state == BitSyncingUnwritten;
1414 }
1415 
1416 /* Something is wrong, sync_thread stop at @offset */
llbitmap_end_sync(struct mddev * mddev,sector_t offset,sector_t * blocks)1417 static void llbitmap_end_sync(struct mddev *mddev, sector_t offset,
1418 			      sector_t *blocks)
1419 {
1420 	struct llbitmap *llbitmap = mddev->bitmap;
1421 	unsigned long p = offset >> llbitmap->chunkshift;
1422 
1423 	*blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1));
1424 	llbitmap_state_machine(llbitmap, p, llbitmap->chunks - 1,
1425 			       BitmapActionAbortsync);
1426 }
1427 
1428 /* A full sync_thread is finished */
llbitmap_close_sync(struct mddev * mddev)1429 static void llbitmap_close_sync(struct mddev *mddev)
1430 {
1431 	struct llbitmap *llbitmap = mddev->bitmap;
1432 	int i;
1433 
1434 	for (i = 0; i < llbitmap->nr_pages; i++) {
1435 		struct llbitmap_page_ctl *pctl = llbitmap->pctl[i];
1436 
1437 		/* let daemon_fn clear dirty bits immediately */
1438 		WRITE_ONCE(pctl->expire, jiffies);
1439 	}
1440 
1441 	llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
1442 			       BitmapActionEndsync);
1443 }
1444 
1445 /*
1446  * sync_thread have reached @sector, update metadata every daemon_sleep seconds,
1447  * just in case sync_thread have to restart after power failure.
1448  */
llbitmap_cond_end_sync(struct mddev * mddev,sector_t sector,bool force)1449 static void llbitmap_cond_end_sync(struct mddev *mddev, sector_t sector,
1450 				   bool force)
1451 {
1452 	struct llbitmap *llbitmap = mddev->bitmap;
1453 
1454 	if (sector == 0) {
1455 		llbitmap->last_end_sync = jiffies;
1456 		return;
1457 	}
1458 
1459 	if (time_before(jiffies, llbitmap->last_end_sync +
1460 				 HZ * mddev->bitmap_info.daemon_sleep))
1461 		return;
1462 
1463 	wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
1464 
1465 	mddev->curr_resync_completed = sector;
1466 	set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
1467 	llbitmap_state_machine(llbitmap, 0, sector >> llbitmap->chunkshift,
1468 			       BitmapActionEndsync);
1469 	__llbitmap_flush(mddev);
1470 
1471 	llbitmap->last_end_sync = jiffies;
1472 	sysfs_notify_dirent_safe(mddev->sysfs_completed);
1473 }
1474 
llbitmap_enabled(void * data,bool flush)1475 static bool llbitmap_enabled(void *data, bool flush)
1476 {
1477 	struct llbitmap *llbitmap = data;
1478 
1479 	return llbitmap && !test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags);
1480 }
1481 
llbitmap_dirty_bits(struct mddev * mddev,unsigned long s,unsigned long e)1482 static void llbitmap_dirty_bits(struct mddev *mddev, unsigned long s,
1483 				unsigned long e)
1484 {
1485 	llbitmap_state_machine(mddev->bitmap, s, e, BitmapActionStartwrite);
1486 }
1487 
llbitmap_write_sb(struct llbitmap * llbitmap)1488 static void llbitmap_write_sb(struct llbitmap *llbitmap)
1489 {
1490 	int nr_blocks = DIV_ROUND_UP(BITMAP_DATA_OFFSET, llbitmap->io_size);
1491 
1492 	bitmap_fill(llbitmap->pctl[0]->dirty, nr_blocks);
1493 	llbitmap_write_page(llbitmap, 0);
1494 	md_super_wait(llbitmap->mddev);
1495 }
1496 
llbitmap_update_sb(void * data)1497 static void llbitmap_update_sb(void *data)
1498 {
1499 	struct llbitmap *llbitmap = data;
1500 	struct mddev *mddev = llbitmap->mddev;
1501 	struct page *sb_page;
1502 	bitmap_super_t *sb;
1503 
1504 	if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags))
1505 		return;
1506 
1507 	sb_page = llbitmap_read_page(llbitmap, 0);
1508 	if (IS_ERR(sb_page)) {
1509 		pr_err("%s: %s: read super block failed", __func__,
1510 		       mdname(mddev));
1511 		set_bit(BITMAP_WRITE_ERROR, &llbitmap->flags);
1512 		return;
1513 	}
1514 
1515 	if (mddev->events < llbitmap->events_cleared)
1516 		llbitmap->events_cleared = mddev->events;
1517 
1518 	sb = kmap_local_page(sb_page);
1519 	sb->events = cpu_to_le64(mddev->events);
1520 	sb->state = cpu_to_le32(llbitmap->flags);
1521 	sb->chunksize = cpu_to_le32(llbitmap->chunksize);
1522 	sb->sync_size = cpu_to_le64(mddev->resync_max_sectors);
1523 	sb->events_cleared = cpu_to_le64(llbitmap->events_cleared);
1524 	sb->sectors_reserved = cpu_to_le32(mddev->bitmap_info.space);
1525 	sb->daemon_sleep = cpu_to_le32(mddev->bitmap_info.daemon_sleep);
1526 
1527 	kunmap_local(sb);
1528 	llbitmap_write_sb(llbitmap);
1529 }
1530 
llbitmap_get_stats(void * data,struct md_bitmap_stats * stats)1531 static int llbitmap_get_stats(void *data, struct md_bitmap_stats *stats)
1532 {
1533 	struct llbitmap *llbitmap = data;
1534 
1535 	memset(stats, 0, sizeof(*stats));
1536 
1537 	stats->missing_pages = 0;
1538 	stats->pages = llbitmap->nr_pages;
1539 	stats->file_pages = llbitmap->nr_pages;
1540 
1541 	stats->behind_writes = atomic_read(&llbitmap->behind_writes);
1542 	stats->behind_wait = wq_has_sleeper(&llbitmap->behind_wait);
1543 	stats->events_cleared = llbitmap->events_cleared;
1544 
1545 	return 0;
1546 }
1547 
1548 /* just flag all pages as needing to be written */
llbitmap_write_all(struct mddev * mddev)1549 static void llbitmap_write_all(struct mddev *mddev)
1550 {
1551 	int i;
1552 	struct llbitmap *llbitmap = mddev->bitmap;
1553 
1554 	for (i = 0; i < llbitmap->nr_pages; i++) {
1555 		struct llbitmap_page_ctl *pctl = llbitmap->pctl[i];
1556 
1557 		set_bit(LLPageDirty, &pctl->flags);
1558 		bitmap_fill(pctl->dirty, llbitmap->blocks_per_page);
1559 	}
1560 }
1561 
llbitmap_start_behind_write(struct mddev * mddev)1562 static void llbitmap_start_behind_write(struct mddev *mddev)
1563 {
1564 	struct llbitmap *llbitmap = mddev->bitmap;
1565 
1566 	atomic_inc(&llbitmap->behind_writes);
1567 }
1568 
llbitmap_end_behind_write(struct mddev * mddev)1569 static void llbitmap_end_behind_write(struct mddev *mddev)
1570 {
1571 	struct llbitmap *llbitmap = mddev->bitmap;
1572 
1573 	if (atomic_dec_and_test(&llbitmap->behind_writes))
1574 		wake_up(&llbitmap->behind_wait);
1575 }
1576 
llbitmap_wait_behind_writes(struct mddev * mddev)1577 static void llbitmap_wait_behind_writes(struct mddev *mddev)
1578 {
1579 	struct llbitmap *llbitmap = mddev->bitmap;
1580 
1581 	if (!llbitmap)
1582 		return;
1583 
1584 	wait_event(llbitmap->behind_wait,
1585 		   atomic_read(&llbitmap->behind_writes) == 0);
1586 
1587 }
1588 
bits_show(struct mddev * mddev,char * page)1589 static ssize_t bits_show(struct mddev *mddev, char *page)
1590 {
1591 	struct llbitmap *llbitmap;
1592 	int bits[BitStateCount] = {0};
1593 	loff_t start = 0;
1594 
1595 	mutex_lock(&mddev->bitmap_info.mutex);
1596 	llbitmap = mddev->bitmap;
1597 	if (!llbitmap || !llbitmap->pctl) {
1598 		mutex_unlock(&mddev->bitmap_info.mutex);
1599 		return sprintf(page, "no bitmap\n");
1600 	}
1601 
1602 	if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) {
1603 		mutex_unlock(&mddev->bitmap_info.mutex);
1604 		return sprintf(page, "bitmap io error\n");
1605 	}
1606 
1607 	while (start < llbitmap->chunks) {
1608 		enum llbitmap_state c = llbitmap_read(llbitmap, start);
1609 
1610 		if (c < 0 || c >= BitStateCount)
1611 			pr_err("%s: invalid bit %llu state %d\n",
1612 			       __func__, start, c);
1613 		else
1614 			bits[c]++;
1615 		start++;
1616 	}
1617 
1618 	mutex_unlock(&mddev->bitmap_info.mutex);
1619 	return sprintf(page,
1620 		       "unwritten %d\nclean %d\ndirty %d\n"
1621 		       "need sync %d\nsyncing %d\n"
1622 		       "need sync unwritten %d\nsyncing unwritten %d\n"
1623 		       "clean unwritten %d\n",
1624 		       bits[BitUnwritten], bits[BitClean], bits[BitDirty],
1625 		       bits[BitNeedSync], bits[BitSyncing],
1626 		       bits[BitNeedSyncUnwritten], bits[BitSyncingUnwritten],
1627 		       bits[BitCleanUnwritten]);
1628 }
1629 
1630 static struct md_sysfs_entry llbitmap_bits = __ATTR_RO(bits);
1631 
metadata_show(struct mddev * mddev,char * page)1632 static ssize_t metadata_show(struct mddev *mddev, char *page)
1633 {
1634 	struct llbitmap *llbitmap;
1635 	ssize_t ret;
1636 
1637 	mutex_lock(&mddev->bitmap_info.mutex);
1638 	llbitmap = mddev->bitmap;
1639 	if (!llbitmap) {
1640 		mutex_unlock(&mddev->bitmap_info.mutex);
1641 		return sprintf(page, "no bitmap\n");
1642 	}
1643 
1644 	ret =  sprintf(page, "chunksize %lu\nchunkshift %lu\nchunks %lu\noffset %llu\ndaemon_sleep %lu\n",
1645 		       llbitmap->chunksize, llbitmap->chunkshift,
1646 		       llbitmap->chunks, mddev->bitmap_info.offset,
1647 		       llbitmap->mddev->bitmap_info.daemon_sleep);
1648 	mutex_unlock(&mddev->bitmap_info.mutex);
1649 
1650 	return ret;
1651 }
1652 
1653 static struct md_sysfs_entry llbitmap_metadata = __ATTR_RO(metadata);
1654 
1655 static ssize_t
daemon_sleep_show(struct mddev * mddev,char * page)1656 daemon_sleep_show(struct mddev *mddev, char *page)
1657 {
1658 	return sprintf(page, "%lu\n", mddev->bitmap_info.daemon_sleep);
1659 }
1660 
1661 static ssize_t
daemon_sleep_store(struct mddev * mddev,const char * buf,size_t len)1662 daemon_sleep_store(struct mddev *mddev, const char *buf, size_t len)
1663 {
1664 	unsigned long timeout;
1665 	int rv = kstrtoul(buf, 10, &timeout);
1666 
1667 	if (rv)
1668 		return rv;
1669 
1670 	mddev->bitmap_info.daemon_sleep = timeout;
1671 	return len;
1672 }
1673 
1674 static struct md_sysfs_entry llbitmap_daemon_sleep = __ATTR_RW(daemon_sleep);
1675 
1676 static ssize_t
barrier_idle_show(struct mddev * mddev,char * page)1677 barrier_idle_show(struct mddev *mddev, char *page)
1678 {
1679 	struct llbitmap *llbitmap = mddev->bitmap;
1680 
1681 	return sprintf(page, "%lu\n", llbitmap->barrier_idle);
1682 }
1683 
1684 static ssize_t
barrier_idle_store(struct mddev * mddev,const char * buf,size_t len)1685 barrier_idle_store(struct mddev *mddev, const char *buf, size_t len)
1686 {
1687 	struct llbitmap *llbitmap = mddev->bitmap;
1688 	unsigned long timeout;
1689 	int rv = kstrtoul(buf, 10, &timeout);
1690 
1691 	if (rv)
1692 		return rv;
1693 
1694 	llbitmap->barrier_idle = timeout;
1695 	return len;
1696 }
1697 
1698 static struct md_sysfs_entry llbitmap_barrier_idle = __ATTR_RW(barrier_idle);
1699 
1700 static ssize_t
proactive_sync_store(struct mddev * mddev,const char * buf,size_t len)1701 proactive_sync_store(struct mddev *mddev, const char *buf, size_t len)
1702 {
1703 	struct llbitmap *llbitmap;
1704 
1705 	/* Only for RAID-456 */
1706 	if (!raid_is_456(mddev))
1707 		return -EINVAL;
1708 
1709 	mutex_lock(&mddev->bitmap_info.mutex);
1710 	llbitmap = mddev->bitmap;
1711 	if (!llbitmap || !llbitmap->pctl) {
1712 		mutex_unlock(&mddev->bitmap_info.mutex);
1713 		return -ENODEV;
1714 	}
1715 
1716 	/* Trigger proactive sync on all Unwritten regions */
1717 	llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
1718 			       BitmapActionProactiveSync);
1719 
1720 	mutex_unlock(&mddev->bitmap_info.mutex);
1721 	return len;
1722 }
1723 
1724 static struct md_sysfs_entry llbitmap_proactive_sync =
1725 	__ATTR(proactive_sync, 0200, NULL, proactive_sync_store);
1726 
1727 static struct attribute *md_llbitmap_attrs[] = {
1728 	&llbitmap_bits.attr,
1729 	&llbitmap_metadata.attr,
1730 	&llbitmap_daemon_sleep.attr,
1731 	&llbitmap_barrier_idle.attr,
1732 	&llbitmap_proactive_sync.attr,
1733 	NULL
1734 };
1735 
1736 static struct attribute_group md_llbitmap_group = {
1737 	.name = "llbitmap",
1738 	.attrs = md_llbitmap_attrs,
1739 };
1740 
1741 static struct bitmap_operations llbitmap_ops = {
1742 	.head = {
1743 		.type	= MD_BITMAP,
1744 		.id	= ID_LLBITMAP,
1745 		.name	= "llbitmap",
1746 	},
1747 
1748 	.enabled		= llbitmap_enabled,
1749 	.create			= llbitmap_create,
1750 	.resize			= llbitmap_resize,
1751 	.load			= llbitmap_load,
1752 	.destroy		= llbitmap_destroy,
1753 
1754 	.start_write		= llbitmap_start_write,
1755 	.end_write		= llbitmap_end_write,
1756 	.start_discard		= llbitmap_start_discard,
1757 	.end_discard		= llbitmap_end_discard,
1758 	.unplug			= llbitmap_unplug,
1759 	.flush			= llbitmap_flush,
1760 
1761 	.start_behind_write	= llbitmap_start_behind_write,
1762 	.end_behind_write	= llbitmap_end_behind_write,
1763 	.wait_behind_writes	= llbitmap_wait_behind_writes,
1764 
1765 	.blocks_synced		= llbitmap_blocks_synced,
1766 	.skip_sync_blocks	= llbitmap_skip_sync_blocks,
1767 	.start_sync		= llbitmap_start_sync,
1768 	.end_sync		= llbitmap_end_sync,
1769 	.close_sync		= llbitmap_close_sync,
1770 	.cond_end_sync		= llbitmap_cond_end_sync,
1771 
1772 	.update_sb		= llbitmap_update_sb,
1773 	.get_stats		= llbitmap_get_stats,
1774 	.dirty_bits		= llbitmap_dirty_bits,
1775 	.write_all		= llbitmap_write_all,
1776 
1777 	.group			= &md_llbitmap_group,
1778 };
1779 
md_llbitmap_init(void)1780 int md_llbitmap_init(void)
1781 {
1782 	md_llbitmap_io_wq = alloc_workqueue("md_llbitmap_io",
1783 					 WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
1784 	if (!md_llbitmap_io_wq)
1785 		return -ENOMEM;
1786 
1787 	md_llbitmap_unplug_wq = alloc_workqueue("md_llbitmap_unplug",
1788 					 WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
1789 	if (!md_llbitmap_unplug_wq) {
1790 		destroy_workqueue(md_llbitmap_io_wq);
1791 		md_llbitmap_io_wq = NULL;
1792 		return -ENOMEM;
1793 	}
1794 
1795 	return register_md_submodule(&llbitmap_ops.head);
1796 }
1797 
md_llbitmap_exit(void)1798 void md_llbitmap_exit(void)
1799 {
1800 	destroy_workqueue(md_llbitmap_io_wq);
1801 	md_llbitmap_io_wq = NULL;
1802 	destroy_workqueue(md_llbitmap_unplug_wq);
1803 	md_llbitmap_unplug_wq = NULL;
1804 	unregister_md_submodule(&llbitmap_ops.head);
1805 }
1806