xref: /linux/fs/btrfs/bio.c (revision c92b4d3dd59f9f71ac34b42d4603d2323a499ab0) !
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2007 Oracle.  All rights reserved.
4  * Copyright (C) 2022 Christoph Hellwig.
5  */
6 
7 #include <linux/blk_types.h>
8 #include <linux/bio.h>
9 #include "bio.h"
10 #include "ctree.h"
11 #include "volumes.h"
12 #include "raid56.h"
13 #include "async-thread.h"
14 #include "dev-replace.h"
15 #include "zoned.h"
16 #include "file-item.h"
17 #include "raid-stripe-tree.h"
18 
19 static struct bio_set btrfs_bioset;
20 static struct bio_set btrfs_clone_bioset;
21 static struct bio_set btrfs_repair_bioset;
22 static mempool_t btrfs_failed_bio_pool;
23 
24 struct btrfs_failed_bio {
25 	struct btrfs_bio *bbio;
26 	int num_copies;
27 	atomic_t repair_count;
28 };
29 
30 /* Is this a data path I/O that needs storage layer checksum and repair? */
is_data_bbio(const struct btrfs_bio * bbio)31 static inline bool is_data_bbio(const struct btrfs_bio *bbio)
32 {
33 	return bbio->inode && is_data_inode(bbio->inode);
34 }
35 
bbio_has_ordered_extent(const struct btrfs_bio * bbio)36 static bool bbio_has_ordered_extent(const struct btrfs_bio *bbio)
37 {
38 	return is_data_bbio(bbio) && btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE;
39 }
40 
41 /*
42  * Initialize a btrfs_bio structure.  This skips the embedded bio itself as it
43  * is already initialized by the block layer.
44  */
btrfs_bio_init(struct btrfs_bio * bbio,struct btrfs_inode * inode,u64 file_offset,btrfs_bio_end_io_t end_io,void * private)45 void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_inode *inode, u64 file_offset,
46 		    btrfs_bio_end_io_t end_io, void *private)
47 {
48 	/* @inode parameter is mandatory. */
49 	ASSERT(inode);
50 
51 	memset(bbio, 0, offsetof(struct btrfs_bio, bio));
52 	bbio->inode = inode;
53 	bbio->end_io = end_io;
54 	bbio->private = private;
55 	bbio->file_offset = file_offset;
56 	atomic_set(&bbio->pending_ios, 1);
57 	WRITE_ONCE(bbio->status, BLK_STS_OK);
58 }
59 
60 /*
61  * Allocate a btrfs_bio structure.  The btrfs_bio is the main I/O container for
62  * btrfs, and is used for all I/O submitted through btrfs_submit_bbio().
63  *
64  * Just like the underlying bio_alloc_bioset it will not fail as it is backed by
65  * a mempool.
66  */
btrfs_bio_alloc(unsigned int nr_vecs,blk_opf_t opf,struct btrfs_inode * inode,u64 file_offset,btrfs_bio_end_io_t end_io,void * private)67 struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
68 				  struct btrfs_inode *inode, u64 file_offset,
69 				  btrfs_bio_end_io_t end_io, void *private)
70 {
71 	struct btrfs_bio *bbio;
72 	struct bio *bio;
73 
74 	bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
75 	bbio = btrfs_bio(bio);
76 	btrfs_bio_init(bbio, inode, file_offset, end_io, private);
77 	return bbio;
78 }
79 
btrfs_split_bio(struct btrfs_fs_info * fs_info,struct btrfs_bio * orig_bbio,u64 map_length)80 static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
81 					 struct btrfs_bio *orig_bbio,
82 					 u64 map_length)
83 {
84 	struct btrfs_bio *bbio;
85 	struct bio *bio;
86 
87 	bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT, GFP_NOFS,
88 			&btrfs_clone_bioset);
89 	if (IS_ERR(bio))
90 		return ERR_CAST(bio);
91 
92 	bbio = btrfs_bio(bio);
93 	btrfs_bio_init(bbio, orig_bbio->inode, orig_bbio->file_offset, NULL, orig_bbio);
94 	orig_bbio->file_offset += map_length;
95 	if (bbio_has_ordered_extent(bbio)) {
96 		refcount_inc(&orig_bbio->ordered->refs);
97 		bbio->ordered = orig_bbio->ordered;
98 		bbio->orig_logical = orig_bbio->orig_logical;
99 		orig_bbio->orig_logical += map_length;
100 	}
101 
102 	bbio->csum_search_commit_root = orig_bbio->csum_search_commit_root;
103 	bbio->can_use_append = orig_bbio->can_use_append;
104 	bbio->is_scrub = orig_bbio->is_scrub;
105 	bbio->is_remap = orig_bbio->is_remap;
106 	bbio->async_csum = orig_bbio->async_csum;
107 
108 	atomic_inc(&orig_bbio->pending_ios);
109 	return bbio;
110 }
111 
btrfs_bio_end_io(struct btrfs_bio * bbio,blk_status_t status)112 void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
113 {
114 	/* Make sure we're already in task context. */
115 	ASSERT(in_task());
116 
117 	if (bbio->async_csum)
118 		wait_for_completion(&bbio->csum_done);
119 
120 	bbio->bio.bi_status = status;
121 	if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
122 		struct btrfs_bio *orig_bbio = bbio->private;
123 
124 		/* Free bio that was never submitted to the underlying device. */
125 		if (bbio_has_ordered_extent(bbio))
126 			btrfs_put_ordered_extent(bbio->ordered);
127 		bio_put(&bbio->bio);
128 
129 		bbio = orig_bbio;
130 	}
131 
132 	/*
133 	 * At this point, bbio always points to the original btrfs_bio. Save
134 	 * the first error in it.
135 	 */
136 	if (status != BLK_STS_OK)
137 		cmpxchg(&bbio->status, BLK_STS_OK, status);
138 
139 	if (atomic_dec_and_test(&bbio->pending_ios)) {
140 		/* Load split bio's error which might be set above. */
141 		if (status == BLK_STS_OK)
142 			bbio->bio.bi_status = READ_ONCE(bbio->status);
143 
144 		if (bbio_has_ordered_extent(bbio)) {
145 			struct btrfs_ordered_extent *ordered = bbio->ordered;
146 
147 			bbio->end_io(bbio);
148 			btrfs_put_ordered_extent(ordered);
149 		} else {
150 			bbio->end_io(bbio);
151 		}
152 	}
153 }
154 
next_repair_mirror(const struct btrfs_failed_bio * fbio,int cur_mirror)155 static int next_repair_mirror(const struct btrfs_failed_bio *fbio, int cur_mirror)
156 {
157 	if (cur_mirror == fbio->num_copies)
158 		return cur_mirror + 1 - fbio->num_copies;
159 	return cur_mirror + 1;
160 }
161 
prev_repair_mirror(const struct btrfs_failed_bio * fbio,int cur_mirror)162 static int prev_repair_mirror(const struct btrfs_failed_bio *fbio, int cur_mirror)
163 {
164 	if (cur_mirror == 1)
165 		return fbio->num_copies;
166 	return cur_mirror - 1;
167 }
168 
btrfs_repair_done(struct btrfs_failed_bio * fbio)169 static void btrfs_repair_done(struct btrfs_failed_bio *fbio)
170 {
171 	if (atomic_dec_and_test(&fbio->repair_count)) {
172 		btrfs_bio_end_io(fbio->bbio, fbio->bbio->bio.bi_status);
173 		mempool_free(fbio, &btrfs_failed_bio_pool);
174 	}
175 }
176 
btrfs_end_repair_bio(struct btrfs_bio * repair_bbio,struct btrfs_device * dev)177 static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
178 				 struct btrfs_device *dev)
179 {
180 	struct btrfs_failed_bio *fbio = repair_bbio->private;
181 	struct btrfs_inode *inode = repair_bbio->inode;
182 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
183 	/*
184 	 * We can not move forward the saved_iter, as it will be later
185 	 * utilized by repair_bbio again.
186 	 */
187 	struct bvec_iter saved_iter = repair_bbio->saved_iter;
188 	const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
189 	const u64 logical = repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT;
190 	const u32 nr_steps = repair_bbio->saved_iter.bi_size / step;
191 	int mirror = repair_bbio->mirror_num;
192 	phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
193 	phys_addr_t paddr;
194 	unsigned int slot = 0;
195 
196 	/* Repair bbio should be eaxctly one block sized. */
197 	ASSERT(repair_bbio->saved_iter.bi_size == fs_info->sectorsize);
198 
199 	btrfs_bio_for_each_block(paddr, &repair_bbio->bio, &saved_iter, step) {
200 		ASSERT(slot < nr_steps);
201 		paddrs[slot] = paddr;
202 		slot++;
203 	}
204 
205 	if (repair_bbio->bio.bi_status ||
206 	    !btrfs_data_csum_ok(repair_bbio, dev, 0, paddrs)) {
207 		bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
208 		repair_bbio->bio.bi_iter = repair_bbio->saved_iter;
209 
210 		mirror = next_repair_mirror(fbio, mirror);
211 		if (mirror == fbio->bbio->mirror_num) {
212 			btrfs_debug(fs_info, "no mirror left");
213 			fbio->bbio->bio.bi_status = BLK_STS_IOERR;
214 			goto done;
215 		}
216 
217 		btrfs_submit_bbio(repair_bbio, mirror);
218 		return;
219 	}
220 
221 	do {
222 		mirror = prev_repair_mirror(fbio, mirror);
223 		btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
224 				  repair_bbio->file_offset, fs_info->sectorsize,
225 				  logical, paddrs, step, mirror);
226 	} while (mirror != fbio->bbio->mirror_num);
227 
228 done:
229 	btrfs_repair_done(fbio);
230 	bio_put(&repair_bbio->bio);
231 }
232 
233 /*
234  * Try to kick off a repair read to the next available mirror for a bad sector.
235  *
236  * This primarily tries to recover good data to serve the actual read request,
237  * but also tries to write the good data back to the bad mirror(s) when a
238  * read succeeded to restore the redundancy.
239  */
repair_one_sector(struct btrfs_bio * failed_bbio,u32 bio_offset,phys_addr_t paddrs[],struct btrfs_failed_bio * fbio)240 static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
241 						  u32 bio_offset,
242 						  phys_addr_t paddrs[],
243 						  struct btrfs_failed_bio *fbio)
244 {
245 	struct btrfs_inode *inode = failed_bbio->inode;
246 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
247 	const u32 sectorsize = fs_info->sectorsize;
248 	const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
249 	const u32 nr_steps = sectorsize / step;
250 	/*
251 	 * For bs > ps cases, the saved_iter can be partially moved forward.
252 	 * In that case we should round it down to the block boundary.
253 	 */
254 	const u64 logical = round_down(failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
255 				       sectorsize);
256 	struct btrfs_bio *repair_bbio;
257 	struct bio *repair_bio;
258 	int num_copies;
259 	int mirror;
260 
261 	btrfs_debug(fs_info, "repair read error: read error at %llu",
262 		    failed_bbio->file_offset + bio_offset);
263 
264 	num_copies = btrfs_num_copies(fs_info, logical, sectorsize);
265 	if (num_copies == 1) {
266 		btrfs_debug(fs_info, "no copy to repair from");
267 		failed_bbio->bio.bi_status = BLK_STS_IOERR;
268 		return fbio;
269 	}
270 
271 	if (!fbio) {
272 		fbio = mempool_alloc(&btrfs_failed_bio_pool, GFP_NOFS);
273 		fbio->bbio = failed_bbio;
274 		fbio->num_copies = num_copies;
275 		atomic_set(&fbio->repair_count, 1);
276 	}
277 
278 	atomic_inc(&fbio->repair_count);
279 
280 	repair_bio = bio_alloc_bioset(NULL, nr_steps, REQ_OP_READ, GFP_NOFS,
281 				      &btrfs_repair_bioset);
282 	repair_bio->bi_iter.bi_sector = logical >> SECTOR_SHIFT;
283 	for (int i = 0; i < nr_steps; i++) {
284 		int ret;
285 
286 		ASSERT(offset_in_page(paddrs[i]) + step <= PAGE_SIZE);
287 
288 		ret = bio_add_page(repair_bio, phys_to_page(paddrs[i]), step,
289 				   offset_in_page(paddrs[i]));
290 		ASSERT(ret == step);
291 	}
292 
293 	repair_bbio = btrfs_bio(repair_bio);
294 	btrfs_bio_init(repair_bbio, failed_bbio->inode, failed_bbio->file_offset + bio_offset,
295 		       NULL, fbio);
296 
297 	mirror = next_repair_mirror(fbio, failed_bbio->mirror_num);
298 	btrfs_debug(fs_info, "submitting repair read to mirror %d", mirror);
299 	btrfs_submit_bbio(repair_bbio, mirror);
300 	return fbio;
301 }
302 
btrfs_check_read_bio(struct btrfs_bio * bbio,struct btrfs_device * dev)303 static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *dev)
304 {
305 	struct btrfs_inode *inode = bbio->inode;
306 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
307 	const u32 sectorsize = fs_info->sectorsize;
308 	const u32 step = min(sectorsize, PAGE_SIZE);
309 	const u32 nr_steps = sectorsize / step;
310 	struct bvec_iter *iter = &bbio->saved_iter;
311 	blk_status_t status = bbio->bio.bi_status;
312 	struct btrfs_failed_bio *fbio = NULL;
313 	phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
314 	phys_addr_t paddr;
315 	u32 offset = 0;
316 
317 	/* Read-repair requires the inode field to be set by the submitter. */
318 	ASSERT(inode);
319 
320 	/*
321 	 * Hand off repair bios to the repair code as there is no upper level
322 	 * submitter for them.
323 	 */
324 	if (bbio->bio.bi_pool == &btrfs_repair_bioset) {
325 		btrfs_end_repair_bio(bbio, dev);
326 		return;
327 	}
328 
329 	/* Clear the I/O error. A failed repair will reset it. */
330 	bbio->bio.bi_status = BLK_STS_OK;
331 
332 	btrfs_bio_for_each_block(paddr, &bbio->bio, iter, step) {
333 		paddrs[(offset / step) % nr_steps] = paddr;
334 		offset += step;
335 
336 		if (IS_ALIGNED(offset, sectorsize)) {
337 			if (status ||
338 			    !btrfs_data_csum_ok(bbio, dev, offset - sectorsize, paddrs))
339 				fbio = repair_one_sector(bbio, offset - sectorsize,
340 							 paddrs, fbio);
341 		}
342 	}
343 	if (bbio->csum != bbio->csum_inline)
344 		kvfree(bbio->csum);
345 
346 	if (fbio)
347 		btrfs_repair_done(fbio);
348 	else
349 		btrfs_bio_end_io(bbio, bbio->bio.bi_status);
350 }
351 
btrfs_log_dev_io_error(const struct bio * bio,struct btrfs_device * dev)352 static void btrfs_log_dev_io_error(const struct bio *bio, struct btrfs_device *dev)
353 {
354 	blk_status_t sts = bio->bi_status;
355 
356 	if (!dev || !dev->bdev)
357 		return;
358 	if (unlikely(sts == BLK_STS_OK))
359 		return;
360 	if (unlikely(sts != BLK_STS_IOERR && sts != BLK_STS_TARGET &&
361 		     sts != BLK_STS_MEDIUM && sts != BLK_STS_PROTECTION)) {
362 		btrfs_warn_rl(dev->fs_info, "bdev %s unexpected block io error: %d",
363 			      btrfs_dev_name(dev), sts);
364 		return;
365 	}
366 	if (btrfs_op(bio) == BTRFS_MAP_WRITE)
367 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
368 	else if (!(bio->bi_opf & REQ_RAHEAD))
369 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
370 	if (bio->bi_opf & REQ_PREFLUSH)
371 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
372 }
373 
btrfs_end_io_wq(const struct btrfs_fs_info * fs_info,const struct bio * bio)374 static struct workqueue_struct *btrfs_end_io_wq(const struct btrfs_fs_info *fs_info,
375 						const struct bio *bio)
376 {
377 	if (bio->bi_opf & REQ_META)
378 		return fs_info->endio_meta_workers;
379 	return fs_info->endio_workers;
380 }
381 
simple_end_io_work(struct work_struct * work)382 static void simple_end_io_work(struct work_struct *work)
383 {
384 	struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
385 	struct bio *bio = &bbio->bio;
386 
387 	if (bio_op(bio) == REQ_OP_READ) {
388 		/* Metadata reads are checked and repaired by the submitter. */
389 		if (is_data_bbio(bbio))
390 			return btrfs_check_read_bio(bbio, bbio->bio.bi_private);
391 		return btrfs_bio_end_io(bbio, bbio->bio.bi_status);
392 	}
393 	if (bio_is_zone_append(bio) && !bio->bi_status)
394 		btrfs_record_physical_zoned(bbio);
395 	btrfs_bio_end_io(bbio, bbio->bio.bi_status);
396 }
397 
btrfs_simple_end_io(struct bio * bio)398 static void btrfs_simple_end_io(struct bio *bio)
399 {
400 	struct btrfs_bio *bbio = btrfs_bio(bio);
401 	struct btrfs_device *dev = bio->bi_private;
402 	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
403 
404 	btrfs_bio_counter_dec(fs_info);
405 
406 	if (bio->bi_status)
407 		btrfs_log_dev_io_error(bio, dev);
408 
409 	INIT_WORK(&bbio->end_io_work, simple_end_io_work);
410 	queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
411 }
412 
btrfs_raid56_end_io(struct bio * bio)413 static void btrfs_raid56_end_io(struct bio *bio)
414 {
415 	struct btrfs_io_context *bioc = bio->bi_private;
416 	struct btrfs_bio *bbio = btrfs_bio(bio);
417 
418 	/* RAID56 endio is always handled in workqueue. */
419 	ASSERT(in_task());
420 
421 	btrfs_bio_counter_dec(bioc->fs_info);
422 	bbio->mirror_num = bioc->mirror_num;
423 	if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
424 		btrfs_check_read_bio(bbio, NULL);
425 	else
426 		btrfs_bio_end_io(bbio, bbio->bio.bi_status);
427 
428 	btrfs_put_bioc(bioc);
429 }
430 
orig_write_end_io_work(struct work_struct * work)431 static void orig_write_end_io_work(struct work_struct *work)
432 {
433 	struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
434 	struct bio *bio = &bbio->bio;
435 	struct btrfs_io_stripe *stripe = bio->bi_private;
436 	struct btrfs_io_context *bioc = stripe->bioc;
437 
438 	btrfs_bio_counter_dec(bioc->fs_info);
439 
440 	if (bio->bi_status) {
441 		atomic_inc(&bioc->error);
442 		btrfs_log_dev_io_error(bio, stripe->dev);
443 	}
444 
445 	/*
446 	 * Only send an error to the higher layers if it is beyond the tolerance
447 	 * threshold.
448 	 */
449 	if (atomic_read(&bioc->error) > bioc->max_errors)
450 		bio->bi_status = BLK_STS_IOERR;
451 	else
452 		bio->bi_status = BLK_STS_OK;
453 
454 	if (bio_is_zone_append(bio) && !bio->bi_status)
455 		stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
456 
457 	btrfs_bio_end_io(bbio, bbio->bio.bi_status);
458 	btrfs_put_bioc(bioc);
459 }
460 
btrfs_orig_write_end_io(struct bio * bio)461 static void btrfs_orig_write_end_io(struct bio *bio)
462 {
463 	struct btrfs_bio *bbio = btrfs_bio(bio);
464 
465 	INIT_WORK(&bbio->end_io_work, orig_write_end_io_work);
466 	queue_work(btrfs_end_io_wq(bbio->inode->root->fs_info, bio), &bbio->end_io_work);
467 }
468 
clone_write_end_io_work(struct work_struct * work)469 static void clone_write_end_io_work(struct work_struct *work)
470 {
471 	struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
472 	struct bio *bio = &bbio->bio;
473 	struct btrfs_io_stripe *stripe = bio->bi_private;
474 
475 	if (bio->bi_status) {
476 		atomic_inc(&stripe->bioc->error);
477 		btrfs_log_dev_io_error(bio, stripe->dev);
478 	} else if (bio_is_zone_append(bio)) {
479 		stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
480 	}
481 
482 	/* Pass on control to the original bio this one was cloned from */
483 	bio_endio(stripe->bioc->orig_bio);
484 	bio_put(bio);
485 }
486 
btrfs_clone_write_end_io(struct bio * bio)487 static void btrfs_clone_write_end_io(struct bio *bio)
488 {
489 	struct btrfs_bio *bbio = btrfs_bio(bio);
490 
491 	INIT_WORK(&bbio->end_io_work, clone_write_end_io_work);
492 	queue_work(btrfs_end_io_wq(bbio->inode->root->fs_info, bio), &bbio->end_io_work);
493 }
494 
btrfs_submit_dev_bio(struct btrfs_device * dev,struct bio * bio)495 static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
496 {
497 	u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
498 
499 	if (!dev || !dev->bdev ||
500 	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
501 	    (btrfs_op(bio) == BTRFS_MAP_WRITE &&
502 	     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
503 		bio_io_error(bio);
504 		return;
505 	}
506 
507 	bio_set_dev(bio, dev->bdev);
508 
509 	/*
510 	 * For zone append writing, bi_sector must point the beginning of the
511 	 * zone
512 	 */
513 	if (btrfs_bio(bio)->can_use_append && btrfs_dev_is_sequential(dev, physical)) {
514 		u64 zone_start = round_down(physical, dev->fs_info->zone_size);
515 
516 		ASSERT(btrfs_dev_is_sequential(dev, physical));
517 		bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
518 		bio->bi_opf &= ~REQ_OP_WRITE;
519 		bio->bi_opf |= REQ_OP_ZONE_APPEND;
520 	}
521 	btrfs_debug(dev->fs_info,
522 	"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
523 		__func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
524 		(unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
525 		dev->devid, bio->bi_iter.bi_size);
526 
527 	/*
528 	 * Track reads if tracking is enabled; ignore I/O operations before the
529 	 * filesystem is fully initialized.
530 	 */
531 	if (dev->fs_devices->collect_fs_stats && bio_op(bio) == REQ_OP_READ && dev->fs_info)
532 		percpu_counter_add(&dev->fs_info->stats_read_blocks,
533 				   bio->bi_iter.bi_size >> dev->fs_info->sectorsize_bits);
534 
535 	if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT)
536 		blkcg_punt_bio_submit(bio);
537 	else
538 		submit_bio(bio);
539 }
540 
btrfs_submit_mirrored_bio(struct btrfs_io_context * bioc,int dev_nr)541 static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
542 {
543 	struct bio *orig_bio = bioc->orig_bio, *bio;
544 	struct btrfs_bio *orig_bbio = btrfs_bio(orig_bio);
545 
546 	ASSERT(bio_op(orig_bio) != REQ_OP_READ);
547 
548 	/* Reuse the bio embedded into the btrfs_bio for the last mirror */
549 	if (dev_nr == bioc->num_stripes - 1) {
550 		bio = orig_bio;
551 		bio->bi_end_io = btrfs_orig_write_end_io;
552 	} else {
553 		/* We need to use endio_work to run end_io in task context. */
554 		bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &btrfs_bioset);
555 		bio_inc_remaining(orig_bio);
556 		btrfs_bio_init(btrfs_bio(bio), orig_bbio->inode,
557 			       orig_bbio->file_offset, NULL, NULL);
558 		bio->bi_end_io = btrfs_clone_write_end_io;
559 	}
560 
561 	bio->bi_private = &bioc->stripes[dev_nr];
562 	bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
563 	bioc->stripes[dev_nr].bioc = bioc;
564 	bioc->size = bio->bi_iter.bi_size;
565 	btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
566 }
567 
btrfs_submit_bio(struct bio * bio,struct btrfs_io_context * bioc,struct btrfs_io_stripe * smap,int mirror_num)568 static void btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
569 			     struct btrfs_io_stripe *smap, int mirror_num)
570 {
571 	if (!bioc) {
572 		/* Single mirror read/write fast path. */
573 		btrfs_bio(bio)->mirror_num = mirror_num;
574 		bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
575 		if (bio_op(bio) != REQ_OP_READ)
576 			btrfs_bio(bio)->orig_physical = smap->physical;
577 		bio->bi_private = smap->dev;
578 		bio->bi_end_io = btrfs_simple_end_io;
579 		btrfs_submit_dev_bio(smap->dev, bio);
580 	} else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
581 		/* Parity RAID write or read recovery. */
582 		bio->bi_private = bioc;
583 		bio->bi_end_io = btrfs_raid56_end_io;
584 		if (bio_op(bio) == REQ_OP_READ)
585 			raid56_parity_recover(bio, bioc, mirror_num);
586 		else
587 			raid56_parity_write(bio, bioc);
588 	} else {
589 		/* Write to multiple mirrors. */
590 		int total_devs = bioc->num_stripes;
591 
592 		bioc->orig_bio = bio;
593 		for (int dev_nr = 0; dev_nr < total_devs; dev_nr++)
594 			btrfs_submit_mirrored_bio(bioc, dev_nr);
595 	}
596 }
597 
btrfs_bio_csum(struct btrfs_bio * bbio)598 static int btrfs_bio_csum(struct btrfs_bio *bbio)
599 {
600 	if (bbio->bio.bi_opf & REQ_META)
601 		return btree_csum_one_bio(bbio);
602 #ifdef CONFIG_BTRFS_EXPERIMENTAL
603 	return btrfs_csum_one_bio(bbio, true);
604 #else
605 	return btrfs_csum_one_bio(bbio, false);
606 #endif
607 }
608 
609 /*
610  * Async submit bios are used to offload expensive checksumming onto the worker
611  * threads.
612  */
613 struct async_submit_bio {
614 	struct btrfs_bio *bbio;
615 	struct btrfs_io_context *bioc;
616 	struct btrfs_io_stripe smap;
617 	int mirror_num;
618 	struct btrfs_work work;
619 };
620 
621 /*
622  * In order to insert checksums into the metadata in large chunks, we wait
623  * until bio submission time.   All the pages in the bio are checksummed and
624  * sums are attached onto the ordered extent record.
625  *
626  * At IO completion time the csums attached on the ordered extent record are
627  * inserted into the btree.
628  */
run_one_async_start(struct btrfs_work * work)629 static void run_one_async_start(struct btrfs_work *work)
630 {
631 	struct async_submit_bio *async =
632 		container_of(work, struct async_submit_bio, work);
633 	int ret;
634 
635 	ret = btrfs_bio_csum(async->bbio);
636 	if (ret)
637 		async->bbio->bio.bi_status = errno_to_blk_status(ret);
638 }
639 
640 /*
641  * In order to insert checksums into the metadata in large chunks, we wait
642  * until bio submission time.   All the pages in the bio are checksummed and
643  * sums are attached onto the ordered extent record.
644  *
645  * At IO completion time the csums attached on the ordered extent record are
646  * inserted into the tree.
647  *
648  * If called with @do_free == true, then it will free the work struct.
649  */
run_one_async_done(struct btrfs_work * work,bool do_free)650 static void run_one_async_done(struct btrfs_work *work, bool do_free)
651 {
652 	struct async_submit_bio *async =
653 		container_of(work, struct async_submit_bio, work);
654 	struct bio *bio = &async->bbio->bio;
655 
656 	if (do_free) {
657 		kfree(container_of(work, struct async_submit_bio, work));
658 		return;
659 	}
660 
661 	/* If an error occurred we just want to clean up the bio and move on. */
662 	if (bio->bi_status) {
663 		btrfs_bio_end_io(async->bbio, bio->bi_status);
664 		return;
665 	}
666 
667 	/*
668 	 * All of the bios that pass through here are from async helpers.
669 	 * Use REQ_BTRFS_CGROUP_PUNT to issue them from the owning cgroup's
670 	 * context.  This changes nothing when cgroups aren't in use.
671 	 */
672 	bio->bi_opf |= REQ_BTRFS_CGROUP_PUNT;
673 	btrfs_submit_bio(bio, async->bioc, &async->smap, async->mirror_num);
674 }
675 
should_async_write(struct btrfs_bio * bbio)676 static bool should_async_write(struct btrfs_bio *bbio)
677 {
678 	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
679 	bool auto_csum_mode = true;
680 
681 #ifdef CONFIG_BTRFS_EXPERIMENTAL
682 	/*
683 	 * Write bios will calculate checksum and submit bio at the same time.
684 	 * Unless explicitly required don't offload serial csum calculate and bio
685 	 * submit into a workqueue.
686 	 */
687 	return false;
688 #endif
689 
690 	/* Submit synchronously if the checksum implementation is fast. */
691 	if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
692 		return false;
693 
694 	/*
695 	 * Try to defer the submission to a workqueue to parallelize the
696 	 * checksum calculation unless the I/O is issued synchronously.
697 	 */
698 	if (op_is_sync(bbio->bio.bi_opf))
699 		return false;
700 
701 	/* Zoned devices require I/O to be submitted in order. */
702 	if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(fs_info))
703 		return false;
704 
705 	return true;
706 }
707 
708 /*
709  * Submit bio to an async queue.
710  *
711  * Return true if the work has been successfully submitted, else false.
712  */
btrfs_wq_submit_bio(struct btrfs_bio * bbio,struct btrfs_io_context * bioc,struct btrfs_io_stripe * smap,int mirror_num)713 static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
714 				struct btrfs_io_context *bioc,
715 				struct btrfs_io_stripe *smap, int mirror_num)
716 {
717 	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
718 	struct async_submit_bio *async;
719 
720 	async = kmalloc_obj(*async, GFP_NOFS);
721 	if (!async)
722 		return false;
723 
724 	async->bbio = bbio;
725 	async->bioc = bioc;
726 	async->smap = *smap;
727 	async->mirror_num = mirror_num;
728 
729 	btrfs_init_work(&async->work, run_one_async_start, run_one_async_done);
730 	btrfs_queue_work(fs_info->workers, &async->work);
731 	return true;
732 }
733 
btrfs_append_map_length(struct btrfs_bio * bbio,u64 map_length)734 static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
735 {
736 	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
737 	unsigned int nr_segs;
738 	int sector_offset;
739 
740 	map_length = min(map_length, fs_info->max_zone_append_size);
741 	sector_offset = bio_split_rw_at(&bbio->bio, &fs_info->limits,
742 					&nr_segs, map_length);
743 	if (sector_offset) {
744 		/*
745 		 * bio_split_rw_at() could split at a size smaller than our
746 		 * sectorsize and thus cause unaligned I/Os.  Fix that by
747 		 * always rounding down to the nearest boundary.
748 		 */
749 		return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, fs_info->sectorsize);
750 	}
751 	return map_length;
752 }
753 
btrfs_submit_chunk(struct btrfs_bio * bbio,int mirror_num)754 static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
755 {
756 	struct btrfs_inode *inode = bbio->inode;
757 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
758 	struct bio *bio = &bbio->bio;
759 	u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
760 	u64 length = bio->bi_iter.bi_size;
761 	u64 map_length = length;
762 	struct btrfs_io_context *bioc = NULL;
763 	struct btrfs_io_stripe smap;
764 	blk_status_t status;
765 	int ret;
766 
767 	if (bbio->is_scrub || btrfs_is_data_reloc_root(inode->root))
768 		smap.rst_search_commit_root = true;
769 	else
770 		smap.rst_search_commit_root = false;
771 
772 	btrfs_bio_counter_inc_blocked(fs_info);
773 	ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
774 			      &bioc, &smap, &mirror_num);
775 	if (ret) {
776 		status = errno_to_blk_status(ret);
777 		btrfs_bio_counter_dec(fs_info);
778 		goto end_bbio;
779 	}
780 
781 	/*
782 	 * For fscrypt writes we will get the encrypted bio after we've remapped
783 	 * our bio to the physical disk location, so we need to save the
784 	 * original bytenr so we know what we're checksumming.
785 	 */
786 	if (bio_op(bio) == REQ_OP_WRITE && is_data_bbio(bbio))
787 		bbio->orig_logical = logical;
788 
789 	bbio->can_use_append = btrfs_use_zone_append(bbio);
790 
791 	map_length = min(map_length, length);
792 	if (bbio->can_use_append)
793 		map_length = btrfs_append_map_length(bbio, map_length);
794 
795 	if (map_length < length) {
796 		struct btrfs_bio *split;
797 
798 		split = btrfs_split_bio(fs_info, bbio, map_length);
799 		if (IS_ERR(split)) {
800 			status = errno_to_blk_status(PTR_ERR(split));
801 			btrfs_bio_counter_dec(fs_info);
802 			goto end_bbio;
803 		}
804 		bbio = split;
805 		bio = &bbio->bio;
806 	}
807 
808 	/*
809 	 * Save the iter for the end_io handler and preload the checksums for
810 	 * data reads.
811 	 */
812 	if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) {
813 		bbio->saved_iter = bio->bi_iter;
814 		ret = btrfs_lookup_bio_sums(bbio);
815 		status = errno_to_blk_status(ret);
816 		if (status)
817 			goto fail;
818 	}
819 
820 	if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
821 		if (is_data_bbio(bbio) && bioc && bioc->use_rst) {
822 			/*
823 			 * No locking for the list update, as we only add to
824 			 * the list in the I/O submission path, and list
825 			 * iteration only happens in the completion path, which
826 			 * can't happen until after the last submission.
827 			 */
828 			btrfs_get_bioc(bioc);
829 			list_add_tail(&bioc->rst_ordered_entry, &bbio->ordered->bioc_list);
830 		}
831 
832 		/*
833 		 * Csum items for reloc roots have already been cloned at this
834 		 * point, so they are handled as part of the no-checksum case.
835 		 */
836 		if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
837 		    !test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
838 		    !btrfs_is_data_reloc_root(inode->root) && !bbio->is_remap) {
839 			if (should_async_write(bbio) &&
840 			    btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
841 				goto done;
842 
843 			ret = btrfs_bio_csum(bbio);
844 			status = errno_to_blk_status(ret);
845 			if (status)
846 				goto fail;
847 		} else if (bbio->can_use_append ||
848 			   (btrfs_is_zoned(fs_info) && inode->flags & BTRFS_INODE_NODATASUM)) {
849 			ret = btrfs_alloc_dummy_sum(bbio);
850 			status = errno_to_blk_status(ret);
851 			if (status)
852 				goto fail;
853 		}
854 	}
855 
856 	btrfs_submit_bio(bio, bioc, &smap, mirror_num);
857 done:
858 	return map_length == length;
859 
860 fail:
861 	btrfs_bio_counter_dec(fs_info);
862 	/*
863 	 * We have split the original bbio, now we have to end both the current
864 	 * @bbio and remaining one, as the remaining one will never be submitted.
865 	 */
866 	if (map_length < length) {
867 		struct btrfs_bio *remaining = bbio->private;
868 
869 		ASSERT(bbio->bio.bi_pool == &btrfs_clone_bioset);
870 		ASSERT(remaining);
871 
872 		btrfs_bio_end_io(remaining, status);
873 	}
874 end_bbio:
875 	btrfs_bio_end_io(bbio, status);
876 	/* Do not submit another chunk */
877 	return true;
878 }
879 
assert_bbio_alignment(struct btrfs_bio * bbio)880 static void assert_bbio_alignment(struct btrfs_bio *bbio)
881 {
882 #ifdef CONFIG_BTRFS_ASSERT
883 	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
884 	struct bio_vec bvec;
885 	struct bvec_iter iter;
886 	const u32 blocksize = fs_info->sectorsize;
887 	const u32 alignment = min(blocksize, PAGE_SIZE);
888 	const u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
889 	const u32 length = bbio->bio.bi_iter.bi_size;
890 
891 	/* The logical and length should still be aligned to blocksize. */
892 	ASSERT(IS_ALIGNED(logical, blocksize) && IS_ALIGNED(length, blocksize) &&
893 	       length != 0, "root=%llu inode=%llu logical=%llu length=%u",
894 	       btrfs_root_id(bbio->inode->root),
895 	       btrfs_ino(bbio->inode), logical, length);
896 
897 	bio_for_each_bvec(bvec, &bbio->bio, iter)
898 		ASSERT(IS_ALIGNED(bvec.bv_offset, alignment) &&
899 		       IS_ALIGNED(bvec.bv_len, alignment),
900 		"root=%llu inode=%llu logical=%llu length=%u index=%u bv_offset=%u bv_len=%u",
901 		btrfs_root_id(bbio->inode->root),
902 		btrfs_ino(bbio->inode), logical, length, iter.bi_idx,
903 		bvec.bv_offset, bvec.bv_len);
904 #endif
905 }
906 
btrfs_submit_bbio(struct btrfs_bio * bbio,int mirror_num)907 void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
908 {
909 	/* If bbio->inode is not populated, its file_offset must be 0. */
910 	ASSERT(bbio->inode || bbio->file_offset == 0);
911 
912 	assert_bbio_alignment(bbio);
913 
914 	while (!btrfs_submit_chunk(bbio, mirror_num))
915 		;
916 }
917 
918 /*
919  * Submit a repair write.
920  *
921  * This bypasses btrfs_submit_bbio() deliberately, as that writes all copies in a
922  * RAID setup.  Here we only want to write the one bad copy, so we do the
923  * mapping ourselves and submit the bio directly.
924  *
925  * The I/O is issued synchronously to block the repair read completion from
926  * freeing the bio.
927  *
928  * @ino:	Offending inode number
929  * @fileoff:	File offset inside the inode
930  * @length:	Length of the repair write
931  * @logical:	Logical address of the range
932  * @paddrs:	Physical address array of the content
933  * @step:	Length of for each paddrs
934  * @mirror_num: Mirror number to write to. Must not be zero
935  */
btrfs_repair_io_failure(struct btrfs_fs_info * fs_info,u64 ino,u64 fileoff,u32 length,u64 logical,const phys_addr_t paddrs[],unsigned int step,int mirror_num)936 int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff,
937 			    u32 length, u64 logical, const phys_addr_t paddrs[],
938 			    unsigned int step, int mirror_num)
939 {
940 	const u32 nr_steps = DIV_ROUND_UP_POW2(length, step);
941 	struct btrfs_io_stripe smap = { 0 };
942 	struct bio *bio = NULL;
943 	int ret = 0;
944 
945 	BUG_ON(!mirror_num);
946 
947 	/* Basic alignment checks. */
948 	ASSERT(IS_ALIGNED(logical, fs_info->sectorsize));
949 	ASSERT(IS_ALIGNED(length, fs_info->sectorsize));
950 	ASSERT(IS_ALIGNED(fileoff, fs_info->sectorsize));
951 	/* Either it's a single data or metadata block. */
952 	ASSERT(length <= BTRFS_MAX_BLOCKSIZE);
953 	ASSERT(step <= length);
954 	ASSERT(is_power_of_2(step));
955 
956 	/*
957 	 * The fs either mounted RO or hit critical errors, no need
958 	 * to continue repairing.
959 	 */
960 	if (unlikely(sb_rdonly(fs_info->sb)))
961 		return 0;
962 
963 	if (btrfs_repair_one_zone(fs_info, logical))
964 		return 0;
965 
966 	/*
967 	 * Avoid races with device replace and make sure our bioc has devices
968 	 * associated to its stripes that don't go away while we are doing the
969 	 * read repair operation.
970 	 */
971 	btrfs_bio_counter_inc_blocked(fs_info);
972 	ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);
973 	if (ret < 0)
974 		goto out_counter_dec;
975 
976 	if (unlikely(!smap.dev->bdev ||
977 		     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state))) {
978 		ret = -EIO;
979 		goto out_counter_dec;
980 	}
981 
982 	bio = bio_alloc(smap.dev->bdev, nr_steps, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
983 	bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
984 	for (int i = 0; i < nr_steps; i++) {
985 		ret = bio_add_page(bio, phys_to_page(paddrs[i]), step, offset_in_page(paddrs[i]));
986 		/* We should have allocated enough slots to contain all the different pages. */
987 		ASSERT(ret == step);
988 	}
989 	ret = submit_bio_wait(bio);
990 	bio_put(bio);
991 	if (ret) {
992 		/* try to remap that extent elsewhere? */
993 		btrfs_dev_stat_inc_and_print(smap.dev, BTRFS_DEV_STAT_WRITE_ERRS);
994 		goto out_counter_dec;
995 	}
996 
997 	btrfs_info_rl(fs_info,
998 		"read error corrected: ino %llu off %llu (dev %s sector %llu)",
999 			     ino, fileoff, btrfs_dev_name(smap.dev),
1000 			     smap.physical >> SECTOR_SHIFT);
1001 	ret = 0;
1002 
1003 out_counter_dec:
1004 	btrfs_bio_counter_dec(fs_info);
1005 	return ret;
1006 }
1007 
1008 /*
1009  * Submit a btrfs_bio based repair write.
1010  *
1011  * If @dev_replace is true, the write would be submitted to dev-replace target.
1012  */
btrfs_submit_repair_write(struct btrfs_bio * bbio,int mirror_num,bool dev_replace)1013 void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace)
1014 {
1015 	struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
1016 	u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
1017 	u64 length = bbio->bio.bi_iter.bi_size;
1018 	struct btrfs_io_stripe smap = { 0 };
1019 	int ret;
1020 
1021 	ASSERT(mirror_num > 0);
1022 	ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
1023 	ASSERT(!is_data_inode(bbio->inode));
1024 	ASSERT(bbio->is_scrub);
1025 
1026 	btrfs_bio_counter_inc_blocked(fs_info);
1027 	ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);
1028 	if (ret < 0)
1029 		goto fail;
1030 
1031 	if (dev_replace) {
1032 		ASSERT(smap.dev == fs_info->dev_replace.srcdev);
1033 		smap.dev = fs_info->dev_replace.tgtdev;
1034 	}
1035 	btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num);
1036 	return;
1037 
1038 fail:
1039 	btrfs_bio_counter_dec(fs_info);
1040 	btrfs_bio_end_io(bbio, errno_to_blk_status(ret));
1041 }
1042 
btrfs_bioset_init(void)1043 int __init btrfs_bioset_init(void)
1044 {
1045 	if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
1046 			offsetof(struct btrfs_bio, bio),
1047 			BIOSET_NEED_BVECS))
1048 		return -ENOMEM;
1049 	if (bioset_init(&btrfs_clone_bioset, BIO_POOL_SIZE,
1050 			offsetof(struct btrfs_bio, bio), 0))
1051 		goto out;
1052 	if (bioset_init(&btrfs_repair_bioset, BIO_POOL_SIZE,
1053 			offsetof(struct btrfs_bio, bio),
1054 			BIOSET_NEED_BVECS))
1055 		goto out;
1056 	if (mempool_init_kmalloc_pool(&btrfs_failed_bio_pool, BIO_POOL_SIZE,
1057 				      sizeof(struct btrfs_failed_bio)))
1058 		goto out;
1059 	return 0;
1060 
1061 out:
1062 	btrfs_bioset_exit();
1063 	return -ENOMEM;
1064 }
1065 
btrfs_bioset_exit(void)1066 void __cold btrfs_bioset_exit(void)
1067 {
1068 	mempool_exit(&btrfs_failed_bio_pool);
1069 	bioset_exit(&btrfs_repair_bioset);
1070 	bioset_exit(&btrfs_clone_bioset);
1071 	bioset_exit(&btrfs_bioset);
1072 }
1073