186db1e29SJens Axboe /* 24fed947cSTejun Heo * Functions to sequence FLUSH and FUA writes. 386db1e29SJens Axboe */ 486db1e29SJens Axboe #include <linux/kernel.h> 586db1e29SJens Axboe #include <linux/module.h> 686db1e29SJens Axboe #include <linux/bio.h> 786db1e29SJens Axboe #include <linux/blkdev.h> 85a0e3ad6STejun Heo #include <linux/gfp.h> 986db1e29SJens Axboe 1086db1e29SJens Axboe #include "blk.h" 1186db1e29SJens Axboe 124fed947cSTejun Heo /* FLUSH/FUA sequences */ 134fed947cSTejun Heo enum { 144fed947cSTejun Heo QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ 154fed947cSTejun Heo QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ 164fed947cSTejun Heo QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ 174fed947cSTejun Heo QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ 184fed947cSTejun Heo QUEUE_FSEQ_DONE = (1 << 4), 194fed947cSTejun Heo }; 204fed947cSTejun Heo 21dd4c133fSTejun Heo static struct request *queue_next_fseq(struct request_queue *q); 2228e7d184STejun Heo 23dd4c133fSTejun Heo unsigned blk_flush_cur_seq(struct request_queue *q) 2486db1e29SJens Axboe { 25dd4c133fSTejun Heo if (!q->flush_seq) 2686db1e29SJens Axboe return 0; 27dd4c133fSTejun Heo return 1 << ffz(q->flush_seq); 2886db1e29SJens Axboe } 2986db1e29SJens Axboe 30dd4c133fSTejun Heo static struct request *blk_flush_complete_seq(struct request_queue *q, 3128e7d184STejun Heo unsigned seq, int error) 3286db1e29SJens Axboe { 3328e7d184STejun Heo struct request *next_rq = NULL; 3486db1e29SJens Axboe 35dd4c133fSTejun Heo if (error && !q->flush_err) 36dd4c133fSTejun Heo q->flush_err = error; 3786db1e29SJens Axboe 38dd4c133fSTejun Heo BUG_ON(q->flush_seq & seq); 39dd4c133fSTejun Heo q->flush_seq |= seq; 4086db1e29SJens Axboe 41dd4c133fSTejun Heo if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { 42dd4c133fSTejun Heo /* not complete yet, queue the next flush sequence */ 43dd4c133fSTejun Heo next_rq = queue_next_fseq(q); 4428e7d184STejun Heo } else { 45dd4c133fSTejun Heo /* complete this flush request */ 46dd4c133fSTejun Heo __blk_end_request_all(q->orig_flush_rq, q->flush_err); 47dd4c133fSTejun Heo q->orig_flush_rq = NULL; 48dd4c133fSTejun Heo q->flush_seq = 0; 4928e7d184STejun Heo 50dd4c133fSTejun Heo /* dispatch the next flush if there's one */ 51dd4c133fSTejun Heo if (!list_empty(&q->pending_flushes)) { 52dd4c133fSTejun Heo next_rq = list_entry_rq(q->pending_flushes.next); 5328e7d184STejun Heo list_move(&next_rq->queuelist, &q->queue_head); 5428e7d184STejun Heo } 5528e7d184STejun Heo } 5628e7d184STejun Heo return next_rq; 5786db1e29SJens Axboe } 5886db1e29SJens Axboe 5947f70d5aSTejun Heo static void blk_flush_complete_seq_end_io(struct request_queue *q, 6047f70d5aSTejun Heo unsigned seq, int error) 6147f70d5aSTejun Heo { 6247f70d5aSTejun Heo bool was_empty = elv_queue_empty(q); 6347f70d5aSTejun Heo struct request *next_rq; 6447f70d5aSTejun Heo 6547f70d5aSTejun Heo next_rq = blk_flush_complete_seq(q, seq, error); 6647f70d5aSTejun Heo 6747f70d5aSTejun Heo /* 6847f70d5aSTejun Heo * Moving a request silently to empty queue_head may stall the 6947f70d5aSTejun Heo * queue. Kick the queue in those cases. 7047f70d5aSTejun Heo */ 7147f70d5aSTejun Heo if (was_empty && next_rq) 7247f70d5aSTejun Heo __blk_run_queue(q); 7347f70d5aSTejun Heo } 7447f70d5aSTejun Heo 7586db1e29SJens Axboe static void pre_flush_end_io(struct request *rq, int error) 7686db1e29SJens Axboe { 7786db1e29SJens Axboe elv_completed_request(rq->q, rq); 7847f70d5aSTejun Heo blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error); 7986db1e29SJens Axboe } 8086db1e29SJens Axboe 81dd4c133fSTejun Heo static void flush_data_end_io(struct request *rq, int error) 8286db1e29SJens Axboe { 8386db1e29SJens Axboe elv_completed_request(rq->q, rq); 8447f70d5aSTejun Heo blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error); 8586db1e29SJens Axboe } 8686db1e29SJens Axboe 8786db1e29SJens Axboe static void post_flush_end_io(struct request *rq, int error) 8886db1e29SJens Axboe { 8986db1e29SJens Axboe elv_completed_request(rq->q, rq); 9047f70d5aSTejun Heo blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error); 9186db1e29SJens Axboe } 9286db1e29SJens Axboe 93cde4c406SChristoph Hellwig static void init_flush_request(struct request *rq, struct gendisk *disk) 9486db1e29SJens Axboe { 9528e18d01SFUJITA Tomonori rq->cmd_type = REQ_TYPE_FS; 96337238beSTejun Heo rq->cmd_flags = WRITE_FLUSH; 97cde4c406SChristoph Hellwig rq->rq_disk = disk; 9886db1e29SJens Axboe } 9986db1e29SJens Axboe 100dd4c133fSTejun Heo static struct request *queue_next_fseq(struct request_queue *q) 10128e7d184STejun Heo { 1024fed947cSTejun Heo struct request *orig_rq = q->orig_flush_rq; 103dd4c133fSTejun Heo struct request *rq = &q->flush_rq; 10428e7d184STejun Heo 105cde4c406SChristoph Hellwig blk_rq_init(q, rq); 106cde4c406SChristoph Hellwig 107dd4c133fSTejun Heo switch (blk_flush_cur_seq(q)) { 108dd4c133fSTejun Heo case QUEUE_FSEQ_PREFLUSH: 109cde4c406SChristoph Hellwig init_flush_request(rq, orig_rq->rq_disk); 110cde4c406SChristoph Hellwig rq->end_io = pre_flush_end_io; 11128e7d184STejun Heo break; 112dd4c133fSTejun Heo case QUEUE_FSEQ_DATA: 1134fed947cSTejun Heo init_request_from_bio(rq, orig_rq->bio); 11409d60c70STejun Heo /* 11509d60c70STejun Heo * orig_rq->rq_disk may be different from 11609d60c70STejun Heo * bio->bi_bdev->bd_disk if orig_rq got here through 11709d60c70STejun Heo * remapping drivers. Make sure rq->rq_disk points 11809d60c70STejun Heo * to the same one as orig_rq. 11909d60c70STejun Heo */ 12009d60c70STejun Heo rq->rq_disk = orig_rq->rq_disk; 1214fed947cSTejun Heo rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); 1224fed947cSTejun Heo rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); 123dd4c133fSTejun Heo rq->end_io = flush_data_end_io; 12428e7d184STejun Heo break; 125dd4c133fSTejun Heo case QUEUE_FSEQ_POSTFLUSH: 126cde4c406SChristoph Hellwig init_flush_request(rq, orig_rq->rq_disk); 127cde4c406SChristoph Hellwig rq->end_io = post_flush_end_io; 12828e7d184STejun Heo break; 12928e7d184STejun Heo default: 13028e7d184STejun Heo BUG(); 13128e7d184STejun Heo } 132cde4c406SChristoph Hellwig 133cde4c406SChristoph Hellwig elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 13428e7d184STejun Heo return rq; 13528e7d184STejun Heo } 13628e7d184STejun Heo 137dd4c133fSTejun Heo struct request *blk_do_flush(struct request_queue *q, struct request *rq) 13886db1e29SJens Axboe { 1394fed947cSTejun Heo unsigned int fflags = q->flush_flags; /* may change, cache it */ 1404fed947cSTejun Heo bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; 1414fed947cSTejun Heo bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); 1424fed947cSTejun Heo bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); 1438f11b3e9STejun Heo unsigned skip = 0; 1448f11b3e9STejun Heo 14528e7d184STejun Heo /* 1464fed947cSTejun Heo * Special case. If there's data but flush is not necessary, 1474fed947cSTejun Heo * the request can be issued directly. 1484fed947cSTejun Heo * 1494fed947cSTejun Heo * Flush w/o data should be able to be issued directly too but 1504fed947cSTejun Heo * currently some drivers assume that rq->bio contains 1514fed947cSTejun Heo * non-zero data if it isn't NULL and empty FLUSH requests 1524fed947cSTejun Heo * getting here usually have bio's without data. 15328e7d184STejun Heo */ 1544fed947cSTejun Heo if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { 1554fed947cSTejun Heo rq->cmd_flags &= ~REQ_FLUSH; 1564fed947cSTejun Heo if (!has_fua) 1574fed947cSTejun Heo rq->cmd_flags &= ~REQ_FUA; 1584fed947cSTejun Heo return rq; 15928e7d184STejun Heo } 16028e7d184STejun Heo 16128e7d184STejun Heo /* 1624fed947cSTejun Heo * Sequenced flushes can't be processed in parallel. If 1634fed947cSTejun Heo * another one is already in progress, queue for later 1644fed947cSTejun Heo * processing. 16528e7d184STejun Heo */ 1664fed947cSTejun Heo if (q->flush_seq) { 1674fed947cSTejun Heo list_move_tail(&rq->queuelist, &q->pending_flushes); 16828e7d184STejun Heo return NULL; 16928e7d184STejun Heo } 17028e7d184STejun Heo 17128e7d184STejun Heo /* 172dd4c133fSTejun Heo * Start a new flush sequence 17328e7d184STejun Heo */ 174dd4c133fSTejun Heo q->flush_err = 0; 175dd4c133fSTejun Heo q->flush_seq |= QUEUE_FSEQ_STARTED; 17686db1e29SJens Axboe 1774fed947cSTejun Heo /* adjust FLUSH/FUA of the original request and stash it away */ 1784fed947cSTejun Heo rq->cmd_flags &= ~REQ_FLUSH; 1794fed947cSTejun Heo if (!has_fua) 1804fed947cSTejun Heo rq->cmd_flags &= ~REQ_FUA; 1819934c8c0STejun Heo blk_dequeue_request(rq); 182dd4c133fSTejun Heo q->orig_flush_rq = rq; 18386db1e29SJens Axboe 1844fed947cSTejun Heo /* skip unneded sequences and return the first one */ 1854fed947cSTejun Heo if (!do_preflush) 186dd4c133fSTejun Heo skip |= QUEUE_FSEQ_PREFLUSH; 1874fed947cSTejun Heo if (!blk_rq_sectors(rq)) 188dd4c133fSTejun Heo skip |= QUEUE_FSEQ_DATA; 1894fed947cSTejun Heo if (!do_postflush) 190dd4c133fSTejun Heo skip |= QUEUE_FSEQ_POSTFLUSH; 191dd4c133fSTejun Heo return blk_flush_complete_seq(q, skip, 0); 19286db1e29SJens Axboe } 19386db1e29SJens Axboe 194d391a2ddSTejun Heo static void bio_end_flush(struct bio *bio, int err) 19586db1e29SJens Axboe { 196d391a2ddSTejun Heo if (err) 19786db1e29SJens Axboe clear_bit(BIO_UPTODATE, &bio->bi_flags); 198f17e232eSDmitry Monakhov if (bio->bi_private) 19986db1e29SJens Axboe complete(bio->bi_private); 200f17e232eSDmitry Monakhov bio_put(bio); 20186db1e29SJens Axboe } 20286db1e29SJens Axboe 20386db1e29SJens Axboe /** 20486db1e29SJens Axboe * blkdev_issue_flush - queue a flush 20586db1e29SJens Axboe * @bdev: blockdev to issue flush for 206fbd9b09aSDmitry Monakhov * @gfp_mask: memory allocation flags (for bio_alloc) 20786db1e29SJens Axboe * @error_sector: error sector 20886db1e29SJens Axboe * 20986db1e29SJens Axboe * Description: 21086db1e29SJens Axboe * Issue a flush for the block device in question. Caller can supply 21186db1e29SJens Axboe * room for storing the error offset in case of a flush error, if they 212f17e232eSDmitry Monakhov * wish to. If WAIT flag is not passed then caller may check only what 213f17e232eSDmitry Monakhov * request was pushed in some internal queue for later handling. 21486db1e29SJens Axboe */ 215fbd9b09aSDmitry Monakhov int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, 216*dd3932edSChristoph Hellwig sector_t *error_sector) 21786db1e29SJens Axboe { 21886db1e29SJens Axboe DECLARE_COMPLETION_ONSTACK(wait); 21986db1e29SJens Axboe struct request_queue *q; 22086db1e29SJens Axboe struct bio *bio; 221fbd9b09aSDmitry Monakhov int ret = 0; 22286db1e29SJens Axboe 22386db1e29SJens Axboe if (bdev->bd_disk == NULL) 22486db1e29SJens Axboe return -ENXIO; 22586db1e29SJens Axboe 22686db1e29SJens Axboe q = bdev_get_queue(bdev); 22786db1e29SJens Axboe if (!q) 22886db1e29SJens Axboe return -ENXIO; 22986db1e29SJens Axboe 230f10d9f61SDave Chinner /* 231f10d9f61SDave Chinner * some block devices may not have their queue correctly set up here 232f10d9f61SDave Chinner * (e.g. loop device without a backing file) and so issuing a flush 233f10d9f61SDave Chinner * here will panic. Ensure there is a request function before issuing 234d391a2ddSTejun Heo * the flush. 235f10d9f61SDave Chinner */ 236f10d9f61SDave Chinner if (!q->make_request_fn) 237f10d9f61SDave Chinner return -ENXIO; 238f10d9f61SDave Chinner 239fbd9b09aSDmitry Monakhov bio = bio_alloc(gfp_mask, 0); 240d391a2ddSTejun Heo bio->bi_end_io = bio_end_flush; 24186db1e29SJens Axboe bio->bi_bdev = bdev; 242f17e232eSDmitry Monakhov bio->bi_private = &wait; 243f17e232eSDmitry Monakhov 244f17e232eSDmitry Monakhov bio_get(bio); 245d391a2ddSTejun Heo submit_bio(WRITE_FLUSH, bio); 24686db1e29SJens Axboe wait_for_completion(&wait); 247*dd3932edSChristoph Hellwig 24886db1e29SJens Axboe /* 24986db1e29SJens Axboe * The driver must store the error location in ->bi_sector, if 250f17e232eSDmitry Monakhov * it supports it. For non-stacked drivers, this should be 251f17e232eSDmitry Monakhov * copied from blk_rq_pos(rq). 25286db1e29SJens Axboe */ 25386db1e29SJens Axboe if (error_sector) 25486db1e29SJens Axboe *error_sector = bio->bi_sector; 25586db1e29SJens Axboe 256d391a2ddSTejun Heo if (!bio_flagged(bio, BIO_UPTODATE)) 25786db1e29SJens Axboe ret = -EIO; 25886db1e29SJens Axboe 25986db1e29SJens Axboe bio_put(bio); 26086db1e29SJens Axboe return ret; 26186db1e29SJens Axboe } 26286db1e29SJens Axboe EXPORT_SYMBOL(blkdev_issue_flush); 263