11da177e4SLinus Torvalds /* 20fe23479SJens Axboe * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk> 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or modify 51da177e4SLinus Torvalds * it under the terms of the GNU General Public License version 2 as 61da177e4SLinus Torvalds * published by the Free Software Foundation. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * This program is distributed in the hope that it will be useful, 91da177e4SLinus Torvalds * but WITHOUT ANY WARRANTY; without even the implied warranty of 101da177e4SLinus Torvalds * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 111da177e4SLinus Torvalds * GNU General Public License for more details. 121da177e4SLinus Torvalds * 131da177e4SLinus Torvalds * You should have received a copy of the GNU General Public Licens 141da177e4SLinus Torvalds * along with this program; if not, write to the Free Software 151da177e4SLinus Torvalds * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 161da177e4SLinus Torvalds * 171da177e4SLinus Torvalds */ 181da177e4SLinus Torvalds #include <linux/mm.h> 191da177e4SLinus Torvalds #include <linux/swap.h> 201da177e4SLinus Torvalds #include <linux/bio.h> 211da177e4SLinus Torvalds #include <linux/blkdev.h> 22a27bb332SKent Overstreet #include <linux/uio.h> 23852c788fSTejun Heo #include <linux/iocontext.h> 241da177e4SLinus Torvalds #include <linux/slab.h> 251da177e4SLinus Torvalds #include <linux/init.h> 261da177e4SLinus Torvalds #include <linux/kernel.h> 27630d9c47SPaul Gortmaker #include <linux/export.h> 281da177e4SLinus Torvalds #include <linux/mempool.h> 291da177e4SLinus Torvalds #include <linux/workqueue.h> 30852c788fSTejun Heo #include <linux/cgroup.h> 31f1970bafSJames Bottomley #include <scsi/sg.h> /* for struct sg_iovec */ 321da177e4SLinus Torvalds 3355782138SLi Zefan #include <trace/events/block.h> 340bfc2455SIngo Molnar 35392ddc32SJens Axboe /* 36392ddc32SJens Axboe * Test patch to inline a certain number of bi_io_vec's inside the bio 37392ddc32SJens Axboe * itself, to shrink a bio data allocation from two mempool calls to one 38392ddc32SJens Axboe */ 39392ddc32SJens Axboe #define BIO_INLINE_VECS 4 40392ddc32SJens Axboe 411da177e4SLinus Torvalds /* 421da177e4SLinus Torvalds * if you change this list, also change bvec_alloc or things will 431da177e4SLinus Torvalds * break badly! cannot be bigger than what you can fit into an 441da177e4SLinus Torvalds * unsigned short 451da177e4SLinus Torvalds */ 461da177e4SLinus Torvalds #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 47df677140SMartin K. Petersen static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { 481da177e4SLinus Torvalds BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 491da177e4SLinus Torvalds }; 501da177e4SLinus Torvalds #undef BV 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds /* 531da177e4SLinus Torvalds * fs_bio_set is the bio_set containing bio and iovec memory pools used by 541da177e4SLinus Torvalds * IO code that does not need private memory pools. 551da177e4SLinus Torvalds */ 5651d654e1SMartin K. Petersen struct bio_set *fs_bio_set; 573f86a82aSKent Overstreet EXPORT_SYMBOL(fs_bio_set); 581da177e4SLinus Torvalds 59bb799ca0SJens Axboe /* 60bb799ca0SJens Axboe * Our slab pool management 61bb799ca0SJens Axboe */ 62bb799ca0SJens Axboe struct bio_slab { 63bb799ca0SJens Axboe struct kmem_cache *slab; 64bb799ca0SJens Axboe unsigned int slab_ref; 65bb799ca0SJens Axboe unsigned int slab_size; 66bb799ca0SJens Axboe char name[8]; 67bb799ca0SJens Axboe }; 68bb799ca0SJens Axboe static DEFINE_MUTEX(bio_slab_lock); 69bb799ca0SJens Axboe static struct bio_slab *bio_slabs; 70bb799ca0SJens Axboe static unsigned int bio_slab_nr, bio_slab_max; 71bb799ca0SJens Axboe 72bb799ca0SJens Axboe static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) 73bb799ca0SJens Axboe { 74bb799ca0SJens Axboe unsigned int sz = sizeof(struct bio) + extra_size; 75bb799ca0SJens Axboe struct kmem_cache *slab = NULL; 76389d7b26SAlexey Khoroshilov struct bio_slab *bslab, *new_bio_slabs; 77386bc35aSAnna Leuschner unsigned int new_bio_slab_max; 78bb799ca0SJens Axboe unsigned int i, entry = -1; 79bb799ca0SJens Axboe 80bb799ca0SJens Axboe mutex_lock(&bio_slab_lock); 81bb799ca0SJens Axboe 82bb799ca0SJens Axboe i = 0; 83bb799ca0SJens Axboe while (i < bio_slab_nr) { 84f06f135dSThiago Farina bslab = &bio_slabs[i]; 85bb799ca0SJens Axboe 86bb799ca0SJens Axboe if (!bslab->slab && entry == -1) 87bb799ca0SJens Axboe entry = i; 88bb799ca0SJens Axboe else if (bslab->slab_size == sz) { 89bb799ca0SJens Axboe slab = bslab->slab; 90bb799ca0SJens Axboe bslab->slab_ref++; 91bb799ca0SJens Axboe break; 92bb799ca0SJens Axboe } 93bb799ca0SJens Axboe i++; 94bb799ca0SJens Axboe } 95bb799ca0SJens Axboe 96bb799ca0SJens Axboe if (slab) 97bb799ca0SJens Axboe goto out_unlock; 98bb799ca0SJens Axboe 99bb799ca0SJens Axboe if (bio_slab_nr == bio_slab_max && entry == -1) { 100386bc35aSAnna Leuschner new_bio_slab_max = bio_slab_max << 1; 101389d7b26SAlexey Khoroshilov new_bio_slabs = krealloc(bio_slabs, 102386bc35aSAnna Leuschner new_bio_slab_max * sizeof(struct bio_slab), 103bb799ca0SJens Axboe GFP_KERNEL); 104389d7b26SAlexey Khoroshilov if (!new_bio_slabs) 105bb799ca0SJens Axboe goto out_unlock; 106386bc35aSAnna Leuschner bio_slab_max = new_bio_slab_max; 107389d7b26SAlexey Khoroshilov bio_slabs = new_bio_slabs; 108bb799ca0SJens Axboe } 109bb799ca0SJens Axboe if (entry == -1) 110bb799ca0SJens Axboe entry = bio_slab_nr++; 111bb799ca0SJens Axboe 112bb799ca0SJens Axboe bslab = &bio_slabs[entry]; 113bb799ca0SJens Axboe 114bb799ca0SJens Axboe snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); 115*6a241483SMikulas Patocka slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN, 116*6a241483SMikulas Patocka SLAB_HWCACHE_ALIGN, NULL); 117bb799ca0SJens Axboe if (!slab) 118bb799ca0SJens Axboe goto out_unlock; 119bb799ca0SJens Axboe 120bb799ca0SJens Axboe bslab->slab = slab; 121bb799ca0SJens Axboe bslab->slab_ref = 1; 122bb799ca0SJens Axboe bslab->slab_size = sz; 123bb799ca0SJens Axboe out_unlock: 124bb799ca0SJens Axboe mutex_unlock(&bio_slab_lock); 125bb799ca0SJens Axboe return slab; 126bb799ca0SJens Axboe } 127bb799ca0SJens Axboe 128bb799ca0SJens Axboe static void bio_put_slab(struct bio_set *bs) 129bb799ca0SJens Axboe { 130bb799ca0SJens Axboe struct bio_slab *bslab = NULL; 131bb799ca0SJens Axboe unsigned int i; 132bb799ca0SJens Axboe 133bb799ca0SJens Axboe mutex_lock(&bio_slab_lock); 134bb799ca0SJens Axboe 135bb799ca0SJens Axboe for (i = 0; i < bio_slab_nr; i++) { 136bb799ca0SJens Axboe if (bs->bio_slab == bio_slabs[i].slab) { 137bb799ca0SJens Axboe bslab = &bio_slabs[i]; 138bb799ca0SJens Axboe break; 139bb799ca0SJens Axboe } 140bb799ca0SJens Axboe } 141bb799ca0SJens Axboe 142bb799ca0SJens Axboe if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n")) 143bb799ca0SJens Axboe goto out; 144bb799ca0SJens Axboe 145bb799ca0SJens Axboe WARN_ON(!bslab->slab_ref); 146bb799ca0SJens Axboe 147bb799ca0SJens Axboe if (--bslab->slab_ref) 148bb799ca0SJens Axboe goto out; 149bb799ca0SJens Axboe 150bb799ca0SJens Axboe kmem_cache_destroy(bslab->slab); 151bb799ca0SJens Axboe bslab->slab = NULL; 152bb799ca0SJens Axboe 153bb799ca0SJens Axboe out: 154bb799ca0SJens Axboe mutex_unlock(&bio_slab_lock); 155bb799ca0SJens Axboe } 156bb799ca0SJens Axboe 1577ba1ba12SMartin K. Petersen unsigned int bvec_nr_vecs(unsigned short idx) 1587ba1ba12SMartin K. Petersen { 1597ba1ba12SMartin K. Petersen return bvec_slabs[idx].nr_vecs; 1607ba1ba12SMartin K. Petersen } 1617ba1ba12SMartin K. Petersen 1629f060e22SKent Overstreet void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) 163bb799ca0SJens Axboe { 164bb799ca0SJens Axboe BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); 165bb799ca0SJens Axboe 166bb799ca0SJens Axboe if (idx == BIOVEC_MAX_IDX) 1679f060e22SKent Overstreet mempool_free(bv, pool); 168bb799ca0SJens Axboe else { 169bb799ca0SJens Axboe struct biovec_slab *bvs = bvec_slabs + idx; 170bb799ca0SJens Axboe 171bb799ca0SJens Axboe kmem_cache_free(bvs->slab, bv); 172bb799ca0SJens Axboe } 173bb799ca0SJens Axboe } 174bb799ca0SJens Axboe 1759f060e22SKent Overstreet struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, 1769f060e22SKent Overstreet mempool_t *pool) 1771da177e4SLinus Torvalds { 1781da177e4SLinus Torvalds struct bio_vec *bvl; 1791da177e4SLinus Torvalds 1801da177e4SLinus Torvalds /* 1811da177e4SLinus Torvalds * see comment near bvec_array define! 1821da177e4SLinus Torvalds */ 1831da177e4SLinus Torvalds switch (nr) { 1840a0d96b0SJens Axboe case 1: 1850a0d96b0SJens Axboe *idx = 0; 1860a0d96b0SJens Axboe break; 1870a0d96b0SJens Axboe case 2 ... 4: 1880a0d96b0SJens Axboe *idx = 1; 1890a0d96b0SJens Axboe break; 1900a0d96b0SJens Axboe case 5 ... 16: 1910a0d96b0SJens Axboe *idx = 2; 1920a0d96b0SJens Axboe break; 1930a0d96b0SJens Axboe case 17 ... 64: 1940a0d96b0SJens Axboe *idx = 3; 1950a0d96b0SJens Axboe break; 1960a0d96b0SJens Axboe case 65 ... 128: 1970a0d96b0SJens Axboe *idx = 4; 1980a0d96b0SJens Axboe break; 1990a0d96b0SJens Axboe case 129 ... BIO_MAX_PAGES: 2000a0d96b0SJens Axboe *idx = 5; 2010a0d96b0SJens Axboe break; 2021da177e4SLinus Torvalds default: 2031da177e4SLinus Torvalds return NULL; 2041da177e4SLinus Torvalds } 2050a0d96b0SJens Axboe 2061da177e4SLinus Torvalds /* 2077ff9345fSJens Axboe * idx now points to the pool we want to allocate from. only the 2087ff9345fSJens Axboe * 1-vec entry pool is mempool backed. 2091da177e4SLinus Torvalds */ 2107ff9345fSJens Axboe if (*idx == BIOVEC_MAX_IDX) { 2117ff9345fSJens Axboe fallback: 2129f060e22SKent Overstreet bvl = mempool_alloc(pool, gfp_mask); 2137ff9345fSJens Axboe } else { 2147ff9345fSJens Axboe struct biovec_slab *bvs = bvec_slabs + *idx; 2157ff9345fSJens Axboe gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); 2167ff9345fSJens Axboe 2177ff9345fSJens Axboe /* 2187ff9345fSJens Axboe * Make this allocation restricted and don't dump info on 2197ff9345fSJens Axboe * allocation failures, since we'll fallback to the mempool 2207ff9345fSJens Axboe * in case of failure. 2217ff9345fSJens Axboe */ 2227ff9345fSJens Axboe __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; 2237ff9345fSJens Axboe 2247ff9345fSJens Axboe /* 2257ff9345fSJens Axboe * Try a slab allocation. If this fails and __GFP_WAIT 2267ff9345fSJens Axboe * is set, retry with the 1-entry mempool 2277ff9345fSJens Axboe */ 2287ff9345fSJens Axboe bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); 2297ff9345fSJens Axboe if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) { 2307ff9345fSJens Axboe *idx = BIOVEC_MAX_IDX; 2317ff9345fSJens Axboe goto fallback; 2327ff9345fSJens Axboe } 2337ff9345fSJens Axboe } 2347ff9345fSJens Axboe 2351da177e4SLinus Torvalds return bvl; 2361da177e4SLinus Torvalds } 2371da177e4SLinus Torvalds 2384254bba1SKent Overstreet static void __bio_free(struct bio *bio) 2391da177e4SLinus Torvalds { 2404254bba1SKent Overstreet bio_disassociate_task(bio); 241992c5ddaSJens Axboe 2427ba1ba12SMartin K. Petersen if (bio_integrity(bio)) 2431e2a410fSKent Overstreet bio_integrity_free(bio); 2444254bba1SKent Overstreet } 2454254bba1SKent Overstreet 2464254bba1SKent Overstreet static void bio_free(struct bio *bio) 2474254bba1SKent Overstreet { 2484254bba1SKent Overstreet struct bio_set *bs = bio->bi_pool; 2494254bba1SKent Overstreet void *p; 2504254bba1SKent Overstreet 2514254bba1SKent Overstreet __bio_free(bio); 2524254bba1SKent Overstreet 2534254bba1SKent Overstreet if (bs) { 254a38352e0SKent Overstreet if (bio_flagged(bio, BIO_OWNS_VEC)) 2559f060e22SKent Overstreet bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); 2567ba1ba12SMartin K. Petersen 257bb799ca0SJens Axboe /* 258bb799ca0SJens Axboe * If we have front padding, adjust the bio pointer before freeing 259bb799ca0SJens Axboe */ 260bb799ca0SJens Axboe p = bio; 261bb799ca0SJens Axboe p -= bs->front_pad; 262bb799ca0SJens Axboe 263bb799ca0SJens Axboe mempool_free(p, bs->bio_pool); 2644254bba1SKent Overstreet } else { 2654254bba1SKent Overstreet /* Bio was allocated by bio_kmalloc() */ 2664254bba1SKent Overstreet kfree(bio); 2673676347aSPeter Osterlund } 2684254bba1SKent Overstreet } 2693676347aSPeter Osterlund 270858119e1SArjan van de Ven void bio_init(struct bio *bio) 2711da177e4SLinus Torvalds { 2722b94de55SJens Axboe memset(bio, 0, sizeof(*bio)); 2731da177e4SLinus Torvalds bio->bi_flags = 1 << BIO_UPTODATE; 274196d38bcSKent Overstreet atomic_set(&bio->bi_remaining, 1); 2751da177e4SLinus Torvalds atomic_set(&bio->bi_cnt, 1); 2761da177e4SLinus Torvalds } 277a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_init); 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds /** 280f44b48c7SKent Overstreet * bio_reset - reinitialize a bio 281f44b48c7SKent Overstreet * @bio: bio to reset 282f44b48c7SKent Overstreet * 283f44b48c7SKent Overstreet * Description: 284f44b48c7SKent Overstreet * After calling bio_reset(), @bio will be in the same state as a freshly 285f44b48c7SKent Overstreet * allocated bio returned bio bio_alloc_bioset() - the only fields that are 286f44b48c7SKent Overstreet * preserved are the ones that are initialized by bio_alloc_bioset(). See 287f44b48c7SKent Overstreet * comment in struct bio. 288f44b48c7SKent Overstreet */ 289f44b48c7SKent Overstreet void bio_reset(struct bio *bio) 290f44b48c7SKent Overstreet { 291f44b48c7SKent Overstreet unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS); 292f44b48c7SKent Overstreet 2934254bba1SKent Overstreet __bio_free(bio); 294f44b48c7SKent Overstreet 295f44b48c7SKent Overstreet memset(bio, 0, BIO_RESET_BYTES); 296f44b48c7SKent Overstreet bio->bi_flags = flags|(1 << BIO_UPTODATE); 297196d38bcSKent Overstreet atomic_set(&bio->bi_remaining, 1); 298f44b48c7SKent Overstreet } 299f44b48c7SKent Overstreet EXPORT_SYMBOL(bio_reset); 300f44b48c7SKent Overstreet 301196d38bcSKent Overstreet static void bio_chain_endio(struct bio *bio, int error) 302196d38bcSKent Overstreet { 303196d38bcSKent Overstreet bio_endio(bio->bi_private, error); 304196d38bcSKent Overstreet bio_put(bio); 305196d38bcSKent Overstreet } 306196d38bcSKent Overstreet 307196d38bcSKent Overstreet /** 308196d38bcSKent Overstreet * bio_chain - chain bio completions 3091051a902SRandy Dunlap * @bio: the target bio 3101051a902SRandy Dunlap * @parent: the @bio's parent bio 311196d38bcSKent Overstreet * 312196d38bcSKent Overstreet * The caller won't have a bi_end_io called when @bio completes - instead, 313196d38bcSKent Overstreet * @parent's bi_end_io won't be called until both @parent and @bio have 314196d38bcSKent Overstreet * completed; the chained bio will also be freed when it completes. 315196d38bcSKent Overstreet * 316196d38bcSKent Overstreet * The caller must not set bi_private or bi_end_io in @bio. 317196d38bcSKent Overstreet */ 318196d38bcSKent Overstreet void bio_chain(struct bio *bio, struct bio *parent) 319196d38bcSKent Overstreet { 320196d38bcSKent Overstreet BUG_ON(bio->bi_private || bio->bi_end_io); 321196d38bcSKent Overstreet 322196d38bcSKent Overstreet bio->bi_private = parent; 323196d38bcSKent Overstreet bio->bi_end_io = bio_chain_endio; 324196d38bcSKent Overstreet atomic_inc(&parent->bi_remaining); 325196d38bcSKent Overstreet } 326196d38bcSKent Overstreet EXPORT_SYMBOL(bio_chain); 327196d38bcSKent Overstreet 328df2cb6daSKent Overstreet static void bio_alloc_rescue(struct work_struct *work) 329df2cb6daSKent Overstreet { 330df2cb6daSKent Overstreet struct bio_set *bs = container_of(work, struct bio_set, rescue_work); 331df2cb6daSKent Overstreet struct bio *bio; 332df2cb6daSKent Overstreet 333df2cb6daSKent Overstreet while (1) { 334df2cb6daSKent Overstreet spin_lock(&bs->rescue_lock); 335df2cb6daSKent Overstreet bio = bio_list_pop(&bs->rescue_list); 336df2cb6daSKent Overstreet spin_unlock(&bs->rescue_lock); 337df2cb6daSKent Overstreet 338df2cb6daSKent Overstreet if (!bio) 339df2cb6daSKent Overstreet break; 340df2cb6daSKent Overstreet 341df2cb6daSKent Overstreet generic_make_request(bio); 342df2cb6daSKent Overstreet } 343df2cb6daSKent Overstreet } 344df2cb6daSKent Overstreet 345df2cb6daSKent Overstreet static void punt_bios_to_rescuer(struct bio_set *bs) 346df2cb6daSKent Overstreet { 347df2cb6daSKent Overstreet struct bio_list punt, nopunt; 348df2cb6daSKent Overstreet struct bio *bio; 349df2cb6daSKent Overstreet 350df2cb6daSKent Overstreet /* 351df2cb6daSKent Overstreet * In order to guarantee forward progress we must punt only bios that 352df2cb6daSKent Overstreet * were allocated from this bio_set; otherwise, if there was a bio on 353df2cb6daSKent Overstreet * there for a stacking driver higher up in the stack, processing it 354df2cb6daSKent Overstreet * could require allocating bios from this bio_set, and doing that from 355df2cb6daSKent Overstreet * our own rescuer would be bad. 356df2cb6daSKent Overstreet * 357df2cb6daSKent Overstreet * Since bio lists are singly linked, pop them all instead of trying to 358df2cb6daSKent Overstreet * remove from the middle of the list: 359df2cb6daSKent Overstreet */ 360df2cb6daSKent Overstreet 361df2cb6daSKent Overstreet bio_list_init(&punt); 362df2cb6daSKent Overstreet bio_list_init(&nopunt); 363df2cb6daSKent Overstreet 364df2cb6daSKent Overstreet while ((bio = bio_list_pop(current->bio_list))) 365df2cb6daSKent Overstreet bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); 366df2cb6daSKent Overstreet 367df2cb6daSKent Overstreet *current->bio_list = nopunt; 368df2cb6daSKent Overstreet 369df2cb6daSKent Overstreet spin_lock(&bs->rescue_lock); 370df2cb6daSKent Overstreet bio_list_merge(&bs->rescue_list, &punt); 371df2cb6daSKent Overstreet spin_unlock(&bs->rescue_lock); 372df2cb6daSKent Overstreet 373df2cb6daSKent Overstreet queue_work(bs->rescue_workqueue, &bs->rescue_work); 374df2cb6daSKent Overstreet } 375df2cb6daSKent Overstreet 376f44b48c7SKent Overstreet /** 3771da177e4SLinus Torvalds * bio_alloc_bioset - allocate a bio for I/O 3781da177e4SLinus Torvalds * @gfp_mask: the GFP_ mask given to the slab allocator 3791da177e4SLinus Torvalds * @nr_iovecs: number of iovecs to pre-allocate 380db18efacSJaak Ristioja * @bs: the bio_set to allocate from. 3811da177e4SLinus Torvalds * 3821da177e4SLinus Torvalds * Description: 3833f86a82aSKent Overstreet * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is 3843f86a82aSKent Overstreet * backed by the @bs's mempool. 3853f86a82aSKent Overstreet * 3863f86a82aSKent Overstreet * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be 3873f86a82aSKent Overstreet * able to allocate a bio. This is due to the mempool guarantees. To make this 3883f86a82aSKent Overstreet * work, callers must never allocate more than 1 bio at a time from this pool. 3893f86a82aSKent Overstreet * Callers that need to allocate more than 1 bio must always submit the 3903f86a82aSKent Overstreet * previously allocated bio for IO before attempting to allocate a new one. 3913f86a82aSKent Overstreet * Failure to do so can cause deadlocks under memory pressure. 3923f86a82aSKent Overstreet * 393df2cb6daSKent Overstreet * Note that when running under generic_make_request() (i.e. any block 394df2cb6daSKent Overstreet * driver), bios are not submitted until after you return - see the code in 395df2cb6daSKent Overstreet * generic_make_request() that converts recursion into iteration, to prevent 396df2cb6daSKent Overstreet * stack overflows. 397df2cb6daSKent Overstreet * 398df2cb6daSKent Overstreet * This would normally mean allocating multiple bios under 399df2cb6daSKent Overstreet * generic_make_request() would be susceptible to deadlocks, but we have 400df2cb6daSKent Overstreet * deadlock avoidance code that resubmits any blocked bios from a rescuer 401df2cb6daSKent Overstreet * thread. 402df2cb6daSKent Overstreet * 403df2cb6daSKent Overstreet * However, we do not guarantee forward progress for allocations from other 404df2cb6daSKent Overstreet * mempools. Doing multiple allocations from the same mempool under 405df2cb6daSKent Overstreet * generic_make_request() should be avoided - instead, use bio_set's front_pad 406df2cb6daSKent Overstreet * for per bio allocations. 407df2cb6daSKent Overstreet * 4083f86a82aSKent Overstreet * RETURNS: 4093f86a82aSKent Overstreet * Pointer to new bio on success, NULL on failure. 4103f86a82aSKent Overstreet */ 411dd0fc66fSAl Viro struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 4121da177e4SLinus Torvalds { 413df2cb6daSKent Overstreet gfp_t saved_gfp = gfp_mask; 4143f86a82aSKent Overstreet unsigned front_pad; 4153f86a82aSKent Overstreet unsigned inline_vecs; 416451a9ebfSTejun Heo unsigned long idx = BIO_POOL_NONE; 41734053979SIngo Molnar struct bio_vec *bvl = NULL; 418451a9ebfSTejun Heo struct bio *bio; 419451a9ebfSTejun Heo void *p; 4200a0d96b0SJens Axboe 4213f86a82aSKent Overstreet if (!bs) { 4223f86a82aSKent Overstreet if (nr_iovecs > UIO_MAXIOV) 4233f86a82aSKent Overstreet return NULL; 4243f86a82aSKent Overstreet 4253f86a82aSKent Overstreet p = kmalloc(sizeof(struct bio) + 4263f86a82aSKent Overstreet nr_iovecs * sizeof(struct bio_vec), 4273f86a82aSKent Overstreet gfp_mask); 4283f86a82aSKent Overstreet front_pad = 0; 4293f86a82aSKent Overstreet inline_vecs = nr_iovecs; 4303f86a82aSKent Overstreet } else { 431df2cb6daSKent Overstreet /* 432df2cb6daSKent Overstreet * generic_make_request() converts recursion to iteration; this 433df2cb6daSKent Overstreet * means if we're running beneath it, any bios we allocate and 434df2cb6daSKent Overstreet * submit will not be submitted (and thus freed) until after we 435df2cb6daSKent Overstreet * return. 436df2cb6daSKent Overstreet * 437df2cb6daSKent Overstreet * This exposes us to a potential deadlock if we allocate 438df2cb6daSKent Overstreet * multiple bios from the same bio_set() while running 439df2cb6daSKent Overstreet * underneath generic_make_request(). If we were to allocate 440df2cb6daSKent Overstreet * multiple bios (say a stacking block driver that was splitting 441df2cb6daSKent Overstreet * bios), we would deadlock if we exhausted the mempool's 442df2cb6daSKent Overstreet * reserve. 443df2cb6daSKent Overstreet * 444df2cb6daSKent Overstreet * We solve this, and guarantee forward progress, with a rescuer 445df2cb6daSKent Overstreet * workqueue per bio_set. If we go to allocate and there are 446df2cb6daSKent Overstreet * bios on current->bio_list, we first try the allocation 447df2cb6daSKent Overstreet * without __GFP_WAIT; if that fails, we punt those bios we 448df2cb6daSKent Overstreet * would be blocking to the rescuer workqueue before we retry 449df2cb6daSKent Overstreet * with the original gfp_flags. 450df2cb6daSKent Overstreet */ 451df2cb6daSKent Overstreet 452df2cb6daSKent Overstreet if (current->bio_list && !bio_list_empty(current->bio_list)) 453df2cb6daSKent Overstreet gfp_mask &= ~__GFP_WAIT; 454df2cb6daSKent Overstreet 455a60e78e5SSubhash Peddamallu p = mempool_alloc(bs->bio_pool, gfp_mask); 456df2cb6daSKent Overstreet if (!p && gfp_mask != saved_gfp) { 457df2cb6daSKent Overstreet punt_bios_to_rescuer(bs); 458df2cb6daSKent Overstreet gfp_mask = saved_gfp; 459df2cb6daSKent Overstreet p = mempool_alloc(bs->bio_pool, gfp_mask); 460df2cb6daSKent Overstreet } 461df2cb6daSKent Overstreet 4623f86a82aSKent Overstreet front_pad = bs->front_pad; 4633f86a82aSKent Overstreet inline_vecs = BIO_INLINE_VECS; 4643f86a82aSKent Overstreet } 4653f86a82aSKent Overstreet 466451a9ebfSTejun Heo if (unlikely(!p)) 467451a9ebfSTejun Heo return NULL; 4681da177e4SLinus Torvalds 4693f86a82aSKent Overstreet bio = p + front_pad; 4701da177e4SLinus Torvalds bio_init(bio); 47134053979SIngo Molnar 4723f86a82aSKent Overstreet if (nr_iovecs > inline_vecs) { 4739f060e22SKent Overstreet bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); 474df2cb6daSKent Overstreet if (!bvl && gfp_mask != saved_gfp) { 475df2cb6daSKent Overstreet punt_bios_to_rescuer(bs); 476df2cb6daSKent Overstreet gfp_mask = saved_gfp; 4779f060e22SKent Overstreet bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); 478df2cb6daSKent Overstreet } 479df2cb6daSKent Overstreet 48034053979SIngo Molnar if (unlikely(!bvl)) 48134053979SIngo Molnar goto err_free; 482a38352e0SKent Overstreet 483a38352e0SKent Overstreet bio->bi_flags |= 1 << BIO_OWNS_VEC; 4843f86a82aSKent Overstreet } else if (nr_iovecs) { 4853f86a82aSKent Overstreet bvl = bio->bi_inline_vecs; 486392ddc32SJens Axboe } 4873f86a82aSKent Overstreet 4883f86a82aSKent Overstreet bio->bi_pool = bs; 48934053979SIngo Molnar bio->bi_flags |= idx << BIO_POOL_OFFSET; 49034053979SIngo Molnar bio->bi_max_vecs = nr_iovecs; 49134053979SIngo Molnar bio->bi_io_vec = bvl; 49234053979SIngo Molnar return bio; 49334053979SIngo Molnar 49434053979SIngo Molnar err_free: 495a60e78e5SSubhash Peddamallu mempool_free(p, bs->bio_pool); 49634053979SIngo Molnar return NULL; 4971da177e4SLinus Torvalds } 498a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_alloc_bioset); 4991da177e4SLinus Torvalds 5001da177e4SLinus Torvalds void zero_fill_bio(struct bio *bio) 5011da177e4SLinus Torvalds { 5021da177e4SLinus Torvalds unsigned long flags; 5037988613bSKent Overstreet struct bio_vec bv; 5047988613bSKent Overstreet struct bvec_iter iter; 5051da177e4SLinus Torvalds 5067988613bSKent Overstreet bio_for_each_segment(bv, bio, iter) { 5077988613bSKent Overstreet char *data = bvec_kmap_irq(&bv, &flags); 5087988613bSKent Overstreet memset(data, 0, bv.bv_len); 5097988613bSKent Overstreet flush_dcache_page(bv.bv_page); 5101da177e4SLinus Torvalds bvec_kunmap_irq(data, &flags); 5111da177e4SLinus Torvalds } 5121da177e4SLinus Torvalds } 5131da177e4SLinus Torvalds EXPORT_SYMBOL(zero_fill_bio); 5141da177e4SLinus Torvalds 5151da177e4SLinus Torvalds /** 5161da177e4SLinus Torvalds * bio_put - release a reference to a bio 5171da177e4SLinus Torvalds * @bio: bio to release reference to 5181da177e4SLinus Torvalds * 5191da177e4SLinus Torvalds * Description: 5201da177e4SLinus Torvalds * Put a reference to a &struct bio, either one you have gotten with 521ad0bf110SAlberto Bertogli * bio_alloc, bio_get or bio_clone. The last put of a bio will free it. 5221da177e4SLinus Torvalds **/ 5231da177e4SLinus Torvalds void bio_put(struct bio *bio) 5241da177e4SLinus Torvalds { 5251da177e4SLinus Torvalds BIO_BUG_ON(!atomic_read(&bio->bi_cnt)); 5261da177e4SLinus Torvalds 5271da177e4SLinus Torvalds /* 5281da177e4SLinus Torvalds * last put frees it 5291da177e4SLinus Torvalds */ 5304254bba1SKent Overstreet if (atomic_dec_and_test(&bio->bi_cnt)) 5314254bba1SKent Overstreet bio_free(bio); 5321da177e4SLinus Torvalds } 533a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_put); 5341da177e4SLinus Torvalds 535165125e1SJens Axboe inline int bio_phys_segments(struct request_queue *q, struct bio *bio) 5361da177e4SLinus Torvalds { 5371da177e4SLinus Torvalds if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 5381da177e4SLinus Torvalds blk_recount_segments(q, bio); 5391da177e4SLinus Torvalds 5401da177e4SLinus Torvalds return bio->bi_phys_segments; 5411da177e4SLinus Torvalds } 542a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_phys_segments); 5431da177e4SLinus Torvalds 5441da177e4SLinus Torvalds /** 54559d276feSKent Overstreet * __bio_clone_fast - clone a bio that shares the original bio's biovec 54659d276feSKent Overstreet * @bio: destination bio 54759d276feSKent Overstreet * @bio_src: bio to clone 54859d276feSKent Overstreet * 54959d276feSKent Overstreet * Clone a &bio. Caller will own the returned bio, but not 55059d276feSKent Overstreet * the actual data it points to. Reference count of returned 55159d276feSKent Overstreet * bio will be one. 55259d276feSKent Overstreet * 55359d276feSKent Overstreet * Caller must ensure that @bio_src is not freed before @bio. 55459d276feSKent Overstreet */ 55559d276feSKent Overstreet void __bio_clone_fast(struct bio *bio, struct bio *bio_src) 55659d276feSKent Overstreet { 55759d276feSKent Overstreet BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE); 55859d276feSKent Overstreet 55959d276feSKent Overstreet /* 56059d276feSKent Overstreet * most users will be overriding ->bi_bdev with a new target, 56159d276feSKent Overstreet * so we don't set nor calculate new physical/hw segment counts here 56259d276feSKent Overstreet */ 56359d276feSKent Overstreet bio->bi_bdev = bio_src->bi_bdev; 56459d276feSKent Overstreet bio->bi_flags |= 1 << BIO_CLONED; 56559d276feSKent Overstreet bio->bi_rw = bio_src->bi_rw; 56659d276feSKent Overstreet bio->bi_iter = bio_src->bi_iter; 56759d276feSKent Overstreet bio->bi_io_vec = bio_src->bi_io_vec; 56859d276feSKent Overstreet } 56959d276feSKent Overstreet EXPORT_SYMBOL(__bio_clone_fast); 57059d276feSKent Overstreet 57159d276feSKent Overstreet /** 57259d276feSKent Overstreet * bio_clone_fast - clone a bio that shares the original bio's biovec 57359d276feSKent Overstreet * @bio: bio to clone 57459d276feSKent Overstreet * @gfp_mask: allocation priority 57559d276feSKent Overstreet * @bs: bio_set to allocate from 57659d276feSKent Overstreet * 57759d276feSKent Overstreet * Like __bio_clone_fast, only also allocates the returned bio 57859d276feSKent Overstreet */ 57959d276feSKent Overstreet struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) 58059d276feSKent Overstreet { 58159d276feSKent Overstreet struct bio *b; 58259d276feSKent Overstreet 58359d276feSKent Overstreet b = bio_alloc_bioset(gfp_mask, 0, bs); 58459d276feSKent Overstreet if (!b) 58559d276feSKent Overstreet return NULL; 58659d276feSKent Overstreet 58759d276feSKent Overstreet __bio_clone_fast(b, bio); 58859d276feSKent Overstreet 58959d276feSKent Overstreet if (bio_integrity(bio)) { 59059d276feSKent Overstreet int ret; 59159d276feSKent Overstreet 59259d276feSKent Overstreet ret = bio_integrity_clone(b, bio, gfp_mask); 59359d276feSKent Overstreet 59459d276feSKent Overstreet if (ret < 0) { 59559d276feSKent Overstreet bio_put(b); 59659d276feSKent Overstreet return NULL; 59759d276feSKent Overstreet } 59859d276feSKent Overstreet } 59959d276feSKent Overstreet 60059d276feSKent Overstreet return b; 60159d276feSKent Overstreet } 60259d276feSKent Overstreet EXPORT_SYMBOL(bio_clone_fast); 60359d276feSKent Overstreet 60459d276feSKent Overstreet /** 605bf800ef1SKent Overstreet * bio_clone_bioset - clone a bio 606bdb53207SKent Overstreet * @bio_src: bio to clone 6071da177e4SLinus Torvalds * @gfp_mask: allocation priority 608bf800ef1SKent Overstreet * @bs: bio_set to allocate from 6091da177e4SLinus Torvalds * 610bdb53207SKent Overstreet * Clone bio. Caller will own the returned bio, but not the actual data it 611bdb53207SKent Overstreet * points to. Reference count of returned bio will be one. 6121da177e4SLinus Torvalds */ 613bdb53207SKent Overstreet struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, 614bf800ef1SKent Overstreet struct bio_set *bs) 6151da177e4SLinus Torvalds { 616bdb53207SKent Overstreet struct bvec_iter iter; 617bdb53207SKent Overstreet struct bio_vec bv; 618bdb53207SKent Overstreet struct bio *bio; 6191da177e4SLinus Torvalds 620bdb53207SKent Overstreet /* 621bdb53207SKent Overstreet * Pre immutable biovecs, __bio_clone() used to just do a memcpy from 622bdb53207SKent Overstreet * bio_src->bi_io_vec to bio->bi_io_vec. 623bdb53207SKent Overstreet * 624bdb53207SKent Overstreet * We can't do that anymore, because: 625bdb53207SKent Overstreet * 626bdb53207SKent Overstreet * - The point of cloning the biovec is to produce a bio with a biovec 627bdb53207SKent Overstreet * the caller can modify: bi_idx and bi_bvec_done should be 0. 628bdb53207SKent Overstreet * 629bdb53207SKent Overstreet * - The original bio could've had more than BIO_MAX_PAGES biovecs; if 630bdb53207SKent Overstreet * we tried to clone the whole thing bio_alloc_bioset() would fail. 631bdb53207SKent Overstreet * But the clone should succeed as long as the number of biovecs we 632bdb53207SKent Overstreet * actually need to allocate is fewer than BIO_MAX_PAGES. 633bdb53207SKent Overstreet * 634bdb53207SKent Overstreet * - Lastly, bi_vcnt should not be looked at or relied upon by code 635bdb53207SKent Overstreet * that does not own the bio - reason being drivers don't use it for 636bdb53207SKent Overstreet * iterating over the biovec anymore, so expecting it to be kept up 637bdb53207SKent Overstreet * to date (i.e. for clones that share the parent biovec) is just 638bdb53207SKent Overstreet * asking for trouble and would force extra work on 639bdb53207SKent Overstreet * __bio_clone_fast() anyways. 640bdb53207SKent Overstreet */ 641bdb53207SKent Overstreet 6428423ae3dSKent Overstreet bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); 643bdb53207SKent Overstreet if (!bio) 6447ba1ba12SMartin K. Petersen return NULL; 6457ba1ba12SMartin K. Petersen 646bdb53207SKent Overstreet bio->bi_bdev = bio_src->bi_bdev; 647bdb53207SKent Overstreet bio->bi_rw = bio_src->bi_rw; 648bdb53207SKent Overstreet bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; 649bdb53207SKent Overstreet bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; 6507ba1ba12SMartin K. Petersen 6518423ae3dSKent Overstreet if (bio->bi_rw & REQ_DISCARD) 6528423ae3dSKent Overstreet goto integrity_clone; 6538423ae3dSKent Overstreet 6548423ae3dSKent Overstreet if (bio->bi_rw & REQ_WRITE_SAME) { 6558423ae3dSKent Overstreet bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; 6568423ae3dSKent Overstreet goto integrity_clone; 6578423ae3dSKent Overstreet } 6588423ae3dSKent Overstreet 659bdb53207SKent Overstreet bio_for_each_segment(bv, bio_src, iter) 660bdb53207SKent Overstreet bio->bi_io_vec[bio->bi_vcnt++] = bv; 661bdb53207SKent Overstreet 6628423ae3dSKent Overstreet integrity_clone: 663bdb53207SKent Overstreet if (bio_integrity(bio_src)) { 6647ba1ba12SMartin K. Petersen int ret; 6657ba1ba12SMartin K. Petersen 666bdb53207SKent Overstreet ret = bio_integrity_clone(bio, bio_src, gfp_mask); 667059ea331SLi Zefan if (ret < 0) { 668bdb53207SKent Overstreet bio_put(bio); 6697ba1ba12SMartin K. Petersen return NULL; 6703676347aSPeter Osterlund } 671059ea331SLi Zefan } 6721da177e4SLinus Torvalds 673bdb53207SKent Overstreet return bio; 6741da177e4SLinus Torvalds } 675bf800ef1SKent Overstreet EXPORT_SYMBOL(bio_clone_bioset); 6761da177e4SLinus Torvalds 6771da177e4SLinus Torvalds /** 6781da177e4SLinus Torvalds * bio_get_nr_vecs - return approx number of vecs 6791da177e4SLinus Torvalds * @bdev: I/O target 6801da177e4SLinus Torvalds * 6811da177e4SLinus Torvalds * Return the approximate number of pages we can send to this target. 6821da177e4SLinus Torvalds * There's no guarantee that you will be able to fit this number of pages 6831da177e4SLinus Torvalds * into a bio, it does not account for dynamic restrictions that vary 6841da177e4SLinus Torvalds * on offset. 6851da177e4SLinus Torvalds */ 6861da177e4SLinus Torvalds int bio_get_nr_vecs(struct block_device *bdev) 6871da177e4SLinus Torvalds { 688165125e1SJens Axboe struct request_queue *q = bdev_get_queue(bdev); 689f908ee94SBernd Schubert int nr_pages; 690f908ee94SBernd Schubert 691f908ee94SBernd Schubert nr_pages = min_t(unsigned, 6925abebfddSKent Overstreet queue_max_segments(q), 6935abebfddSKent Overstreet queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1); 694f908ee94SBernd Schubert 695f908ee94SBernd Schubert return min_t(unsigned, nr_pages, BIO_MAX_PAGES); 696f908ee94SBernd Schubert 6971da177e4SLinus Torvalds } 698a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_get_nr_vecs); 6991da177e4SLinus Torvalds 700165125e1SJens Axboe static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page 701defd94b7SMike Christie *page, unsigned int len, unsigned int offset, 70234f2fd8dSAkinobu Mita unsigned int max_sectors) 7031da177e4SLinus Torvalds { 7041da177e4SLinus Torvalds int retried_segments = 0; 7051da177e4SLinus Torvalds struct bio_vec *bvec; 7061da177e4SLinus Torvalds 7071da177e4SLinus Torvalds /* 7081da177e4SLinus Torvalds * cloned bio must not modify vec list 7091da177e4SLinus Torvalds */ 7101da177e4SLinus Torvalds if (unlikely(bio_flagged(bio, BIO_CLONED))) 7111da177e4SLinus Torvalds return 0; 7121da177e4SLinus Torvalds 7134f024f37SKent Overstreet if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors) 7141da177e4SLinus Torvalds return 0; 7151da177e4SLinus Torvalds 71680cfd548SJens Axboe /* 71780cfd548SJens Axboe * For filesystems with a blocksize smaller than the pagesize 71880cfd548SJens Axboe * we will often be called with the same page as last time and 71980cfd548SJens Axboe * a consecutive offset. Optimize this special case. 72080cfd548SJens Axboe */ 72180cfd548SJens Axboe if (bio->bi_vcnt > 0) { 72280cfd548SJens Axboe struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; 72380cfd548SJens Axboe 72480cfd548SJens Axboe if (page == prev->bv_page && 72580cfd548SJens Axboe offset == prev->bv_offset + prev->bv_len) { 7261d616585SDmitry Monakhov unsigned int prev_bv_len = prev->bv_len; 72780cfd548SJens Axboe prev->bv_len += len; 728cc371e66SAlasdair G Kergon 729cc371e66SAlasdair G Kergon if (q->merge_bvec_fn) { 730cc371e66SAlasdair G Kergon struct bvec_merge_data bvm = { 7311d616585SDmitry Monakhov /* prev_bvec is already charged in 7321d616585SDmitry Monakhov bi_size, discharge it in order to 7331d616585SDmitry Monakhov simulate merging updated prev_bvec 7341d616585SDmitry Monakhov as new bvec. */ 735cc371e66SAlasdair G Kergon .bi_bdev = bio->bi_bdev, 7364f024f37SKent Overstreet .bi_sector = bio->bi_iter.bi_sector, 7374f024f37SKent Overstreet .bi_size = bio->bi_iter.bi_size - 7384f024f37SKent Overstreet prev_bv_len, 739cc371e66SAlasdair G Kergon .bi_rw = bio->bi_rw, 740cc371e66SAlasdair G Kergon }; 741cc371e66SAlasdair G Kergon 7428bf8c376SDmitry Monakhov if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) { 74380cfd548SJens Axboe prev->bv_len -= len; 74480cfd548SJens Axboe return 0; 74580cfd548SJens Axboe } 746cc371e66SAlasdair G Kergon } 74780cfd548SJens Axboe 74880cfd548SJens Axboe goto done; 74980cfd548SJens Axboe } 75066cb45aaSJens Axboe 75166cb45aaSJens Axboe /* 75266cb45aaSJens Axboe * If the queue doesn't support SG gaps and adding this 75366cb45aaSJens Axboe * offset would create a gap, disallow it. 75466cb45aaSJens Axboe */ 75566cb45aaSJens Axboe if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) && 75666cb45aaSJens Axboe bvec_gap_to_prev(prev, offset)) 75766cb45aaSJens Axboe return 0; 75880cfd548SJens Axboe } 75980cfd548SJens Axboe 76080cfd548SJens Axboe if (bio->bi_vcnt >= bio->bi_max_vecs) 7611da177e4SLinus Torvalds return 0; 7621da177e4SLinus Torvalds 7631da177e4SLinus Torvalds /* 7641da177e4SLinus Torvalds * we might lose a segment or two here, but rather that than 7651da177e4SLinus Torvalds * make this too complex. 7661da177e4SLinus Torvalds */ 7671da177e4SLinus Torvalds 7688a78362cSMartin K. Petersen while (bio->bi_phys_segments >= queue_max_segments(q)) { 7691da177e4SLinus Torvalds 7701da177e4SLinus Torvalds if (retried_segments) 7711da177e4SLinus Torvalds return 0; 7721da177e4SLinus Torvalds 7731da177e4SLinus Torvalds retried_segments = 1; 7741da177e4SLinus Torvalds blk_recount_segments(q, bio); 7751da177e4SLinus Torvalds } 7761da177e4SLinus Torvalds 7771da177e4SLinus Torvalds /* 7781da177e4SLinus Torvalds * setup the new entry, we might clear it again later if we 7791da177e4SLinus Torvalds * cannot add the page 7801da177e4SLinus Torvalds */ 7811da177e4SLinus Torvalds bvec = &bio->bi_io_vec[bio->bi_vcnt]; 7821da177e4SLinus Torvalds bvec->bv_page = page; 7831da177e4SLinus Torvalds bvec->bv_len = len; 7841da177e4SLinus Torvalds bvec->bv_offset = offset; 7851da177e4SLinus Torvalds 7861da177e4SLinus Torvalds /* 7871da177e4SLinus Torvalds * if queue has other restrictions (eg varying max sector size 7881da177e4SLinus Torvalds * depending on offset), it can specify a merge_bvec_fn in the 7891da177e4SLinus Torvalds * queue to get further control 7901da177e4SLinus Torvalds */ 7911da177e4SLinus Torvalds if (q->merge_bvec_fn) { 792cc371e66SAlasdair G Kergon struct bvec_merge_data bvm = { 793cc371e66SAlasdair G Kergon .bi_bdev = bio->bi_bdev, 7944f024f37SKent Overstreet .bi_sector = bio->bi_iter.bi_sector, 7954f024f37SKent Overstreet .bi_size = bio->bi_iter.bi_size, 796cc371e66SAlasdair G Kergon .bi_rw = bio->bi_rw, 797cc371e66SAlasdair G Kergon }; 798cc371e66SAlasdair G Kergon 7991da177e4SLinus Torvalds /* 8001da177e4SLinus Torvalds * merge_bvec_fn() returns number of bytes it can accept 8011da177e4SLinus Torvalds * at this offset 8021da177e4SLinus Torvalds */ 8038bf8c376SDmitry Monakhov if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) { 8041da177e4SLinus Torvalds bvec->bv_page = NULL; 8051da177e4SLinus Torvalds bvec->bv_len = 0; 8061da177e4SLinus Torvalds bvec->bv_offset = 0; 8071da177e4SLinus Torvalds return 0; 8081da177e4SLinus Torvalds } 8091da177e4SLinus Torvalds } 8101da177e4SLinus Torvalds 8111da177e4SLinus Torvalds /* If we may be able to merge these biovecs, force a recount */ 812b8b3e16cSMikulas Patocka if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) 8131da177e4SLinus Torvalds bio->bi_flags &= ~(1 << BIO_SEG_VALID); 8141da177e4SLinus Torvalds 8151da177e4SLinus Torvalds bio->bi_vcnt++; 8161da177e4SLinus Torvalds bio->bi_phys_segments++; 81780cfd548SJens Axboe done: 8184f024f37SKent Overstreet bio->bi_iter.bi_size += len; 8191da177e4SLinus Torvalds return len; 8201da177e4SLinus Torvalds } 8211da177e4SLinus Torvalds 8221da177e4SLinus Torvalds /** 8236e68af66SMike Christie * bio_add_pc_page - attempt to add page to bio 824fddfdeafSJens Axboe * @q: the target queue 8256e68af66SMike Christie * @bio: destination bio 8266e68af66SMike Christie * @page: page to add 8276e68af66SMike Christie * @len: vec entry length 8286e68af66SMike Christie * @offset: vec entry offset 8296e68af66SMike Christie * 8306e68af66SMike Christie * Attempt to add a page to the bio_vec maplist. This can fail for a 831c6428084SAndreas Gruenbacher * number of reasons, such as the bio being full or target block device 832c6428084SAndreas Gruenbacher * limitations. The target block device must allow bio's up to PAGE_SIZE, 833c6428084SAndreas Gruenbacher * so it is always possible to add a single page to an empty bio. 834c6428084SAndreas Gruenbacher * 835c6428084SAndreas Gruenbacher * This should only be used by REQ_PC bios. 8366e68af66SMike Christie */ 837165125e1SJens Axboe int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, 8386e68af66SMike Christie unsigned int len, unsigned int offset) 8396e68af66SMike Christie { 840ae03bf63SMartin K. Petersen return __bio_add_page(q, bio, page, len, offset, 841ae03bf63SMartin K. Petersen queue_max_hw_sectors(q)); 8426e68af66SMike Christie } 843a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_add_pc_page); 8446e68af66SMike Christie 8456e68af66SMike Christie /** 8461da177e4SLinus Torvalds * bio_add_page - attempt to add page to bio 8471da177e4SLinus Torvalds * @bio: destination bio 8481da177e4SLinus Torvalds * @page: page to add 8491da177e4SLinus Torvalds * @len: vec entry length 8501da177e4SLinus Torvalds * @offset: vec entry offset 8511da177e4SLinus Torvalds * 8521da177e4SLinus Torvalds * Attempt to add a page to the bio_vec maplist. This can fail for a 853c6428084SAndreas Gruenbacher * number of reasons, such as the bio being full or target block device 854c6428084SAndreas Gruenbacher * limitations. The target block device must allow bio's up to PAGE_SIZE, 855c6428084SAndreas Gruenbacher * so it is always possible to add a single page to an empty bio. 8561da177e4SLinus Torvalds */ 8571da177e4SLinus Torvalds int bio_add_page(struct bio *bio, struct page *page, unsigned int len, 8581da177e4SLinus Torvalds unsigned int offset) 8591da177e4SLinus Torvalds { 860defd94b7SMike Christie struct request_queue *q = bdev_get_queue(bio->bi_bdev); 86158a4915aSJens Axboe unsigned int max_sectors; 862762380adSJens Axboe 86358a4915aSJens Axboe max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector); 86458a4915aSJens Axboe if ((max_sectors < (len >> 9)) && !bio->bi_iter.bi_size) 86558a4915aSJens Axboe max_sectors = len >> 9; 86658a4915aSJens Axboe 86758a4915aSJens Axboe return __bio_add_page(q, bio, page, len, offset, max_sectors); 8681da177e4SLinus Torvalds } 869a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_add_page); 8701da177e4SLinus Torvalds 8719e882242SKent Overstreet struct submit_bio_ret { 8729e882242SKent Overstreet struct completion event; 8739e882242SKent Overstreet int error; 8749e882242SKent Overstreet }; 8759e882242SKent Overstreet 8769e882242SKent Overstreet static void submit_bio_wait_endio(struct bio *bio, int error) 8779e882242SKent Overstreet { 8789e882242SKent Overstreet struct submit_bio_ret *ret = bio->bi_private; 8799e882242SKent Overstreet 8809e882242SKent Overstreet ret->error = error; 8819e882242SKent Overstreet complete(&ret->event); 8829e882242SKent Overstreet } 8839e882242SKent Overstreet 8849e882242SKent Overstreet /** 8859e882242SKent Overstreet * submit_bio_wait - submit a bio, and wait until it completes 8869e882242SKent Overstreet * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 8879e882242SKent Overstreet * @bio: The &struct bio which describes the I/O 8889e882242SKent Overstreet * 8899e882242SKent Overstreet * Simple wrapper around submit_bio(). Returns 0 on success, or the error from 8909e882242SKent Overstreet * bio_endio() on failure. 8919e882242SKent Overstreet */ 8929e882242SKent Overstreet int submit_bio_wait(int rw, struct bio *bio) 8939e882242SKent Overstreet { 8949e882242SKent Overstreet struct submit_bio_ret ret; 8959e882242SKent Overstreet 8969e882242SKent Overstreet rw |= REQ_SYNC; 8979e882242SKent Overstreet init_completion(&ret.event); 8989e882242SKent Overstreet bio->bi_private = &ret; 8999e882242SKent Overstreet bio->bi_end_io = submit_bio_wait_endio; 9009e882242SKent Overstreet submit_bio(rw, bio); 9019e882242SKent Overstreet wait_for_completion(&ret.event); 9029e882242SKent Overstreet 9039e882242SKent Overstreet return ret.error; 9049e882242SKent Overstreet } 9059e882242SKent Overstreet EXPORT_SYMBOL(submit_bio_wait); 9069e882242SKent Overstreet 907054bdf64SKent Overstreet /** 908054bdf64SKent Overstreet * bio_advance - increment/complete a bio by some number of bytes 909054bdf64SKent Overstreet * @bio: bio to advance 910054bdf64SKent Overstreet * @bytes: number of bytes to complete 911054bdf64SKent Overstreet * 912054bdf64SKent Overstreet * This updates bi_sector, bi_size and bi_idx; if the number of bytes to 913054bdf64SKent Overstreet * complete doesn't align with a bvec boundary, then bv_len and bv_offset will 914054bdf64SKent Overstreet * be updated on the last bvec as well. 915054bdf64SKent Overstreet * 916054bdf64SKent Overstreet * @bio will then represent the remaining, uncompleted portion of the io. 917054bdf64SKent Overstreet */ 918054bdf64SKent Overstreet void bio_advance(struct bio *bio, unsigned bytes) 919054bdf64SKent Overstreet { 920054bdf64SKent Overstreet if (bio_integrity(bio)) 921054bdf64SKent Overstreet bio_integrity_advance(bio, bytes); 922054bdf64SKent Overstreet 9234550dd6cSKent Overstreet bio_advance_iter(bio, &bio->bi_iter, bytes); 924054bdf64SKent Overstreet } 925054bdf64SKent Overstreet EXPORT_SYMBOL(bio_advance); 926054bdf64SKent Overstreet 92716ac3d63SKent Overstreet /** 928a0787606SKent Overstreet * bio_alloc_pages - allocates a single page for each bvec in a bio 929a0787606SKent Overstreet * @bio: bio to allocate pages for 930a0787606SKent Overstreet * @gfp_mask: flags for allocation 931a0787606SKent Overstreet * 932a0787606SKent Overstreet * Allocates pages up to @bio->bi_vcnt. 933a0787606SKent Overstreet * 934a0787606SKent Overstreet * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are 935a0787606SKent Overstreet * freed. 936a0787606SKent Overstreet */ 937a0787606SKent Overstreet int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) 938a0787606SKent Overstreet { 939a0787606SKent Overstreet int i; 940a0787606SKent Overstreet struct bio_vec *bv; 941a0787606SKent Overstreet 942a0787606SKent Overstreet bio_for_each_segment_all(bv, bio, i) { 943a0787606SKent Overstreet bv->bv_page = alloc_page(gfp_mask); 944a0787606SKent Overstreet if (!bv->bv_page) { 945a0787606SKent Overstreet while (--bv >= bio->bi_io_vec) 946a0787606SKent Overstreet __free_page(bv->bv_page); 947a0787606SKent Overstreet return -ENOMEM; 948a0787606SKent Overstreet } 949a0787606SKent Overstreet } 950a0787606SKent Overstreet 951a0787606SKent Overstreet return 0; 952a0787606SKent Overstreet } 953a0787606SKent Overstreet EXPORT_SYMBOL(bio_alloc_pages); 954a0787606SKent Overstreet 955a0787606SKent Overstreet /** 95616ac3d63SKent Overstreet * bio_copy_data - copy contents of data buffers from one chain of bios to 95716ac3d63SKent Overstreet * another 95816ac3d63SKent Overstreet * @src: source bio list 95916ac3d63SKent Overstreet * @dst: destination bio list 96016ac3d63SKent Overstreet * 96116ac3d63SKent Overstreet * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats 96216ac3d63SKent Overstreet * @src and @dst as linked lists of bios. 96316ac3d63SKent Overstreet * 96416ac3d63SKent Overstreet * Stops when it reaches the end of either @src or @dst - that is, copies 96516ac3d63SKent Overstreet * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios). 96616ac3d63SKent Overstreet */ 96716ac3d63SKent Overstreet void bio_copy_data(struct bio *dst, struct bio *src) 96816ac3d63SKent Overstreet { 9691cb9dda4SKent Overstreet struct bvec_iter src_iter, dst_iter; 9701cb9dda4SKent Overstreet struct bio_vec src_bv, dst_bv; 97116ac3d63SKent Overstreet void *src_p, *dst_p; 9721cb9dda4SKent Overstreet unsigned bytes; 97316ac3d63SKent Overstreet 9741cb9dda4SKent Overstreet src_iter = src->bi_iter; 9751cb9dda4SKent Overstreet dst_iter = dst->bi_iter; 97616ac3d63SKent Overstreet 97716ac3d63SKent Overstreet while (1) { 9781cb9dda4SKent Overstreet if (!src_iter.bi_size) { 97916ac3d63SKent Overstreet src = src->bi_next; 98016ac3d63SKent Overstreet if (!src) 98116ac3d63SKent Overstreet break; 98216ac3d63SKent Overstreet 9831cb9dda4SKent Overstreet src_iter = src->bi_iter; 98416ac3d63SKent Overstreet } 98516ac3d63SKent Overstreet 9861cb9dda4SKent Overstreet if (!dst_iter.bi_size) { 98716ac3d63SKent Overstreet dst = dst->bi_next; 98816ac3d63SKent Overstreet if (!dst) 98916ac3d63SKent Overstreet break; 99016ac3d63SKent Overstreet 9911cb9dda4SKent Overstreet dst_iter = dst->bi_iter; 99216ac3d63SKent Overstreet } 99316ac3d63SKent Overstreet 9941cb9dda4SKent Overstreet src_bv = bio_iter_iovec(src, src_iter); 9951cb9dda4SKent Overstreet dst_bv = bio_iter_iovec(dst, dst_iter); 99616ac3d63SKent Overstreet 9971cb9dda4SKent Overstreet bytes = min(src_bv.bv_len, dst_bv.bv_len); 99816ac3d63SKent Overstreet 9991cb9dda4SKent Overstreet src_p = kmap_atomic(src_bv.bv_page); 10001cb9dda4SKent Overstreet dst_p = kmap_atomic(dst_bv.bv_page); 100116ac3d63SKent Overstreet 10021cb9dda4SKent Overstreet memcpy(dst_p + dst_bv.bv_offset, 10031cb9dda4SKent Overstreet src_p + src_bv.bv_offset, 100416ac3d63SKent Overstreet bytes); 100516ac3d63SKent Overstreet 100616ac3d63SKent Overstreet kunmap_atomic(dst_p); 100716ac3d63SKent Overstreet kunmap_atomic(src_p); 100816ac3d63SKent Overstreet 10091cb9dda4SKent Overstreet bio_advance_iter(src, &src_iter, bytes); 10101cb9dda4SKent Overstreet bio_advance_iter(dst, &dst_iter, bytes); 101116ac3d63SKent Overstreet } 101216ac3d63SKent Overstreet } 101316ac3d63SKent Overstreet EXPORT_SYMBOL(bio_copy_data); 101416ac3d63SKent Overstreet 10151da177e4SLinus Torvalds struct bio_map_data { 1016152e283fSFUJITA Tomonori int nr_sgvecs; 1017152e283fSFUJITA Tomonori int is_our_pages; 1018c8db4448SKent Overstreet struct sg_iovec sgvecs[]; 10191da177e4SLinus Torvalds }; 10201da177e4SLinus Torvalds 1021c5dec1c3SFUJITA Tomonori static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, 102286d564c8SAl Viro const struct sg_iovec *iov, int iov_count, 1023152e283fSFUJITA Tomonori int is_our_pages) 10241da177e4SLinus Torvalds { 1025c5dec1c3SFUJITA Tomonori memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); 1026c5dec1c3SFUJITA Tomonori bmd->nr_sgvecs = iov_count; 1027152e283fSFUJITA Tomonori bmd->is_our_pages = is_our_pages; 10281da177e4SLinus Torvalds bio->bi_private = bmd; 10291da177e4SLinus Torvalds } 10301da177e4SLinus Torvalds 10317410b3c6SFabian Frederick static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, 103276029ff3SFUJITA Tomonori gfp_t gfp_mask) 10331da177e4SLinus Torvalds { 1034f3f63c1cSJens Axboe if (iov_count > UIO_MAXIOV) 1035f3f63c1cSJens Axboe return NULL; 1036f3f63c1cSJens Axboe 1037c8db4448SKent Overstreet return kmalloc(sizeof(struct bio_map_data) + 1038c8db4448SKent Overstreet sizeof(struct sg_iovec) * iov_count, gfp_mask); 10391da177e4SLinus Torvalds } 10401da177e4SLinus Torvalds 104186d564c8SAl Viro static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count, 1042ecb554a8SFUJITA Tomonori int to_user, int from_user, int do_free_page) 1043c5dec1c3SFUJITA Tomonori { 1044c5dec1c3SFUJITA Tomonori int ret = 0, i; 1045c5dec1c3SFUJITA Tomonori struct bio_vec *bvec; 1046c5dec1c3SFUJITA Tomonori int iov_idx = 0; 1047c5dec1c3SFUJITA Tomonori unsigned int iov_off = 0; 1048c5dec1c3SFUJITA Tomonori 1049d74c6d51SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 1050c5dec1c3SFUJITA Tomonori char *bv_addr = page_address(bvec->bv_page); 1051c8db4448SKent Overstreet unsigned int bv_len = bvec->bv_len; 1052c5dec1c3SFUJITA Tomonori 1053c5dec1c3SFUJITA Tomonori while (bv_len && iov_idx < iov_count) { 1054c5dec1c3SFUJITA Tomonori unsigned int bytes; 10550e0c6212SMichal Simek char __user *iov_addr; 1056c5dec1c3SFUJITA Tomonori 1057c5dec1c3SFUJITA Tomonori bytes = min_t(unsigned int, 1058c5dec1c3SFUJITA Tomonori iov[iov_idx].iov_len - iov_off, bv_len); 1059c5dec1c3SFUJITA Tomonori iov_addr = iov[iov_idx].iov_base + iov_off; 1060c5dec1c3SFUJITA Tomonori 1061c5dec1c3SFUJITA Tomonori if (!ret) { 1062ecb554a8SFUJITA Tomonori if (to_user) 1063c5dec1c3SFUJITA Tomonori ret = copy_to_user(iov_addr, bv_addr, 1064c5dec1c3SFUJITA Tomonori bytes); 1065c5dec1c3SFUJITA Tomonori 1066ecb554a8SFUJITA Tomonori if (from_user) 1067ecb554a8SFUJITA Tomonori ret = copy_from_user(bv_addr, iov_addr, 1068ecb554a8SFUJITA Tomonori bytes); 1069ecb554a8SFUJITA Tomonori 1070c5dec1c3SFUJITA Tomonori if (ret) 1071c5dec1c3SFUJITA Tomonori ret = -EFAULT; 1072c5dec1c3SFUJITA Tomonori } 1073c5dec1c3SFUJITA Tomonori 1074c5dec1c3SFUJITA Tomonori bv_len -= bytes; 1075c5dec1c3SFUJITA Tomonori bv_addr += bytes; 1076c5dec1c3SFUJITA Tomonori iov_addr += bytes; 1077c5dec1c3SFUJITA Tomonori iov_off += bytes; 1078c5dec1c3SFUJITA Tomonori 1079c5dec1c3SFUJITA Tomonori if (iov[iov_idx].iov_len == iov_off) { 1080c5dec1c3SFUJITA Tomonori iov_idx++; 1081c5dec1c3SFUJITA Tomonori iov_off = 0; 1082c5dec1c3SFUJITA Tomonori } 1083c5dec1c3SFUJITA Tomonori } 1084c5dec1c3SFUJITA Tomonori 1085152e283fSFUJITA Tomonori if (do_free_page) 1086c5dec1c3SFUJITA Tomonori __free_page(bvec->bv_page); 1087c5dec1c3SFUJITA Tomonori } 1088c5dec1c3SFUJITA Tomonori 1089c5dec1c3SFUJITA Tomonori return ret; 1090c5dec1c3SFUJITA Tomonori } 1091c5dec1c3SFUJITA Tomonori 10921da177e4SLinus Torvalds /** 10931da177e4SLinus Torvalds * bio_uncopy_user - finish previously mapped bio 10941da177e4SLinus Torvalds * @bio: bio being terminated 10951da177e4SLinus Torvalds * 10961da177e4SLinus Torvalds * Free pages allocated from bio_copy_user() and write back data 10971da177e4SLinus Torvalds * to user space in case of a read. 10981da177e4SLinus Torvalds */ 10991da177e4SLinus Torvalds int bio_uncopy_user(struct bio *bio) 11001da177e4SLinus Torvalds { 11011da177e4SLinus Torvalds struct bio_map_data *bmd = bio->bi_private; 110235dc2483SRoland Dreier struct bio_vec *bvec; 110335dc2483SRoland Dreier int ret = 0, i; 11041da177e4SLinus Torvalds 110535dc2483SRoland Dreier if (!bio_flagged(bio, BIO_NULL_MAPPED)) { 110635dc2483SRoland Dreier /* 110735dc2483SRoland Dreier * if we're in a workqueue, the request is orphaned, so 110835dc2483SRoland Dreier * don't copy into a random user address space, just free. 110935dc2483SRoland Dreier */ 111035dc2483SRoland Dreier if (current->mm) 1111c8db4448SKent Overstreet ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1112c8db4448SKent Overstreet bio_data_dir(bio) == READ, 1113ecb554a8SFUJITA Tomonori 0, bmd->is_our_pages); 111435dc2483SRoland Dreier else if (bmd->is_our_pages) 111535dc2483SRoland Dreier bio_for_each_segment_all(bvec, bio, i) 111635dc2483SRoland Dreier __free_page(bvec->bv_page); 111735dc2483SRoland Dreier } 1118c8db4448SKent Overstreet kfree(bmd); 11191da177e4SLinus Torvalds bio_put(bio); 11201da177e4SLinus Torvalds return ret; 11211da177e4SLinus Torvalds } 1122a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_uncopy_user); 11231da177e4SLinus Torvalds 11241da177e4SLinus Torvalds /** 1125c5dec1c3SFUJITA Tomonori * bio_copy_user_iov - copy user data to bio 11261da177e4SLinus Torvalds * @q: destination block queue 1127152e283fSFUJITA Tomonori * @map_data: pointer to the rq_map_data holding pages (if necessary) 1128c5dec1c3SFUJITA Tomonori * @iov: the iovec. 1129c5dec1c3SFUJITA Tomonori * @iov_count: number of elements in the iovec 11301da177e4SLinus Torvalds * @write_to_vm: bool indicating writing to pages or not 1131a3bce90eSFUJITA Tomonori * @gfp_mask: memory allocation flags 11321da177e4SLinus Torvalds * 11331da177e4SLinus Torvalds * Prepares and returns a bio for indirect user io, bouncing data 11341da177e4SLinus Torvalds * to/from kernel pages as necessary. Must be paired with 11351da177e4SLinus Torvalds * call bio_uncopy_user() on io completion. 11361da177e4SLinus Torvalds */ 1137152e283fSFUJITA Tomonori struct bio *bio_copy_user_iov(struct request_queue *q, 1138152e283fSFUJITA Tomonori struct rq_map_data *map_data, 113986d564c8SAl Viro const struct sg_iovec *iov, int iov_count, 1140152e283fSFUJITA Tomonori int write_to_vm, gfp_t gfp_mask) 11411da177e4SLinus Torvalds { 11421da177e4SLinus Torvalds struct bio_map_data *bmd; 11431da177e4SLinus Torvalds struct bio_vec *bvec; 11441da177e4SLinus Torvalds struct page *page; 11451da177e4SLinus Torvalds struct bio *bio; 11461da177e4SLinus Torvalds int i, ret; 1147c5dec1c3SFUJITA Tomonori int nr_pages = 0; 1148c5dec1c3SFUJITA Tomonori unsigned int len = 0; 114956c451f4SFUJITA Tomonori unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0; 11501da177e4SLinus Torvalds 1151c5dec1c3SFUJITA Tomonori for (i = 0; i < iov_count; i++) { 1152c5dec1c3SFUJITA Tomonori unsigned long uaddr; 1153c5dec1c3SFUJITA Tomonori unsigned long end; 1154c5dec1c3SFUJITA Tomonori unsigned long start; 1155c5dec1c3SFUJITA Tomonori 1156c5dec1c3SFUJITA Tomonori uaddr = (unsigned long)iov[i].iov_base; 1157c5dec1c3SFUJITA Tomonori end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1158c5dec1c3SFUJITA Tomonori start = uaddr >> PAGE_SHIFT; 1159c5dec1c3SFUJITA Tomonori 1160cb4644caSJens Axboe /* 1161cb4644caSJens Axboe * Overflow, abort 1162cb4644caSJens Axboe */ 1163cb4644caSJens Axboe if (end < start) 1164cb4644caSJens Axboe return ERR_PTR(-EINVAL); 1165cb4644caSJens Axboe 1166c5dec1c3SFUJITA Tomonori nr_pages += end - start; 1167c5dec1c3SFUJITA Tomonori len += iov[i].iov_len; 1168c5dec1c3SFUJITA Tomonori } 1169c5dec1c3SFUJITA Tomonori 117069838727SFUJITA Tomonori if (offset) 117169838727SFUJITA Tomonori nr_pages++; 117269838727SFUJITA Tomonori 11737410b3c6SFabian Frederick bmd = bio_alloc_map_data(iov_count, gfp_mask); 11741da177e4SLinus Torvalds if (!bmd) 11751da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 11761da177e4SLinus Torvalds 11771da177e4SLinus Torvalds ret = -ENOMEM; 1178a9e9dc24STejun Heo bio = bio_kmalloc(gfp_mask, nr_pages); 11791da177e4SLinus Torvalds if (!bio) 11801da177e4SLinus Torvalds goto out_bmd; 11811da177e4SLinus Torvalds 11827b6d91daSChristoph Hellwig if (!write_to_vm) 11837b6d91daSChristoph Hellwig bio->bi_rw |= REQ_WRITE; 11841da177e4SLinus Torvalds 11851da177e4SLinus Torvalds ret = 0; 118656c451f4SFUJITA Tomonori 118756c451f4SFUJITA Tomonori if (map_data) { 1188e623ddb4SFUJITA Tomonori nr_pages = 1 << map_data->page_order; 118956c451f4SFUJITA Tomonori i = map_data->offset / PAGE_SIZE; 119056c451f4SFUJITA Tomonori } 1191e623ddb4SFUJITA Tomonori while (len) { 1192e623ddb4SFUJITA Tomonori unsigned int bytes = PAGE_SIZE; 11931da177e4SLinus Torvalds 119456c451f4SFUJITA Tomonori bytes -= offset; 119556c451f4SFUJITA Tomonori 11961da177e4SLinus Torvalds if (bytes > len) 11971da177e4SLinus Torvalds bytes = len; 11981da177e4SLinus Torvalds 1199152e283fSFUJITA Tomonori if (map_data) { 1200e623ddb4SFUJITA Tomonori if (i == map_data->nr_entries * nr_pages) { 1201152e283fSFUJITA Tomonori ret = -ENOMEM; 1202152e283fSFUJITA Tomonori break; 1203152e283fSFUJITA Tomonori } 1204e623ddb4SFUJITA Tomonori 1205e623ddb4SFUJITA Tomonori page = map_data->pages[i / nr_pages]; 1206e623ddb4SFUJITA Tomonori page += (i % nr_pages); 1207e623ddb4SFUJITA Tomonori 1208e623ddb4SFUJITA Tomonori i++; 1209e623ddb4SFUJITA Tomonori } else { 1210a3bce90eSFUJITA Tomonori page = alloc_page(q->bounce_gfp | gfp_mask); 12111da177e4SLinus Torvalds if (!page) { 12121da177e4SLinus Torvalds ret = -ENOMEM; 12131da177e4SLinus Torvalds break; 12141da177e4SLinus Torvalds } 1215e623ddb4SFUJITA Tomonori } 12161da177e4SLinus Torvalds 121756c451f4SFUJITA Tomonori if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) 12181da177e4SLinus Torvalds break; 12191da177e4SLinus Torvalds 12201da177e4SLinus Torvalds len -= bytes; 122156c451f4SFUJITA Tomonori offset = 0; 12221da177e4SLinus Torvalds } 12231da177e4SLinus Torvalds 12241da177e4SLinus Torvalds if (ret) 12251da177e4SLinus Torvalds goto cleanup; 12261da177e4SLinus Torvalds 12271da177e4SLinus Torvalds /* 12281da177e4SLinus Torvalds * success 12291da177e4SLinus Torvalds */ 1230ecb554a8SFUJITA Tomonori if ((!write_to_vm && (!map_data || !map_data->null_mapped)) || 1231ecb554a8SFUJITA Tomonori (map_data && map_data->from_user)) { 1232c8db4448SKent Overstreet ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0); 1233c5dec1c3SFUJITA Tomonori if (ret) 12341da177e4SLinus Torvalds goto cleanup; 12351da177e4SLinus Torvalds } 12361da177e4SLinus Torvalds 1237152e283fSFUJITA Tomonori bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); 12381da177e4SLinus Torvalds return bio; 12391da177e4SLinus Torvalds cleanup: 1240152e283fSFUJITA Tomonori if (!map_data) 1241d74c6d51SKent Overstreet bio_for_each_segment_all(bvec, bio, i) 12421da177e4SLinus Torvalds __free_page(bvec->bv_page); 12431da177e4SLinus Torvalds 12441da177e4SLinus Torvalds bio_put(bio); 12451da177e4SLinus Torvalds out_bmd: 1246c8db4448SKent Overstreet kfree(bmd); 12471da177e4SLinus Torvalds return ERR_PTR(ret); 12481da177e4SLinus Torvalds } 12491da177e4SLinus Torvalds 1250c5dec1c3SFUJITA Tomonori /** 1251c5dec1c3SFUJITA Tomonori * bio_copy_user - copy user data to bio 1252c5dec1c3SFUJITA Tomonori * @q: destination block queue 1253152e283fSFUJITA Tomonori * @map_data: pointer to the rq_map_data holding pages (if necessary) 1254c5dec1c3SFUJITA Tomonori * @uaddr: start of user address 1255c5dec1c3SFUJITA Tomonori * @len: length in bytes 1256c5dec1c3SFUJITA Tomonori * @write_to_vm: bool indicating writing to pages or not 1257a3bce90eSFUJITA Tomonori * @gfp_mask: memory allocation flags 1258c5dec1c3SFUJITA Tomonori * 1259c5dec1c3SFUJITA Tomonori * Prepares and returns a bio for indirect user io, bouncing data 1260c5dec1c3SFUJITA Tomonori * to/from kernel pages as necessary. Must be paired with 1261c5dec1c3SFUJITA Tomonori * call bio_uncopy_user() on io completion. 1262c5dec1c3SFUJITA Tomonori */ 1263152e283fSFUJITA Tomonori struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, 1264152e283fSFUJITA Tomonori unsigned long uaddr, unsigned int len, 1265152e283fSFUJITA Tomonori int write_to_vm, gfp_t gfp_mask) 1266c5dec1c3SFUJITA Tomonori { 1267c5dec1c3SFUJITA Tomonori struct sg_iovec iov; 1268c5dec1c3SFUJITA Tomonori 1269c5dec1c3SFUJITA Tomonori iov.iov_base = (void __user *)uaddr; 1270c5dec1c3SFUJITA Tomonori iov.iov_len = len; 1271c5dec1c3SFUJITA Tomonori 1272152e283fSFUJITA Tomonori return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); 1273c5dec1c3SFUJITA Tomonori } 1274a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_copy_user); 1275c5dec1c3SFUJITA Tomonori 1276165125e1SJens Axboe static struct bio *__bio_map_user_iov(struct request_queue *q, 1277f1970bafSJames Bottomley struct block_device *bdev, 127886d564c8SAl Viro const struct sg_iovec *iov, int iov_count, 1279a3bce90eSFUJITA Tomonori int write_to_vm, gfp_t gfp_mask) 12801da177e4SLinus Torvalds { 1281f1970bafSJames Bottomley int i, j; 1282f1970bafSJames Bottomley int nr_pages = 0; 12831da177e4SLinus Torvalds struct page **pages; 12841da177e4SLinus Torvalds struct bio *bio; 1285f1970bafSJames Bottomley int cur_page = 0; 1286f1970bafSJames Bottomley int ret, offset; 12871da177e4SLinus Torvalds 1288f1970bafSJames Bottomley for (i = 0; i < iov_count; i++) { 1289f1970bafSJames Bottomley unsigned long uaddr = (unsigned long)iov[i].iov_base; 1290f1970bafSJames Bottomley unsigned long len = iov[i].iov_len; 1291f1970bafSJames Bottomley unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1292f1970bafSJames Bottomley unsigned long start = uaddr >> PAGE_SHIFT; 1293f1970bafSJames Bottomley 1294cb4644caSJens Axboe /* 1295cb4644caSJens Axboe * Overflow, abort 1296cb4644caSJens Axboe */ 1297cb4644caSJens Axboe if (end < start) 1298cb4644caSJens Axboe return ERR_PTR(-EINVAL); 1299cb4644caSJens Axboe 1300f1970bafSJames Bottomley nr_pages += end - start; 13011da177e4SLinus Torvalds /* 1302ad2d7225SMike Christie * buffer must be aligned to at least hardsector size for now 13031da177e4SLinus Torvalds */ 1304ad2d7225SMike Christie if (uaddr & queue_dma_alignment(q)) 13051da177e4SLinus Torvalds return ERR_PTR(-EINVAL); 1306f1970bafSJames Bottomley } 1307f1970bafSJames Bottomley 1308f1970bafSJames Bottomley if (!nr_pages) 1309f1970bafSJames Bottomley return ERR_PTR(-EINVAL); 13101da177e4SLinus Torvalds 1311a9e9dc24STejun Heo bio = bio_kmalloc(gfp_mask, nr_pages); 13121da177e4SLinus Torvalds if (!bio) 13131da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 13141da177e4SLinus Torvalds 13151da177e4SLinus Torvalds ret = -ENOMEM; 1316a3bce90eSFUJITA Tomonori pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); 13171da177e4SLinus Torvalds if (!pages) 13181da177e4SLinus Torvalds goto out; 13191da177e4SLinus Torvalds 1320f1970bafSJames Bottomley for (i = 0; i < iov_count; i++) { 1321f1970bafSJames Bottomley unsigned long uaddr = (unsigned long)iov[i].iov_base; 1322f1970bafSJames Bottomley unsigned long len = iov[i].iov_len; 1323f1970bafSJames Bottomley unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1324f1970bafSJames Bottomley unsigned long start = uaddr >> PAGE_SHIFT; 1325f1970bafSJames Bottomley const int local_nr_pages = end - start; 1326f1970bafSJames Bottomley const int page_limit = cur_page + local_nr_pages; 1327f1970bafSJames Bottomley 1328f5dd33c4SNick Piggin ret = get_user_pages_fast(uaddr, local_nr_pages, 1329f5dd33c4SNick Piggin write_to_vm, &pages[cur_page]); 133099172157SJens Axboe if (ret < local_nr_pages) { 133199172157SJens Axboe ret = -EFAULT; 1332f1970bafSJames Bottomley goto out_unmap; 133399172157SJens Axboe } 13341da177e4SLinus Torvalds 13351da177e4SLinus Torvalds offset = uaddr & ~PAGE_MASK; 1336f1970bafSJames Bottomley for (j = cur_page; j < page_limit; j++) { 13371da177e4SLinus Torvalds unsigned int bytes = PAGE_SIZE - offset; 13381da177e4SLinus Torvalds 13391da177e4SLinus Torvalds if (len <= 0) 13401da177e4SLinus Torvalds break; 13411da177e4SLinus Torvalds 13421da177e4SLinus Torvalds if (bytes > len) 13431da177e4SLinus Torvalds bytes = len; 13441da177e4SLinus Torvalds 13451da177e4SLinus Torvalds /* 13461da177e4SLinus Torvalds * sorry... 13471da177e4SLinus Torvalds */ 1348defd94b7SMike Christie if (bio_add_pc_page(q, bio, pages[j], bytes, offset) < 1349defd94b7SMike Christie bytes) 13501da177e4SLinus Torvalds break; 13511da177e4SLinus Torvalds 13521da177e4SLinus Torvalds len -= bytes; 13531da177e4SLinus Torvalds offset = 0; 13541da177e4SLinus Torvalds } 13551da177e4SLinus Torvalds 1356f1970bafSJames Bottomley cur_page = j; 13571da177e4SLinus Torvalds /* 13581da177e4SLinus Torvalds * release the pages we didn't map into the bio, if any 13591da177e4SLinus Torvalds */ 1360f1970bafSJames Bottomley while (j < page_limit) 1361f1970bafSJames Bottomley page_cache_release(pages[j++]); 1362f1970bafSJames Bottomley } 13631da177e4SLinus Torvalds 13641da177e4SLinus Torvalds kfree(pages); 13651da177e4SLinus Torvalds 13661da177e4SLinus Torvalds /* 13671da177e4SLinus Torvalds * set data direction, and check if mapped pages need bouncing 13681da177e4SLinus Torvalds */ 13691da177e4SLinus Torvalds if (!write_to_vm) 13707b6d91daSChristoph Hellwig bio->bi_rw |= REQ_WRITE; 13711da177e4SLinus Torvalds 1372f1970bafSJames Bottomley bio->bi_bdev = bdev; 13731da177e4SLinus Torvalds bio->bi_flags |= (1 << BIO_USER_MAPPED); 13741da177e4SLinus Torvalds return bio; 1375f1970bafSJames Bottomley 1376f1970bafSJames Bottomley out_unmap: 1377f1970bafSJames Bottomley for (i = 0; i < nr_pages; i++) { 1378f1970bafSJames Bottomley if(!pages[i]) 1379f1970bafSJames Bottomley break; 1380f1970bafSJames Bottomley page_cache_release(pages[i]); 1381f1970bafSJames Bottomley } 13821da177e4SLinus Torvalds out: 13831da177e4SLinus Torvalds kfree(pages); 13841da177e4SLinus Torvalds bio_put(bio); 13851da177e4SLinus Torvalds return ERR_PTR(ret); 13861da177e4SLinus Torvalds } 13871da177e4SLinus Torvalds 13881da177e4SLinus Torvalds /** 13891da177e4SLinus Torvalds * bio_map_user - map user address into bio 1390165125e1SJens Axboe * @q: the struct request_queue for the bio 13911da177e4SLinus Torvalds * @bdev: destination block device 13921da177e4SLinus Torvalds * @uaddr: start of user address 13931da177e4SLinus Torvalds * @len: length in bytes 13941da177e4SLinus Torvalds * @write_to_vm: bool indicating writing to pages or not 1395a3bce90eSFUJITA Tomonori * @gfp_mask: memory allocation flags 13961da177e4SLinus Torvalds * 13971da177e4SLinus Torvalds * Map the user space address into a bio suitable for io to a block 13981da177e4SLinus Torvalds * device. Returns an error pointer in case of error. 13991da177e4SLinus Torvalds */ 1400165125e1SJens Axboe struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, 1401a3bce90eSFUJITA Tomonori unsigned long uaddr, unsigned int len, int write_to_vm, 1402a3bce90eSFUJITA Tomonori gfp_t gfp_mask) 14031da177e4SLinus Torvalds { 1404f1970bafSJames Bottomley struct sg_iovec iov; 14051da177e4SLinus Torvalds 14063f70353eSviro@ZenIV.linux.org.uk iov.iov_base = (void __user *)uaddr; 1407f1970bafSJames Bottomley iov.iov_len = len; 1408f1970bafSJames Bottomley 1409a3bce90eSFUJITA Tomonori return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); 1410f1970bafSJames Bottomley } 1411a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_map_user); 1412f1970bafSJames Bottomley 1413f1970bafSJames Bottomley /** 1414f1970bafSJames Bottomley * bio_map_user_iov - map user sg_iovec table into bio 1415165125e1SJens Axboe * @q: the struct request_queue for the bio 1416f1970bafSJames Bottomley * @bdev: destination block device 1417f1970bafSJames Bottomley * @iov: the iovec. 1418f1970bafSJames Bottomley * @iov_count: number of elements in the iovec 1419f1970bafSJames Bottomley * @write_to_vm: bool indicating writing to pages or not 1420a3bce90eSFUJITA Tomonori * @gfp_mask: memory allocation flags 1421f1970bafSJames Bottomley * 1422f1970bafSJames Bottomley * Map the user space address into a bio suitable for io to a block 1423f1970bafSJames Bottomley * device. Returns an error pointer in case of error. 1424f1970bafSJames Bottomley */ 1425165125e1SJens Axboe struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, 142686d564c8SAl Viro const struct sg_iovec *iov, int iov_count, 1427a3bce90eSFUJITA Tomonori int write_to_vm, gfp_t gfp_mask) 1428f1970bafSJames Bottomley { 1429f1970bafSJames Bottomley struct bio *bio; 1430f1970bafSJames Bottomley 1431a3bce90eSFUJITA Tomonori bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, 1432a3bce90eSFUJITA Tomonori gfp_mask); 14331da177e4SLinus Torvalds if (IS_ERR(bio)) 14341da177e4SLinus Torvalds return bio; 14351da177e4SLinus Torvalds 14361da177e4SLinus Torvalds /* 14371da177e4SLinus Torvalds * subtle -- if __bio_map_user() ended up bouncing a bio, 14381da177e4SLinus Torvalds * it would normally disappear when its bi_end_io is run. 14391da177e4SLinus Torvalds * however, we need it for the unmap, so grab an extra 14401da177e4SLinus Torvalds * reference to it 14411da177e4SLinus Torvalds */ 14421da177e4SLinus Torvalds bio_get(bio); 14431da177e4SLinus Torvalds 14441da177e4SLinus Torvalds return bio; 14451da177e4SLinus Torvalds } 14461da177e4SLinus Torvalds 14471da177e4SLinus Torvalds static void __bio_unmap_user(struct bio *bio) 14481da177e4SLinus Torvalds { 14491da177e4SLinus Torvalds struct bio_vec *bvec; 14501da177e4SLinus Torvalds int i; 14511da177e4SLinus Torvalds 14521da177e4SLinus Torvalds /* 14531da177e4SLinus Torvalds * make sure we dirty pages we wrote to 14541da177e4SLinus Torvalds */ 1455d74c6d51SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 14561da177e4SLinus Torvalds if (bio_data_dir(bio) == READ) 14571da177e4SLinus Torvalds set_page_dirty_lock(bvec->bv_page); 14581da177e4SLinus Torvalds 14591da177e4SLinus Torvalds page_cache_release(bvec->bv_page); 14601da177e4SLinus Torvalds } 14611da177e4SLinus Torvalds 14621da177e4SLinus Torvalds bio_put(bio); 14631da177e4SLinus Torvalds } 14641da177e4SLinus Torvalds 14651da177e4SLinus Torvalds /** 14661da177e4SLinus Torvalds * bio_unmap_user - unmap a bio 14671da177e4SLinus Torvalds * @bio: the bio being unmapped 14681da177e4SLinus Torvalds * 14691da177e4SLinus Torvalds * Unmap a bio previously mapped by bio_map_user(). Must be called with 14701da177e4SLinus Torvalds * a process context. 14711da177e4SLinus Torvalds * 14721da177e4SLinus Torvalds * bio_unmap_user() may sleep. 14731da177e4SLinus Torvalds */ 14741da177e4SLinus Torvalds void bio_unmap_user(struct bio *bio) 14751da177e4SLinus Torvalds { 14761da177e4SLinus Torvalds __bio_unmap_user(bio); 14771da177e4SLinus Torvalds bio_put(bio); 14781da177e4SLinus Torvalds } 1479a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_unmap_user); 14801da177e4SLinus Torvalds 14816712ecf8SNeilBrown static void bio_map_kern_endio(struct bio *bio, int err) 1482b823825eSJens Axboe { 1483b823825eSJens Axboe bio_put(bio); 1484b823825eSJens Axboe } 1485b823825eSJens Axboe 1486165125e1SJens Axboe static struct bio *__bio_map_kern(struct request_queue *q, void *data, 148727496a8cSAl Viro unsigned int len, gfp_t gfp_mask) 1488df46b9a4SMike Christie { 1489df46b9a4SMike Christie unsigned long kaddr = (unsigned long)data; 1490df46b9a4SMike Christie unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1491df46b9a4SMike Christie unsigned long start = kaddr >> PAGE_SHIFT; 1492df46b9a4SMike Christie const int nr_pages = end - start; 1493df46b9a4SMike Christie int offset, i; 1494df46b9a4SMike Christie struct bio *bio; 1495df46b9a4SMike Christie 1496a9e9dc24STejun Heo bio = bio_kmalloc(gfp_mask, nr_pages); 1497df46b9a4SMike Christie if (!bio) 1498df46b9a4SMike Christie return ERR_PTR(-ENOMEM); 1499df46b9a4SMike Christie 1500df46b9a4SMike Christie offset = offset_in_page(kaddr); 1501df46b9a4SMike Christie for (i = 0; i < nr_pages; i++) { 1502df46b9a4SMike Christie unsigned int bytes = PAGE_SIZE - offset; 1503df46b9a4SMike Christie 1504df46b9a4SMike Christie if (len <= 0) 1505df46b9a4SMike Christie break; 1506df46b9a4SMike Christie 1507df46b9a4SMike Christie if (bytes > len) 1508df46b9a4SMike Christie bytes = len; 1509df46b9a4SMike Christie 1510defd94b7SMike Christie if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, 1511df46b9a4SMike Christie offset) < bytes) 1512df46b9a4SMike Christie break; 1513df46b9a4SMike Christie 1514df46b9a4SMike Christie data += bytes; 1515df46b9a4SMike Christie len -= bytes; 1516df46b9a4SMike Christie offset = 0; 1517df46b9a4SMike Christie } 1518df46b9a4SMike Christie 1519b823825eSJens Axboe bio->bi_end_io = bio_map_kern_endio; 1520df46b9a4SMike Christie return bio; 1521df46b9a4SMike Christie } 1522df46b9a4SMike Christie 1523df46b9a4SMike Christie /** 1524df46b9a4SMike Christie * bio_map_kern - map kernel address into bio 1525165125e1SJens Axboe * @q: the struct request_queue for the bio 1526df46b9a4SMike Christie * @data: pointer to buffer to map 1527df46b9a4SMike Christie * @len: length in bytes 1528df46b9a4SMike Christie * @gfp_mask: allocation flags for bio allocation 1529df46b9a4SMike Christie * 1530df46b9a4SMike Christie * Map the kernel address into a bio suitable for io to a block 1531df46b9a4SMike Christie * device. Returns an error pointer in case of error. 1532df46b9a4SMike Christie */ 1533165125e1SJens Axboe struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, 153427496a8cSAl Viro gfp_t gfp_mask) 1535df46b9a4SMike Christie { 1536df46b9a4SMike Christie struct bio *bio; 1537df46b9a4SMike Christie 1538df46b9a4SMike Christie bio = __bio_map_kern(q, data, len, gfp_mask); 1539df46b9a4SMike Christie if (IS_ERR(bio)) 1540df46b9a4SMike Christie return bio; 1541df46b9a4SMike Christie 15424f024f37SKent Overstreet if (bio->bi_iter.bi_size == len) 1543df46b9a4SMike Christie return bio; 1544df46b9a4SMike Christie 1545df46b9a4SMike Christie /* 1546df46b9a4SMike Christie * Don't support partial mappings. 1547df46b9a4SMike Christie */ 1548df46b9a4SMike Christie bio_put(bio); 1549df46b9a4SMike Christie return ERR_PTR(-EINVAL); 1550df46b9a4SMike Christie } 1551a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_map_kern); 1552df46b9a4SMike Christie 155368154e90SFUJITA Tomonori static void bio_copy_kern_endio(struct bio *bio, int err) 155468154e90SFUJITA Tomonori { 155568154e90SFUJITA Tomonori struct bio_vec *bvec; 155668154e90SFUJITA Tomonori const int read = bio_data_dir(bio) == READ; 155776029ff3SFUJITA Tomonori struct bio_map_data *bmd = bio->bi_private; 155868154e90SFUJITA Tomonori int i; 155976029ff3SFUJITA Tomonori char *p = bmd->sgvecs[0].iov_base; 156068154e90SFUJITA Tomonori 1561d74c6d51SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 156268154e90SFUJITA Tomonori char *addr = page_address(bvec->bv_page); 156368154e90SFUJITA Tomonori 15644fc981efSTejun Heo if (read) 1565c8db4448SKent Overstreet memcpy(p, addr, bvec->bv_len); 156668154e90SFUJITA Tomonori 156768154e90SFUJITA Tomonori __free_page(bvec->bv_page); 1568c8db4448SKent Overstreet p += bvec->bv_len; 156968154e90SFUJITA Tomonori } 157068154e90SFUJITA Tomonori 1571c8db4448SKent Overstreet kfree(bmd); 157268154e90SFUJITA Tomonori bio_put(bio); 157368154e90SFUJITA Tomonori } 157468154e90SFUJITA Tomonori 157568154e90SFUJITA Tomonori /** 157668154e90SFUJITA Tomonori * bio_copy_kern - copy kernel address into bio 157768154e90SFUJITA Tomonori * @q: the struct request_queue for the bio 157868154e90SFUJITA Tomonori * @data: pointer to buffer to copy 157968154e90SFUJITA Tomonori * @len: length in bytes 158068154e90SFUJITA Tomonori * @gfp_mask: allocation flags for bio and page allocation 1581ffee0259SRandy Dunlap * @reading: data direction is READ 158268154e90SFUJITA Tomonori * 158368154e90SFUJITA Tomonori * copy the kernel address into a bio suitable for io to a block 158468154e90SFUJITA Tomonori * device. Returns an error pointer in case of error. 158568154e90SFUJITA Tomonori */ 158668154e90SFUJITA Tomonori struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, 158768154e90SFUJITA Tomonori gfp_t gfp_mask, int reading) 158868154e90SFUJITA Tomonori { 158968154e90SFUJITA Tomonori struct bio *bio; 159068154e90SFUJITA Tomonori struct bio_vec *bvec; 15914d8ab62eSFUJITA Tomonori int i; 159268154e90SFUJITA Tomonori 15934d8ab62eSFUJITA Tomonori bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); 15944d8ab62eSFUJITA Tomonori if (IS_ERR(bio)) 15954d8ab62eSFUJITA Tomonori return bio; 159668154e90SFUJITA Tomonori 159768154e90SFUJITA Tomonori if (!reading) { 159868154e90SFUJITA Tomonori void *p = data; 159968154e90SFUJITA Tomonori 1600d74c6d51SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 160168154e90SFUJITA Tomonori char *addr = page_address(bvec->bv_page); 160268154e90SFUJITA Tomonori 160368154e90SFUJITA Tomonori memcpy(addr, p, bvec->bv_len); 160468154e90SFUJITA Tomonori p += bvec->bv_len; 160568154e90SFUJITA Tomonori } 160668154e90SFUJITA Tomonori } 160768154e90SFUJITA Tomonori 160868154e90SFUJITA Tomonori bio->bi_end_io = bio_copy_kern_endio; 160976029ff3SFUJITA Tomonori 161068154e90SFUJITA Tomonori return bio; 161168154e90SFUJITA Tomonori } 1612a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_copy_kern); 161368154e90SFUJITA Tomonori 16141da177e4SLinus Torvalds /* 16151da177e4SLinus Torvalds * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions 16161da177e4SLinus Torvalds * for performing direct-IO in BIOs. 16171da177e4SLinus Torvalds * 16181da177e4SLinus Torvalds * The problem is that we cannot run set_page_dirty() from interrupt context 16191da177e4SLinus Torvalds * because the required locks are not interrupt-safe. So what we can do is to 16201da177e4SLinus Torvalds * mark the pages dirty _before_ performing IO. And in interrupt context, 16211da177e4SLinus Torvalds * check that the pages are still dirty. If so, fine. If not, redirty them 16221da177e4SLinus Torvalds * in process context. 16231da177e4SLinus Torvalds * 16241da177e4SLinus Torvalds * We special-case compound pages here: normally this means reads into hugetlb 16251da177e4SLinus Torvalds * pages. The logic in here doesn't really work right for compound pages 16261da177e4SLinus Torvalds * because the VM does not uniformly chase down the head page in all cases. 16271da177e4SLinus Torvalds * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't 16281da177e4SLinus Torvalds * handle them at all. So we skip compound pages here at an early stage. 16291da177e4SLinus Torvalds * 16301da177e4SLinus Torvalds * Note that this code is very hard to test under normal circumstances because 16311da177e4SLinus Torvalds * direct-io pins the pages with get_user_pages(). This makes 16321da177e4SLinus Torvalds * is_page_cache_freeable return false, and the VM will not clean the pages. 16330d5c3ebaSArtem Bityutskiy * But other code (eg, flusher threads) could clean the pages if they are mapped 16341da177e4SLinus Torvalds * pagecache. 16351da177e4SLinus Torvalds * 16361da177e4SLinus Torvalds * Simply disabling the call to bio_set_pages_dirty() is a good way to test the 16371da177e4SLinus Torvalds * deferred bio dirtying paths. 16381da177e4SLinus Torvalds */ 16391da177e4SLinus Torvalds 16401da177e4SLinus Torvalds /* 16411da177e4SLinus Torvalds * bio_set_pages_dirty() will mark all the bio's pages as dirty. 16421da177e4SLinus Torvalds */ 16431da177e4SLinus Torvalds void bio_set_pages_dirty(struct bio *bio) 16441da177e4SLinus Torvalds { 1645cb34e057SKent Overstreet struct bio_vec *bvec; 16461da177e4SLinus Torvalds int i; 16471da177e4SLinus Torvalds 1648cb34e057SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 1649cb34e057SKent Overstreet struct page *page = bvec->bv_page; 16501da177e4SLinus Torvalds 16511da177e4SLinus Torvalds if (page && !PageCompound(page)) 16521da177e4SLinus Torvalds set_page_dirty_lock(page); 16531da177e4SLinus Torvalds } 16541da177e4SLinus Torvalds } 16551da177e4SLinus Torvalds 165686b6c7a7SAdrian Bunk static void bio_release_pages(struct bio *bio) 16571da177e4SLinus Torvalds { 1658cb34e057SKent Overstreet struct bio_vec *bvec; 16591da177e4SLinus Torvalds int i; 16601da177e4SLinus Torvalds 1661cb34e057SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 1662cb34e057SKent Overstreet struct page *page = bvec->bv_page; 16631da177e4SLinus Torvalds 16641da177e4SLinus Torvalds if (page) 16651da177e4SLinus Torvalds put_page(page); 16661da177e4SLinus Torvalds } 16671da177e4SLinus Torvalds } 16681da177e4SLinus Torvalds 16691da177e4SLinus Torvalds /* 16701da177e4SLinus Torvalds * bio_check_pages_dirty() will check that all the BIO's pages are still dirty. 16711da177e4SLinus Torvalds * If they are, then fine. If, however, some pages are clean then they must 16721da177e4SLinus Torvalds * have been written out during the direct-IO read. So we take another ref on 16731da177e4SLinus Torvalds * the BIO and the offending pages and re-dirty the pages in process context. 16741da177e4SLinus Torvalds * 16751da177e4SLinus Torvalds * It is expected that bio_check_pages_dirty() will wholly own the BIO from 16761da177e4SLinus Torvalds * here on. It will run one page_cache_release() against each page and will 16771da177e4SLinus Torvalds * run one bio_put() against the BIO. 16781da177e4SLinus Torvalds */ 16791da177e4SLinus Torvalds 168065f27f38SDavid Howells static void bio_dirty_fn(struct work_struct *work); 16811da177e4SLinus Torvalds 168265f27f38SDavid Howells static DECLARE_WORK(bio_dirty_work, bio_dirty_fn); 16831da177e4SLinus Torvalds static DEFINE_SPINLOCK(bio_dirty_lock); 16841da177e4SLinus Torvalds static struct bio *bio_dirty_list; 16851da177e4SLinus Torvalds 16861da177e4SLinus Torvalds /* 16871da177e4SLinus Torvalds * This runs in process context 16881da177e4SLinus Torvalds */ 168965f27f38SDavid Howells static void bio_dirty_fn(struct work_struct *work) 16901da177e4SLinus Torvalds { 16911da177e4SLinus Torvalds unsigned long flags; 16921da177e4SLinus Torvalds struct bio *bio; 16931da177e4SLinus Torvalds 16941da177e4SLinus Torvalds spin_lock_irqsave(&bio_dirty_lock, flags); 16951da177e4SLinus Torvalds bio = bio_dirty_list; 16961da177e4SLinus Torvalds bio_dirty_list = NULL; 16971da177e4SLinus Torvalds spin_unlock_irqrestore(&bio_dirty_lock, flags); 16981da177e4SLinus Torvalds 16991da177e4SLinus Torvalds while (bio) { 17001da177e4SLinus Torvalds struct bio *next = bio->bi_private; 17011da177e4SLinus Torvalds 17021da177e4SLinus Torvalds bio_set_pages_dirty(bio); 17031da177e4SLinus Torvalds bio_release_pages(bio); 17041da177e4SLinus Torvalds bio_put(bio); 17051da177e4SLinus Torvalds bio = next; 17061da177e4SLinus Torvalds } 17071da177e4SLinus Torvalds } 17081da177e4SLinus Torvalds 17091da177e4SLinus Torvalds void bio_check_pages_dirty(struct bio *bio) 17101da177e4SLinus Torvalds { 1711cb34e057SKent Overstreet struct bio_vec *bvec; 17121da177e4SLinus Torvalds int nr_clean_pages = 0; 17131da177e4SLinus Torvalds int i; 17141da177e4SLinus Torvalds 1715cb34e057SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 1716cb34e057SKent Overstreet struct page *page = bvec->bv_page; 17171da177e4SLinus Torvalds 17181da177e4SLinus Torvalds if (PageDirty(page) || PageCompound(page)) { 17191da177e4SLinus Torvalds page_cache_release(page); 1720cb34e057SKent Overstreet bvec->bv_page = NULL; 17211da177e4SLinus Torvalds } else { 17221da177e4SLinus Torvalds nr_clean_pages++; 17231da177e4SLinus Torvalds } 17241da177e4SLinus Torvalds } 17251da177e4SLinus Torvalds 17261da177e4SLinus Torvalds if (nr_clean_pages) { 17271da177e4SLinus Torvalds unsigned long flags; 17281da177e4SLinus Torvalds 17291da177e4SLinus Torvalds spin_lock_irqsave(&bio_dirty_lock, flags); 17301da177e4SLinus Torvalds bio->bi_private = bio_dirty_list; 17311da177e4SLinus Torvalds bio_dirty_list = bio; 17321da177e4SLinus Torvalds spin_unlock_irqrestore(&bio_dirty_lock, flags); 17331da177e4SLinus Torvalds schedule_work(&bio_dirty_work); 17341da177e4SLinus Torvalds } else { 17351da177e4SLinus Torvalds bio_put(bio); 17361da177e4SLinus Torvalds } 17371da177e4SLinus Torvalds } 17381da177e4SLinus Torvalds 17392d4dc890SIlya Loginov #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 17402d4dc890SIlya Loginov void bio_flush_dcache_pages(struct bio *bi) 17412d4dc890SIlya Loginov { 17427988613bSKent Overstreet struct bio_vec bvec; 17437988613bSKent Overstreet struct bvec_iter iter; 17442d4dc890SIlya Loginov 17457988613bSKent Overstreet bio_for_each_segment(bvec, bi, iter) 17467988613bSKent Overstreet flush_dcache_page(bvec.bv_page); 17472d4dc890SIlya Loginov } 17482d4dc890SIlya Loginov EXPORT_SYMBOL(bio_flush_dcache_pages); 17492d4dc890SIlya Loginov #endif 17502d4dc890SIlya Loginov 17511da177e4SLinus Torvalds /** 17521da177e4SLinus Torvalds * bio_endio - end I/O on a bio 17531da177e4SLinus Torvalds * @bio: bio 17541da177e4SLinus Torvalds * @error: error, if any 17551da177e4SLinus Torvalds * 17561da177e4SLinus Torvalds * Description: 17576712ecf8SNeilBrown * bio_endio() will end I/O on the whole bio. bio_endio() is the 17585bb23a68SNeilBrown * preferred way to end I/O on a bio, it takes care of clearing 17595bb23a68SNeilBrown * BIO_UPTODATE on error. @error is 0 on success, and and one of the 17605bb23a68SNeilBrown * established -Exxxx (-EIO, for instance) error values in case 17615bb23a68SNeilBrown * something went wrong. No one should call bi_end_io() directly on a 17625bb23a68SNeilBrown * bio unless they own it and thus know that it has an end_io 17635bb23a68SNeilBrown * function. 17641da177e4SLinus Torvalds **/ 17656712ecf8SNeilBrown void bio_endio(struct bio *bio, int error) 17661da177e4SLinus Torvalds { 1767196d38bcSKent Overstreet while (bio) { 1768196d38bcSKent Overstreet BUG_ON(atomic_read(&bio->bi_remaining) <= 0); 1769196d38bcSKent Overstreet 17701da177e4SLinus Torvalds if (error) 17711da177e4SLinus Torvalds clear_bit(BIO_UPTODATE, &bio->bi_flags); 17729cc54d40SNeilBrown else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 17739cc54d40SNeilBrown error = -EIO; 17741da177e4SLinus Torvalds 1775196d38bcSKent Overstreet if (!atomic_dec_and_test(&bio->bi_remaining)) 1776196d38bcSKent Overstreet return; 1777196d38bcSKent Overstreet 1778196d38bcSKent Overstreet /* 1779196d38bcSKent Overstreet * Need to have a real endio function for chained bios, 1780196d38bcSKent Overstreet * otherwise various corner cases will break (like stacking 1781196d38bcSKent Overstreet * block devices that save/restore bi_end_io) - however, we want 1782196d38bcSKent Overstreet * to avoid unbounded recursion and blowing the stack. Tail call 1783196d38bcSKent Overstreet * optimization would handle this, but compiling with frame 1784196d38bcSKent Overstreet * pointers also disables gcc's sibling call optimization. 1785196d38bcSKent Overstreet */ 1786196d38bcSKent Overstreet if (bio->bi_end_io == bio_chain_endio) { 1787196d38bcSKent Overstreet struct bio *parent = bio->bi_private; 1788196d38bcSKent Overstreet bio_put(bio); 1789196d38bcSKent Overstreet bio = parent; 1790196d38bcSKent Overstreet } else { 17915bb23a68SNeilBrown if (bio->bi_end_io) 17926712ecf8SNeilBrown bio->bi_end_io(bio, error); 1793196d38bcSKent Overstreet bio = NULL; 1794196d38bcSKent Overstreet } 1795196d38bcSKent Overstreet } 17961da177e4SLinus Torvalds } 1797a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_endio); 17981da177e4SLinus Torvalds 1799196d38bcSKent Overstreet /** 1800196d38bcSKent Overstreet * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining 1801196d38bcSKent Overstreet * @bio: bio 1802196d38bcSKent Overstreet * @error: error, if any 1803196d38bcSKent Overstreet * 1804196d38bcSKent Overstreet * For code that has saved and restored bi_end_io; thing hard before using this 1805196d38bcSKent Overstreet * function, probably you should've cloned the entire bio. 1806196d38bcSKent Overstreet **/ 1807196d38bcSKent Overstreet void bio_endio_nodec(struct bio *bio, int error) 1808196d38bcSKent Overstreet { 1809196d38bcSKent Overstreet atomic_inc(&bio->bi_remaining); 1810196d38bcSKent Overstreet bio_endio(bio, error); 1811196d38bcSKent Overstreet } 1812196d38bcSKent Overstreet EXPORT_SYMBOL(bio_endio_nodec); 1813196d38bcSKent Overstreet 181420d0189bSKent Overstreet /** 181520d0189bSKent Overstreet * bio_split - split a bio 181620d0189bSKent Overstreet * @bio: bio to split 181720d0189bSKent Overstreet * @sectors: number of sectors to split from the front of @bio 181820d0189bSKent Overstreet * @gfp: gfp mask 181920d0189bSKent Overstreet * @bs: bio set to allocate from 182020d0189bSKent Overstreet * 182120d0189bSKent Overstreet * Allocates and returns a new bio which represents @sectors from the start of 182220d0189bSKent Overstreet * @bio, and updates @bio to represent the remaining sectors. 182320d0189bSKent Overstreet * 182420d0189bSKent Overstreet * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's 182520d0189bSKent Overstreet * responsibility to ensure that @bio is not freed before the split. 182620d0189bSKent Overstreet */ 182720d0189bSKent Overstreet struct bio *bio_split(struct bio *bio, int sectors, 182820d0189bSKent Overstreet gfp_t gfp, struct bio_set *bs) 182920d0189bSKent Overstreet { 183020d0189bSKent Overstreet struct bio *split = NULL; 183120d0189bSKent Overstreet 183220d0189bSKent Overstreet BUG_ON(sectors <= 0); 183320d0189bSKent Overstreet BUG_ON(sectors >= bio_sectors(bio)); 183420d0189bSKent Overstreet 183520d0189bSKent Overstreet split = bio_clone_fast(bio, gfp, bs); 183620d0189bSKent Overstreet if (!split) 183720d0189bSKent Overstreet return NULL; 183820d0189bSKent Overstreet 183920d0189bSKent Overstreet split->bi_iter.bi_size = sectors << 9; 184020d0189bSKent Overstreet 184120d0189bSKent Overstreet if (bio_integrity(split)) 184220d0189bSKent Overstreet bio_integrity_trim(split, 0, sectors); 184320d0189bSKent Overstreet 184420d0189bSKent Overstreet bio_advance(bio, split->bi_iter.bi_size); 184520d0189bSKent Overstreet 184620d0189bSKent Overstreet return split; 184720d0189bSKent Overstreet } 184820d0189bSKent Overstreet EXPORT_SYMBOL(bio_split); 184920d0189bSKent Overstreet 1850ad3316bfSMartin K. Petersen /** 18516678d83fSKent Overstreet * bio_trim - trim a bio 18526678d83fSKent Overstreet * @bio: bio to trim 18536678d83fSKent Overstreet * @offset: number of sectors to trim from the front of @bio 18546678d83fSKent Overstreet * @size: size we want to trim @bio to, in sectors 18556678d83fSKent Overstreet */ 18566678d83fSKent Overstreet void bio_trim(struct bio *bio, int offset, int size) 18576678d83fSKent Overstreet { 18586678d83fSKent Overstreet /* 'bio' is a cloned bio which we need to trim to match 18596678d83fSKent Overstreet * the given offset and size. 18606678d83fSKent Overstreet */ 18616678d83fSKent Overstreet 18626678d83fSKent Overstreet size <<= 9; 18634f024f37SKent Overstreet if (offset == 0 && size == bio->bi_iter.bi_size) 18646678d83fSKent Overstreet return; 18656678d83fSKent Overstreet 18666678d83fSKent Overstreet clear_bit(BIO_SEG_VALID, &bio->bi_flags); 18676678d83fSKent Overstreet 18686678d83fSKent Overstreet bio_advance(bio, offset << 9); 18696678d83fSKent Overstreet 18704f024f37SKent Overstreet bio->bi_iter.bi_size = size; 18716678d83fSKent Overstreet } 18726678d83fSKent Overstreet EXPORT_SYMBOL_GPL(bio_trim); 18736678d83fSKent Overstreet 18741da177e4SLinus Torvalds /* 18751da177e4SLinus Torvalds * create memory pools for biovec's in a bio_set. 18761da177e4SLinus Torvalds * use the global biovec slabs created for general use. 18771da177e4SLinus Torvalds */ 1878a6c39cb4SFabian Frederick mempool_t *biovec_create_pool(int pool_entries) 18791da177e4SLinus Torvalds { 18807ff9345fSJens Axboe struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; 18811da177e4SLinus Torvalds 18829f060e22SKent Overstreet return mempool_create_slab_pool(pool_entries, bp->slab); 18831da177e4SLinus Torvalds } 18841da177e4SLinus Torvalds 18851da177e4SLinus Torvalds void bioset_free(struct bio_set *bs) 18861da177e4SLinus Torvalds { 1887df2cb6daSKent Overstreet if (bs->rescue_workqueue) 1888df2cb6daSKent Overstreet destroy_workqueue(bs->rescue_workqueue); 1889df2cb6daSKent Overstreet 18901da177e4SLinus Torvalds if (bs->bio_pool) 18911da177e4SLinus Torvalds mempool_destroy(bs->bio_pool); 18921da177e4SLinus Torvalds 18939f060e22SKent Overstreet if (bs->bvec_pool) 18949f060e22SKent Overstreet mempool_destroy(bs->bvec_pool); 18959f060e22SKent Overstreet 18967878cba9SMartin K. Petersen bioset_integrity_free(bs); 1897bb799ca0SJens Axboe bio_put_slab(bs); 18981da177e4SLinus Torvalds 18991da177e4SLinus Torvalds kfree(bs); 19001da177e4SLinus Torvalds } 1901a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bioset_free); 19021da177e4SLinus Torvalds 1903bb799ca0SJens Axboe /** 1904bb799ca0SJens Axboe * bioset_create - Create a bio_set 1905bb799ca0SJens Axboe * @pool_size: Number of bio and bio_vecs to cache in the mempool 1906bb799ca0SJens Axboe * @front_pad: Number of bytes to allocate in front of the returned bio 1907bb799ca0SJens Axboe * 1908bb799ca0SJens Axboe * Description: 1909bb799ca0SJens Axboe * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller 1910bb799ca0SJens Axboe * to ask for a number of bytes to be allocated in front of the bio. 1911bb799ca0SJens Axboe * Front pad allocation is useful for embedding the bio inside 1912bb799ca0SJens Axboe * another structure, to avoid allocating extra data to go with the bio. 1913bb799ca0SJens Axboe * Note that the bio must be embedded at the END of that structure always, 1914bb799ca0SJens Axboe * or things will break badly. 1915bb799ca0SJens Axboe */ 1916bb799ca0SJens Axboe struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) 19171da177e4SLinus Torvalds { 1918392ddc32SJens Axboe unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); 19191b434498SJens Axboe struct bio_set *bs; 19201da177e4SLinus Torvalds 19211b434498SJens Axboe bs = kzalloc(sizeof(*bs), GFP_KERNEL); 19221da177e4SLinus Torvalds if (!bs) 19231da177e4SLinus Torvalds return NULL; 19241da177e4SLinus Torvalds 1925bb799ca0SJens Axboe bs->front_pad = front_pad; 19261b434498SJens Axboe 1927df2cb6daSKent Overstreet spin_lock_init(&bs->rescue_lock); 1928df2cb6daSKent Overstreet bio_list_init(&bs->rescue_list); 1929df2cb6daSKent Overstreet INIT_WORK(&bs->rescue_work, bio_alloc_rescue); 1930df2cb6daSKent Overstreet 1931392ddc32SJens Axboe bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); 1932bb799ca0SJens Axboe if (!bs->bio_slab) { 1933bb799ca0SJens Axboe kfree(bs); 1934bb799ca0SJens Axboe return NULL; 1935bb799ca0SJens Axboe } 1936bb799ca0SJens Axboe 1937bb799ca0SJens Axboe bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab); 19381da177e4SLinus Torvalds if (!bs->bio_pool) 19391da177e4SLinus Torvalds goto bad; 19401da177e4SLinus Torvalds 1941a6c39cb4SFabian Frederick bs->bvec_pool = biovec_create_pool(pool_size); 19429f060e22SKent Overstreet if (!bs->bvec_pool) 1943df2cb6daSKent Overstreet goto bad; 19441da177e4SLinus Torvalds 1945df2cb6daSKent Overstreet bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); 1946df2cb6daSKent Overstreet if (!bs->rescue_workqueue) 1947df2cb6daSKent Overstreet goto bad; 1948df2cb6daSKent Overstreet 1949df2cb6daSKent Overstreet return bs; 19501da177e4SLinus Torvalds bad: 19511da177e4SLinus Torvalds bioset_free(bs); 19521da177e4SLinus Torvalds return NULL; 19531da177e4SLinus Torvalds } 1954a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bioset_create); 19551da177e4SLinus Torvalds 1956852c788fSTejun Heo #ifdef CONFIG_BLK_CGROUP 1957852c788fSTejun Heo /** 1958852c788fSTejun Heo * bio_associate_current - associate a bio with %current 1959852c788fSTejun Heo * @bio: target bio 1960852c788fSTejun Heo * 1961852c788fSTejun Heo * Associate @bio with %current if it hasn't been associated yet. Block 1962852c788fSTejun Heo * layer will treat @bio as if it were issued by %current no matter which 1963852c788fSTejun Heo * task actually issues it. 1964852c788fSTejun Heo * 1965852c788fSTejun Heo * This function takes an extra reference of @task's io_context and blkcg 1966852c788fSTejun Heo * which will be put when @bio is released. The caller must own @bio, 1967852c788fSTejun Heo * ensure %current->io_context exists, and is responsible for synchronizing 1968852c788fSTejun Heo * calls to this function. 1969852c788fSTejun Heo */ 1970852c788fSTejun Heo int bio_associate_current(struct bio *bio) 1971852c788fSTejun Heo { 1972852c788fSTejun Heo struct io_context *ioc; 1973852c788fSTejun Heo struct cgroup_subsys_state *css; 1974852c788fSTejun Heo 1975852c788fSTejun Heo if (bio->bi_ioc) 1976852c788fSTejun Heo return -EBUSY; 1977852c788fSTejun Heo 1978852c788fSTejun Heo ioc = current->io_context; 1979852c788fSTejun Heo if (!ioc) 1980852c788fSTejun Heo return -ENOENT; 1981852c788fSTejun Heo 1982852c788fSTejun Heo /* acquire active ref on @ioc and associate */ 1983852c788fSTejun Heo get_io_context_active(ioc); 1984852c788fSTejun Heo bio->bi_ioc = ioc; 1985852c788fSTejun Heo 1986852c788fSTejun Heo /* associate blkcg if exists */ 1987852c788fSTejun Heo rcu_read_lock(); 1988073219e9STejun Heo css = task_css(current, blkio_cgrp_id); 1989ec903c0cSTejun Heo if (css && css_tryget_online(css)) 1990852c788fSTejun Heo bio->bi_css = css; 1991852c788fSTejun Heo rcu_read_unlock(); 1992852c788fSTejun Heo 1993852c788fSTejun Heo return 0; 1994852c788fSTejun Heo } 1995852c788fSTejun Heo 1996852c788fSTejun Heo /** 1997852c788fSTejun Heo * bio_disassociate_task - undo bio_associate_current() 1998852c788fSTejun Heo * @bio: target bio 1999852c788fSTejun Heo */ 2000852c788fSTejun Heo void bio_disassociate_task(struct bio *bio) 2001852c788fSTejun Heo { 2002852c788fSTejun Heo if (bio->bi_ioc) { 2003852c788fSTejun Heo put_io_context(bio->bi_ioc); 2004852c788fSTejun Heo bio->bi_ioc = NULL; 2005852c788fSTejun Heo } 2006852c788fSTejun Heo if (bio->bi_css) { 2007852c788fSTejun Heo css_put(bio->bi_css); 2008852c788fSTejun Heo bio->bi_css = NULL; 2009852c788fSTejun Heo } 2010852c788fSTejun Heo } 2011852c788fSTejun Heo 2012852c788fSTejun Heo #endif /* CONFIG_BLK_CGROUP */ 2013852c788fSTejun Heo 20141da177e4SLinus Torvalds static void __init biovec_init_slabs(void) 20151da177e4SLinus Torvalds { 20161da177e4SLinus Torvalds int i; 20171da177e4SLinus Torvalds 20181da177e4SLinus Torvalds for (i = 0; i < BIOVEC_NR_POOLS; i++) { 20191da177e4SLinus Torvalds int size; 20201da177e4SLinus Torvalds struct biovec_slab *bvs = bvec_slabs + i; 20211da177e4SLinus Torvalds 2022a7fcd37cSJens Axboe if (bvs->nr_vecs <= BIO_INLINE_VECS) { 2023a7fcd37cSJens Axboe bvs->slab = NULL; 2024a7fcd37cSJens Axboe continue; 2025a7fcd37cSJens Axboe } 2026a7fcd37cSJens Axboe 20271da177e4SLinus Torvalds size = bvs->nr_vecs * sizeof(struct bio_vec); 20281da177e4SLinus Torvalds bvs->slab = kmem_cache_create(bvs->name, size, 0, 202920c2df83SPaul Mundt SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 20301da177e4SLinus Torvalds } 20311da177e4SLinus Torvalds } 20321da177e4SLinus Torvalds 20331da177e4SLinus Torvalds static int __init init_bio(void) 20341da177e4SLinus Torvalds { 2035bb799ca0SJens Axboe bio_slab_max = 2; 2036bb799ca0SJens Axboe bio_slab_nr = 0; 2037bb799ca0SJens Axboe bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL); 2038bb799ca0SJens Axboe if (!bio_slabs) 2039bb799ca0SJens Axboe panic("bio: can't allocate bios\n"); 20401da177e4SLinus Torvalds 20417878cba9SMartin K. Petersen bio_integrity_init(); 20421da177e4SLinus Torvalds biovec_init_slabs(); 20431da177e4SLinus Torvalds 2044bb799ca0SJens Axboe fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); 20451da177e4SLinus Torvalds if (!fs_bio_set) 20461da177e4SLinus Torvalds panic("bio: can't allocate bios\n"); 20471da177e4SLinus Torvalds 2048a91a2785SMartin K. Petersen if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE)) 2049a91a2785SMartin K. Petersen panic("bio: can't create integrity pool\n"); 2050a91a2785SMartin K. Petersen 20511da177e4SLinus Torvalds return 0; 20521da177e4SLinus Torvalds } 20531da177e4SLinus Torvalds subsys_initcall(init_bio); 2054