11da177e4SLinus Torvalds /* 20fe23479SJens Axboe * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk> 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or modify 51da177e4SLinus Torvalds * it under the terms of the GNU General Public License version 2 as 61da177e4SLinus Torvalds * published by the Free Software Foundation. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * This program is distributed in the hope that it will be useful, 91da177e4SLinus Torvalds * but WITHOUT ANY WARRANTY; without even the implied warranty of 101da177e4SLinus Torvalds * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 111da177e4SLinus Torvalds * GNU General Public License for more details. 121da177e4SLinus Torvalds * 131da177e4SLinus Torvalds * You should have received a copy of the GNU General Public Licens 141da177e4SLinus Torvalds * along with this program; if not, write to the Free Software 151da177e4SLinus Torvalds * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 161da177e4SLinus Torvalds * 171da177e4SLinus Torvalds */ 181da177e4SLinus Torvalds #include <linux/mm.h> 191da177e4SLinus Torvalds #include <linux/swap.h> 201da177e4SLinus Torvalds #include <linux/bio.h> 211da177e4SLinus Torvalds #include <linux/blkdev.h> 22a27bb332SKent Overstreet #include <linux/uio.h> 23852c788fSTejun Heo #include <linux/iocontext.h> 241da177e4SLinus Torvalds #include <linux/slab.h> 251da177e4SLinus Torvalds #include <linux/init.h> 261da177e4SLinus Torvalds #include <linux/kernel.h> 27630d9c47SPaul Gortmaker #include <linux/export.h> 281da177e4SLinus Torvalds #include <linux/mempool.h> 291da177e4SLinus Torvalds #include <linux/workqueue.h> 30852c788fSTejun Heo #include <linux/cgroup.h> 31f1970bafSJames Bottomley #include <scsi/sg.h> /* for struct sg_iovec */ 321da177e4SLinus Torvalds 3355782138SLi Zefan #include <trace/events/block.h> 340bfc2455SIngo Molnar 35392ddc32SJens Axboe /* 36392ddc32SJens Axboe * Test patch to inline a certain number of bi_io_vec's inside the bio 37392ddc32SJens Axboe * itself, to shrink a bio data allocation from two mempool calls to one 38392ddc32SJens Axboe */ 39392ddc32SJens Axboe #define BIO_INLINE_VECS 4 40392ddc32SJens Axboe 411da177e4SLinus Torvalds /* 421da177e4SLinus Torvalds * if you change this list, also change bvec_alloc or things will 431da177e4SLinus Torvalds * break badly! cannot be bigger than what you can fit into an 441da177e4SLinus Torvalds * unsigned short 451da177e4SLinus Torvalds */ 461da177e4SLinus Torvalds #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 47df677140SMartin K. Petersen static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { 481da177e4SLinus Torvalds BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 491da177e4SLinus Torvalds }; 501da177e4SLinus Torvalds #undef BV 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds /* 531da177e4SLinus Torvalds * fs_bio_set is the bio_set containing bio and iovec memory pools used by 541da177e4SLinus Torvalds * IO code that does not need private memory pools. 551da177e4SLinus Torvalds */ 5651d654e1SMartin K. Petersen struct bio_set *fs_bio_set; 573f86a82aSKent Overstreet EXPORT_SYMBOL(fs_bio_set); 581da177e4SLinus Torvalds 59bb799ca0SJens Axboe /* 60bb799ca0SJens Axboe * Our slab pool management 61bb799ca0SJens Axboe */ 62bb799ca0SJens Axboe struct bio_slab { 63bb799ca0SJens Axboe struct kmem_cache *slab; 64bb799ca0SJens Axboe unsigned int slab_ref; 65bb799ca0SJens Axboe unsigned int slab_size; 66bb799ca0SJens Axboe char name[8]; 67bb799ca0SJens Axboe }; 68bb799ca0SJens Axboe static DEFINE_MUTEX(bio_slab_lock); 69bb799ca0SJens Axboe static struct bio_slab *bio_slabs; 70bb799ca0SJens Axboe static unsigned int bio_slab_nr, bio_slab_max; 71bb799ca0SJens Axboe 72bb799ca0SJens Axboe static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) 73bb799ca0SJens Axboe { 74bb799ca0SJens Axboe unsigned int sz = sizeof(struct bio) + extra_size; 75bb799ca0SJens Axboe struct kmem_cache *slab = NULL; 76389d7b26SAlexey Khoroshilov struct bio_slab *bslab, *new_bio_slabs; 77386bc35aSAnna Leuschner unsigned int new_bio_slab_max; 78bb799ca0SJens Axboe unsigned int i, entry = -1; 79bb799ca0SJens Axboe 80bb799ca0SJens Axboe mutex_lock(&bio_slab_lock); 81bb799ca0SJens Axboe 82bb799ca0SJens Axboe i = 0; 83bb799ca0SJens Axboe while (i < bio_slab_nr) { 84f06f135dSThiago Farina bslab = &bio_slabs[i]; 85bb799ca0SJens Axboe 86bb799ca0SJens Axboe if (!bslab->slab && entry == -1) 87bb799ca0SJens Axboe entry = i; 88bb799ca0SJens Axboe else if (bslab->slab_size == sz) { 89bb799ca0SJens Axboe slab = bslab->slab; 90bb799ca0SJens Axboe bslab->slab_ref++; 91bb799ca0SJens Axboe break; 92bb799ca0SJens Axboe } 93bb799ca0SJens Axboe i++; 94bb799ca0SJens Axboe } 95bb799ca0SJens Axboe 96bb799ca0SJens Axboe if (slab) 97bb799ca0SJens Axboe goto out_unlock; 98bb799ca0SJens Axboe 99bb799ca0SJens Axboe if (bio_slab_nr == bio_slab_max && entry == -1) { 100386bc35aSAnna Leuschner new_bio_slab_max = bio_slab_max << 1; 101389d7b26SAlexey Khoroshilov new_bio_slabs = krealloc(bio_slabs, 102386bc35aSAnna Leuschner new_bio_slab_max * sizeof(struct bio_slab), 103bb799ca0SJens Axboe GFP_KERNEL); 104389d7b26SAlexey Khoroshilov if (!new_bio_slabs) 105bb799ca0SJens Axboe goto out_unlock; 106386bc35aSAnna Leuschner bio_slab_max = new_bio_slab_max; 107389d7b26SAlexey Khoroshilov bio_slabs = new_bio_slabs; 108bb799ca0SJens Axboe } 109bb799ca0SJens Axboe if (entry == -1) 110bb799ca0SJens Axboe entry = bio_slab_nr++; 111bb799ca0SJens Axboe 112bb799ca0SJens Axboe bslab = &bio_slabs[entry]; 113bb799ca0SJens Axboe 114bb799ca0SJens Axboe snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); 1156a241483SMikulas Patocka slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN, 1166a241483SMikulas Patocka SLAB_HWCACHE_ALIGN, NULL); 117bb799ca0SJens Axboe if (!slab) 118bb799ca0SJens Axboe goto out_unlock; 119bb799ca0SJens Axboe 120bb799ca0SJens Axboe bslab->slab = slab; 121bb799ca0SJens Axboe bslab->slab_ref = 1; 122bb799ca0SJens Axboe bslab->slab_size = sz; 123bb799ca0SJens Axboe out_unlock: 124bb799ca0SJens Axboe mutex_unlock(&bio_slab_lock); 125bb799ca0SJens Axboe return slab; 126bb799ca0SJens Axboe } 127bb799ca0SJens Axboe 128bb799ca0SJens Axboe static void bio_put_slab(struct bio_set *bs) 129bb799ca0SJens Axboe { 130bb799ca0SJens Axboe struct bio_slab *bslab = NULL; 131bb799ca0SJens Axboe unsigned int i; 132bb799ca0SJens Axboe 133bb799ca0SJens Axboe mutex_lock(&bio_slab_lock); 134bb799ca0SJens Axboe 135bb799ca0SJens Axboe for (i = 0; i < bio_slab_nr; i++) { 136bb799ca0SJens Axboe if (bs->bio_slab == bio_slabs[i].slab) { 137bb799ca0SJens Axboe bslab = &bio_slabs[i]; 138bb799ca0SJens Axboe break; 139bb799ca0SJens Axboe } 140bb799ca0SJens Axboe } 141bb799ca0SJens Axboe 142bb799ca0SJens Axboe if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n")) 143bb799ca0SJens Axboe goto out; 144bb799ca0SJens Axboe 145bb799ca0SJens Axboe WARN_ON(!bslab->slab_ref); 146bb799ca0SJens Axboe 147bb799ca0SJens Axboe if (--bslab->slab_ref) 148bb799ca0SJens Axboe goto out; 149bb799ca0SJens Axboe 150bb799ca0SJens Axboe kmem_cache_destroy(bslab->slab); 151bb799ca0SJens Axboe bslab->slab = NULL; 152bb799ca0SJens Axboe 153bb799ca0SJens Axboe out: 154bb799ca0SJens Axboe mutex_unlock(&bio_slab_lock); 155bb799ca0SJens Axboe } 156bb799ca0SJens Axboe 1577ba1ba12SMartin K. Petersen unsigned int bvec_nr_vecs(unsigned short idx) 1587ba1ba12SMartin K. Petersen { 1597ba1ba12SMartin K. Petersen return bvec_slabs[idx].nr_vecs; 1607ba1ba12SMartin K. Petersen } 1617ba1ba12SMartin K. Petersen 1629f060e22SKent Overstreet void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) 163bb799ca0SJens Axboe { 164bb799ca0SJens Axboe BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); 165bb799ca0SJens Axboe 166bb799ca0SJens Axboe if (idx == BIOVEC_MAX_IDX) 1679f060e22SKent Overstreet mempool_free(bv, pool); 168bb799ca0SJens Axboe else { 169bb799ca0SJens Axboe struct biovec_slab *bvs = bvec_slabs + idx; 170bb799ca0SJens Axboe 171bb799ca0SJens Axboe kmem_cache_free(bvs->slab, bv); 172bb799ca0SJens Axboe } 173bb799ca0SJens Axboe } 174bb799ca0SJens Axboe 1759f060e22SKent Overstreet struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx, 1769f060e22SKent Overstreet mempool_t *pool) 1771da177e4SLinus Torvalds { 1781da177e4SLinus Torvalds struct bio_vec *bvl; 1791da177e4SLinus Torvalds 1801da177e4SLinus Torvalds /* 1811da177e4SLinus Torvalds * see comment near bvec_array define! 1821da177e4SLinus Torvalds */ 1831da177e4SLinus Torvalds switch (nr) { 1840a0d96b0SJens Axboe case 1: 1850a0d96b0SJens Axboe *idx = 0; 1860a0d96b0SJens Axboe break; 1870a0d96b0SJens Axboe case 2 ... 4: 1880a0d96b0SJens Axboe *idx = 1; 1890a0d96b0SJens Axboe break; 1900a0d96b0SJens Axboe case 5 ... 16: 1910a0d96b0SJens Axboe *idx = 2; 1920a0d96b0SJens Axboe break; 1930a0d96b0SJens Axboe case 17 ... 64: 1940a0d96b0SJens Axboe *idx = 3; 1950a0d96b0SJens Axboe break; 1960a0d96b0SJens Axboe case 65 ... 128: 1970a0d96b0SJens Axboe *idx = 4; 1980a0d96b0SJens Axboe break; 1990a0d96b0SJens Axboe case 129 ... BIO_MAX_PAGES: 2000a0d96b0SJens Axboe *idx = 5; 2010a0d96b0SJens Axboe break; 2021da177e4SLinus Torvalds default: 2031da177e4SLinus Torvalds return NULL; 2041da177e4SLinus Torvalds } 2050a0d96b0SJens Axboe 2061da177e4SLinus Torvalds /* 2077ff9345fSJens Axboe * idx now points to the pool we want to allocate from. only the 2087ff9345fSJens Axboe * 1-vec entry pool is mempool backed. 2091da177e4SLinus Torvalds */ 2107ff9345fSJens Axboe if (*idx == BIOVEC_MAX_IDX) { 2117ff9345fSJens Axboe fallback: 2129f060e22SKent Overstreet bvl = mempool_alloc(pool, gfp_mask); 2137ff9345fSJens Axboe } else { 2147ff9345fSJens Axboe struct biovec_slab *bvs = bvec_slabs + *idx; 2157ff9345fSJens Axboe gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); 2167ff9345fSJens Axboe 2177ff9345fSJens Axboe /* 2187ff9345fSJens Axboe * Make this allocation restricted and don't dump info on 2197ff9345fSJens Axboe * allocation failures, since we'll fallback to the mempool 2207ff9345fSJens Axboe * in case of failure. 2217ff9345fSJens Axboe */ 2227ff9345fSJens Axboe __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; 2237ff9345fSJens Axboe 2247ff9345fSJens Axboe /* 2257ff9345fSJens Axboe * Try a slab allocation. If this fails and __GFP_WAIT 2267ff9345fSJens Axboe * is set, retry with the 1-entry mempool 2277ff9345fSJens Axboe */ 2287ff9345fSJens Axboe bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); 2297ff9345fSJens Axboe if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) { 2307ff9345fSJens Axboe *idx = BIOVEC_MAX_IDX; 2317ff9345fSJens Axboe goto fallback; 2327ff9345fSJens Axboe } 2337ff9345fSJens Axboe } 2347ff9345fSJens Axboe 2351da177e4SLinus Torvalds return bvl; 2361da177e4SLinus Torvalds } 2371da177e4SLinus Torvalds 2384254bba1SKent Overstreet static void __bio_free(struct bio *bio) 2391da177e4SLinus Torvalds { 2404254bba1SKent Overstreet bio_disassociate_task(bio); 241992c5ddaSJens Axboe 2427ba1ba12SMartin K. Petersen if (bio_integrity(bio)) 2431e2a410fSKent Overstreet bio_integrity_free(bio); 2444254bba1SKent Overstreet } 2454254bba1SKent Overstreet 2464254bba1SKent Overstreet static void bio_free(struct bio *bio) 2474254bba1SKent Overstreet { 2484254bba1SKent Overstreet struct bio_set *bs = bio->bi_pool; 2494254bba1SKent Overstreet void *p; 2504254bba1SKent Overstreet 2514254bba1SKent Overstreet __bio_free(bio); 2524254bba1SKent Overstreet 2534254bba1SKent Overstreet if (bs) { 254a38352e0SKent Overstreet if (bio_flagged(bio, BIO_OWNS_VEC)) 2559f060e22SKent Overstreet bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio)); 2567ba1ba12SMartin K. Petersen 257bb799ca0SJens Axboe /* 258bb799ca0SJens Axboe * If we have front padding, adjust the bio pointer before freeing 259bb799ca0SJens Axboe */ 260bb799ca0SJens Axboe p = bio; 261bb799ca0SJens Axboe p -= bs->front_pad; 262bb799ca0SJens Axboe 263bb799ca0SJens Axboe mempool_free(p, bs->bio_pool); 2644254bba1SKent Overstreet } else { 2654254bba1SKent Overstreet /* Bio was allocated by bio_kmalloc() */ 2664254bba1SKent Overstreet kfree(bio); 2673676347aSPeter Osterlund } 2684254bba1SKent Overstreet } 2693676347aSPeter Osterlund 270858119e1SArjan van de Ven void bio_init(struct bio *bio) 2711da177e4SLinus Torvalds { 2722b94de55SJens Axboe memset(bio, 0, sizeof(*bio)); 2731da177e4SLinus Torvalds bio->bi_flags = 1 << BIO_UPTODATE; 274196d38bcSKent Overstreet atomic_set(&bio->bi_remaining, 1); 2751da177e4SLinus Torvalds atomic_set(&bio->bi_cnt, 1); 2761da177e4SLinus Torvalds } 277a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_init); 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds /** 280f44b48c7SKent Overstreet * bio_reset - reinitialize a bio 281f44b48c7SKent Overstreet * @bio: bio to reset 282f44b48c7SKent Overstreet * 283f44b48c7SKent Overstreet * Description: 284f44b48c7SKent Overstreet * After calling bio_reset(), @bio will be in the same state as a freshly 285f44b48c7SKent Overstreet * allocated bio returned bio bio_alloc_bioset() - the only fields that are 286f44b48c7SKent Overstreet * preserved are the ones that are initialized by bio_alloc_bioset(). See 287f44b48c7SKent Overstreet * comment in struct bio. 288f44b48c7SKent Overstreet */ 289f44b48c7SKent Overstreet void bio_reset(struct bio *bio) 290f44b48c7SKent Overstreet { 291f44b48c7SKent Overstreet unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS); 292f44b48c7SKent Overstreet 2934254bba1SKent Overstreet __bio_free(bio); 294f44b48c7SKent Overstreet 295f44b48c7SKent Overstreet memset(bio, 0, BIO_RESET_BYTES); 296f44b48c7SKent Overstreet bio->bi_flags = flags|(1 << BIO_UPTODATE); 297196d38bcSKent Overstreet atomic_set(&bio->bi_remaining, 1); 298f44b48c7SKent Overstreet } 299f44b48c7SKent Overstreet EXPORT_SYMBOL(bio_reset); 300f44b48c7SKent Overstreet 301196d38bcSKent Overstreet static void bio_chain_endio(struct bio *bio, int error) 302196d38bcSKent Overstreet { 303196d38bcSKent Overstreet bio_endio(bio->bi_private, error); 304196d38bcSKent Overstreet bio_put(bio); 305196d38bcSKent Overstreet } 306196d38bcSKent Overstreet 307196d38bcSKent Overstreet /** 308196d38bcSKent Overstreet * bio_chain - chain bio completions 3091051a902SRandy Dunlap * @bio: the target bio 3101051a902SRandy Dunlap * @parent: the @bio's parent bio 311196d38bcSKent Overstreet * 312196d38bcSKent Overstreet * The caller won't have a bi_end_io called when @bio completes - instead, 313196d38bcSKent Overstreet * @parent's bi_end_io won't be called until both @parent and @bio have 314196d38bcSKent Overstreet * completed; the chained bio will also be freed when it completes. 315196d38bcSKent Overstreet * 316196d38bcSKent Overstreet * The caller must not set bi_private or bi_end_io in @bio. 317196d38bcSKent Overstreet */ 318196d38bcSKent Overstreet void bio_chain(struct bio *bio, struct bio *parent) 319196d38bcSKent Overstreet { 320196d38bcSKent Overstreet BUG_ON(bio->bi_private || bio->bi_end_io); 321196d38bcSKent Overstreet 322196d38bcSKent Overstreet bio->bi_private = parent; 323196d38bcSKent Overstreet bio->bi_end_io = bio_chain_endio; 324196d38bcSKent Overstreet atomic_inc(&parent->bi_remaining); 325196d38bcSKent Overstreet } 326196d38bcSKent Overstreet EXPORT_SYMBOL(bio_chain); 327196d38bcSKent Overstreet 328df2cb6daSKent Overstreet static void bio_alloc_rescue(struct work_struct *work) 329df2cb6daSKent Overstreet { 330df2cb6daSKent Overstreet struct bio_set *bs = container_of(work, struct bio_set, rescue_work); 331df2cb6daSKent Overstreet struct bio *bio; 332df2cb6daSKent Overstreet 333df2cb6daSKent Overstreet while (1) { 334df2cb6daSKent Overstreet spin_lock(&bs->rescue_lock); 335df2cb6daSKent Overstreet bio = bio_list_pop(&bs->rescue_list); 336df2cb6daSKent Overstreet spin_unlock(&bs->rescue_lock); 337df2cb6daSKent Overstreet 338df2cb6daSKent Overstreet if (!bio) 339df2cb6daSKent Overstreet break; 340df2cb6daSKent Overstreet 341df2cb6daSKent Overstreet generic_make_request(bio); 342df2cb6daSKent Overstreet } 343df2cb6daSKent Overstreet } 344df2cb6daSKent Overstreet 345df2cb6daSKent Overstreet static void punt_bios_to_rescuer(struct bio_set *bs) 346df2cb6daSKent Overstreet { 347df2cb6daSKent Overstreet struct bio_list punt, nopunt; 348df2cb6daSKent Overstreet struct bio *bio; 349df2cb6daSKent Overstreet 350df2cb6daSKent Overstreet /* 351df2cb6daSKent Overstreet * In order to guarantee forward progress we must punt only bios that 352df2cb6daSKent Overstreet * were allocated from this bio_set; otherwise, if there was a bio on 353df2cb6daSKent Overstreet * there for a stacking driver higher up in the stack, processing it 354df2cb6daSKent Overstreet * could require allocating bios from this bio_set, and doing that from 355df2cb6daSKent Overstreet * our own rescuer would be bad. 356df2cb6daSKent Overstreet * 357df2cb6daSKent Overstreet * Since bio lists are singly linked, pop them all instead of trying to 358df2cb6daSKent Overstreet * remove from the middle of the list: 359df2cb6daSKent Overstreet */ 360df2cb6daSKent Overstreet 361df2cb6daSKent Overstreet bio_list_init(&punt); 362df2cb6daSKent Overstreet bio_list_init(&nopunt); 363df2cb6daSKent Overstreet 364df2cb6daSKent Overstreet while ((bio = bio_list_pop(current->bio_list))) 365df2cb6daSKent Overstreet bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); 366df2cb6daSKent Overstreet 367df2cb6daSKent Overstreet *current->bio_list = nopunt; 368df2cb6daSKent Overstreet 369df2cb6daSKent Overstreet spin_lock(&bs->rescue_lock); 370df2cb6daSKent Overstreet bio_list_merge(&bs->rescue_list, &punt); 371df2cb6daSKent Overstreet spin_unlock(&bs->rescue_lock); 372df2cb6daSKent Overstreet 373df2cb6daSKent Overstreet queue_work(bs->rescue_workqueue, &bs->rescue_work); 374df2cb6daSKent Overstreet } 375df2cb6daSKent Overstreet 376f44b48c7SKent Overstreet /** 3771da177e4SLinus Torvalds * bio_alloc_bioset - allocate a bio for I/O 3781da177e4SLinus Torvalds * @gfp_mask: the GFP_ mask given to the slab allocator 3791da177e4SLinus Torvalds * @nr_iovecs: number of iovecs to pre-allocate 380db18efacSJaak Ristioja * @bs: the bio_set to allocate from. 3811da177e4SLinus Torvalds * 3821da177e4SLinus Torvalds * Description: 3833f86a82aSKent Overstreet * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is 3843f86a82aSKent Overstreet * backed by the @bs's mempool. 3853f86a82aSKent Overstreet * 3863f86a82aSKent Overstreet * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be 3873f86a82aSKent Overstreet * able to allocate a bio. This is due to the mempool guarantees. To make this 3883f86a82aSKent Overstreet * work, callers must never allocate more than 1 bio at a time from this pool. 3893f86a82aSKent Overstreet * Callers that need to allocate more than 1 bio must always submit the 3903f86a82aSKent Overstreet * previously allocated bio for IO before attempting to allocate a new one. 3913f86a82aSKent Overstreet * Failure to do so can cause deadlocks under memory pressure. 3923f86a82aSKent Overstreet * 393df2cb6daSKent Overstreet * Note that when running under generic_make_request() (i.e. any block 394df2cb6daSKent Overstreet * driver), bios are not submitted until after you return - see the code in 395df2cb6daSKent Overstreet * generic_make_request() that converts recursion into iteration, to prevent 396df2cb6daSKent Overstreet * stack overflows. 397df2cb6daSKent Overstreet * 398df2cb6daSKent Overstreet * This would normally mean allocating multiple bios under 399df2cb6daSKent Overstreet * generic_make_request() would be susceptible to deadlocks, but we have 400df2cb6daSKent Overstreet * deadlock avoidance code that resubmits any blocked bios from a rescuer 401df2cb6daSKent Overstreet * thread. 402df2cb6daSKent Overstreet * 403df2cb6daSKent Overstreet * However, we do not guarantee forward progress for allocations from other 404df2cb6daSKent Overstreet * mempools. Doing multiple allocations from the same mempool under 405df2cb6daSKent Overstreet * generic_make_request() should be avoided - instead, use bio_set's front_pad 406df2cb6daSKent Overstreet * for per bio allocations. 407df2cb6daSKent Overstreet * 4083f86a82aSKent Overstreet * RETURNS: 4093f86a82aSKent Overstreet * Pointer to new bio on success, NULL on failure. 4103f86a82aSKent Overstreet */ 411dd0fc66fSAl Viro struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 4121da177e4SLinus Torvalds { 413df2cb6daSKent Overstreet gfp_t saved_gfp = gfp_mask; 4143f86a82aSKent Overstreet unsigned front_pad; 4153f86a82aSKent Overstreet unsigned inline_vecs; 416451a9ebfSTejun Heo unsigned long idx = BIO_POOL_NONE; 41734053979SIngo Molnar struct bio_vec *bvl = NULL; 418451a9ebfSTejun Heo struct bio *bio; 419451a9ebfSTejun Heo void *p; 4200a0d96b0SJens Axboe 4213f86a82aSKent Overstreet if (!bs) { 4223f86a82aSKent Overstreet if (nr_iovecs > UIO_MAXIOV) 4233f86a82aSKent Overstreet return NULL; 4243f86a82aSKent Overstreet 4253f86a82aSKent Overstreet p = kmalloc(sizeof(struct bio) + 4263f86a82aSKent Overstreet nr_iovecs * sizeof(struct bio_vec), 4273f86a82aSKent Overstreet gfp_mask); 4283f86a82aSKent Overstreet front_pad = 0; 4293f86a82aSKent Overstreet inline_vecs = nr_iovecs; 4303f86a82aSKent Overstreet } else { 431d8f429e1SJunichi Nomura /* should not use nobvec bioset for nr_iovecs > 0 */ 432d8f429e1SJunichi Nomura if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0)) 433d8f429e1SJunichi Nomura return NULL; 434df2cb6daSKent Overstreet /* 435df2cb6daSKent Overstreet * generic_make_request() converts recursion to iteration; this 436df2cb6daSKent Overstreet * means if we're running beneath it, any bios we allocate and 437df2cb6daSKent Overstreet * submit will not be submitted (and thus freed) until after we 438df2cb6daSKent Overstreet * return. 439df2cb6daSKent Overstreet * 440df2cb6daSKent Overstreet * This exposes us to a potential deadlock if we allocate 441df2cb6daSKent Overstreet * multiple bios from the same bio_set() while running 442df2cb6daSKent Overstreet * underneath generic_make_request(). If we were to allocate 443df2cb6daSKent Overstreet * multiple bios (say a stacking block driver that was splitting 444df2cb6daSKent Overstreet * bios), we would deadlock if we exhausted the mempool's 445df2cb6daSKent Overstreet * reserve. 446df2cb6daSKent Overstreet * 447df2cb6daSKent Overstreet * We solve this, and guarantee forward progress, with a rescuer 448df2cb6daSKent Overstreet * workqueue per bio_set. If we go to allocate and there are 449df2cb6daSKent Overstreet * bios on current->bio_list, we first try the allocation 450df2cb6daSKent Overstreet * without __GFP_WAIT; if that fails, we punt those bios we 451df2cb6daSKent Overstreet * would be blocking to the rescuer workqueue before we retry 452df2cb6daSKent Overstreet * with the original gfp_flags. 453df2cb6daSKent Overstreet */ 454df2cb6daSKent Overstreet 455df2cb6daSKent Overstreet if (current->bio_list && !bio_list_empty(current->bio_list)) 456df2cb6daSKent Overstreet gfp_mask &= ~__GFP_WAIT; 457df2cb6daSKent Overstreet 458a60e78e5SSubhash Peddamallu p = mempool_alloc(bs->bio_pool, gfp_mask); 459df2cb6daSKent Overstreet if (!p && gfp_mask != saved_gfp) { 460df2cb6daSKent Overstreet punt_bios_to_rescuer(bs); 461df2cb6daSKent Overstreet gfp_mask = saved_gfp; 462df2cb6daSKent Overstreet p = mempool_alloc(bs->bio_pool, gfp_mask); 463df2cb6daSKent Overstreet } 464df2cb6daSKent Overstreet 4653f86a82aSKent Overstreet front_pad = bs->front_pad; 4663f86a82aSKent Overstreet inline_vecs = BIO_INLINE_VECS; 4673f86a82aSKent Overstreet } 4683f86a82aSKent Overstreet 469451a9ebfSTejun Heo if (unlikely(!p)) 470451a9ebfSTejun Heo return NULL; 4711da177e4SLinus Torvalds 4723f86a82aSKent Overstreet bio = p + front_pad; 4731da177e4SLinus Torvalds bio_init(bio); 47434053979SIngo Molnar 4753f86a82aSKent Overstreet if (nr_iovecs > inline_vecs) { 4769f060e22SKent Overstreet bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); 477df2cb6daSKent Overstreet if (!bvl && gfp_mask != saved_gfp) { 478df2cb6daSKent Overstreet punt_bios_to_rescuer(bs); 479df2cb6daSKent Overstreet gfp_mask = saved_gfp; 4809f060e22SKent Overstreet bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool); 481df2cb6daSKent Overstreet } 482df2cb6daSKent Overstreet 48334053979SIngo Molnar if (unlikely(!bvl)) 48434053979SIngo Molnar goto err_free; 485a38352e0SKent Overstreet 486a38352e0SKent Overstreet bio->bi_flags |= 1 << BIO_OWNS_VEC; 4873f86a82aSKent Overstreet } else if (nr_iovecs) { 4883f86a82aSKent Overstreet bvl = bio->bi_inline_vecs; 489392ddc32SJens Axboe } 4903f86a82aSKent Overstreet 4913f86a82aSKent Overstreet bio->bi_pool = bs; 49234053979SIngo Molnar bio->bi_flags |= idx << BIO_POOL_OFFSET; 49334053979SIngo Molnar bio->bi_max_vecs = nr_iovecs; 49434053979SIngo Molnar bio->bi_io_vec = bvl; 49534053979SIngo Molnar return bio; 49634053979SIngo Molnar 49734053979SIngo Molnar err_free: 498a60e78e5SSubhash Peddamallu mempool_free(p, bs->bio_pool); 49934053979SIngo Molnar return NULL; 5001da177e4SLinus Torvalds } 501a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_alloc_bioset); 5021da177e4SLinus Torvalds 5031da177e4SLinus Torvalds void zero_fill_bio(struct bio *bio) 5041da177e4SLinus Torvalds { 5051da177e4SLinus Torvalds unsigned long flags; 5067988613bSKent Overstreet struct bio_vec bv; 5077988613bSKent Overstreet struct bvec_iter iter; 5081da177e4SLinus Torvalds 5097988613bSKent Overstreet bio_for_each_segment(bv, bio, iter) { 5107988613bSKent Overstreet char *data = bvec_kmap_irq(&bv, &flags); 5117988613bSKent Overstreet memset(data, 0, bv.bv_len); 5127988613bSKent Overstreet flush_dcache_page(bv.bv_page); 5131da177e4SLinus Torvalds bvec_kunmap_irq(data, &flags); 5141da177e4SLinus Torvalds } 5151da177e4SLinus Torvalds } 5161da177e4SLinus Torvalds EXPORT_SYMBOL(zero_fill_bio); 5171da177e4SLinus Torvalds 5181da177e4SLinus Torvalds /** 5191da177e4SLinus Torvalds * bio_put - release a reference to a bio 5201da177e4SLinus Torvalds * @bio: bio to release reference to 5211da177e4SLinus Torvalds * 5221da177e4SLinus Torvalds * Description: 5231da177e4SLinus Torvalds * Put a reference to a &struct bio, either one you have gotten with 524ad0bf110SAlberto Bertogli * bio_alloc, bio_get or bio_clone. The last put of a bio will free it. 5251da177e4SLinus Torvalds **/ 5261da177e4SLinus Torvalds void bio_put(struct bio *bio) 5271da177e4SLinus Torvalds { 5281da177e4SLinus Torvalds BIO_BUG_ON(!atomic_read(&bio->bi_cnt)); 5291da177e4SLinus Torvalds 5301da177e4SLinus Torvalds /* 5311da177e4SLinus Torvalds * last put frees it 5321da177e4SLinus Torvalds */ 5334254bba1SKent Overstreet if (atomic_dec_and_test(&bio->bi_cnt)) 5344254bba1SKent Overstreet bio_free(bio); 5351da177e4SLinus Torvalds } 536a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_put); 5371da177e4SLinus Torvalds 538165125e1SJens Axboe inline int bio_phys_segments(struct request_queue *q, struct bio *bio) 5391da177e4SLinus Torvalds { 5401da177e4SLinus Torvalds if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 5411da177e4SLinus Torvalds blk_recount_segments(q, bio); 5421da177e4SLinus Torvalds 5431da177e4SLinus Torvalds return bio->bi_phys_segments; 5441da177e4SLinus Torvalds } 545a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_phys_segments); 5461da177e4SLinus Torvalds 5471da177e4SLinus Torvalds /** 54859d276feSKent Overstreet * __bio_clone_fast - clone a bio that shares the original bio's biovec 54959d276feSKent Overstreet * @bio: destination bio 55059d276feSKent Overstreet * @bio_src: bio to clone 55159d276feSKent Overstreet * 55259d276feSKent Overstreet * Clone a &bio. Caller will own the returned bio, but not 55359d276feSKent Overstreet * the actual data it points to. Reference count of returned 55459d276feSKent Overstreet * bio will be one. 55559d276feSKent Overstreet * 55659d276feSKent Overstreet * Caller must ensure that @bio_src is not freed before @bio. 55759d276feSKent Overstreet */ 55859d276feSKent Overstreet void __bio_clone_fast(struct bio *bio, struct bio *bio_src) 55959d276feSKent Overstreet { 56059d276feSKent Overstreet BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE); 56159d276feSKent Overstreet 56259d276feSKent Overstreet /* 56359d276feSKent Overstreet * most users will be overriding ->bi_bdev with a new target, 56459d276feSKent Overstreet * so we don't set nor calculate new physical/hw segment counts here 56559d276feSKent Overstreet */ 56659d276feSKent Overstreet bio->bi_bdev = bio_src->bi_bdev; 56759d276feSKent Overstreet bio->bi_flags |= 1 << BIO_CLONED; 56859d276feSKent Overstreet bio->bi_rw = bio_src->bi_rw; 56959d276feSKent Overstreet bio->bi_iter = bio_src->bi_iter; 57059d276feSKent Overstreet bio->bi_io_vec = bio_src->bi_io_vec; 57159d276feSKent Overstreet } 57259d276feSKent Overstreet EXPORT_SYMBOL(__bio_clone_fast); 57359d276feSKent Overstreet 57459d276feSKent Overstreet /** 57559d276feSKent Overstreet * bio_clone_fast - clone a bio that shares the original bio's biovec 57659d276feSKent Overstreet * @bio: bio to clone 57759d276feSKent Overstreet * @gfp_mask: allocation priority 57859d276feSKent Overstreet * @bs: bio_set to allocate from 57959d276feSKent Overstreet * 58059d276feSKent Overstreet * Like __bio_clone_fast, only also allocates the returned bio 58159d276feSKent Overstreet */ 58259d276feSKent Overstreet struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) 58359d276feSKent Overstreet { 58459d276feSKent Overstreet struct bio *b; 58559d276feSKent Overstreet 58659d276feSKent Overstreet b = bio_alloc_bioset(gfp_mask, 0, bs); 58759d276feSKent Overstreet if (!b) 58859d276feSKent Overstreet return NULL; 58959d276feSKent Overstreet 59059d276feSKent Overstreet __bio_clone_fast(b, bio); 59159d276feSKent Overstreet 59259d276feSKent Overstreet if (bio_integrity(bio)) { 59359d276feSKent Overstreet int ret; 59459d276feSKent Overstreet 59559d276feSKent Overstreet ret = bio_integrity_clone(b, bio, gfp_mask); 59659d276feSKent Overstreet 59759d276feSKent Overstreet if (ret < 0) { 59859d276feSKent Overstreet bio_put(b); 59959d276feSKent Overstreet return NULL; 60059d276feSKent Overstreet } 60159d276feSKent Overstreet } 60259d276feSKent Overstreet 60359d276feSKent Overstreet return b; 60459d276feSKent Overstreet } 60559d276feSKent Overstreet EXPORT_SYMBOL(bio_clone_fast); 60659d276feSKent Overstreet 60759d276feSKent Overstreet /** 608bf800ef1SKent Overstreet * bio_clone_bioset - clone a bio 609bdb53207SKent Overstreet * @bio_src: bio to clone 6101da177e4SLinus Torvalds * @gfp_mask: allocation priority 611bf800ef1SKent Overstreet * @bs: bio_set to allocate from 6121da177e4SLinus Torvalds * 613bdb53207SKent Overstreet * Clone bio. Caller will own the returned bio, but not the actual data it 614bdb53207SKent Overstreet * points to. Reference count of returned bio will be one. 6151da177e4SLinus Torvalds */ 616bdb53207SKent Overstreet struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, 617bf800ef1SKent Overstreet struct bio_set *bs) 6181da177e4SLinus Torvalds { 619bdb53207SKent Overstreet struct bvec_iter iter; 620bdb53207SKent Overstreet struct bio_vec bv; 621bdb53207SKent Overstreet struct bio *bio; 6221da177e4SLinus Torvalds 623bdb53207SKent Overstreet /* 624bdb53207SKent Overstreet * Pre immutable biovecs, __bio_clone() used to just do a memcpy from 625bdb53207SKent Overstreet * bio_src->bi_io_vec to bio->bi_io_vec. 626bdb53207SKent Overstreet * 627bdb53207SKent Overstreet * We can't do that anymore, because: 628bdb53207SKent Overstreet * 629bdb53207SKent Overstreet * - The point of cloning the biovec is to produce a bio with a biovec 630bdb53207SKent Overstreet * the caller can modify: bi_idx and bi_bvec_done should be 0. 631bdb53207SKent Overstreet * 632bdb53207SKent Overstreet * - The original bio could've had more than BIO_MAX_PAGES biovecs; if 633bdb53207SKent Overstreet * we tried to clone the whole thing bio_alloc_bioset() would fail. 634bdb53207SKent Overstreet * But the clone should succeed as long as the number of biovecs we 635bdb53207SKent Overstreet * actually need to allocate is fewer than BIO_MAX_PAGES. 636bdb53207SKent Overstreet * 637bdb53207SKent Overstreet * - Lastly, bi_vcnt should not be looked at or relied upon by code 638bdb53207SKent Overstreet * that does not own the bio - reason being drivers don't use it for 639bdb53207SKent Overstreet * iterating over the biovec anymore, so expecting it to be kept up 640bdb53207SKent Overstreet * to date (i.e. for clones that share the parent biovec) is just 641bdb53207SKent Overstreet * asking for trouble and would force extra work on 642bdb53207SKent Overstreet * __bio_clone_fast() anyways. 643bdb53207SKent Overstreet */ 644bdb53207SKent Overstreet 6458423ae3dSKent Overstreet bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); 646bdb53207SKent Overstreet if (!bio) 6477ba1ba12SMartin K. Petersen return NULL; 6487ba1ba12SMartin K. Petersen 649bdb53207SKent Overstreet bio->bi_bdev = bio_src->bi_bdev; 650bdb53207SKent Overstreet bio->bi_rw = bio_src->bi_rw; 651bdb53207SKent Overstreet bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; 652bdb53207SKent Overstreet bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; 6537ba1ba12SMartin K. Petersen 6548423ae3dSKent Overstreet if (bio->bi_rw & REQ_DISCARD) 6558423ae3dSKent Overstreet goto integrity_clone; 6568423ae3dSKent Overstreet 6578423ae3dSKent Overstreet if (bio->bi_rw & REQ_WRITE_SAME) { 6588423ae3dSKent Overstreet bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; 6598423ae3dSKent Overstreet goto integrity_clone; 6608423ae3dSKent Overstreet } 6618423ae3dSKent Overstreet 662bdb53207SKent Overstreet bio_for_each_segment(bv, bio_src, iter) 663bdb53207SKent Overstreet bio->bi_io_vec[bio->bi_vcnt++] = bv; 664bdb53207SKent Overstreet 6658423ae3dSKent Overstreet integrity_clone: 666bdb53207SKent Overstreet if (bio_integrity(bio_src)) { 6677ba1ba12SMartin K. Petersen int ret; 6687ba1ba12SMartin K. Petersen 669bdb53207SKent Overstreet ret = bio_integrity_clone(bio, bio_src, gfp_mask); 670059ea331SLi Zefan if (ret < 0) { 671bdb53207SKent Overstreet bio_put(bio); 6727ba1ba12SMartin K. Petersen return NULL; 6733676347aSPeter Osterlund } 674059ea331SLi Zefan } 6751da177e4SLinus Torvalds 676bdb53207SKent Overstreet return bio; 6771da177e4SLinus Torvalds } 678bf800ef1SKent Overstreet EXPORT_SYMBOL(bio_clone_bioset); 6791da177e4SLinus Torvalds 6801da177e4SLinus Torvalds /** 6811da177e4SLinus Torvalds * bio_get_nr_vecs - return approx number of vecs 6821da177e4SLinus Torvalds * @bdev: I/O target 6831da177e4SLinus Torvalds * 6841da177e4SLinus Torvalds * Return the approximate number of pages we can send to this target. 6851da177e4SLinus Torvalds * There's no guarantee that you will be able to fit this number of pages 6861da177e4SLinus Torvalds * into a bio, it does not account for dynamic restrictions that vary 6871da177e4SLinus Torvalds * on offset. 6881da177e4SLinus Torvalds */ 6891da177e4SLinus Torvalds int bio_get_nr_vecs(struct block_device *bdev) 6901da177e4SLinus Torvalds { 691165125e1SJens Axboe struct request_queue *q = bdev_get_queue(bdev); 692f908ee94SBernd Schubert int nr_pages; 693f908ee94SBernd Schubert 694f908ee94SBernd Schubert nr_pages = min_t(unsigned, 6955abebfddSKent Overstreet queue_max_segments(q), 6965abebfddSKent Overstreet queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1); 697f908ee94SBernd Schubert 698f908ee94SBernd Schubert return min_t(unsigned, nr_pages, BIO_MAX_PAGES); 699f908ee94SBernd Schubert 7001da177e4SLinus Torvalds } 701a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_get_nr_vecs); 7021da177e4SLinus Torvalds 703165125e1SJens Axboe static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page 704defd94b7SMike Christie *page, unsigned int len, unsigned int offset, 70534f2fd8dSAkinobu Mita unsigned int max_sectors) 7061da177e4SLinus Torvalds { 7071da177e4SLinus Torvalds int retried_segments = 0; 7081da177e4SLinus Torvalds struct bio_vec *bvec; 7091da177e4SLinus Torvalds 7101da177e4SLinus Torvalds /* 7111da177e4SLinus Torvalds * cloned bio must not modify vec list 7121da177e4SLinus Torvalds */ 7131da177e4SLinus Torvalds if (unlikely(bio_flagged(bio, BIO_CLONED))) 7141da177e4SLinus Torvalds return 0; 7151da177e4SLinus Torvalds 7164f024f37SKent Overstreet if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors) 7171da177e4SLinus Torvalds return 0; 7181da177e4SLinus Torvalds 71980cfd548SJens Axboe /* 72080cfd548SJens Axboe * For filesystems with a blocksize smaller than the pagesize 72180cfd548SJens Axboe * we will often be called with the same page as last time and 72280cfd548SJens Axboe * a consecutive offset. Optimize this special case. 72380cfd548SJens Axboe */ 72480cfd548SJens Axboe if (bio->bi_vcnt > 0) { 72580cfd548SJens Axboe struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; 72680cfd548SJens Axboe 72780cfd548SJens Axboe if (page == prev->bv_page && 72880cfd548SJens Axboe offset == prev->bv_offset + prev->bv_len) { 7291d616585SDmitry Monakhov unsigned int prev_bv_len = prev->bv_len; 73080cfd548SJens Axboe prev->bv_len += len; 731cc371e66SAlasdair G Kergon 732cc371e66SAlasdair G Kergon if (q->merge_bvec_fn) { 733cc371e66SAlasdair G Kergon struct bvec_merge_data bvm = { 7341d616585SDmitry Monakhov /* prev_bvec is already charged in 7351d616585SDmitry Monakhov bi_size, discharge it in order to 7361d616585SDmitry Monakhov simulate merging updated prev_bvec 7371d616585SDmitry Monakhov as new bvec. */ 738cc371e66SAlasdair G Kergon .bi_bdev = bio->bi_bdev, 7394f024f37SKent Overstreet .bi_sector = bio->bi_iter.bi_sector, 7404f024f37SKent Overstreet .bi_size = bio->bi_iter.bi_size - 7414f024f37SKent Overstreet prev_bv_len, 742cc371e66SAlasdair G Kergon .bi_rw = bio->bi_rw, 743cc371e66SAlasdair G Kergon }; 744cc371e66SAlasdair G Kergon 7458bf8c376SDmitry Monakhov if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) { 74680cfd548SJens Axboe prev->bv_len -= len; 74780cfd548SJens Axboe return 0; 74880cfd548SJens Axboe } 749cc371e66SAlasdair G Kergon } 75080cfd548SJens Axboe 751*fcbf6a08SMaurizio Lombardi bio->bi_iter.bi_size += len; 75280cfd548SJens Axboe goto done; 75380cfd548SJens Axboe } 75466cb45aaSJens Axboe 75566cb45aaSJens Axboe /* 75666cb45aaSJens Axboe * If the queue doesn't support SG gaps and adding this 75766cb45aaSJens Axboe * offset would create a gap, disallow it. 75866cb45aaSJens Axboe */ 75966cb45aaSJens Axboe if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) && 76066cb45aaSJens Axboe bvec_gap_to_prev(prev, offset)) 76166cb45aaSJens Axboe return 0; 76280cfd548SJens Axboe } 76380cfd548SJens Axboe 76480cfd548SJens Axboe if (bio->bi_vcnt >= bio->bi_max_vecs) 7651da177e4SLinus Torvalds return 0; 7661da177e4SLinus Torvalds 7671da177e4SLinus Torvalds /* 7681da177e4SLinus Torvalds * setup the new entry, we might clear it again later if we 7691da177e4SLinus Torvalds * cannot add the page 7701da177e4SLinus Torvalds */ 7711da177e4SLinus Torvalds bvec = &bio->bi_io_vec[bio->bi_vcnt]; 7721da177e4SLinus Torvalds bvec->bv_page = page; 7731da177e4SLinus Torvalds bvec->bv_len = len; 7741da177e4SLinus Torvalds bvec->bv_offset = offset; 775*fcbf6a08SMaurizio Lombardi bio->bi_vcnt++; 776*fcbf6a08SMaurizio Lombardi bio->bi_phys_segments++; 777*fcbf6a08SMaurizio Lombardi bio->bi_iter.bi_size += len; 778*fcbf6a08SMaurizio Lombardi 779*fcbf6a08SMaurizio Lombardi /* 780*fcbf6a08SMaurizio Lombardi * Perform a recount if the number of segments is greater 781*fcbf6a08SMaurizio Lombardi * than queue_max_segments(q). 782*fcbf6a08SMaurizio Lombardi */ 783*fcbf6a08SMaurizio Lombardi 784*fcbf6a08SMaurizio Lombardi while (bio->bi_phys_segments > queue_max_segments(q)) { 785*fcbf6a08SMaurizio Lombardi 786*fcbf6a08SMaurizio Lombardi if (retried_segments) 787*fcbf6a08SMaurizio Lombardi goto failed; 788*fcbf6a08SMaurizio Lombardi 789*fcbf6a08SMaurizio Lombardi retried_segments = 1; 790*fcbf6a08SMaurizio Lombardi blk_recount_segments(q, bio); 791*fcbf6a08SMaurizio Lombardi } 7921da177e4SLinus Torvalds 7931da177e4SLinus Torvalds /* 7941da177e4SLinus Torvalds * if queue has other restrictions (eg varying max sector size 7951da177e4SLinus Torvalds * depending on offset), it can specify a merge_bvec_fn in the 7961da177e4SLinus Torvalds * queue to get further control 7971da177e4SLinus Torvalds */ 7981da177e4SLinus Torvalds if (q->merge_bvec_fn) { 799cc371e66SAlasdair G Kergon struct bvec_merge_data bvm = { 800cc371e66SAlasdair G Kergon .bi_bdev = bio->bi_bdev, 8014f024f37SKent Overstreet .bi_sector = bio->bi_iter.bi_sector, 802*fcbf6a08SMaurizio Lombardi .bi_size = bio->bi_iter.bi_size - len, 803cc371e66SAlasdair G Kergon .bi_rw = bio->bi_rw, 804cc371e66SAlasdair G Kergon }; 805cc371e66SAlasdair G Kergon 8061da177e4SLinus Torvalds /* 8071da177e4SLinus Torvalds * merge_bvec_fn() returns number of bytes it can accept 8081da177e4SLinus Torvalds * at this offset 8091da177e4SLinus Torvalds */ 810*fcbf6a08SMaurizio Lombardi if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) 811*fcbf6a08SMaurizio Lombardi goto failed; 8121da177e4SLinus Torvalds } 8131da177e4SLinus Torvalds 8141da177e4SLinus Torvalds /* If we may be able to merge these biovecs, force a recount */ 815*fcbf6a08SMaurizio Lombardi if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) 8161da177e4SLinus Torvalds bio->bi_flags &= ~(1 << BIO_SEG_VALID); 8171da177e4SLinus Torvalds 81880cfd548SJens Axboe done: 8191da177e4SLinus Torvalds return len; 820*fcbf6a08SMaurizio Lombardi 821*fcbf6a08SMaurizio Lombardi failed: 822*fcbf6a08SMaurizio Lombardi bvec->bv_page = NULL; 823*fcbf6a08SMaurizio Lombardi bvec->bv_len = 0; 824*fcbf6a08SMaurizio Lombardi bvec->bv_offset = 0; 825*fcbf6a08SMaurizio Lombardi bio->bi_vcnt--; 826*fcbf6a08SMaurizio Lombardi bio->bi_iter.bi_size -= len; 827*fcbf6a08SMaurizio Lombardi blk_recount_segments(q, bio); 828*fcbf6a08SMaurizio Lombardi return 0; 8291da177e4SLinus Torvalds } 8301da177e4SLinus Torvalds 8311da177e4SLinus Torvalds /** 8326e68af66SMike Christie * bio_add_pc_page - attempt to add page to bio 833fddfdeafSJens Axboe * @q: the target queue 8346e68af66SMike Christie * @bio: destination bio 8356e68af66SMike Christie * @page: page to add 8366e68af66SMike Christie * @len: vec entry length 8376e68af66SMike Christie * @offset: vec entry offset 8386e68af66SMike Christie * 8396e68af66SMike Christie * Attempt to add a page to the bio_vec maplist. This can fail for a 840c6428084SAndreas Gruenbacher * number of reasons, such as the bio being full or target block device 841c6428084SAndreas Gruenbacher * limitations. The target block device must allow bio's up to PAGE_SIZE, 842c6428084SAndreas Gruenbacher * so it is always possible to add a single page to an empty bio. 843c6428084SAndreas Gruenbacher * 844c6428084SAndreas Gruenbacher * This should only be used by REQ_PC bios. 8456e68af66SMike Christie */ 846165125e1SJens Axboe int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, 8476e68af66SMike Christie unsigned int len, unsigned int offset) 8486e68af66SMike Christie { 849ae03bf63SMartin K. Petersen return __bio_add_page(q, bio, page, len, offset, 850ae03bf63SMartin K. Petersen queue_max_hw_sectors(q)); 8516e68af66SMike Christie } 852a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_add_pc_page); 8536e68af66SMike Christie 8546e68af66SMike Christie /** 8551da177e4SLinus Torvalds * bio_add_page - attempt to add page to bio 8561da177e4SLinus Torvalds * @bio: destination bio 8571da177e4SLinus Torvalds * @page: page to add 8581da177e4SLinus Torvalds * @len: vec entry length 8591da177e4SLinus Torvalds * @offset: vec entry offset 8601da177e4SLinus Torvalds * 8611da177e4SLinus Torvalds * Attempt to add a page to the bio_vec maplist. This can fail for a 862c6428084SAndreas Gruenbacher * number of reasons, such as the bio being full or target block device 863c6428084SAndreas Gruenbacher * limitations. The target block device must allow bio's up to PAGE_SIZE, 864c6428084SAndreas Gruenbacher * so it is always possible to add a single page to an empty bio. 8651da177e4SLinus Torvalds */ 8661da177e4SLinus Torvalds int bio_add_page(struct bio *bio, struct page *page, unsigned int len, 8671da177e4SLinus Torvalds unsigned int offset) 8681da177e4SLinus Torvalds { 869defd94b7SMike Christie struct request_queue *q = bdev_get_queue(bio->bi_bdev); 87058a4915aSJens Axboe unsigned int max_sectors; 871762380adSJens Axboe 87258a4915aSJens Axboe max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector); 87358a4915aSJens Axboe if ((max_sectors < (len >> 9)) && !bio->bi_iter.bi_size) 87458a4915aSJens Axboe max_sectors = len >> 9; 87558a4915aSJens Axboe 87658a4915aSJens Axboe return __bio_add_page(q, bio, page, len, offset, max_sectors); 8771da177e4SLinus Torvalds } 878a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_add_page); 8791da177e4SLinus Torvalds 8809e882242SKent Overstreet struct submit_bio_ret { 8819e882242SKent Overstreet struct completion event; 8829e882242SKent Overstreet int error; 8839e882242SKent Overstreet }; 8849e882242SKent Overstreet 8859e882242SKent Overstreet static void submit_bio_wait_endio(struct bio *bio, int error) 8869e882242SKent Overstreet { 8879e882242SKent Overstreet struct submit_bio_ret *ret = bio->bi_private; 8889e882242SKent Overstreet 8899e882242SKent Overstreet ret->error = error; 8909e882242SKent Overstreet complete(&ret->event); 8919e882242SKent Overstreet } 8929e882242SKent Overstreet 8939e882242SKent Overstreet /** 8949e882242SKent Overstreet * submit_bio_wait - submit a bio, and wait until it completes 8959e882242SKent Overstreet * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 8969e882242SKent Overstreet * @bio: The &struct bio which describes the I/O 8979e882242SKent Overstreet * 8989e882242SKent Overstreet * Simple wrapper around submit_bio(). Returns 0 on success, or the error from 8999e882242SKent Overstreet * bio_endio() on failure. 9009e882242SKent Overstreet */ 9019e882242SKent Overstreet int submit_bio_wait(int rw, struct bio *bio) 9029e882242SKent Overstreet { 9039e882242SKent Overstreet struct submit_bio_ret ret; 9049e882242SKent Overstreet 9059e882242SKent Overstreet rw |= REQ_SYNC; 9069e882242SKent Overstreet init_completion(&ret.event); 9079e882242SKent Overstreet bio->bi_private = &ret; 9089e882242SKent Overstreet bio->bi_end_io = submit_bio_wait_endio; 9099e882242SKent Overstreet submit_bio(rw, bio); 9109e882242SKent Overstreet wait_for_completion(&ret.event); 9119e882242SKent Overstreet 9129e882242SKent Overstreet return ret.error; 9139e882242SKent Overstreet } 9149e882242SKent Overstreet EXPORT_SYMBOL(submit_bio_wait); 9159e882242SKent Overstreet 916054bdf64SKent Overstreet /** 917054bdf64SKent Overstreet * bio_advance - increment/complete a bio by some number of bytes 918054bdf64SKent Overstreet * @bio: bio to advance 919054bdf64SKent Overstreet * @bytes: number of bytes to complete 920054bdf64SKent Overstreet * 921054bdf64SKent Overstreet * This updates bi_sector, bi_size and bi_idx; if the number of bytes to 922054bdf64SKent Overstreet * complete doesn't align with a bvec boundary, then bv_len and bv_offset will 923054bdf64SKent Overstreet * be updated on the last bvec as well. 924054bdf64SKent Overstreet * 925054bdf64SKent Overstreet * @bio will then represent the remaining, uncompleted portion of the io. 926054bdf64SKent Overstreet */ 927054bdf64SKent Overstreet void bio_advance(struct bio *bio, unsigned bytes) 928054bdf64SKent Overstreet { 929054bdf64SKent Overstreet if (bio_integrity(bio)) 930054bdf64SKent Overstreet bio_integrity_advance(bio, bytes); 931054bdf64SKent Overstreet 9324550dd6cSKent Overstreet bio_advance_iter(bio, &bio->bi_iter, bytes); 933054bdf64SKent Overstreet } 934054bdf64SKent Overstreet EXPORT_SYMBOL(bio_advance); 935054bdf64SKent Overstreet 93616ac3d63SKent Overstreet /** 937a0787606SKent Overstreet * bio_alloc_pages - allocates a single page for each bvec in a bio 938a0787606SKent Overstreet * @bio: bio to allocate pages for 939a0787606SKent Overstreet * @gfp_mask: flags for allocation 940a0787606SKent Overstreet * 941a0787606SKent Overstreet * Allocates pages up to @bio->bi_vcnt. 942a0787606SKent Overstreet * 943a0787606SKent Overstreet * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are 944a0787606SKent Overstreet * freed. 945a0787606SKent Overstreet */ 946a0787606SKent Overstreet int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) 947a0787606SKent Overstreet { 948a0787606SKent Overstreet int i; 949a0787606SKent Overstreet struct bio_vec *bv; 950a0787606SKent Overstreet 951a0787606SKent Overstreet bio_for_each_segment_all(bv, bio, i) { 952a0787606SKent Overstreet bv->bv_page = alloc_page(gfp_mask); 953a0787606SKent Overstreet if (!bv->bv_page) { 954a0787606SKent Overstreet while (--bv >= bio->bi_io_vec) 955a0787606SKent Overstreet __free_page(bv->bv_page); 956a0787606SKent Overstreet return -ENOMEM; 957a0787606SKent Overstreet } 958a0787606SKent Overstreet } 959a0787606SKent Overstreet 960a0787606SKent Overstreet return 0; 961a0787606SKent Overstreet } 962a0787606SKent Overstreet EXPORT_SYMBOL(bio_alloc_pages); 963a0787606SKent Overstreet 964a0787606SKent Overstreet /** 96516ac3d63SKent Overstreet * bio_copy_data - copy contents of data buffers from one chain of bios to 96616ac3d63SKent Overstreet * another 96716ac3d63SKent Overstreet * @src: source bio list 96816ac3d63SKent Overstreet * @dst: destination bio list 96916ac3d63SKent Overstreet * 97016ac3d63SKent Overstreet * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats 97116ac3d63SKent Overstreet * @src and @dst as linked lists of bios. 97216ac3d63SKent Overstreet * 97316ac3d63SKent Overstreet * Stops when it reaches the end of either @src or @dst - that is, copies 97416ac3d63SKent Overstreet * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios). 97516ac3d63SKent Overstreet */ 97616ac3d63SKent Overstreet void bio_copy_data(struct bio *dst, struct bio *src) 97716ac3d63SKent Overstreet { 9781cb9dda4SKent Overstreet struct bvec_iter src_iter, dst_iter; 9791cb9dda4SKent Overstreet struct bio_vec src_bv, dst_bv; 98016ac3d63SKent Overstreet void *src_p, *dst_p; 9811cb9dda4SKent Overstreet unsigned bytes; 98216ac3d63SKent Overstreet 9831cb9dda4SKent Overstreet src_iter = src->bi_iter; 9841cb9dda4SKent Overstreet dst_iter = dst->bi_iter; 98516ac3d63SKent Overstreet 98616ac3d63SKent Overstreet while (1) { 9871cb9dda4SKent Overstreet if (!src_iter.bi_size) { 98816ac3d63SKent Overstreet src = src->bi_next; 98916ac3d63SKent Overstreet if (!src) 99016ac3d63SKent Overstreet break; 99116ac3d63SKent Overstreet 9921cb9dda4SKent Overstreet src_iter = src->bi_iter; 99316ac3d63SKent Overstreet } 99416ac3d63SKent Overstreet 9951cb9dda4SKent Overstreet if (!dst_iter.bi_size) { 99616ac3d63SKent Overstreet dst = dst->bi_next; 99716ac3d63SKent Overstreet if (!dst) 99816ac3d63SKent Overstreet break; 99916ac3d63SKent Overstreet 10001cb9dda4SKent Overstreet dst_iter = dst->bi_iter; 100116ac3d63SKent Overstreet } 100216ac3d63SKent Overstreet 10031cb9dda4SKent Overstreet src_bv = bio_iter_iovec(src, src_iter); 10041cb9dda4SKent Overstreet dst_bv = bio_iter_iovec(dst, dst_iter); 100516ac3d63SKent Overstreet 10061cb9dda4SKent Overstreet bytes = min(src_bv.bv_len, dst_bv.bv_len); 100716ac3d63SKent Overstreet 10081cb9dda4SKent Overstreet src_p = kmap_atomic(src_bv.bv_page); 10091cb9dda4SKent Overstreet dst_p = kmap_atomic(dst_bv.bv_page); 101016ac3d63SKent Overstreet 10111cb9dda4SKent Overstreet memcpy(dst_p + dst_bv.bv_offset, 10121cb9dda4SKent Overstreet src_p + src_bv.bv_offset, 101316ac3d63SKent Overstreet bytes); 101416ac3d63SKent Overstreet 101516ac3d63SKent Overstreet kunmap_atomic(dst_p); 101616ac3d63SKent Overstreet kunmap_atomic(src_p); 101716ac3d63SKent Overstreet 10181cb9dda4SKent Overstreet bio_advance_iter(src, &src_iter, bytes); 10191cb9dda4SKent Overstreet bio_advance_iter(dst, &dst_iter, bytes); 102016ac3d63SKent Overstreet } 102116ac3d63SKent Overstreet } 102216ac3d63SKent Overstreet EXPORT_SYMBOL(bio_copy_data); 102316ac3d63SKent Overstreet 10241da177e4SLinus Torvalds struct bio_map_data { 1025152e283fSFUJITA Tomonori int nr_sgvecs; 1026152e283fSFUJITA Tomonori int is_our_pages; 1027c8db4448SKent Overstreet struct sg_iovec sgvecs[]; 10281da177e4SLinus Torvalds }; 10291da177e4SLinus Torvalds 1030c5dec1c3SFUJITA Tomonori static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, 103186d564c8SAl Viro const struct sg_iovec *iov, int iov_count, 1032152e283fSFUJITA Tomonori int is_our_pages) 10331da177e4SLinus Torvalds { 1034c5dec1c3SFUJITA Tomonori memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); 1035c5dec1c3SFUJITA Tomonori bmd->nr_sgvecs = iov_count; 1036152e283fSFUJITA Tomonori bmd->is_our_pages = is_our_pages; 10371da177e4SLinus Torvalds bio->bi_private = bmd; 10381da177e4SLinus Torvalds } 10391da177e4SLinus Torvalds 10407410b3c6SFabian Frederick static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, 104176029ff3SFUJITA Tomonori gfp_t gfp_mask) 10421da177e4SLinus Torvalds { 1043f3f63c1cSJens Axboe if (iov_count > UIO_MAXIOV) 1044f3f63c1cSJens Axboe return NULL; 1045f3f63c1cSJens Axboe 1046c8db4448SKent Overstreet return kmalloc(sizeof(struct bio_map_data) + 1047c8db4448SKent Overstreet sizeof(struct sg_iovec) * iov_count, gfp_mask); 10481da177e4SLinus Torvalds } 10491da177e4SLinus Torvalds 105086d564c8SAl Viro static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count, 1051ecb554a8SFUJITA Tomonori int to_user, int from_user, int do_free_page) 1052c5dec1c3SFUJITA Tomonori { 1053c5dec1c3SFUJITA Tomonori int ret = 0, i; 1054c5dec1c3SFUJITA Tomonori struct bio_vec *bvec; 1055c5dec1c3SFUJITA Tomonori int iov_idx = 0; 1056c5dec1c3SFUJITA Tomonori unsigned int iov_off = 0; 1057c5dec1c3SFUJITA Tomonori 1058d74c6d51SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 1059c5dec1c3SFUJITA Tomonori char *bv_addr = page_address(bvec->bv_page); 1060c8db4448SKent Overstreet unsigned int bv_len = bvec->bv_len; 1061c5dec1c3SFUJITA Tomonori 1062c5dec1c3SFUJITA Tomonori while (bv_len && iov_idx < iov_count) { 1063c5dec1c3SFUJITA Tomonori unsigned int bytes; 10640e0c6212SMichal Simek char __user *iov_addr; 1065c5dec1c3SFUJITA Tomonori 1066c5dec1c3SFUJITA Tomonori bytes = min_t(unsigned int, 1067c5dec1c3SFUJITA Tomonori iov[iov_idx].iov_len - iov_off, bv_len); 1068c5dec1c3SFUJITA Tomonori iov_addr = iov[iov_idx].iov_base + iov_off; 1069c5dec1c3SFUJITA Tomonori 1070c5dec1c3SFUJITA Tomonori if (!ret) { 1071ecb554a8SFUJITA Tomonori if (to_user) 1072c5dec1c3SFUJITA Tomonori ret = copy_to_user(iov_addr, bv_addr, 1073c5dec1c3SFUJITA Tomonori bytes); 1074c5dec1c3SFUJITA Tomonori 1075ecb554a8SFUJITA Tomonori if (from_user) 1076ecb554a8SFUJITA Tomonori ret = copy_from_user(bv_addr, iov_addr, 1077ecb554a8SFUJITA Tomonori bytes); 1078ecb554a8SFUJITA Tomonori 1079c5dec1c3SFUJITA Tomonori if (ret) 1080c5dec1c3SFUJITA Tomonori ret = -EFAULT; 1081c5dec1c3SFUJITA Tomonori } 1082c5dec1c3SFUJITA Tomonori 1083c5dec1c3SFUJITA Tomonori bv_len -= bytes; 1084c5dec1c3SFUJITA Tomonori bv_addr += bytes; 1085c5dec1c3SFUJITA Tomonori iov_addr += bytes; 1086c5dec1c3SFUJITA Tomonori iov_off += bytes; 1087c5dec1c3SFUJITA Tomonori 1088c5dec1c3SFUJITA Tomonori if (iov[iov_idx].iov_len == iov_off) { 1089c5dec1c3SFUJITA Tomonori iov_idx++; 1090c5dec1c3SFUJITA Tomonori iov_off = 0; 1091c5dec1c3SFUJITA Tomonori } 1092c5dec1c3SFUJITA Tomonori } 1093c5dec1c3SFUJITA Tomonori 1094152e283fSFUJITA Tomonori if (do_free_page) 1095c5dec1c3SFUJITA Tomonori __free_page(bvec->bv_page); 1096c5dec1c3SFUJITA Tomonori } 1097c5dec1c3SFUJITA Tomonori 1098c5dec1c3SFUJITA Tomonori return ret; 1099c5dec1c3SFUJITA Tomonori } 1100c5dec1c3SFUJITA Tomonori 11011da177e4SLinus Torvalds /** 11021da177e4SLinus Torvalds * bio_uncopy_user - finish previously mapped bio 11031da177e4SLinus Torvalds * @bio: bio being terminated 11041da177e4SLinus Torvalds * 11051da177e4SLinus Torvalds * Free pages allocated from bio_copy_user() and write back data 11061da177e4SLinus Torvalds * to user space in case of a read. 11071da177e4SLinus Torvalds */ 11081da177e4SLinus Torvalds int bio_uncopy_user(struct bio *bio) 11091da177e4SLinus Torvalds { 11101da177e4SLinus Torvalds struct bio_map_data *bmd = bio->bi_private; 111135dc2483SRoland Dreier struct bio_vec *bvec; 111235dc2483SRoland Dreier int ret = 0, i; 11131da177e4SLinus Torvalds 111435dc2483SRoland Dreier if (!bio_flagged(bio, BIO_NULL_MAPPED)) { 111535dc2483SRoland Dreier /* 111635dc2483SRoland Dreier * if we're in a workqueue, the request is orphaned, so 111735dc2483SRoland Dreier * don't copy into a random user address space, just free. 111835dc2483SRoland Dreier */ 111935dc2483SRoland Dreier if (current->mm) 1120c8db4448SKent Overstreet ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1121c8db4448SKent Overstreet bio_data_dir(bio) == READ, 1122ecb554a8SFUJITA Tomonori 0, bmd->is_our_pages); 112335dc2483SRoland Dreier else if (bmd->is_our_pages) 112435dc2483SRoland Dreier bio_for_each_segment_all(bvec, bio, i) 112535dc2483SRoland Dreier __free_page(bvec->bv_page); 112635dc2483SRoland Dreier } 1127c8db4448SKent Overstreet kfree(bmd); 11281da177e4SLinus Torvalds bio_put(bio); 11291da177e4SLinus Torvalds return ret; 11301da177e4SLinus Torvalds } 1131a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_uncopy_user); 11321da177e4SLinus Torvalds 11331da177e4SLinus Torvalds /** 1134c5dec1c3SFUJITA Tomonori * bio_copy_user_iov - copy user data to bio 11351da177e4SLinus Torvalds * @q: destination block queue 1136152e283fSFUJITA Tomonori * @map_data: pointer to the rq_map_data holding pages (if necessary) 1137c5dec1c3SFUJITA Tomonori * @iov: the iovec. 1138c5dec1c3SFUJITA Tomonori * @iov_count: number of elements in the iovec 11391da177e4SLinus Torvalds * @write_to_vm: bool indicating writing to pages or not 1140a3bce90eSFUJITA Tomonori * @gfp_mask: memory allocation flags 11411da177e4SLinus Torvalds * 11421da177e4SLinus Torvalds * Prepares and returns a bio for indirect user io, bouncing data 11431da177e4SLinus Torvalds * to/from kernel pages as necessary. Must be paired with 11441da177e4SLinus Torvalds * call bio_uncopy_user() on io completion. 11451da177e4SLinus Torvalds */ 1146152e283fSFUJITA Tomonori struct bio *bio_copy_user_iov(struct request_queue *q, 1147152e283fSFUJITA Tomonori struct rq_map_data *map_data, 114886d564c8SAl Viro const struct sg_iovec *iov, int iov_count, 1149152e283fSFUJITA Tomonori int write_to_vm, gfp_t gfp_mask) 11501da177e4SLinus Torvalds { 11511da177e4SLinus Torvalds struct bio_map_data *bmd; 11521da177e4SLinus Torvalds struct bio_vec *bvec; 11531da177e4SLinus Torvalds struct page *page; 11541da177e4SLinus Torvalds struct bio *bio; 11551da177e4SLinus Torvalds int i, ret; 1156c5dec1c3SFUJITA Tomonori int nr_pages = 0; 1157c5dec1c3SFUJITA Tomonori unsigned int len = 0; 115856c451f4SFUJITA Tomonori unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0; 11591da177e4SLinus Torvalds 1160c5dec1c3SFUJITA Tomonori for (i = 0; i < iov_count; i++) { 1161c5dec1c3SFUJITA Tomonori unsigned long uaddr; 1162c5dec1c3SFUJITA Tomonori unsigned long end; 1163c5dec1c3SFUJITA Tomonori unsigned long start; 1164c5dec1c3SFUJITA Tomonori 1165c5dec1c3SFUJITA Tomonori uaddr = (unsigned long)iov[i].iov_base; 1166c5dec1c3SFUJITA Tomonori end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1167c5dec1c3SFUJITA Tomonori start = uaddr >> PAGE_SHIFT; 1168c5dec1c3SFUJITA Tomonori 1169cb4644caSJens Axboe /* 1170cb4644caSJens Axboe * Overflow, abort 1171cb4644caSJens Axboe */ 1172cb4644caSJens Axboe if (end < start) 1173cb4644caSJens Axboe return ERR_PTR(-EINVAL); 1174cb4644caSJens Axboe 1175c5dec1c3SFUJITA Tomonori nr_pages += end - start; 1176c5dec1c3SFUJITA Tomonori len += iov[i].iov_len; 1177c5dec1c3SFUJITA Tomonori } 1178c5dec1c3SFUJITA Tomonori 117969838727SFUJITA Tomonori if (offset) 118069838727SFUJITA Tomonori nr_pages++; 118169838727SFUJITA Tomonori 11827410b3c6SFabian Frederick bmd = bio_alloc_map_data(iov_count, gfp_mask); 11831da177e4SLinus Torvalds if (!bmd) 11841da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 11851da177e4SLinus Torvalds 11861da177e4SLinus Torvalds ret = -ENOMEM; 1187a9e9dc24STejun Heo bio = bio_kmalloc(gfp_mask, nr_pages); 11881da177e4SLinus Torvalds if (!bio) 11891da177e4SLinus Torvalds goto out_bmd; 11901da177e4SLinus Torvalds 11917b6d91daSChristoph Hellwig if (!write_to_vm) 11927b6d91daSChristoph Hellwig bio->bi_rw |= REQ_WRITE; 11931da177e4SLinus Torvalds 11941da177e4SLinus Torvalds ret = 0; 119556c451f4SFUJITA Tomonori 119656c451f4SFUJITA Tomonori if (map_data) { 1197e623ddb4SFUJITA Tomonori nr_pages = 1 << map_data->page_order; 119856c451f4SFUJITA Tomonori i = map_data->offset / PAGE_SIZE; 119956c451f4SFUJITA Tomonori } 1200e623ddb4SFUJITA Tomonori while (len) { 1201e623ddb4SFUJITA Tomonori unsigned int bytes = PAGE_SIZE; 12021da177e4SLinus Torvalds 120356c451f4SFUJITA Tomonori bytes -= offset; 120456c451f4SFUJITA Tomonori 12051da177e4SLinus Torvalds if (bytes > len) 12061da177e4SLinus Torvalds bytes = len; 12071da177e4SLinus Torvalds 1208152e283fSFUJITA Tomonori if (map_data) { 1209e623ddb4SFUJITA Tomonori if (i == map_data->nr_entries * nr_pages) { 1210152e283fSFUJITA Tomonori ret = -ENOMEM; 1211152e283fSFUJITA Tomonori break; 1212152e283fSFUJITA Tomonori } 1213e623ddb4SFUJITA Tomonori 1214e623ddb4SFUJITA Tomonori page = map_data->pages[i / nr_pages]; 1215e623ddb4SFUJITA Tomonori page += (i % nr_pages); 1216e623ddb4SFUJITA Tomonori 1217e623ddb4SFUJITA Tomonori i++; 1218e623ddb4SFUJITA Tomonori } else { 1219a3bce90eSFUJITA Tomonori page = alloc_page(q->bounce_gfp | gfp_mask); 12201da177e4SLinus Torvalds if (!page) { 12211da177e4SLinus Torvalds ret = -ENOMEM; 12221da177e4SLinus Torvalds break; 12231da177e4SLinus Torvalds } 1224e623ddb4SFUJITA Tomonori } 12251da177e4SLinus Torvalds 122656c451f4SFUJITA Tomonori if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) 12271da177e4SLinus Torvalds break; 12281da177e4SLinus Torvalds 12291da177e4SLinus Torvalds len -= bytes; 123056c451f4SFUJITA Tomonori offset = 0; 12311da177e4SLinus Torvalds } 12321da177e4SLinus Torvalds 12331da177e4SLinus Torvalds if (ret) 12341da177e4SLinus Torvalds goto cleanup; 12351da177e4SLinus Torvalds 12361da177e4SLinus Torvalds /* 12371da177e4SLinus Torvalds * success 12381da177e4SLinus Torvalds */ 1239ecb554a8SFUJITA Tomonori if ((!write_to_vm && (!map_data || !map_data->null_mapped)) || 1240ecb554a8SFUJITA Tomonori (map_data && map_data->from_user)) { 1241c8db4448SKent Overstreet ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0); 1242c5dec1c3SFUJITA Tomonori if (ret) 12431da177e4SLinus Torvalds goto cleanup; 12441da177e4SLinus Torvalds } 12451da177e4SLinus Torvalds 1246152e283fSFUJITA Tomonori bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); 12471da177e4SLinus Torvalds return bio; 12481da177e4SLinus Torvalds cleanup: 1249152e283fSFUJITA Tomonori if (!map_data) 1250d74c6d51SKent Overstreet bio_for_each_segment_all(bvec, bio, i) 12511da177e4SLinus Torvalds __free_page(bvec->bv_page); 12521da177e4SLinus Torvalds 12531da177e4SLinus Torvalds bio_put(bio); 12541da177e4SLinus Torvalds out_bmd: 1255c8db4448SKent Overstreet kfree(bmd); 12561da177e4SLinus Torvalds return ERR_PTR(ret); 12571da177e4SLinus Torvalds } 12581da177e4SLinus Torvalds 1259c5dec1c3SFUJITA Tomonori /** 1260c5dec1c3SFUJITA Tomonori * bio_copy_user - copy user data to bio 1261c5dec1c3SFUJITA Tomonori * @q: destination block queue 1262152e283fSFUJITA Tomonori * @map_data: pointer to the rq_map_data holding pages (if necessary) 1263c5dec1c3SFUJITA Tomonori * @uaddr: start of user address 1264c5dec1c3SFUJITA Tomonori * @len: length in bytes 1265c5dec1c3SFUJITA Tomonori * @write_to_vm: bool indicating writing to pages or not 1266a3bce90eSFUJITA Tomonori * @gfp_mask: memory allocation flags 1267c5dec1c3SFUJITA Tomonori * 1268c5dec1c3SFUJITA Tomonori * Prepares and returns a bio for indirect user io, bouncing data 1269c5dec1c3SFUJITA Tomonori * to/from kernel pages as necessary. Must be paired with 1270c5dec1c3SFUJITA Tomonori * call bio_uncopy_user() on io completion. 1271c5dec1c3SFUJITA Tomonori */ 1272152e283fSFUJITA Tomonori struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, 1273152e283fSFUJITA Tomonori unsigned long uaddr, unsigned int len, 1274152e283fSFUJITA Tomonori int write_to_vm, gfp_t gfp_mask) 1275c5dec1c3SFUJITA Tomonori { 1276c5dec1c3SFUJITA Tomonori struct sg_iovec iov; 1277c5dec1c3SFUJITA Tomonori 1278c5dec1c3SFUJITA Tomonori iov.iov_base = (void __user *)uaddr; 1279c5dec1c3SFUJITA Tomonori iov.iov_len = len; 1280c5dec1c3SFUJITA Tomonori 1281152e283fSFUJITA Tomonori return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); 1282c5dec1c3SFUJITA Tomonori } 1283a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_copy_user); 1284c5dec1c3SFUJITA Tomonori 1285165125e1SJens Axboe static struct bio *__bio_map_user_iov(struct request_queue *q, 1286f1970bafSJames Bottomley struct block_device *bdev, 128786d564c8SAl Viro const struct sg_iovec *iov, int iov_count, 1288a3bce90eSFUJITA Tomonori int write_to_vm, gfp_t gfp_mask) 12891da177e4SLinus Torvalds { 1290f1970bafSJames Bottomley int i, j; 1291f1970bafSJames Bottomley int nr_pages = 0; 12921da177e4SLinus Torvalds struct page **pages; 12931da177e4SLinus Torvalds struct bio *bio; 1294f1970bafSJames Bottomley int cur_page = 0; 1295f1970bafSJames Bottomley int ret, offset; 12961da177e4SLinus Torvalds 1297f1970bafSJames Bottomley for (i = 0; i < iov_count; i++) { 1298f1970bafSJames Bottomley unsigned long uaddr = (unsigned long)iov[i].iov_base; 1299f1970bafSJames Bottomley unsigned long len = iov[i].iov_len; 1300f1970bafSJames Bottomley unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1301f1970bafSJames Bottomley unsigned long start = uaddr >> PAGE_SHIFT; 1302f1970bafSJames Bottomley 1303cb4644caSJens Axboe /* 1304cb4644caSJens Axboe * Overflow, abort 1305cb4644caSJens Axboe */ 1306cb4644caSJens Axboe if (end < start) 1307cb4644caSJens Axboe return ERR_PTR(-EINVAL); 1308cb4644caSJens Axboe 1309f1970bafSJames Bottomley nr_pages += end - start; 13101da177e4SLinus Torvalds /* 1311ad2d7225SMike Christie * buffer must be aligned to at least hardsector size for now 13121da177e4SLinus Torvalds */ 1313ad2d7225SMike Christie if (uaddr & queue_dma_alignment(q)) 13141da177e4SLinus Torvalds return ERR_PTR(-EINVAL); 1315f1970bafSJames Bottomley } 1316f1970bafSJames Bottomley 1317f1970bafSJames Bottomley if (!nr_pages) 1318f1970bafSJames Bottomley return ERR_PTR(-EINVAL); 13191da177e4SLinus Torvalds 1320a9e9dc24STejun Heo bio = bio_kmalloc(gfp_mask, nr_pages); 13211da177e4SLinus Torvalds if (!bio) 13221da177e4SLinus Torvalds return ERR_PTR(-ENOMEM); 13231da177e4SLinus Torvalds 13241da177e4SLinus Torvalds ret = -ENOMEM; 1325a3bce90eSFUJITA Tomonori pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); 13261da177e4SLinus Torvalds if (!pages) 13271da177e4SLinus Torvalds goto out; 13281da177e4SLinus Torvalds 1329f1970bafSJames Bottomley for (i = 0; i < iov_count; i++) { 1330f1970bafSJames Bottomley unsigned long uaddr = (unsigned long)iov[i].iov_base; 1331f1970bafSJames Bottomley unsigned long len = iov[i].iov_len; 1332f1970bafSJames Bottomley unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1333f1970bafSJames Bottomley unsigned long start = uaddr >> PAGE_SHIFT; 1334f1970bafSJames Bottomley const int local_nr_pages = end - start; 1335f1970bafSJames Bottomley const int page_limit = cur_page + local_nr_pages; 1336f1970bafSJames Bottomley 1337f5dd33c4SNick Piggin ret = get_user_pages_fast(uaddr, local_nr_pages, 1338f5dd33c4SNick Piggin write_to_vm, &pages[cur_page]); 133999172157SJens Axboe if (ret < local_nr_pages) { 134099172157SJens Axboe ret = -EFAULT; 1341f1970bafSJames Bottomley goto out_unmap; 134299172157SJens Axboe } 13431da177e4SLinus Torvalds 13441da177e4SLinus Torvalds offset = uaddr & ~PAGE_MASK; 1345f1970bafSJames Bottomley for (j = cur_page; j < page_limit; j++) { 13461da177e4SLinus Torvalds unsigned int bytes = PAGE_SIZE - offset; 13471da177e4SLinus Torvalds 13481da177e4SLinus Torvalds if (len <= 0) 13491da177e4SLinus Torvalds break; 13501da177e4SLinus Torvalds 13511da177e4SLinus Torvalds if (bytes > len) 13521da177e4SLinus Torvalds bytes = len; 13531da177e4SLinus Torvalds 13541da177e4SLinus Torvalds /* 13551da177e4SLinus Torvalds * sorry... 13561da177e4SLinus Torvalds */ 1357defd94b7SMike Christie if (bio_add_pc_page(q, bio, pages[j], bytes, offset) < 1358defd94b7SMike Christie bytes) 13591da177e4SLinus Torvalds break; 13601da177e4SLinus Torvalds 13611da177e4SLinus Torvalds len -= bytes; 13621da177e4SLinus Torvalds offset = 0; 13631da177e4SLinus Torvalds } 13641da177e4SLinus Torvalds 1365f1970bafSJames Bottomley cur_page = j; 13661da177e4SLinus Torvalds /* 13671da177e4SLinus Torvalds * release the pages we didn't map into the bio, if any 13681da177e4SLinus Torvalds */ 1369f1970bafSJames Bottomley while (j < page_limit) 1370f1970bafSJames Bottomley page_cache_release(pages[j++]); 1371f1970bafSJames Bottomley } 13721da177e4SLinus Torvalds 13731da177e4SLinus Torvalds kfree(pages); 13741da177e4SLinus Torvalds 13751da177e4SLinus Torvalds /* 13761da177e4SLinus Torvalds * set data direction, and check if mapped pages need bouncing 13771da177e4SLinus Torvalds */ 13781da177e4SLinus Torvalds if (!write_to_vm) 13797b6d91daSChristoph Hellwig bio->bi_rw |= REQ_WRITE; 13801da177e4SLinus Torvalds 1381f1970bafSJames Bottomley bio->bi_bdev = bdev; 13821da177e4SLinus Torvalds bio->bi_flags |= (1 << BIO_USER_MAPPED); 13831da177e4SLinus Torvalds return bio; 1384f1970bafSJames Bottomley 1385f1970bafSJames Bottomley out_unmap: 1386f1970bafSJames Bottomley for (i = 0; i < nr_pages; i++) { 1387f1970bafSJames Bottomley if(!pages[i]) 1388f1970bafSJames Bottomley break; 1389f1970bafSJames Bottomley page_cache_release(pages[i]); 1390f1970bafSJames Bottomley } 13911da177e4SLinus Torvalds out: 13921da177e4SLinus Torvalds kfree(pages); 13931da177e4SLinus Torvalds bio_put(bio); 13941da177e4SLinus Torvalds return ERR_PTR(ret); 13951da177e4SLinus Torvalds } 13961da177e4SLinus Torvalds 13971da177e4SLinus Torvalds /** 13981da177e4SLinus Torvalds * bio_map_user - map user address into bio 1399165125e1SJens Axboe * @q: the struct request_queue for the bio 14001da177e4SLinus Torvalds * @bdev: destination block device 14011da177e4SLinus Torvalds * @uaddr: start of user address 14021da177e4SLinus Torvalds * @len: length in bytes 14031da177e4SLinus Torvalds * @write_to_vm: bool indicating writing to pages or not 1404a3bce90eSFUJITA Tomonori * @gfp_mask: memory allocation flags 14051da177e4SLinus Torvalds * 14061da177e4SLinus Torvalds * Map the user space address into a bio suitable for io to a block 14071da177e4SLinus Torvalds * device. Returns an error pointer in case of error. 14081da177e4SLinus Torvalds */ 1409165125e1SJens Axboe struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, 1410a3bce90eSFUJITA Tomonori unsigned long uaddr, unsigned int len, int write_to_vm, 1411a3bce90eSFUJITA Tomonori gfp_t gfp_mask) 14121da177e4SLinus Torvalds { 1413f1970bafSJames Bottomley struct sg_iovec iov; 14141da177e4SLinus Torvalds 14153f70353eSviro@ZenIV.linux.org.uk iov.iov_base = (void __user *)uaddr; 1416f1970bafSJames Bottomley iov.iov_len = len; 1417f1970bafSJames Bottomley 1418a3bce90eSFUJITA Tomonori return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); 1419f1970bafSJames Bottomley } 1420a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_map_user); 1421f1970bafSJames Bottomley 1422f1970bafSJames Bottomley /** 1423f1970bafSJames Bottomley * bio_map_user_iov - map user sg_iovec table into bio 1424165125e1SJens Axboe * @q: the struct request_queue for the bio 1425f1970bafSJames Bottomley * @bdev: destination block device 1426f1970bafSJames Bottomley * @iov: the iovec. 1427f1970bafSJames Bottomley * @iov_count: number of elements in the iovec 1428f1970bafSJames Bottomley * @write_to_vm: bool indicating writing to pages or not 1429a3bce90eSFUJITA Tomonori * @gfp_mask: memory allocation flags 1430f1970bafSJames Bottomley * 1431f1970bafSJames Bottomley * Map the user space address into a bio suitable for io to a block 1432f1970bafSJames Bottomley * device. Returns an error pointer in case of error. 1433f1970bafSJames Bottomley */ 1434165125e1SJens Axboe struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, 143586d564c8SAl Viro const struct sg_iovec *iov, int iov_count, 1436a3bce90eSFUJITA Tomonori int write_to_vm, gfp_t gfp_mask) 1437f1970bafSJames Bottomley { 1438f1970bafSJames Bottomley struct bio *bio; 1439f1970bafSJames Bottomley 1440a3bce90eSFUJITA Tomonori bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, 1441a3bce90eSFUJITA Tomonori gfp_mask); 14421da177e4SLinus Torvalds if (IS_ERR(bio)) 14431da177e4SLinus Torvalds return bio; 14441da177e4SLinus Torvalds 14451da177e4SLinus Torvalds /* 14461da177e4SLinus Torvalds * subtle -- if __bio_map_user() ended up bouncing a bio, 14471da177e4SLinus Torvalds * it would normally disappear when its bi_end_io is run. 14481da177e4SLinus Torvalds * however, we need it for the unmap, so grab an extra 14491da177e4SLinus Torvalds * reference to it 14501da177e4SLinus Torvalds */ 14511da177e4SLinus Torvalds bio_get(bio); 14521da177e4SLinus Torvalds 14531da177e4SLinus Torvalds return bio; 14541da177e4SLinus Torvalds } 14551da177e4SLinus Torvalds 14561da177e4SLinus Torvalds static void __bio_unmap_user(struct bio *bio) 14571da177e4SLinus Torvalds { 14581da177e4SLinus Torvalds struct bio_vec *bvec; 14591da177e4SLinus Torvalds int i; 14601da177e4SLinus Torvalds 14611da177e4SLinus Torvalds /* 14621da177e4SLinus Torvalds * make sure we dirty pages we wrote to 14631da177e4SLinus Torvalds */ 1464d74c6d51SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 14651da177e4SLinus Torvalds if (bio_data_dir(bio) == READ) 14661da177e4SLinus Torvalds set_page_dirty_lock(bvec->bv_page); 14671da177e4SLinus Torvalds 14681da177e4SLinus Torvalds page_cache_release(bvec->bv_page); 14691da177e4SLinus Torvalds } 14701da177e4SLinus Torvalds 14711da177e4SLinus Torvalds bio_put(bio); 14721da177e4SLinus Torvalds } 14731da177e4SLinus Torvalds 14741da177e4SLinus Torvalds /** 14751da177e4SLinus Torvalds * bio_unmap_user - unmap a bio 14761da177e4SLinus Torvalds * @bio: the bio being unmapped 14771da177e4SLinus Torvalds * 14781da177e4SLinus Torvalds * Unmap a bio previously mapped by bio_map_user(). Must be called with 14791da177e4SLinus Torvalds * a process context. 14801da177e4SLinus Torvalds * 14811da177e4SLinus Torvalds * bio_unmap_user() may sleep. 14821da177e4SLinus Torvalds */ 14831da177e4SLinus Torvalds void bio_unmap_user(struct bio *bio) 14841da177e4SLinus Torvalds { 14851da177e4SLinus Torvalds __bio_unmap_user(bio); 14861da177e4SLinus Torvalds bio_put(bio); 14871da177e4SLinus Torvalds } 1488a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_unmap_user); 14891da177e4SLinus Torvalds 14906712ecf8SNeilBrown static void bio_map_kern_endio(struct bio *bio, int err) 1491b823825eSJens Axboe { 1492b823825eSJens Axboe bio_put(bio); 1493b823825eSJens Axboe } 1494b823825eSJens Axboe 1495165125e1SJens Axboe static struct bio *__bio_map_kern(struct request_queue *q, void *data, 149627496a8cSAl Viro unsigned int len, gfp_t gfp_mask) 1497df46b9a4SMike Christie { 1498df46b9a4SMike Christie unsigned long kaddr = (unsigned long)data; 1499df46b9a4SMike Christie unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1500df46b9a4SMike Christie unsigned long start = kaddr >> PAGE_SHIFT; 1501df46b9a4SMike Christie const int nr_pages = end - start; 1502df46b9a4SMike Christie int offset, i; 1503df46b9a4SMike Christie struct bio *bio; 1504df46b9a4SMike Christie 1505a9e9dc24STejun Heo bio = bio_kmalloc(gfp_mask, nr_pages); 1506df46b9a4SMike Christie if (!bio) 1507df46b9a4SMike Christie return ERR_PTR(-ENOMEM); 1508df46b9a4SMike Christie 1509df46b9a4SMike Christie offset = offset_in_page(kaddr); 1510df46b9a4SMike Christie for (i = 0; i < nr_pages; i++) { 1511df46b9a4SMike Christie unsigned int bytes = PAGE_SIZE - offset; 1512df46b9a4SMike Christie 1513df46b9a4SMike Christie if (len <= 0) 1514df46b9a4SMike Christie break; 1515df46b9a4SMike Christie 1516df46b9a4SMike Christie if (bytes > len) 1517df46b9a4SMike Christie bytes = len; 1518df46b9a4SMike Christie 1519defd94b7SMike Christie if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, 1520df46b9a4SMike Christie offset) < bytes) 1521df46b9a4SMike Christie break; 1522df46b9a4SMike Christie 1523df46b9a4SMike Christie data += bytes; 1524df46b9a4SMike Christie len -= bytes; 1525df46b9a4SMike Christie offset = 0; 1526df46b9a4SMike Christie } 1527df46b9a4SMike Christie 1528b823825eSJens Axboe bio->bi_end_io = bio_map_kern_endio; 1529df46b9a4SMike Christie return bio; 1530df46b9a4SMike Christie } 1531df46b9a4SMike Christie 1532df46b9a4SMike Christie /** 1533df46b9a4SMike Christie * bio_map_kern - map kernel address into bio 1534165125e1SJens Axboe * @q: the struct request_queue for the bio 1535df46b9a4SMike Christie * @data: pointer to buffer to map 1536df46b9a4SMike Christie * @len: length in bytes 1537df46b9a4SMike Christie * @gfp_mask: allocation flags for bio allocation 1538df46b9a4SMike Christie * 1539df46b9a4SMike Christie * Map the kernel address into a bio suitable for io to a block 1540df46b9a4SMike Christie * device. Returns an error pointer in case of error. 1541df46b9a4SMike Christie */ 1542165125e1SJens Axboe struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, 154327496a8cSAl Viro gfp_t gfp_mask) 1544df46b9a4SMike Christie { 1545df46b9a4SMike Christie struct bio *bio; 1546df46b9a4SMike Christie 1547df46b9a4SMike Christie bio = __bio_map_kern(q, data, len, gfp_mask); 1548df46b9a4SMike Christie if (IS_ERR(bio)) 1549df46b9a4SMike Christie return bio; 1550df46b9a4SMike Christie 15514f024f37SKent Overstreet if (bio->bi_iter.bi_size == len) 1552df46b9a4SMike Christie return bio; 1553df46b9a4SMike Christie 1554df46b9a4SMike Christie /* 1555df46b9a4SMike Christie * Don't support partial mappings. 1556df46b9a4SMike Christie */ 1557df46b9a4SMike Christie bio_put(bio); 1558df46b9a4SMike Christie return ERR_PTR(-EINVAL); 1559df46b9a4SMike Christie } 1560a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_map_kern); 1561df46b9a4SMike Christie 156268154e90SFUJITA Tomonori static void bio_copy_kern_endio(struct bio *bio, int err) 156368154e90SFUJITA Tomonori { 156468154e90SFUJITA Tomonori struct bio_vec *bvec; 156568154e90SFUJITA Tomonori const int read = bio_data_dir(bio) == READ; 156676029ff3SFUJITA Tomonori struct bio_map_data *bmd = bio->bi_private; 156768154e90SFUJITA Tomonori int i; 156876029ff3SFUJITA Tomonori char *p = bmd->sgvecs[0].iov_base; 156968154e90SFUJITA Tomonori 1570d74c6d51SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 157168154e90SFUJITA Tomonori char *addr = page_address(bvec->bv_page); 157268154e90SFUJITA Tomonori 15734fc981efSTejun Heo if (read) 1574c8db4448SKent Overstreet memcpy(p, addr, bvec->bv_len); 157568154e90SFUJITA Tomonori 157668154e90SFUJITA Tomonori __free_page(bvec->bv_page); 1577c8db4448SKent Overstreet p += bvec->bv_len; 157868154e90SFUJITA Tomonori } 157968154e90SFUJITA Tomonori 1580c8db4448SKent Overstreet kfree(bmd); 158168154e90SFUJITA Tomonori bio_put(bio); 158268154e90SFUJITA Tomonori } 158368154e90SFUJITA Tomonori 158468154e90SFUJITA Tomonori /** 158568154e90SFUJITA Tomonori * bio_copy_kern - copy kernel address into bio 158668154e90SFUJITA Tomonori * @q: the struct request_queue for the bio 158768154e90SFUJITA Tomonori * @data: pointer to buffer to copy 158868154e90SFUJITA Tomonori * @len: length in bytes 158968154e90SFUJITA Tomonori * @gfp_mask: allocation flags for bio and page allocation 1590ffee0259SRandy Dunlap * @reading: data direction is READ 159168154e90SFUJITA Tomonori * 159268154e90SFUJITA Tomonori * copy the kernel address into a bio suitable for io to a block 159368154e90SFUJITA Tomonori * device. Returns an error pointer in case of error. 159468154e90SFUJITA Tomonori */ 159568154e90SFUJITA Tomonori struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, 159668154e90SFUJITA Tomonori gfp_t gfp_mask, int reading) 159768154e90SFUJITA Tomonori { 159868154e90SFUJITA Tomonori struct bio *bio; 159968154e90SFUJITA Tomonori struct bio_vec *bvec; 16004d8ab62eSFUJITA Tomonori int i; 160168154e90SFUJITA Tomonori 16024d8ab62eSFUJITA Tomonori bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); 16034d8ab62eSFUJITA Tomonori if (IS_ERR(bio)) 16044d8ab62eSFUJITA Tomonori return bio; 160568154e90SFUJITA Tomonori 160668154e90SFUJITA Tomonori if (!reading) { 160768154e90SFUJITA Tomonori void *p = data; 160868154e90SFUJITA Tomonori 1609d74c6d51SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 161068154e90SFUJITA Tomonori char *addr = page_address(bvec->bv_page); 161168154e90SFUJITA Tomonori 161268154e90SFUJITA Tomonori memcpy(addr, p, bvec->bv_len); 161368154e90SFUJITA Tomonori p += bvec->bv_len; 161468154e90SFUJITA Tomonori } 161568154e90SFUJITA Tomonori } 161668154e90SFUJITA Tomonori 161768154e90SFUJITA Tomonori bio->bi_end_io = bio_copy_kern_endio; 161876029ff3SFUJITA Tomonori 161968154e90SFUJITA Tomonori return bio; 162068154e90SFUJITA Tomonori } 1621a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_copy_kern); 162268154e90SFUJITA Tomonori 16231da177e4SLinus Torvalds /* 16241da177e4SLinus Torvalds * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions 16251da177e4SLinus Torvalds * for performing direct-IO in BIOs. 16261da177e4SLinus Torvalds * 16271da177e4SLinus Torvalds * The problem is that we cannot run set_page_dirty() from interrupt context 16281da177e4SLinus Torvalds * because the required locks are not interrupt-safe. So what we can do is to 16291da177e4SLinus Torvalds * mark the pages dirty _before_ performing IO. And in interrupt context, 16301da177e4SLinus Torvalds * check that the pages are still dirty. If so, fine. If not, redirty them 16311da177e4SLinus Torvalds * in process context. 16321da177e4SLinus Torvalds * 16331da177e4SLinus Torvalds * We special-case compound pages here: normally this means reads into hugetlb 16341da177e4SLinus Torvalds * pages. The logic in here doesn't really work right for compound pages 16351da177e4SLinus Torvalds * because the VM does not uniformly chase down the head page in all cases. 16361da177e4SLinus Torvalds * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't 16371da177e4SLinus Torvalds * handle them at all. So we skip compound pages here at an early stage. 16381da177e4SLinus Torvalds * 16391da177e4SLinus Torvalds * Note that this code is very hard to test under normal circumstances because 16401da177e4SLinus Torvalds * direct-io pins the pages with get_user_pages(). This makes 16411da177e4SLinus Torvalds * is_page_cache_freeable return false, and the VM will not clean the pages. 16420d5c3ebaSArtem Bityutskiy * But other code (eg, flusher threads) could clean the pages if they are mapped 16431da177e4SLinus Torvalds * pagecache. 16441da177e4SLinus Torvalds * 16451da177e4SLinus Torvalds * Simply disabling the call to bio_set_pages_dirty() is a good way to test the 16461da177e4SLinus Torvalds * deferred bio dirtying paths. 16471da177e4SLinus Torvalds */ 16481da177e4SLinus Torvalds 16491da177e4SLinus Torvalds /* 16501da177e4SLinus Torvalds * bio_set_pages_dirty() will mark all the bio's pages as dirty. 16511da177e4SLinus Torvalds */ 16521da177e4SLinus Torvalds void bio_set_pages_dirty(struct bio *bio) 16531da177e4SLinus Torvalds { 1654cb34e057SKent Overstreet struct bio_vec *bvec; 16551da177e4SLinus Torvalds int i; 16561da177e4SLinus Torvalds 1657cb34e057SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 1658cb34e057SKent Overstreet struct page *page = bvec->bv_page; 16591da177e4SLinus Torvalds 16601da177e4SLinus Torvalds if (page && !PageCompound(page)) 16611da177e4SLinus Torvalds set_page_dirty_lock(page); 16621da177e4SLinus Torvalds } 16631da177e4SLinus Torvalds } 16641da177e4SLinus Torvalds 166586b6c7a7SAdrian Bunk static void bio_release_pages(struct bio *bio) 16661da177e4SLinus Torvalds { 1667cb34e057SKent Overstreet struct bio_vec *bvec; 16681da177e4SLinus Torvalds int i; 16691da177e4SLinus Torvalds 1670cb34e057SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 1671cb34e057SKent Overstreet struct page *page = bvec->bv_page; 16721da177e4SLinus Torvalds 16731da177e4SLinus Torvalds if (page) 16741da177e4SLinus Torvalds put_page(page); 16751da177e4SLinus Torvalds } 16761da177e4SLinus Torvalds } 16771da177e4SLinus Torvalds 16781da177e4SLinus Torvalds /* 16791da177e4SLinus Torvalds * bio_check_pages_dirty() will check that all the BIO's pages are still dirty. 16801da177e4SLinus Torvalds * If they are, then fine. If, however, some pages are clean then they must 16811da177e4SLinus Torvalds * have been written out during the direct-IO read. So we take another ref on 16821da177e4SLinus Torvalds * the BIO and the offending pages and re-dirty the pages in process context. 16831da177e4SLinus Torvalds * 16841da177e4SLinus Torvalds * It is expected that bio_check_pages_dirty() will wholly own the BIO from 16851da177e4SLinus Torvalds * here on. It will run one page_cache_release() against each page and will 16861da177e4SLinus Torvalds * run one bio_put() against the BIO. 16871da177e4SLinus Torvalds */ 16881da177e4SLinus Torvalds 168965f27f38SDavid Howells static void bio_dirty_fn(struct work_struct *work); 16901da177e4SLinus Torvalds 169165f27f38SDavid Howells static DECLARE_WORK(bio_dirty_work, bio_dirty_fn); 16921da177e4SLinus Torvalds static DEFINE_SPINLOCK(bio_dirty_lock); 16931da177e4SLinus Torvalds static struct bio *bio_dirty_list; 16941da177e4SLinus Torvalds 16951da177e4SLinus Torvalds /* 16961da177e4SLinus Torvalds * This runs in process context 16971da177e4SLinus Torvalds */ 169865f27f38SDavid Howells static void bio_dirty_fn(struct work_struct *work) 16991da177e4SLinus Torvalds { 17001da177e4SLinus Torvalds unsigned long flags; 17011da177e4SLinus Torvalds struct bio *bio; 17021da177e4SLinus Torvalds 17031da177e4SLinus Torvalds spin_lock_irqsave(&bio_dirty_lock, flags); 17041da177e4SLinus Torvalds bio = bio_dirty_list; 17051da177e4SLinus Torvalds bio_dirty_list = NULL; 17061da177e4SLinus Torvalds spin_unlock_irqrestore(&bio_dirty_lock, flags); 17071da177e4SLinus Torvalds 17081da177e4SLinus Torvalds while (bio) { 17091da177e4SLinus Torvalds struct bio *next = bio->bi_private; 17101da177e4SLinus Torvalds 17111da177e4SLinus Torvalds bio_set_pages_dirty(bio); 17121da177e4SLinus Torvalds bio_release_pages(bio); 17131da177e4SLinus Torvalds bio_put(bio); 17141da177e4SLinus Torvalds bio = next; 17151da177e4SLinus Torvalds } 17161da177e4SLinus Torvalds } 17171da177e4SLinus Torvalds 17181da177e4SLinus Torvalds void bio_check_pages_dirty(struct bio *bio) 17191da177e4SLinus Torvalds { 1720cb34e057SKent Overstreet struct bio_vec *bvec; 17211da177e4SLinus Torvalds int nr_clean_pages = 0; 17221da177e4SLinus Torvalds int i; 17231da177e4SLinus Torvalds 1724cb34e057SKent Overstreet bio_for_each_segment_all(bvec, bio, i) { 1725cb34e057SKent Overstreet struct page *page = bvec->bv_page; 17261da177e4SLinus Torvalds 17271da177e4SLinus Torvalds if (PageDirty(page) || PageCompound(page)) { 17281da177e4SLinus Torvalds page_cache_release(page); 1729cb34e057SKent Overstreet bvec->bv_page = NULL; 17301da177e4SLinus Torvalds } else { 17311da177e4SLinus Torvalds nr_clean_pages++; 17321da177e4SLinus Torvalds } 17331da177e4SLinus Torvalds } 17341da177e4SLinus Torvalds 17351da177e4SLinus Torvalds if (nr_clean_pages) { 17361da177e4SLinus Torvalds unsigned long flags; 17371da177e4SLinus Torvalds 17381da177e4SLinus Torvalds spin_lock_irqsave(&bio_dirty_lock, flags); 17391da177e4SLinus Torvalds bio->bi_private = bio_dirty_list; 17401da177e4SLinus Torvalds bio_dirty_list = bio; 17411da177e4SLinus Torvalds spin_unlock_irqrestore(&bio_dirty_lock, flags); 17421da177e4SLinus Torvalds schedule_work(&bio_dirty_work); 17431da177e4SLinus Torvalds } else { 17441da177e4SLinus Torvalds bio_put(bio); 17451da177e4SLinus Torvalds } 17461da177e4SLinus Torvalds } 17471da177e4SLinus Torvalds 1748394ffa50SGu Zheng void generic_start_io_acct(int rw, unsigned long sectors, 1749394ffa50SGu Zheng struct hd_struct *part) 1750394ffa50SGu Zheng { 1751394ffa50SGu Zheng int cpu = part_stat_lock(); 1752394ffa50SGu Zheng 1753394ffa50SGu Zheng part_round_stats(cpu, part); 1754394ffa50SGu Zheng part_stat_inc(cpu, part, ios[rw]); 1755394ffa50SGu Zheng part_stat_add(cpu, part, sectors[rw], sectors); 1756394ffa50SGu Zheng part_inc_in_flight(part, rw); 1757394ffa50SGu Zheng 1758394ffa50SGu Zheng part_stat_unlock(); 1759394ffa50SGu Zheng } 1760394ffa50SGu Zheng EXPORT_SYMBOL(generic_start_io_acct); 1761394ffa50SGu Zheng 1762394ffa50SGu Zheng void generic_end_io_acct(int rw, struct hd_struct *part, 1763394ffa50SGu Zheng unsigned long start_time) 1764394ffa50SGu Zheng { 1765394ffa50SGu Zheng unsigned long duration = jiffies - start_time; 1766394ffa50SGu Zheng int cpu = part_stat_lock(); 1767394ffa50SGu Zheng 1768394ffa50SGu Zheng part_stat_add(cpu, part, ticks[rw], duration); 1769394ffa50SGu Zheng part_round_stats(cpu, part); 1770394ffa50SGu Zheng part_dec_in_flight(part, rw); 1771394ffa50SGu Zheng 1772394ffa50SGu Zheng part_stat_unlock(); 1773394ffa50SGu Zheng } 1774394ffa50SGu Zheng EXPORT_SYMBOL(generic_end_io_acct); 1775394ffa50SGu Zheng 17762d4dc890SIlya Loginov #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 17772d4dc890SIlya Loginov void bio_flush_dcache_pages(struct bio *bi) 17782d4dc890SIlya Loginov { 17797988613bSKent Overstreet struct bio_vec bvec; 17807988613bSKent Overstreet struct bvec_iter iter; 17812d4dc890SIlya Loginov 17827988613bSKent Overstreet bio_for_each_segment(bvec, bi, iter) 17837988613bSKent Overstreet flush_dcache_page(bvec.bv_page); 17842d4dc890SIlya Loginov } 17852d4dc890SIlya Loginov EXPORT_SYMBOL(bio_flush_dcache_pages); 17862d4dc890SIlya Loginov #endif 17872d4dc890SIlya Loginov 17881da177e4SLinus Torvalds /** 17891da177e4SLinus Torvalds * bio_endio - end I/O on a bio 17901da177e4SLinus Torvalds * @bio: bio 17911da177e4SLinus Torvalds * @error: error, if any 17921da177e4SLinus Torvalds * 17931da177e4SLinus Torvalds * Description: 17946712ecf8SNeilBrown * bio_endio() will end I/O on the whole bio. bio_endio() is the 17955bb23a68SNeilBrown * preferred way to end I/O on a bio, it takes care of clearing 17965bb23a68SNeilBrown * BIO_UPTODATE on error. @error is 0 on success, and and one of the 17975bb23a68SNeilBrown * established -Exxxx (-EIO, for instance) error values in case 17985bb23a68SNeilBrown * something went wrong. No one should call bi_end_io() directly on a 17995bb23a68SNeilBrown * bio unless they own it and thus know that it has an end_io 18005bb23a68SNeilBrown * function. 18011da177e4SLinus Torvalds **/ 18026712ecf8SNeilBrown void bio_endio(struct bio *bio, int error) 18031da177e4SLinus Torvalds { 1804196d38bcSKent Overstreet while (bio) { 1805196d38bcSKent Overstreet BUG_ON(atomic_read(&bio->bi_remaining) <= 0); 1806196d38bcSKent Overstreet 18071da177e4SLinus Torvalds if (error) 18081da177e4SLinus Torvalds clear_bit(BIO_UPTODATE, &bio->bi_flags); 18099cc54d40SNeilBrown else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 18109cc54d40SNeilBrown error = -EIO; 18111da177e4SLinus Torvalds 1812196d38bcSKent Overstreet if (!atomic_dec_and_test(&bio->bi_remaining)) 1813196d38bcSKent Overstreet return; 1814196d38bcSKent Overstreet 1815196d38bcSKent Overstreet /* 1816196d38bcSKent Overstreet * Need to have a real endio function for chained bios, 1817196d38bcSKent Overstreet * otherwise various corner cases will break (like stacking 1818196d38bcSKent Overstreet * block devices that save/restore bi_end_io) - however, we want 1819196d38bcSKent Overstreet * to avoid unbounded recursion and blowing the stack. Tail call 1820196d38bcSKent Overstreet * optimization would handle this, but compiling with frame 1821196d38bcSKent Overstreet * pointers also disables gcc's sibling call optimization. 1822196d38bcSKent Overstreet */ 1823196d38bcSKent Overstreet if (bio->bi_end_io == bio_chain_endio) { 1824196d38bcSKent Overstreet struct bio *parent = bio->bi_private; 1825196d38bcSKent Overstreet bio_put(bio); 1826196d38bcSKent Overstreet bio = parent; 1827196d38bcSKent Overstreet } else { 18285bb23a68SNeilBrown if (bio->bi_end_io) 18296712ecf8SNeilBrown bio->bi_end_io(bio, error); 1830196d38bcSKent Overstreet bio = NULL; 1831196d38bcSKent Overstreet } 1832196d38bcSKent Overstreet } 18331da177e4SLinus Torvalds } 1834a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bio_endio); 18351da177e4SLinus Torvalds 1836196d38bcSKent Overstreet /** 1837196d38bcSKent Overstreet * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining 1838196d38bcSKent Overstreet * @bio: bio 1839196d38bcSKent Overstreet * @error: error, if any 1840196d38bcSKent Overstreet * 1841196d38bcSKent Overstreet * For code that has saved and restored bi_end_io; thing hard before using this 1842196d38bcSKent Overstreet * function, probably you should've cloned the entire bio. 1843196d38bcSKent Overstreet **/ 1844196d38bcSKent Overstreet void bio_endio_nodec(struct bio *bio, int error) 1845196d38bcSKent Overstreet { 1846196d38bcSKent Overstreet atomic_inc(&bio->bi_remaining); 1847196d38bcSKent Overstreet bio_endio(bio, error); 1848196d38bcSKent Overstreet } 1849196d38bcSKent Overstreet EXPORT_SYMBOL(bio_endio_nodec); 1850196d38bcSKent Overstreet 185120d0189bSKent Overstreet /** 185220d0189bSKent Overstreet * bio_split - split a bio 185320d0189bSKent Overstreet * @bio: bio to split 185420d0189bSKent Overstreet * @sectors: number of sectors to split from the front of @bio 185520d0189bSKent Overstreet * @gfp: gfp mask 185620d0189bSKent Overstreet * @bs: bio set to allocate from 185720d0189bSKent Overstreet * 185820d0189bSKent Overstreet * Allocates and returns a new bio which represents @sectors from the start of 185920d0189bSKent Overstreet * @bio, and updates @bio to represent the remaining sectors. 186020d0189bSKent Overstreet * 186120d0189bSKent Overstreet * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's 186220d0189bSKent Overstreet * responsibility to ensure that @bio is not freed before the split. 186320d0189bSKent Overstreet */ 186420d0189bSKent Overstreet struct bio *bio_split(struct bio *bio, int sectors, 186520d0189bSKent Overstreet gfp_t gfp, struct bio_set *bs) 186620d0189bSKent Overstreet { 186720d0189bSKent Overstreet struct bio *split = NULL; 186820d0189bSKent Overstreet 186920d0189bSKent Overstreet BUG_ON(sectors <= 0); 187020d0189bSKent Overstreet BUG_ON(sectors >= bio_sectors(bio)); 187120d0189bSKent Overstreet 187220d0189bSKent Overstreet split = bio_clone_fast(bio, gfp, bs); 187320d0189bSKent Overstreet if (!split) 187420d0189bSKent Overstreet return NULL; 187520d0189bSKent Overstreet 187620d0189bSKent Overstreet split->bi_iter.bi_size = sectors << 9; 187720d0189bSKent Overstreet 187820d0189bSKent Overstreet if (bio_integrity(split)) 187920d0189bSKent Overstreet bio_integrity_trim(split, 0, sectors); 188020d0189bSKent Overstreet 188120d0189bSKent Overstreet bio_advance(bio, split->bi_iter.bi_size); 188220d0189bSKent Overstreet 188320d0189bSKent Overstreet return split; 188420d0189bSKent Overstreet } 188520d0189bSKent Overstreet EXPORT_SYMBOL(bio_split); 188620d0189bSKent Overstreet 1887ad3316bfSMartin K. Petersen /** 18886678d83fSKent Overstreet * bio_trim - trim a bio 18896678d83fSKent Overstreet * @bio: bio to trim 18906678d83fSKent Overstreet * @offset: number of sectors to trim from the front of @bio 18916678d83fSKent Overstreet * @size: size we want to trim @bio to, in sectors 18926678d83fSKent Overstreet */ 18936678d83fSKent Overstreet void bio_trim(struct bio *bio, int offset, int size) 18946678d83fSKent Overstreet { 18956678d83fSKent Overstreet /* 'bio' is a cloned bio which we need to trim to match 18966678d83fSKent Overstreet * the given offset and size. 18976678d83fSKent Overstreet */ 18986678d83fSKent Overstreet 18996678d83fSKent Overstreet size <<= 9; 19004f024f37SKent Overstreet if (offset == 0 && size == bio->bi_iter.bi_size) 19016678d83fSKent Overstreet return; 19026678d83fSKent Overstreet 19036678d83fSKent Overstreet clear_bit(BIO_SEG_VALID, &bio->bi_flags); 19046678d83fSKent Overstreet 19056678d83fSKent Overstreet bio_advance(bio, offset << 9); 19066678d83fSKent Overstreet 19074f024f37SKent Overstreet bio->bi_iter.bi_size = size; 19086678d83fSKent Overstreet } 19096678d83fSKent Overstreet EXPORT_SYMBOL_GPL(bio_trim); 19106678d83fSKent Overstreet 19111da177e4SLinus Torvalds /* 19121da177e4SLinus Torvalds * create memory pools for biovec's in a bio_set. 19131da177e4SLinus Torvalds * use the global biovec slabs created for general use. 19141da177e4SLinus Torvalds */ 1915a6c39cb4SFabian Frederick mempool_t *biovec_create_pool(int pool_entries) 19161da177e4SLinus Torvalds { 19177ff9345fSJens Axboe struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; 19181da177e4SLinus Torvalds 19199f060e22SKent Overstreet return mempool_create_slab_pool(pool_entries, bp->slab); 19201da177e4SLinus Torvalds } 19211da177e4SLinus Torvalds 19221da177e4SLinus Torvalds void bioset_free(struct bio_set *bs) 19231da177e4SLinus Torvalds { 1924df2cb6daSKent Overstreet if (bs->rescue_workqueue) 1925df2cb6daSKent Overstreet destroy_workqueue(bs->rescue_workqueue); 1926df2cb6daSKent Overstreet 19271da177e4SLinus Torvalds if (bs->bio_pool) 19281da177e4SLinus Torvalds mempool_destroy(bs->bio_pool); 19291da177e4SLinus Torvalds 19309f060e22SKent Overstreet if (bs->bvec_pool) 19319f060e22SKent Overstreet mempool_destroy(bs->bvec_pool); 19329f060e22SKent Overstreet 19337878cba9SMartin K. Petersen bioset_integrity_free(bs); 1934bb799ca0SJens Axboe bio_put_slab(bs); 19351da177e4SLinus Torvalds 19361da177e4SLinus Torvalds kfree(bs); 19371da177e4SLinus Torvalds } 1938a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bioset_free); 19391da177e4SLinus Torvalds 1940d8f429e1SJunichi Nomura static struct bio_set *__bioset_create(unsigned int pool_size, 1941d8f429e1SJunichi Nomura unsigned int front_pad, 1942d8f429e1SJunichi Nomura bool create_bvec_pool) 19431da177e4SLinus Torvalds { 1944392ddc32SJens Axboe unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); 19451b434498SJens Axboe struct bio_set *bs; 19461da177e4SLinus Torvalds 19471b434498SJens Axboe bs = kzalloc(sizeof(*bs), GFP_KERNEL); 19481da177e4SLinus Torvalds if (!bs) 19491da177e4SLinus Torvalds return NULL; 19501da177e4SLinus Torvalds 1951bb799ca0SJens Axboe bs->front_pad = front_pad; 19521b434498SJens Axboe 1953df2cb6daSKent Overstreet spin_lock_init(&bs->rescue_lock); 1954df2cb6daSKent Overstreet bio_list_init(&bs->rescue_list); 1955df2cb6daSKent Overstreet INIT_WORK(&bs->rescue_work, bio_alloc_rescue); 1956df2cb6daSKent Overstreet 1957392ddc32SJens Axboe bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); 1958bb799ca0SJens Axboe if (!bs->bio_slab) { 1959bb799ca0SJens Axboe kfree(bs); 1960bb799ca0SJens Axboe return NULL; 1961bb799ca0SJens Axboe } 1962bb799ca0SJens Axboe 1963bb799ca0SJens Axboe bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab); 19641da177e4SLinus Torvalds if (!bs->bio_pool) 19651da177e4SLinus Torvalds goto bad; 19661da177e4SLinus Torvalds 1967d8f429e1SJunichi Nomura if (create_bvec_pool) { 1968a6c39cb4SFabian Frederick bs->bvec_pool = biovec_create_pool(pool_size); 19699f060e22SKent Overstreet if (!bs->bvec_pool) 1970df2cb6daSKent Overstreet goto bad; 1971d8f429e1SJunichi Nomura } 19721da177e4SLinus Torvalds 1973df2cb6daSKent Overstreet bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); 1974df2cb6daSKent Overstreet if (!bs->rescue_workqueue) 1975df2cb6daSKent Overstreet goto bad; 1976df2cb6daSKent Overstreet 1977df2cb6daSKent Overstreet return bs; 19781da177e4SLinus Torvalds bad: 19791da177e4SLinus Torvalds bioset_free(bs); 19801da177e4SLinus Torvalds return NULL; 19811da177e4SLinus Torvalds } 1982d8f429e1SJunichi Nomura 1983d8f429e1SJunichi Nomura /** 1984d8f429e1SJunichi Nomura * bioset_create - Create a bio_set 1985d8f429e1SJunichi Nomura * @pool_size: Number of bio and bio_vecs to cache in the mempool 1986d8f429e1SJunichi Nomura * @front_pad: Number of bytes to allocate in front of the returned bio 1987d8f429e1SJunichi Nomura * 1988d8f429e1SJunichi Nomura * Description: 1989d8f429e1SJunichi Nomura * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller 1990d8f429e1SJunichi Nomura * to ask for a number of bytes to be allocated in front of the bio. 1991d8f429e1SJunichi Nomura * Front pad allocation is useful for embedding the bio inside 1992d8f429e1SJunichi Nomura * another structure, to avoid allocating extra data to go with the bio. 1993d8f429e1SJunichi Nomura * Note that the bio must be embedded at the END of that structure always, 1994d8f429e1SJunichi Nomura * or things will break badly. 1995d8f429e1SJunichi Nomura */ 1996d8f429e1SJunichi Nomura struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) 1997d8f429e1SJunichi Nomura { 1998d8f429e1SJunichi Nomura return __bioset_create(pool_size, front_pad, true); 1999d8f429e1SJunichi Nomura } 2000a112a71dSH Hartley Sweeten EXPORT_SYMBOL(bioset_create); 20011da177e4SLinus Torvalds 2002d8f429e1SJunichi Nomura /** 2003d8f429e1SJunichi Nomura * bioset_create_nobvec - Create a bio_set without bio_vec mempool 2004d8f429e1SJunichi Nomura * @pool_size: Number of bio to cache in the mempool 2005d8f429e1SJunichi Nomura * @front_pad: Number of bytes to allocate in front of the returned bio 2006d8f429e1SJunichi Nomura * 2007d8f429e1SJunichi Nomura * Description: 2008d8f429e1SJunichi Nomura * Same functionality as bioset_create() except that mempool is not 2009d8f429e1SJunichi Nomura * created for bio_vecs. Saving some memory for bio_clone_fast() users. 2010d8f429e1SJunichi Nomura */ 2011d8f429e1SJunichi Nomura struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad) 2012d8f429e1SJunichi Nomura { 2013d8f429e1SJunichi Nomura return __bioset_create(pool_size, front_pad, false); 2014d8f429e1SJunichi Nomura } 2015d8f429e1SJunichi Nomura EXPORT_SYMBOL(bioset_create_nobvec); 2016d8f429e1SJunichi Nomura 2017852c788fSTejun Heo #ifdef CONFIG_BLK_CGROUP 2018852c788fSTejun Heo /** 2019852c788fSTejun Heo * bio_associate_current - associate a bio with %current 2020852c788fSTejun Heo * @bio: target bio 2021852c788fSTejun Heo * 2022852c788fSTejun Heo * Associate @bio with %current if it hasn't been associated yet. Block 2023852c788fSTejun Heo * layer will treat @bio as if it were issued by %current no matter which 2024852c788fSTejun Heo * task actually issues it. 2025852c788fSTejun Heo * 2026852c788fSTejun Heo * This function takes an extra reference of @task's io_context and blkcg 2027852c788fSTejun Heo * which will be put when @bio is released. The caller must own @bio, 2028852c788fSTejun Heo * ensure %current->io_context exists, and is responsible for synchronizing 2029852c788fSTejun Heo * calls to this function. 2030852c788fSTejun Heo */ 2031852c788fSTejun Heo int bio_associate_current(struct bio *bio) 2032852c788fSTejun Heo { 2033852c788fSTejun Heo struct io_context *ioc; 2034852c788fSTejun Heo struct cgroup_subsys_state *css; 2035852c788fSTejun Heo 2036852c788fSTejun Heo if (bio->bi_ioc) 2037852c788fSTejun Heo return -EBUSY; 2038852c788fSTejun Heo 2039852c788fSTejun Heo ioc = current->io_context; 2040852c788fSTejun Heo if (!ioc) 2041852c788fSTejun Heo return -ENOENT; 2042852c788fSTejun Heo 2043852c788fSTejun Heo /* acquire active ref on @ioc and associate */ 2044852c788fSTejun Heo get_io_context_active(ioc); 2045852c788fSTejun Heo bio->bi_ioc = ioc; 2046852c788fSTejun Heo 2047852c788fSTejun Heo /* associate blkcg if exists */ 2048852c788fSTejun Heo rcu_read_lock(); 2049073219e9STejun Heo css = task_css(current, blkio_cgrp_id); 2050ec903c0cSTejun Heo if (css && css_tryget_online(css)) 2051852c788fSTejun Heo bio->bi_css = css; 2052852c788fSTejun Heo rcu_read_unlock(); 2053852c788fSTejun Heo 2054852c788fSTejun Heo return 0; 2055852c788fSTejun Heo } 2056852c788fSTejun Heo 2057852c788fSTejun Heo /** 2058852c788fSTejun Heo * bio_disassociate_task - undo bio_associate_current() 2059852c788fSTejun Heo * @bio: target bio 2060852c788fSTejun Heo */ 2061852c788fSTejun Heo void bio_disassociate_task(struct bio *bio) 2062852c788fSTejun Heo { 2063852c788fSTejun Heo if (bio->bi_ioc) { 2064852c788fSTejun Heo put_io_context(bio->bi_ioc); 2065852c788fSTejun Heo bio->bi_ioc = NULL; 2066852c788fSTejun Heo } 2067852c788fSTejun Heo if (bio->bi_css) { 2068852c788fSTejun Heo css_put(bio->bi_css); 2069852c788fSTejun Heo bio->bi_css = NULL; 2070852c788fSTejun Heo } 2071852c788fSTejun Heo } 2072852c788fSTejun Heo 2073852c788fSTejun Heo #endif /* CONFIG_BLK_CGROUP */ 2074852c788fSTejun Heo 20751da177e4SLinus Torvalds static void __init biovec_init_slabs(void) 20761da177e4SLinus Torvalds { 20771da177e4SLinus Torvalds int i; 20781da177e4SLinus Torvalds 20791da177e4SLinus Torvalds for (i = 0; i < BIOVEC_NR_POOLS; i++) { 20801da177e4SLinus Torvalds int size; 20811da177e4SLinus Torvalds struct biovec_slab *bvs = bvec_slabs + i; 20821da177e4SLinus Torvalds 2083a7fcd37cSJens Axboe if (bvs->nr_vecs <= BIO_INLINE_VECS) { 2084a7fcd37cSJens Axboe bvs->slab = NULL; 2085a7fcd37cSJens Axboe continue; 2086a7fcd37cSJens Axboe } 2087a7fcd37cSJens Axboe 20881da177e4SLinus Torvalds size = bvs->nr_vecs * sizeof(struct bio_vec); 20891da177e4SLinus Torvalds bvs->slab = kmem_cache_create(bvs->name, size, 0, 209020c2df83SPaul Mundt SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 20911da177e4SLinus Torvalds } 20921da177e4SLinus Torvalds } 20931da177e4SLinus Torvalds 20941da177e4SLinus Torvalds static int __init init_bio(void) 20951da177e4SLinus Torvalds { 2096bb799ca0SJens Axboe bio_slab_max = 2; 2097bb799ca0SJens Axboe bio_slab_nr = 0; 2098bb799ca0SJens Axboe bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL); 2099bb799ca0SJens Axboe if (!bio_slabs) 2100bb799ca0SJens Axboe panic("bio: can't allocate bios\n"); 21011da177e4SLinus Torvalds 21027878cba9SMartin K. Petersen bio_integrity_init(); 21031da177e4SLinus Torvalds biovec_init_slabs(); 21041da177e4SLinus Torvalds 2105bb799ca0SJens Axboe fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); 21061da177e4SLinus Torvalds if (!fs_bio_set) 21071da177e4SLinus Torvalds panic("bio: can't allocate bios\n"); 21081da177e4SLinus Torvalds 2109a91a2785SMartin K. Petersen if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE)) 2110a91a2785SMartin K. Petersen panic("bio: can't create integrity pool\n"); 2111a91a2785SMartin K. Petersen 21121da177e4SLinus Torvalds return 0; 21131da177e4SLinus Torvalds } 21141da177e4SLinus Torvalds subsys_initcall(init_bio); 2115