1 /* 2 * DMA helper functions 3 * 4 * Copyright (c) 2009,2020 Red Hat 5 * 6 * This work is licensed under the terms of the GNU General Public License 7 * (GNU GPL), version 2 or later. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "sysemu/block-backend.h" 12 #include "sysemu/dma.h" 13 #include "trace/trace-root.h" 14 #include "qemu/thread.h" 15 #include "qemu/main-loop.h" 16 #include "sysemu/cpu-timers.h" 17 #include "qemu/range.h" 18 19 /* #define DEBUG_IOMMU */ 20 21 MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr, 22 uint8_t c, dma_addr_t len, MemTxAttrs attrs) 23 { 24 dma_barrier(as, DMA_DIRECTION_FROM_DEVICE); 25 26 return address_space_set(as, addr, c, len, attrs); 27 } 28 29 void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint, 30 AddressSpace *as) 31 { 32 qsg->sg = g_new(ScatterGatherEntry, alloc_hint); 33 qsg->nsg = 0; 34 qsg->nalloc = alloc_hint; 35 qsg->size = 0; 36 qsg->as = as; 37 qsg->dev = dev; 38 object_ref(OBJECT(dev)); 39 } 40 41 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len) 42 { 43 if (qsg->nsg == qsg->nalloc) { 44 qsg->nalloc = 2 * qsg->nalloc + 1; 45 qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc); 46 } 47 qsg->sg[qsg->nsg].base = base; 48 qsg->sg[qsg->nsg].len = len; 49 qsg->size += len; 50 ++qsg->nsg; 51 } 52 53 void qemu_sglist_destroy(QEMUSGList *qsg) 54 { 55 object_unref(OBJECT(qsg->dev)); 56 g_free(qsg->sg); 57 memset(qsg, 0, sizeof(*qsg)); 58 } 59 60 typedef struct { 61 BlockAIOCB common; 62 AioContext *ctx; 63 BlockAIOCB *acb; 64 QEMUSGList *sg; 65 uint32_t align; 66 uint64_t offset; 67 DMADirection dir; 68 int sg_cur_index; 69 dma_addr_t sg_cur_byte; 70 QEMUIOVector iov; 71 QEMUBH *bh; 72 DMAIOFunc *io_func; 73 void *io_func_opaque; 74 } DMAAIOCB; 75 76 static void dma_blk_cb(void *opaque, int ret); 77 78 static void reschedule_dma(void *opaque) 79 { 80 DMAAIOCB *dbs = (DMAAIOCB *)opaque; 81 82 assert(!dbs->acb && dbs->bh); 83 qemu_bh_delete(dbs->bh); 84 dbs->bh = NULL; 85 dma_blk_cb(dbs, 0); 86 } 87 88 static void dma_blk_unmap(DMAAIOCB *dbs) 89 { 90 int i; 91 92 for (i = 0; i < dbs->iov.niov; ++i) { 93 dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base, 94 dbs->iov.iov[i].iov_len, dbs->dir, 95 dbs->iov.iov[i].iov_len); 96 } 97 qemu_iovec_reset(&dbs->iov); 98 } 99 100 static void dma_complete(DMAAIOCB *dbs, int ret) 101 { 102 trace_dma_complete(dbs, ret, dbs->common.cb); 103 104 assert(!dbs->acb && !dbs->bh); 105 dma_blk_unmap(dbs); 106 if (dbs->common.cb) { 107 dbs->common.cb(dbs->common.opaque, ret); 108 } 109 qemu_iovec_destroy(&dbs->iov); 110 qemu_aio_unref(dbs); 111 } 112 113 static void dma_blk_cb(void *opaque, int ret) 114 { 115 DMAAIOCB *dbs = (DMAAIOCB *)opaque; 116 AioContext *ctx = dbs->ctx; 117 dma_addr_t cur_addr, cur_len; 118 void *mem; 119 120 trace_dma_blk_cb(dbs, ret); 121 122 aio_context_acquire(ctx); 123 dbs->acb = NULL; 124 dbs->offset += dbs->iov.size; 125 126 if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { 127 dma_complete(dbs, ret); 128 goto out; 129 } 130 dma_blk_unmap(dbs); 131 132 while (dbs->sg_cur_index < dbs->sg->nsg) { 133 cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte; 134 cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte; 135 mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir, 136 MEMTXATTRS_UNSPECIFIED); 137 /* 138 * Make reads deterministic in icount mode. Windows sometimes issues 139 * disk read requests with overlapping SGs. It leads 140 * to non-determinism, because resulting buffer contents may be mixed 141 * from several sectors. This code splits all SGs into several 142 * groups. SGs in every group do not overlap. 143 */ 144 if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) { 145 int i; 146 for (i = 0 ; i < dbs->iov.niov ; ++i) { 147 if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base, 148 dbs->iov.iov[i].iov_len, (intptr_t)mem, 149 cur_len)) { 150 dma_memory_unmap(dbs->sg->as, mem, cur_len, 151 dbs->dir, cur_len); 152 mem = NULL; 153 break; 154 } 155 } 156 } 157 if (!mem) 158 break; 159 qemu_iovec_add(&dbs->iov, mem, cur_len); 160 dbs->sg_cur_byte += cur_len; 161 if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) { 162 dbs->sg_cur_byte = 0; 163 ++dbs->sg_cur_index; 164 } 165 } 166 167 if (dbs->iov.size == 0) { 168 trace_dma_map_wait(dbs); 169 dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); 170 cpu_register_map_client(dbs->bh); 171 goto out; 172 } 173 174 if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { 175 qemu_iovec_discard_back(&dbs->iov, 176 QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); 177 } 178 179 dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, 180 dma_blk_cb, dbs, dbs->io_func_opaque); 181 assert(dbs->acb); 182 out: 183 aio_context_release(ctx); 184 } 185 186 static void dma_aio_cancel(BlockAIOCB *acb) 187 { 188 DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common); 189 190 trace_dma_aio_cancel(dbs); 191 192 assert(!(dbs->acb && dbs->bh)); 193 if (dbs->acb) { 194 /* This will invoke dma_blk_cb. */ 195 blk_aio_cancel_async(dbs->acb); 196 return; 197 } 198 199 if (dbs->bh) { 200 cpu_unregister_map_client(dbs->bh); 201 qemu_bh_delete(dbs->bh); 202 dbs->bh = NULL; 203 } 204 if (dbs->common.cb) { 205 dbs->common.cb(dbs->common.opaque, -ECANCELED); 206 } 207 } 208 209 static AioContext *dma_get_aio_context(BlockAIOCB *acb) 210 { 211 DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common); 212 213 return dbs->ctx; 214 } 215 216 static const AIOCBInfo dma_aiocb_info = { 217 .aiocb_size = sizeof(DMAAIOCB), 218 .cancel_async = dma_aio_cancel, 219 .get_aio_context = dma_get_aio_context, 220 }; 221 222 BlockAIOCB *dma_blk_io(AioContext *ctx, 223 QEMUSGList *sg, uint64_t offset, uint32_t align, 224 DMAIOFunc *io_func, void *io_func_opaque, 225 BlockCompletionFunc *cb, 226 void *opaque, DMADirection dir) 227 { 228 DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque); 229 230 trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE)); 231 232 dbs->acb = NULL; 233 dbs->sg = sg; 234 dbs->ctx = ctx; 235 dbs->offset = offset; 236 dbs->align = align; 237 dbs->sg_cur_index = 0; 238 dbs->sg_cur_byte = 0; 239 dbs->dir = dir; 240 dbs->io_func = io_func; 241 dbs->io_func_opaque = io_func_opaque; 242 dbs->bh = NULL; 243 qemu_iovec_init(&dbs->iov, sg->nsg); 244 dma_blk_cb(dbs, 0); 245 return &dbs->common; 246 } 247 248 249 static 250 BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov, 251 BlockCompletionFunc *cb, void *cb_opaque, 252 void *opaque) 253 { 254 BlockBackend *blk = opaque; 255 return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque); 256 } 257 258 BlockAIOCB *dma_blk_read(BlockBackend *blk, 259 QEMUSGList *sg, uint64_t offset, uint32_t align, 260 void (*cb)(void *opaque, int ret), void *opaque) 261 { 262 return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, 263 dma_blk_read_io_func, blk, cb, opaque, 264 DMA_DIRECTION_FROM_DEVICE); 265 } 266 267 static 268 BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov, 269 BlockCompletionFunc *cb, void *cb_opaque, 270 void *opaque) 271 { 272 BlockBackend *blk = opaque; 273 return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque); 274 } 275 276 BlockAIOCB *dma_blk_write(BlockBackend *blk, 277 QEMUSGList *sg, uint64_t offset, uint32_t align, 278 void (*cb)(void *opaque, int ret), void *opaque) 279 { 280 return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, 281 dma_blk_write_io_func, blk, cb, opaque, 282 DMA_DIRECTION_TO_DEVICE); 283 } 284 285 286 static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual, 287 QEMUSGList *sg, DMADirection dir, 288 MemTxAttrs attrs) 289 { 290 uint8_t *ptr = buf; 291 dma_addr_t xresidual; 292 int sg_cur_index; 293 MemTxResult res = MEMTX_OK; 294 295 xresidual = sg->size; 296 sg_cur_index = 0; 297 len = MIN(len, xresidual); 298 while (len > 0) { 299 ScatterGatherEntry entry = sg->sg[sg_cur_index++]; 300 dma_addr_t xfer = MIN(len, entry.len); 301 res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs); 302 ptr += xfer; 303 len -= xfer; 304 xresidual -= xfer; 305 } 306 307 if (residual) { 308 *residual = xresidual; 309 } 310 return res; 311 } 312 313 MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual, 314 QEMUSGList *sg, MemTxAttrs attrs) 315 { 316 return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs); 317 } 318 319 MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual, 320 QEMUSGList *sg, MemTxAttrs attrs) 321 { 322 return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs); 323 } 324 325 void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie, 326 QEMUSGList *sg, enum BlockAcctType type) 327 { 328 block_acct_start(blk_get_stats(blk), cookie, sg->size, type); 329 } 330 331 uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits) 332 { 333 uint64_t max_mask = UINT64_MAX, addr_mask = end - start; 334 uint64_t alignment_mask, size_mask; 335 336 if (max_addr_bits != 64) { 337 max_mask = (1ULL << max_addr_bits) - 1; 338 } 339 340 alignment_mask = start ? (start & -start) - 1 : max_mask; 341 alignment_mask = MIN(alignment_mask, max_mask); 342 size_mask = MIN(addr_mask, max_mask); 343 344 if (alignment_mask <= size_mask) { 345 /* Increase the alignment of start */ 346 return alignment_mask; 347 } else { 348 /* Find the largest page mask from size */ 349 if (addr_mask == UINT64_MAX) { 350 return UINT64_MAX; 351 } 352 return (1ULL << (63 - clz64(addr_mask + 1))) - 1; 353 } 354 } 355 356