1 /* 2 * DMA helper functions 3 * 4 * Copyright (c) 2009,2020 Red Hat 5 * 6 * This work is licensed under the terms of the GNU General Public License 7 * (GNU GPL), version 2 or later. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "sysemu/block-backend.h" 12 #include "sysemu/dma.h" 13 #include "trace/trace-root.h" 14 #include "qemu/thread.h" 15 #include "qemu/main-loop.h" 16 #include "sysemu/cpu-timers.h" 17 #include "qemu/range.h" 18 19 /* #define DEBUG_IOMMU */ 20 21 MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr, 22 uint8_t c, dma_addr_t len, MemTxAttrs attrs) 23 { 24 dma_barrier(as, DMA_DIRECTION_FROM_DEVICE); 25 26 #define FILLBUF_SIZE 512 27 uint8_t fillbuf[FILLBUF_SIZE]; 28 int l; 29 MemTxResult error = MEMTX_OK; 30 31 memset(fillbuf, c, FILLBUF_SIZE); 32 while (len > 0) { 33 l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE; 34 error |= address_space_write(as, addr, attrs, fillbuf, l); 35 len -= l; 36 addr += l; 37 } 38 39 return error; 40 } 41 42 void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint, 43 AddressSpace *as) 44 { 45 qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry)); 46 qsg->nsg = 0; 47 qsg->nalloc = alloc_hint; 48 qsg->size = 0; 49 qsg->as = as; 50 qsg->dev = dev; 51 object_ref(OBJECT(dev)); 52 } 53 54 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len) 55 { 56 if (qsg->nsg == qsg->nalloc) { 57 qsg->nalloc = 2 * qsg->nalloc + 1; 58 qsg->sg = g_realloc(qsg->sg, qsg->nalloc * sizeof(ScatterGatherEntry)); 59 } 60 qsg->sg[qsg->nsg].base = base; 61 qsg->sg[qsg->nsg].len = len; 62 qsg->size += len; 63 ++qsg->nsg; 64 } 65 66 void qemu_sglist_destroy(QEMUSGList *qsg) 67 { 68 object_unref(OBJECT(qsg->dev)); 69 g_free(qsg->sg); 70 memset(qsg, 0, sizeof(*qsg)); 71 } 72 73 typedef struct { 74 BlockAIOCB common; 75 AioContext *ctx; 76 BlockAIOCB *acb; 77 QEMUSGList *sg; 78 uint32_t align; 79 uint64_t offset; 80 DMADirection dir; 81 int sg_cur_index; 82 dma_addr_t sg_cur_byte; 83 QEMUIOVector iov; 84 QEMUBH *bh; 85 DMAIOFunc *io_func; 86 void *io_func_opaque; 87 } DMAAIOCB; 88 89 static void dma_blk_cb(void *opaque, int ret); 90 91 static void reschedule_dma(void *opaque) 92 { 93 DMAAIOCB *dbs = (DMAAIOCB *)opaque; 94 95 assert(!dbs->acb && dbs->bh); 96 qemu_bh_delete(dbs->bh); 97 dbs->bh = NULL; 98 dma_blk_cb(dbs, 0); 99 } 100 101 static void dma_blk_unmap(DMAAIOCB *dbs) 102 { 103 int i; 104 105 for (i = 0; i < dbs->iov.niov; ++i) { 106 dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base, 107 dbs->iov.iov[i].iov_len, dbs->dir, 108 dbs->iov.iov[i].iov_len); 109 } 110 qemu_iovec_reset(&dbs->iov); 111 } 112 113 static void dma_complete(DMAAIOCB *dbs, int ret) 114 { 115 trace_dma_complete(dbs, ret, dbs->common.cb); 116 117 assert(!dbs->acb && !dbs->bh); 118 dma_blk_unmap(dbs); 119 if (dbs->common.cb) { 120 dbs->common.cb(dbs->common.opaque, ret); 121 } 122 qemu_iovec_destroy(&dbs->iov); 123 qemu_aio_unref(dbs); 124 } 125 126 static void dma_blk_cb(void *opaque, int ret) 127 { 128 DMAAIOCB *dbs = (DMAAIOCB *)opaque; 129 dma_addr_t cur_addr, cur_len; 130 void *mem; 131 132 trace_dma_blk_cb(dbs, ret); 133 134 dbs->acb = NULL; 135 dbs->offset += dbs->iov.size; 136 137 if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { 138 dma_complete(dbs, ret); 139 return; 140 } 141 dma_blk_unmap(dbs); 142 143 while (dbs->sg_cur_index < dbs->sg->nsg) { 144 cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte; 145 cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte; 146 mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir, 147 MEMTXATTRS_UNSPECIFIED); 148 /* 149 * Make reads deterministic in icount mode. Windows sometimes issues 150 * disk read requests with overlapping SGs. It leads 151 * to non-determinism, because resulting buffer contents may be mixed 152 * from several sectors. This code splits all SGs into several 153 * groups. SGs in every group do not overlap. 154 */ 155 if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) { 156 int i; 157 for (i = 0 ; i < dbs->iov.niov ; ++i) { 158 if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base, 159 dbs->iov.iov[i].iov_len, (intptr_t)mem, 160 cur_len)) { 161 dma_memory_unmap(dbs->sg->as, mem, cur_len, 162 dbs->dir, cur_len); 163 mem = NULL; 164 break; 165 } 166 } 167 } 168 if (!mem) 169 break; 170 qemu_iovec_add(&dbs->iov, mem, cur_len); 171 dbs->sg_cur_byte += cur_len; 172 if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) { 173 dbs->sg_cur_byte = 0; 174 ++dbs->sg_cur_index; 175 } 176 } 177 178 if (dbs->iov.size == 0) { 179 trace_dma_map_wait(dbs); 180 dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); 181 cpu_register_map_client(dbs->bh); 182 return; 183 } 184 185 if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { 186 qemu_iovec_discard_back(&dbs->iov, 187 QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); 188 } 189 190 aio_context_acquire(dbs->ctx); 191 dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, 192 dma_blk_cb, dbs, dbs->io_func_opaque); 193 aio_context_release(dbs->ctx); 194 assert(dbs->acb); 195 } 196 197 static void dma_aio_cancel(BlockAIOCB *acb) 198 { 199 DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common); 200 201 trace_dma_aio_cancel(dbs); 202 203 assert(!(dbs->acb && dbs->bh)); 204 if (dbs->acb) { 205 /* This will invoke dma_blk_cb. */ 206 blk_aio_cancel_async(dbs->acb); 207 return; 208 } 209 210 if (dbs->bh) { 211 cpu_unregister_map_client(dbs->bh); 212 qemu_bh_delete(dbs->bh); 213 dbs->bh = NULL; 214 } 215 if (dbs->common.cb) { 216 dbs->common.cb(dbs->common.opaque, -ECANCELED); 217 } 218 } 219 220 static AioContext *dma_get_aio_context(BlockAIOCB *acb) 221 { 222 DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common); 223 224 return dbs->ctx; 225 } 226 227 static const AIOCBInfo dma_aiocb_info = { 228 .aiocb_size = sizeof(DMAAIOCB), 229 .cancel_async = dma_aio_cancel, 230 .get_aio_context = dma_get_aio_context, 231 }; 232 233 BlockAIOCB *dma_blk_io(AioContext *ctx, 234 QEMUSGList *sg, uint64_t offset, uint32_t align, 235 DMAIOFunc *io_func, void *io_func_opaque, 236 BlockCompletionFunc *cb, 237 void *opaque, DMADirection dir) 238 { 239 DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque); 240 241 trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE)); 242 243 dbs->acb = NULL; 244 dbs->sg = sg; 245 dbs->ctx = ctx; 246 dbs->offset = offset; 247 dbs->align = align; 248 dbs->sg_cur_index = 0; 249 dbs->sg_cur_byte = 0; 250 dbs->dir = dir; 251 dbs->io_func = io_func; 252 dbs->io_func_opaque = io_func_opaque; 253 dbs->bh = NULL; 254 qemu_iovec_init(&dbs->iov, sg->nsg); 255 dma_blk_cb(dbs, 0); 256 return &dbs->common; 257 } 258 259 260 static 261 BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov, 262 BlockCompletionFunc *cb, void *cb_opaque, 263 void *opaque) 264 { 265 BlockBackend *blk = opaque; 266 return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque); 267 } 268 269 BlockAIOCB *dma_blk_read(BlockBackend *blk, 270 QEMUSGList *sg, uint64_t offset, uint32_t align, 271 void (*cb)(void *opaque, int ret), void *opaque) 272 { 273 return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, 274 dma_blk_read_io_func, blk, cb, opaque, 275 DMA_DIRECTION_FROM_DEVICE); 276 } 277 278 static 279 BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov, 280 BlockCompletionFunc *cb, void *cb_opaque, 281 void *opaque) 282 { 283 BlockBackend *blk = opaque; 284 return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque); 285 } 286 287 BlockAIOCB *dma_blk_write(BlockBackend *blk, 288 QEMUSGList *sg, uint64_t offset, uint32_t align, 289 void (*cb)(void *opaque, int ret), void *opaque) 290 { 291 return dma_blk_io(blk_get_aio_context(blk), sg, offset, align, 292 dma_blk_write_io_func, blk, cb, opaque, 293 DMA_DIRECTION_TO_DEVICE); 294 } 295 296 297 static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual, 298 QEMUSGList *sg, DMADirection dir, 299 MemTxAttrs attrs) 300 { 301 uint8_t *ptr = buf; 302 dma_addr_t xresidual; 303 int sg_cur_index; 304 MemTxResult res = MEMTX_OK; 305 306 xresidual = sg->size; 307 sg_cur_index = 0; 308 len = MIN(len, xresidual); 309 while (len > 0) { 310 ScatterGatherEntry entry = sg->sg[sg_cur_index++]; 311 dma_addr_t xfer = MIN(len, entry.len); 312 res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs); 313 ptr += xfer; 314 len -= xfer; 315 xresidual -= xfer; 316 } 317 318 if (residual) { 319 *residual = xresidual; 320 } 321 return res; 322 } 323 324 MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual, 325 QEMUSGList *sg, MemTxAttrs attrs) 326 { 327 return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs); 328 } 329 330 MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual, 331 QEMUSGList *sg, MemTxAttrs attrs) 332 { 333 return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs); 334 } 335 336 void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie, 337 QEMUSGList *sg, enum BlockAcctType type) 338 { 339 block_acct_start(blk_get_stats(blk), cookie, sg->size, type); 340 } 341 342 uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits) 343 { 344 uint64_t max_mask = UINT64_MAX, addr_mask = end - start; 345 uint64_t alignment_mask, size_mask; 346 347 if (max_addr_bits != 64) { 348 max_mask = (1ULL << max_addr_bits) - 1; 349 } 350 351 alignment_mask = start ? (start & -start) - 1 : max_mask; 352 alignment_mask = MIN(alignment_mask, max_mask); 353 size_mask = MIN(addr_mask, max_mask); 354 355 if (alignment_mask <= size_mask) { 356 /* Increase the alignment of start */ 357 return alignment_mask; 358 } else { 359 /* Find the largest page mask from size */ 360 if (addr_mask == UINT64_MAX) { 361 return UINT64_MAX; 362 } 363 return (1ULL << (63 - clz64(addr_mask + 1))) - 1; 364 } 365 } 366 367