xref: /qemu/system/dma-helpers.c (revision abfcd2760b3e70727bbc0792221b8b98a733dc32)
1 /*
2  * DMA helper functions
3  *
4  * Copyright (c) 2009,2020 Red Hat
5  *
6  * This work is licensed under the terms of the GNU General Public License
7  * (GNU GPL), version 2 or later.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "sysemu/block-backend.h"
12 #include "sysemu/dma.h"
13 #include "trace/trace-root.h"
14 #include "qemu/thread.h"
15 #include "qemu/main-loop.h"
16 #include "sysemu/cpu-timers.h"
17 #include "qemu/range.h"
18 
19 /* #define DEBUG_IOMMU */
20 
21 MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
22                            uint8_t c, dma_addr_t len, MemTxAttrs attrs)
23 {
24     dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
25 
26     return address_space_set(as, addr, c, len, attrs);
27 }
28 
29 void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
30                       AddressSpace *as)
31 {
32     qsg->sg = g_new(ScatterGatherEntry, alloc_hint);
33     qsg->nsg = 0;
34     qsg->nalloc = alloc_hint;
35     qsg->size = 0;
36     qsg->as = as;
37     qsg->dev = dev;
38     object_ref(OBJECT(dev));
39 }
40 
41 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
42 {
43     if (qsg->nsg == qsg->nalloc) {
44         qsg->nalloc = 2 * qsg->nalloc + 1;
45         qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc);
46     }
47     qsg->sg[qsg->nsg].base = base;
48     qsg->sg[qsg->nsg].len = len;
49     qsg->size += len;
50     ++qsg->nsg;
51 }
52 
53 void qemu_sglist_destroy(QEMUSGList *qsg)
54 {
55     object_unref(OBJECT(qsg->dev));
56     g_free(qsg->sg);
57     memset(qsg, 0, sizeof(*qsg));
58 }
59 
60 typedef struct {
61     BlockAIOCB common;
62     AioContext *ctx;
63     BlockAIOCB *acb;
64     QEMUSGList *sg;
65     uint32_t align;
66     uint64_t offset;
67     DMADirection dir;
68     int sg_cur_index;
69     dma_addr_t sg_cur_byte;
70     QEMUIOVector iov;
71     QEMUBH *bh;
72     DMAIOFunc *io_func;
73     void *io_func_opaque;
74 } DMAAIOCB;
75 
76 static void dma_blk_cb(void *opaque, int ret);
77 
78 static void reschedule_dma(void *opaque)
79 {
80     DMAAIOCB *dbs = (DMAAIOCB *)opaque;
81 
82     assert(!dbs->acb && dbs->bh);
83     qemu_bh_delete(dbs->bh);
84     dbs->bh = NULL;
85     dma_blk_cb(dbs, 0);
86 }
87 
88 static void dma_blk_unmap(DMAAIOCB *dbs)
89 {
90     int i;
91 
92     for (i = 0; i < dbs->iov.niov; ++i) {
93         dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
94                          dbs->iov.iov[i].iov_len, dbs->dir,
95                          dbs->iov.iov[i].iov_len);
96     }
97     qemu_iovec_reset(&dbs->iov);
98 }
99 
100 static void dma_complete(DMAAIOCB *dbs, int ret)
101 {
102     trace_dma_complete(dbs, ret, dbs->common.cb);
103 
104     assert(!dbs->acb && !dbs->bh);
105     dma_blk_unmap(dbs);
106     if (dbs->common.cb) {
107         dbs->common.cb(dbs->common.opaque, ret);
108     }
109     qemu_iovec_destroy(&dbs->iov);
110     qemu_aio_unref(dbs);
111 }
112 
113 static void dma_blk_cb(void *opaque, int ret)
114 {
115     DMAAIOCB *dbs = (DMAAIOCB *)opaque;
116     AioContext *ctx = dbs->ctx;
117     dma_addr_t cur_addr, cur_len;
118     void *mem;
119 
120     trace_dma_blk_cb(dbs, ret);
121 
122     aio_context_acquire(ctx);
123     dbs->acb = NULL;
124     dbs->offset += dbs->iov.size;
125 
126     if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
127         dma_complete(dbs, ret);
128         goto out;
129     }
130     dma_blk_unmap(dbs);
131 
132     while (dbs->sg_cur_index < dbs->sg->nsg) {
133         cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
134         cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
135         mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
136                              MEMTXATTRS_UNSPECIFIED);
137         /*
138          * Make reads deterministic in icount mode. Windows sometimes issues
139          * disk read requests with overlapping SGs. It leads
140          * to non-determinism, because resulting buffer contents may be mixed
141          * from several sectors. This code splits all SGs into several
142          * groups. SGs in every group do not overlap.
143          */
144         if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
145             int i;
146             for (i = 0 ; i < dbs->iov.niov ; ++i) {
147                 if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
148                                    dbs->iov.iov[i].iov_len, (intptr_t)mem,
149                                    cur_len)) {
150                     dma_memory_unmap(dbs->sg->as, mem, cur_len,
151                                      dbs->dir, cur_len);
152                     mem = NULL;
153                     break;
154                 }
155             }
156         }
157         if (!mem)
158             break;
159         qemu_iovec_add(&dbs->iov, mem, cur_len);
160         dbs->sg_cur_byte += cur_len;
161         if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
162             dbs->sg_cur_byte = 0;
163             ++dbs->sg_cur_index;
164         }
165     }
166 
167     if (dbs->iov.size == 0) {
168         trace_dma_map_wait(dbs);
169         dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
170         cpu_register_map_client(dbs->bh);
171         goto out;
172     }
173 
174     if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
175         qemu_iovec_discard_back(&dbs->iov,
176                                 QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
177     }
178 
179     dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
180                             dma_blk_cb, dbs, dbs->io_func_opaque);
181     assert(dbs->acb);
182 out:
183     aio_context_release(ctx);
184 }
185 
186 static void dma_aio_cancel(BlockAIOCB *acb)
187 {
188     DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
189 
190     trace_dma_aio_cancel(dbs);
191 
192     assert(!(dbs->acb && dbs->bh));
193     if (dbs->acb) {
194         /* This will invoke dma_blk_cb.  */
195         blk_aio_cancel_async(dbs->acb);
196         return;
197     }
198 
199     if (dbs->bh) {
200         cpu_unregister_map_client(dbs->bh);
201         qemu_bh_delete(dbs->bh);
202         dbs->bh = NULL;
203     }
204     if (dbs->common.cb) {
205         dbs->common.cb(dbs->common.opaque, -ECANCELED);
206     }
207 }
208 
209 static AioContext *dma_get_aio_context(BlockAIOCB *acb)
210 {
211     DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
212 
213     return dbs->ctx;
214 }
215 
216 static const AIOCBInfo dma_aiocb_info = {
217     .aiocb_size         = sizeof(DMAAIOCB),
218     .cancel_async       = dma_aio_cancel,
219     .get_aio_context    = dma_get_aio_context,
220 };
221 
222 BlockAIOCB *dma_blk_io(AioContext *ctx,
223     QEMUSGList *sg, uint64_t offset, uint32_t align,
224     DMAIOFunc *io_func, void *io_func_opaque,
225     BlockCompletionFunc *cb,
226     void *opaque, DMADirection dir)
227 {
228     DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
229 
230     trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
231 
232     dbs->acb = NULL;
233     dbs->sg = sg;
234     dbs->ctx = ctx;
235     dbs->offset = offset;
236     dbs->align = align;
237     dbs->sg_cur_index = 0;
238     dbs->sg_cur_byte = 0;
239     dbs->dir = dir;
240     dbs->io_func = io_func;
241     dbs->io_func_opaque = io_func_opaque;
242     dbs->bh = NULL;
243     qemu_iovec_init(&dbs->iov, sg->nsg);
244     dma_blk_cb(dbs, 0);
245     return &dbs->common;
246 }
247 
248 
249 static
250 BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
251                                  BlockCompletionFunc *cb, void *cb_opaque,
252                                  void *opaque)
253 {
254     BlockBackend *blk = opaque;
255     return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
256 }
257 
258 BlockAIOCB *dma_blk_read(BlockBackend *blk,
259                          QEMUSGList *sg, uint64_t offset, uint32_t align,
260                          void (*cb)(void *opaque, int ret), void *opaque)
261 {
262     return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
263                       dma_blk_read_io_func, blk, cb, opaque,
264                       DMA_DIRECTION_FROM_DEVICE);
265 }
266 
267 static
268 BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
269                                   BlockCompletionFunc *cb, void *cb_opaque,
270                                   void *opaque)
271 {
272     BlockBackend *blk = opaque;
273     return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
274 }
275 
276 BlockAIOCB *dma_blk_write(BlockBackend *blk,
277                           QEMUSGList *sg, uint64_t offset, uint32_t align,
278                           void (*cb)(void *opaque, int ret), void *opaque)
279 {
280     return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
281                       dma_blk_write_io_func, blk, cb, opaque,
282                       DMA_DIRECTION_TO_DEVICE);
283 }
284 
285 
286 static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
287                               QEMUSGList *sg, DMADirection dir,
288                               MemTxAttrs attrs)
289 {
290     uint8_t *ptr = buf;
291     dma_addr_t xresidual;
292     int sg_cur_index;
293     MemTxResult res = MEMTX_OK;
294 
295     xresidual = sg->size;
296     sg_cur_index = 0;
297     len = MIN(len, xresidual);
298     while (len > 0) {
299         ScatterGatherEntry entry = sg->sg[sg_cur_index++];
300         dma_addr_t xfer = MIN(len, entry.len);
301         res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs);
302         ptr += xfer;
303         len -= xfer;
304         xresidual -= xfer;
305     }
306 
307     if (residual) {
308         *residual = xresidual;
309     }
310     return res;
311 }
312 
313 MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
314                          QEMUSGList *sg, MemTxAttrs attrs)
315 {
316     return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs);
317 }
318 
319 MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
320                           QEMUSGList *sg, MemTxAttrs attrs)
321 {
322     return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs);
323 }
324 
325 void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
326                     QEMUSGList *sg, enum BlockAcctType type)
327 {
328     block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
329 }
330 
331 uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
332 {
333     uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
334     uint64_t alignment_mask, size_mask;
335 
336     if (max_addr_bits != 64) {
337         max_mask = (1ULL << max_addr_bits) - 1;
338     }
339 
340     alignment_mask = start ? (start & -start) - 1 : max_mask;
341     alignment_mask = MIN(alignment_mask, max_mask);
342     size_mask = MIN(addr_mask, max_mask);
343 
344     if (alignment_mask <= size_mask) {
345         /* Increase the alignment of start */
346         return alignment_mask;
347     } else {
348         /* Find the largest page mask from size */
349         if (addr_mask == UINT64_MAX) {
350             return UINT64_MAX;
351         }
352         return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
353     }
354 }
355 
356