xref: /qemu/system/dma-helpers.c (revision 959384e74e1b508acc3af6e806b3d7b87335fc2a)
1 /*
2  * DMA helper functions
3  *
4  * Copyright (c) 2009,2020 Red Hat
5  *
6  * This work is licensed under the terms of the GNU General Public License
7  * (GNU GPL), version 2 or later.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "sysemu/block-backend.h"
12 #include "sysemu/dma.h"
13 #include "trace/trace-root.h"
14 #include "qemu/thread.h"
15 #include "qemu/main-loop.h"
16 #include "sysemu/cpu-timers.h"
17 #include "qemu/range.h"
18 
19 /* #define DEBUG_IOMMU */
20 
21 MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
22                            uint8_t c, dma_addr_t len, MemTxAttrs attrs)
23 {
24     dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
25 
26 #define FILLBUF_SIZE 512
27     uint8_t fillbuf[FILLBUF_SIZE];
28     int l;
29     MemTxResult error = MEMTX_OK;
30 
31     memset(fillbuf, c, FILLBUF_SIZE);
32     while (len > 0) {
33         l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
34         error |= address_space_write(as, addr, attrs, fillbuf, l);
35         len -= l;
36         addr += l;
37     }
38 
39     return error;
40 }
41 
42 void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
43                       AddressSpace *as)
44 {
45     qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
46     qsg->nsg = 0;
47     qsg->nalloc = alloc_hint;
48     qsg->size = 0;
49     qsg->as = as;
50     qsg->dev = dev;
51     object_ref(OBJECT(dev));
52 }
53 
54 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
55 {
56     if (qsg->nsg == qsg->nalloc) {
57         qsg->nalloc = 2 * qsg->nalloc + 1;
58         qsg->sg = g_realloc(qsg->sg, qsg->nalloc * sizeof(ScatterGatherEntry));
59     }
60     qsg->sg[qsg->nsg].base = base;
61     qsg->sg[qsg->nsg].len = len;
62     qsg->size += len;
63     ++qsg->nsg;
64 }
65 
66 void qemu_sglist_destroy(QEMUSGList *qsg)
67 {
68     object_unref(OBJECT(qsg->dev));
69     g_free(qsg->sg);
70     memset(qsg, 0, sizeof(*qsg));
71 }
72 
73 typedef struct {
74     BlockAIOCB common;
75     AioContext *ctx;
76     BlockAIOCB *acb;
77     QEMUSGList *sg;
78     uint32_t align;
79     uint64_t offset;
80     DMADirection dir;
81     int sg_cur_index;
82     dma_addr_t sg_cur_byte;
83     QEMUIOVector iov;
84     QEMUBH *bh;
85     DMAIOFunc *io_func;
86     void *io_func_opaque;
87 } DMAAIOCB;
88 
89 static void dma_blk_cb(void *opaque, int ret);
90 
91 static void reschedule_dma(void *opaque)
92 {
93     DMAAIOCB *dbs = (DMAAIOCB *)opaque;
94 
95     assert(!dbs->acb && dbs->bh);
96     qemu_bh_delete(dbs->bh);
97     dbs->bh = NULL;
98     dma_blk_cb(dbs, 0);
99 }
100 
101 static void dma_blk_unmap(DMAAIOCB *dbs)
102 {
103     int i;
104 
105     for (i = 0; i < dbs->iov.niov; ++i) {
106         dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
107                          dbs->iov.iov[i].iov_len, dbs->dir,
108                          dbs->iov.iov[i].iov_len);
109     }
110     qemu_iovec_reset(&dbs->iov);
111 }
112 
113 static void dma_complete(DMAAIOCB *dbs, int ret)
114 {
115     trace_dma_complete(dbs, ret, dbs->common.cb);
116 
117     assert(!dbs->acb && !dbs->bh);
118     dma_blk_unmap(dbs);
119     if (dbs->common.cb) {
120         dbs->common.cb(dbs->common.opaque, ret);
121     }
122     qemu_iovec_destroy(&dbs->iov);
123     qemu_aio_unref(dbs);
124 }
125 
126 static void dma_blk_cb(void *opaque, int ret)
127 {
128     DMAAIOCB *dbs = (DMAAIOCB *)opaque;
129     dma_addr_t cur_addr, cur_len;
130     void *mem;
131 
132     trace_dma_blk_cb(dbs, ret);
133 
134     dbs->acb = NULL;
135     dbs->offset += dbs->iov.size;
136 
137     if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
138         dma_complete(dbs, ret);
139         return;
140     }
141     dma_blk_unmap(dbs);
142 
143     while (dbs->sg_cur_index < dbs->sg->nsg) {
144         cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
145         cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
146         mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
147                              MEMTXATTRS_UNSPECIFIED);
148         /*
149          * Make reads deterministic in icount mode. Windows sometimes issues
150          * disk read requests with overlapping SGs. It leads
151          * to non-determinism, because resulting buffer contents may be mixed
152          * from several sectors. This code splits all SGs into several
153          * groups. SGs in every group do not overlap.
154          */
155         if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
156             int i;
157             for (i = 0 ; i < dbs->iov.niov ; ++i) {
158                 if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
159                                    dbs->iov.iov[i].iov_len, (intptr_t)mem,
160                                    cur_len)) {
161                     dma_memory_unmap(dbs->sg->as, mem, cur_len,
162                                      dbs->dir, cur_len);
163                     mem = NULL;
164                     break;
165                 }
166             }
167         }
168         if (!mem)
169             break;
170         qemu_iovec_add(&dbs->iov, mem, cur_len);
171         dbs->sg_cur_byte += cur_len;
172         if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
173             dbs->sg_cur_byte = 0;
174             ++dbs->sg_cur_index;
175         }
176     }
177 
178     if (dbs->iov.size == 0) {
179         trace_dma_map_wait(dbs);
180         dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
181         cpu_register_map_client(dbs->bh);
182         return;
183     }
184 
185     if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
186         qemu_iovec_discard_back(&dbs->iov,
187                                 QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
188     }
189 
190     aio_context_acquire(dbs->ctx);
191     dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
192                             dma_blk_cb, dbs, dbs->io_func_opaque);
193     aio_context_release(dbs->ctx);
194     assert(dbs->acb);
195 }
196 
197 static void dma_aio_cancel(BlockAIOCB *acb)
198 {
199     DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
200 
201     trace_dma_aio_cancel(dbs);
202 
203     assert(!(dbs->acb && dbs->bh));
204     if (dbs->acb) {
205         /* This will invoke dma_blk_cb.  */
206         blk_aio_cancel_async(dbs->acb);
207         return;
208     }
209 
210     if (dbs->bh) {
211         cpu_unregister_map_client(dbs->bh);
212         qemu_bh_delete(dbs->bh);
213         dbs->bh = NULL;
214     }
215     if (dbs->common.cb) {
216         dbs->common.cb(dbs->common.opaque, -ECANCELED);
217     }
218 }
219 
220 static AioContext *dma_get_aio_context(BlockAIOCB *acb)
221 {
222     DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
223 
224     return dbs->ctx;
225 }
226 
227 static const AIOCBInfo dma_aiocb_info = {
228     .aiocb_size         = sizeof(DMAAIOCB),
229     .cancel_async       = dma_aio_cancel,
230     .get_aio_context    = dma_get_aio_context,
231 };
232 
233 BlockAIOCB *dma_blk_io(AioContext *ctx,
234     QEMUSGList *sg, uint64_t offset, uint32_t align,
235     DMAIOFunc *io_func, void *io_func_opaque,
236     BlockCompletionFunc *cb,
237     void *opaque, DMADirection dir)
238 {
239     DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
240 
241     trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
242 
243     dbs->acb = NULL;
244     dbs->sg = sg;
245     dbs->ctx = ctx;
246     dbs->offset = offset;
247     dbs->align = align;
248     dbs->sg_cur_index = 0;
249     dbs->sg_cur_byte = 0;
250     dbs->dir = dir;
251     dbs->io_func = io_func;
252     dbs->io_func_opaque = io_func_opaque;
253     dbs->bh = NULL;
254     qemu_iovec_init(&dbs->iov, sg->nsg);
255     dma_blk_cb(dbs, 0);
256     return &dbs->common;
257 }
258 
259 
260 static
261 BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
262                                  BlockCompletionFunc *cb, void *cb_opaque,
263                                  void *opaque)
264 {
265     BlockBackend *blk = opaque;
266     return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
267 }
268 
269 BlockAIOCB *dma_blk_read(BlockBackend *blk,
270                          QEMUSGList *sg, uint64_t offset, uint32_t align,
271                          void (*cb)(void *opaque, int ret), void *opaque)
272 {
273     return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
274                       dma_blk_read_io_func, blk, cb, opaque,
275                       DMA_DIRECTION_FROM_DEVICE);
276 }
277 
278 static
279 BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
280                                   BlockCompletionFunc *cb, void *cb_opaque,
281                                   void *opaque)
282 {
283     BlockBackend *blk = opaque;
284     return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
285 }
286 
287 BlockAIOCB *dma_blk_write(BlockBackend *blk,
288                           QEMUSGList *sg, uint64_t offset, uint32_t align,
289                           void (*cb)(void *opaque, int ret), void *opaque)
290 {
291     return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
292                       dma_blk_write_io_func, blk, cb, opaque,
293                       DMA_DIRECTION_TO_DEVICE);
294 }
295 
296 
297 static uint64_t dma_buf_rw(void *buf, int32_t len, QEMUSGList *sg,
298                            DMADirection dir, MemTxAttrs attrs)
299 {
300     uint8_t *ptr = buf;
301     uint64_t resid;
302     int sg_cur_index;
303 
304     resid = sg->size;
305     sg_cur_index = 0;
306     len = MIN(len, resid);
307     while (len > 0) {
308         ScatterGatherEntry entry = sg->sg[sg_cur_index++];
309         int32_t xfer = MIN(len, entry.len);
310         dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs);
311         ptr += xfer;
312         len -= xfer;
313         resid -= xfer;
314     }
315 
316     return resid;
317 }
318 
319 uint64_t dma_buf_read(void *ptr, int32_t len, QEMUSGList *sg)
320 {
321     return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_FROM_DEVICE,
322                       MEMTXATTRS_UNSPECIFIED);
323 }
324 
325 uint64_t dma_buf_write(void *ptr, int32_t len, QEMUSGList *sg)
326 {
327     return dma_buf_rw(ptr, len, sg, DMA_DIRECTION_TO_DEVICE,
328                       MEMTXATTRS_UNSPECIFIED);
329 }
330 
331 void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
332                     QEMUSGList *sg, enum BlockAcctType type)
333 {
334     block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
335 }
336 
337 uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
338 {
339     uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
340     uint64_t alignment_mask, size_mask;
341 
342     if (max_addr_bits != 64) {
343         max_mask = (1ULL << max_addr_bits) - 1;
344     }
345 
346     alignment_mask = start ? (start & -start) - 1 : max_mask;
347     alignment_mask = MIN(alignment_mask, max_mask);
348     size_mask = MIN(addr_mask, max_mask);
349 
350     if (alignment_mask <= size_mask) {
351         /* Increase the alignment of start */
352         return alignment_mask;
353     } else {
354         /* Find the largest page mask from size */
355         if (addr_mask == UINT64_MAX) {
356             return UINT64_MAX;
357         }
358         return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
359     }
360 }
361 
362