xref: /qemu/hw/vfio-user/device.c (revision aec6836c73403cffa56b9a4c5556451ee16071fe)
1 /*
2  * vfio protocol over a UNIX socket device handling.
3  *
4  * Copyright © 2018, 2021 Oracle and/or its affiliates.
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  */
8 
9 #include "qemu/osdep.h"
10 #include "qapi/error.h"
11 #include "qemu/error-report.h"
12 #include "qemu/lockable.h"
13 #include "qemu/thread.h"
14 
15 #include "hw/vfio-user/device.h"
16 #include "hw/vfio-user/trace.h"
17 
18 /*
19  * These are to defend against a malign server trying
20  * to force us to run out of memory.
21  */
22 #define VFIO_USER_MAX_REGIONS   100
23 #define VFIO_USER_MAX_IRQS      50
24 
vfio_user_get_device_info(VFIOUserProxy * proxy,struct vfio_device_info * info,Error ** errp)25 bool vfio_user_get_device_info(VFIOUserProxy *proxy,
26                                struct vfio_device_info *info, Error **errp)
27 {
28     VFIOUserDeviceInfo msg;
29     uint32_t argsz = sizeof(msg) - sizeof(msg.hdr);
30 
31     memset(&msg, 0, sizeof(msg));
32     vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0);
33     msg.argsz = argsz;
34 
35     if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) {
36         return false;
37     }
38 
39     if (msg.hdr.flags & VFIO_USER_ERROR) {
40         error_setg_errno(errp, -msg.hdr.error_reply,
41                          "VFIO_USER_DEVICE_GET_INFO failed");
42         return false;
43     }
44 
45     trace_vfio_user_get_info(msg.num_regions, msg.num_irqs);
46 
47     memcpy(info, &msg.argsz, argsz);
48 
49     /* defend against a malicious server */
50     if (info->num_regions > VFIO_USER_MAX_REGIONS ||
51         info->num_irqs > VFIO_USER_MAX_IRQS) {
52         error_setg_errno(errp, EINVAL, "invalid reply");
53         return false;
54     }
55 
56     return true;
57 }
58 
vfio_user_device_reset(VFIOUserProxy * proxy)59 void vfio_user_device_reset(VFIOUserProxy *proxy)
60 {
61     Error *local_err = NULL;
62     VFIOUserHdr hdr;
63 
64     vfio_user_request_msg(&hdr, VFIO_USER_DEVICE_RESET, sizeof(hdr), 0);
65 
66     if (!vfio_user_send_wait(proxy, &hdr, NULL, 0, &local_err)) {
67         error_prepend(&local_err, "%s: ", __func__);
68         error_report_err(local_err);
69         return;
70     }
71 
72     if (hdr.flags & VFIO_USER_ERROR) {
73         error_printf("reset reply error %d\n", hdr.error_reply);
74     }
75 }
76 
vfio_user_get_region_info(VFIOUserProxy * proxy,struct vfio_region_info * info,VFIOUserFDs * fds)77 static int vfio_user_get_region_info(VFIOUserProxy *proxy,
78                                      struct vfio_region_info *info,
79                                      VFIOUserFDs *fds)
80 {
81     g_autofree VFIOUserRegionInfo *msgp = NULL;
82     Error *local_err = NULL;
83     uint32_t size;
84 
85     /* data returned can be larger than vfio_region_info */
86     if (info->argsz < sizeof(*info)) {
87         error_printf("vfio_user_get_region_info argsz too small\n");
88         return -E2BIG;
89     }
90     if (fds != NULL && fds->send_fds != 0) {
91         error_printf("vfio_user_get_region_info can't send FDs\n");
92         return -EINVAL;
93     }
94 
95     size = info->argsz + sizeof(VFIOUserHdr);
96     msgp = g_malloc0(size);
97 
98     vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO,
99                           sizeof(*msgp), 0);
100     msgp->argsz = info->argsz;
101     msgp->index = info->index;
102 
103     if (!vfio_user_send_wait(proxy, &msgp->hdr, fds, size, &local_err)) {
104         error_prepend(&local_err, "%s: ", __func__);
105         error_report_err(local_err);
106         return -EFAULT;
107     }
108 
109     if (msgp->hdr.flags & VFIO_USER_ERROR) {
110         return -msgp->hdr.error_reply;
111     }
112     trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size);
113 
114     memcpy(info, &msgp->argsz, info->argsz);
115 
116     /*
117      * If at least one region is directly mapped into the VM, then we can no
118      * longer rely on the sequential nature of vfio-user request handling to
119      * ensure that posted writes are completed before a subsequent read. In this
120      * case, disable posted write support. This is a per-device property, not
121      * per-region.
122      */
123     if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) {
124         vfio_user_disable_posted_writes(proxy);
125     }
126 
127     return 0;
128 }
129 
vfio_user_device_io_get_region_info(VFIODevice * vbasedev,struct vfio_region_info * info,int * fd)130 static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev,
131                                                struct vfio_region_info *info,
132                                                int *fd)
133 {
134     VFIOUserFDs fds = { 0, 1, fd};
135     int ret;
136 
137     if (info->index > vbasedev->num_regions) {
138         return -EINVAL;
139     }
140 
141     ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds);
142     if (ret) {
143         return ret;
144     }
145 
146     /* cap_offset in valid area */
147     if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) &&
148         (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) {
149         return -EINVAL;
150     }
151 
152     return 0;
153 }
154 
vfio_user_device_io_get_irq_info(VFIODevice * vbasedev,struct vfio_irq_info * info)155 static int vfio_user_device_io_get_irq_info(VFIODevice *vbasedev,
156                                             struct vfio_irq_info *info)
157 {
158     VFIOUserProxy *proxy = vbasedev->proxy;
159     Error *local_err = NULL;
160     VFIOUserIRQInfo msg;
161 
162     memset(&msg, 0, sizeof(msg));
163     vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
164                           sizeof(msg), 0);
165     msg.argsz = info->argsz;
166     msg.index = info->index;
167 
168     if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, &local_err)) {
169         error_prepend(&local_err, "%s: ", __func__);
170         error_report_err(local_err);
171         return -EFAULT;
172     }
173 
174     if (msg.hdr.flags & VFIO_USER_ERROR) {
175         return -msg.hdr.error_reply;
176     }
177     trace_vfio_user_get_irq_info(msg.index, msg.flags, msg.count);
178 
179     memcpy(info, &msg.argsz, sizeof(*info));
180     return 0;
181 }
182 
irq_howmany(int * fdp,uint32_t cur,uint32_t max)183 static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)
184 {
185     int n = 0;
186 
187     if (fdp[cur] != -1) {
188         do {
189             n++;
190         } while (n < max && fdp[cur + n] != -1);
191     } else {
192         do {
193             n++;
194         } while (n < max && fdp[cur + n] == -1);
195     }
196 
197     return n;
198 }
199 
vfio_user_device_io_set_irqs(VFIODevice * vbasedev,struct vfio_irq_set * irq)200 static int vfio_user_device_io_set_irqs(VFIODevice *vbasedev,
201                                         struct vfio_irq_set *irq)
202 {
203     VFIOUserProxy *proxy = vbasedev->proxy;
204     g_autofree VFIOUserIRQSet *msgp = NULL;
205     uint32_t size, nfds, send_fds, sent_fds, max;
206     Error *local_err = NULL;
207 
208     if (irq->argsz < sizeof(*irq)) {
209         error_printf("vfio_user_set_irqs argsz too small\n");
210         return -EINVAL;
211     }
212 
213     /*
214      * Handle simple case
215      */
216     if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
217         size = sizeof(VFIOUserHdr) + irq->argsz;
218         msgp = g_malloc0(size);
219 
220         vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
221         msgp->argsz = irq->argsz;
222         msgp->flags = irq->flags;
223         msgp->index = irq->index;
224         msgp->start = irq->start;
225         msgp->count = irq->count;
226         trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
227                                  msgp->flags);
228 
229         if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
230             error_prepend(&local_err, "%s: ", __func__);
231             error_report_err(local_err);
232             return -EFAULT;
233         }
234 
235         if (msgp->hdr.flags & VFIO_USER_ERROR) {
236             return -msgp->hdr.error_reply;
237         }
238 
239         return 0;
240     }
241 
242     /*
243      * Calculate the number of FDs to send
244      * and adjust argsz
245      */
246     nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
247     irq->argsz = sizeof(*irq);
248     msgp = g_malloc0(sizeof(*msgp));
249     /*
250      * Send in chunks if over max_send_fds
251      */
252     for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
253         VFIOUserFDs *arg_fds, loop_fds;
254 
255         /* must send all valid FDs or all invalid FDs in single msg */
256         max = nfds - sent_fds;
257         if (max > proxy->max_send_fds) {
258             max = proxy->max_send_fds;
259         }
260         send_fds = irq_howmany((int *)irq->data, sent_fds, max);
261 
262         vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
263                               sizeof(*msgp), 0);
264         msgp->argsz = irq->argsz;
265         msgp->flags = irq->flags;
266         msgp->index = irq->index;
267         msgp->start = irq->start + sent_fds;
268         msgp->count = send_fds;
269         trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
270                                  msgp->flags);
271 
272         loop_fds.send_fds = send_fds;
273         loop_fds.recv_fds = 0;
274         loop_fds.fds = (int *)irq->data + sent_fds;
275         arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
276 
277         if (!vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, &local_err)) {
278             error_prepend(&local_err, "%s: ", __func__);
279             error_report_err(local_err);
280             return -EFAULT;
281         }
282 
283         if (msgp->hdr.flags & VFIO_USER_ERROR) {
284             return -msgp->hdr.error_reply;
285         }
286     }
287 
288     return 0;
289 }
290 
vfio_user_device_io_region_read(VFIODevice * vbasedev,uint8_t index,off_t off,uint32_t count,void * data)291 static int vfio_user_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
292                                            off_t off, uint32_t count,
293                                            void *data)
294 {
295     g_autofree VFIOUserRegionRW *msgp = NULL;
296     VFIOUserProxy *proxy = vbasedev->proxy;
297     int size = sizeof(*msgp) + count;
298     Error *local_err = NULL;
299 
300     if (count > proxy->max_xfer_size) {
301         return -EINVAL;
302     }
303 
304     msgp = g_malloc0(size);
305     vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0);
306     msgp->offset = off;
307     msgp->region = index;
308     msgp->count = count;
309     trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
310 
311     if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, &local_err)) {
312         error_prepend(&local_err, "%s: ", __func__);
313         error_report_err(local_err);
314         return -EFAULT;
315     }
316 
317     if (msgp->hdr.flags & VFIO_USER_ERROR) {
318         return -msgp->hdr.error_reply;
319     } else if (msgp->count > count) {
320         return -E2BIG;
321     } else {
322         memcpy(data, &msgp->data, msgp->count);
323     }
324 
325     return msgp->count;
326 }
327 
328 /*
329  * If this is a posted write, and VFIO_PROXY_NO_POST is not set, then we are OK
330  * to send the write to the socket without waiting for the server's reply:
331  * a subsequent read (of any region) will not pass the posted write, as all
332  * messages are handled sequentially.
333  */
vfio_user_device_io_region_write(VFIODevice * vbasedev,uint8_t index,off_t off,unsigned count,void * data,bool post)334 static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
335                                             off_t off, unsigned count,
336                                             void *data, bool post)
337 {
338     VFIOUserRegionRW *msgp = NULL;
339     VFIOUserProxy *proxy = vbasedev->proxy;
340     int size = sizeof(*msgp) + count;
341     Error *local_err = NULL;
342     bool can_multi;
343     int flags = 0;
344     int ret;
345 
346     if (count > proxy->max_xfer_size) {
347         return -EINVAL;
348     }
349 
350     if (proxy->flags & VFIO_PROXY_NO_POST) {
351         post = false;
352     }
353 
354     if (post) {
355         flags |= VFIO_USER_NO_REPLY;
356     }
357 
358     /* write eligible to be in a WRITE_MULTI msg ? */
359     can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post &&
360         count <= VFIO_USER_MULTI_DATA;
361 
362     /*
363      * This should be a rare case, so first check without the lock,
364      * if we're wrong, vfio_send_queued() will flush any posted writes
365      * we missed here
366      */
367     if (proxy->wr_multi != NULL ||
368         (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) {
369 
370         /*
371          * re-check with lock
372          *
373          * if already building a WRITE_MULTI msg,
374          *  add this one if possible else flush pending before
375          *  sending the current one
376          *
377          * else if outgoing queue is over the highwater,
378          *  start a new WRITE_MULTI message
379          */
380         WITH_QEMU_LOCK_GUARD(&proxy->lock) {
381             if (proxy->wr_multi != NULL) {
382                 if (can_multi) {
383                     vfio_user_add_multi(proxy, index, off, count, data);
384                     return count;
385                 }
386                 vfio_user_flush_multi(proxy);
387             } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) {
388                 vfio_user_create_multi(proxy);
389                 vfio_user_add_multi(proxy, index, off, count, data);
390                 return count;
391             }
392         }
393     }
394 
395     msgp = g_malloc0(size);
396     vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
397     msgp->offset = off;
398     msgp->region = index;
399     msgp->count = count;
400     memcpy(&msgp->data, data, count);
401     trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
402 
403     /* async send will free msg after it's sent */
404     if (post) {
405         if (!vfio_user_send_async(proxy, &msgp->hdr, NULL, &local_err)) {
406             error_prepend(&local_err, "%s: ", __func__);
407             error_report_err(local_err);
408             return -EFAULT;
409         }
410 
411         return count;
412     }
413 
414     if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
415         error_prepend(&local_err, "%s: ", __func__);
416         error_report_err(local_err);
417         g_free(msgp);
418         return -EFAULT;
419     }
420 
421     if (msgp->hdr.flags & VFIO_USER_ERROR) {
422         ret = -msgp->hdr.error_reply;
423     } else {
424         ret = count;
425     }
426 
427     g_free(msgp);
428     return ret;
429 }
430 
431 /*
432  * Socket-based io_ops
433  */
434 VFIODeviceIOOps vfio_user_device_io_ops_sock = {
435     .get_region_info = vfio_user_device_io_get_region_info,
436     .get_irq_info = vfio_user_device_io_get_irq_info,
437     .set_irqs = vfio_user_device_io_set_irqs,
438     .region_read = vfio_user_device_io_region_read,
439     .region_write = vfio_user_device_io_region_write,
440 
441 };
442