xref: /qemu/hw/vfio-user/proxy.c (revision 36227628d824f563fda95f9344176ca7263c7eaf)
1 /*
2  * vfio protocol over a UNIX socket.
3  *
4  * Copyright © 2018, 2021 Oracle and/or its affiliates.
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  */
8 
9 #include "qemu/osdep.h"
10 #include <sys/ioctl.h>
11 
12 #include "hw/vfio/vfio-device.h"
13 #include "hw/vfio-user/proxy.h"
14 #include "hw/vfio-user/trace.h"
15 #include "qapi/error.h"
16 #include "qobject/qdict.h"
17 #include "qobject/qjson.h"
18 #include "qobject/qnum.h"
19 #include "qemu/error-report.h"
20 #include "qemu/lockable.h"
21 #include "qemu/main-loop.h"
22 #include "system/iothread.h"
23 
24 static int wait_time = 5000;   /* wait up to 5 sec for busy servers */
25 static IOThread *vfio_user_iothread;
26 
27 static void vfio_user_shutdown(VFIOUserProxy *proxy);
28 static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
29                                      VFIOUserFDs *fds);
30 static VFIOUserFDs *vfio_user_getfds(int numfds);
31 static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg);
32 
33 static void vfio_user_recv(void *opaque);
34 static void vfio_user_send(void *opaque);
35 static void vfio_user_cb(void *opaque);
36 
37 static void vfio_user_request(void *opaque);
38 static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
39                                   uint32_t size, uint32_t flags);
40 
41 static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err)
42 {
43     hdr->flags |= VFIO_USER_ERROR;
44     hdr->error_reply = err;
45 }
46 
47 /*
48  * Functions called by main, CPU, or iothread threads
49  */
50 
51 static void vfio_user_shutdown(VFIOUserProxy *proxy)
52 {
53     qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL);
54     qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL,
55                                    proxy->ctx, NULL, NULL);
56 }
57 
58 /*
59  * Same return values as qio_channel_writev_full():
60  *
61  * QIO_CHANNEL_ERR_BLOCK: *errp not set
62  * -1: *errp will be populated
63  * otherwise: bytes written
64  */
65 static ssize_t vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg,
66                                   Error **errp)
67 {
68     VFIOUserFDs *fds =  msg->fds;
69     struct iovec iov = {
70         .iov_base = msg->hdr,
71         .iov_len = msg->hdr->size,
72     };
73     size_t numfds = 0;
74     int *fdp = NULL;
75     ssize_t ret;
76 
77     if (fds != NULL && fds->send_fds != 0) {
78         numfds = fds->send_fds;
79         fdp = fds->fds;
80     }
81 
82     ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, errp);
83 
84     if (ret == -1) {
85         vfio_user_set_error(msg->hdr, EIO);
86         vfio_user_shutdown(proxy);
87     }
88     trace_vfio_user_send_write(msg->hdr->id, ret);
89 
90     return ret;
91 }
92 
93 static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
94                                      VFIOUserFDs *fds)
95 {
96     VFIOUserMsg *msg;
97 
98     msg = QTAILQ_FIRST(&proxy->free);
99     if (msg != NULL) {
100         QTAILQ_REMOVE(&proxy->free, msg, next);
101     } else {
102         msg = g_malloc0(sizeof(*msg));
103         qemu_cond_init(&msg->cv);
104     }
105 
106     msg->hdr = hdr;
107     msg->fds = fds;
108     return msg;
109 }
110 
111 /*
112  * Recycle a message list entry to the free list.
113  */
114 static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg)
115 {
116     if (msg->type == VFIO_MSG_NONE) {
117         error_printf("vfio_user_recycle - freeing free msg\n");
118         return;
119     }
120 
121     /* free msg buffer if no one is waiting to consume the reply */
122     if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) {
123         g_free(msg->hdr);
124         if (msg->fds != NULL) {
125             g_free(msg->fds);
126         }
127     }
128 
129     msg->type = VFIO_MSG_NONE;
130     msg->hdr = NULL;
131     msg->fds = NULL;
132     msg->complete = false;
133     msg->pending = false;
134     QTAILQ_INSERT_HEAD(&proxy->free, msg, next);
135 }
136 
137 static VFIOUserFDs *vfio_user_getfds(int numfds)
138 {
139     VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int)));
140 
141     fds->fds = (int *)((char *)fds + sizeof(*fds));
142 
143     return fds;
144 }
145 
146 /*
147  * Functions only called by iothread
148  */
149 
150 /*
151  * Process a received message.
152  */
153 static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg,
154                               bool isreply)
155 {
156 
157     /*
158      * Replies signal a waiter, if none just check for errors
159      * and free the message buffer.
160      *
161      * Requests get queued for the BH.
162      */
163     if (isreply) {
164         msg->complete = true;
165         if (msg->type == VFIO_MSG_WAIT) {
166             qemu_cond_signal(&msg->cv);
167         } else {
168             if (msg->hdr->flags & VFIO_USER_ERROR) {
169                 error_printf("vfio_user_process: error reply on async ");
170                 error_printf("request command %x error %s\n",
171                              msg->hdr->command,
172                              strerror(msg->hdr->error_reply));
173             }
174             /* youngest nowait msg has been ack'd */
175             if (proxy->last_nowait == msg) {
176                 proxy->last_nowait = NULL;
177             }
178             vfio_user_recycle(proxy, msg);
179         }
180     } else {
181         QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next);
182         qemu_bh_schedule(proxy->req_bh);
183     }
184 }
185 
186 /*
187  * Complete a partial message read
188  */
189 static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp)
190 {
191     VFIOUserMsg *msg = proxy->part_recv;
192     size_t msgleft = proxy->recv_left;
193     bool isreply;
194     char *data;
195     int ret;
196 
197     data = (char *)msg->hdr + (msg->hdr->size - msgleft);
198     while (msgleft > 0) {
199         ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
200 
201         /* error or would block */
202         if (ret <= 0) {
203             /* try for rest on next iternation */
204             if (ret == QIO_CHANNEL_ERR_BLOCK) {
205                 proxy->recv_left = msgleft;
206             }
207             return ret;
208         }
209         trace_vfio_user_recv_read(msg->hdr->id, ret);
210 
211         msgleft -= ret;
212         data += ret;
213     }
214 
215     /*
216      * Read complete message, process it.
217      */
218     proxy->part_recv = NULL;
219     proxy->recv_left = 0;
220     isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY;
221     vfio_user_process(proxy, msg, isreply);
222 
223     /* return positive value */
224     return 1;
225 }
226 
227 /*
228  * Receive and process one incoming message.
229  *
230  * For replies, find matching outgoing request and wake any waiters.
231  * For requests, queue in incoming list and run request BH.
232  */
233 static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp)
234 {
235     VFIOUserMsg *msg = NULL;
236     g_autofree int *fdp = NULL;
237     VFIOUserFDs *reqfds;
238     VFIOUserHdr hdr;
239     struct iovec iov = {
240         .iov_base = &hdr,
241         .iov_len = sizeof(hdr),
242     };
243     bool isreply = false;
244     int i, ret;
245     size_t msgleft, numfds = 0;
246     char *data = NULL;
247     char *buf = NULL;
248 
249     /*
250      * Complete any partial reads
251      */
252     if (proxy->part_recv != NULL) {
253         ret = vfio_user_complete(proxy, errp);
254 
255         /* still not complete, try later */
256         if (ret == QIO_CHANNEL_ERR_BLOCK) {
257             return ret;
258         }
259 
260         if (ret <= 0) {
261             goto fatal;
262         }
263         /* else fall into reading another msg */
264     }
265 
266     /*
267      * Read header
268      */
269     ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0,
270                                  errp);
271     if (ret == QIO_CHANNEL_ERR_BLOCK) {
272         return ret;
273     }
274 
275     /* read error or other side closed connection */
276     if (ret <= 0) {
277         goto fatal;
278     }
279 
280     if (ret < sizeof(hdr)) {
281         error_setg(errp, "short read of header");
282         goto fatal;
283     }
284 
285     /*
286      * Validate header
287      */
288     if (hdr.size < sizeof(VFIOUserHdr)) {
289         error_setg(errp, "bad header size");
290         goto fatal;
291     }
292     switch (hdr.flags & VFIO_USER_TYPE) {
293     case VFIO_USER_REQUEST:
294         isreply = false;
295         break;
296     case VFIO_USER_REPLY:
297         isreply = true;
298         break;
299     default:
300         error_setg(errp, "unknown message type");
301         goto fatal;
302     }
303     trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size,
304                              hdr.flags);
305 
306     /*
307      * For replies, find the matching pending request.
308      * For requests, reap incoming FDs.
309      */
310     if (isreply) {
311         QTAILQ_FOREACH(msg, &proxy->pending, next) {
312             if (hdr.id == msg->id) {
313                 break;
314             }
315         }
316         if (msg == NULL) {
317             error_setg(errp, "unexpected reply");
318             goto err;
319         }
320         QTAILQ_REMOVE(&proxy->pending, msg, next);
321 
322         /*
323          * Process any received FDs
324          */
325         if (numfds != 0) {
326             if (msg->fds == NULL || msg->fds->recv_fds < numfds) {
327                 error_setg(errp, "unexpected FDs");
328                 goto err;
329             }
330             msg->fds->recv_fds = numfds;
331             memcpy(msg->fds->fds, fdp, numfds * sizeof(int));
332         }
333     } else {
334         if (numfds != 0) {
335             reqfds = vfio_user_getfds(numfds);
336             memcpy(reqfds->fds, fdp, numfds * sizeof(int));
337         } else {
338             reqfds = NULL;
339         }
340     }
341 
342     /*
343      * Put the whole message into a single buffer.
344      */
345     if (isreply) {
346         if (hdr.size > msg->rsize) {
347             error_setg(errp, "reply larger than recv buffer");
348             goto err;
349         }
350         *msg->hdr = hdr;
351         data = (char *)msg->hdr + sizeof(hdr);
352     } else {
353         buf = g_malloc0(hdr.size);
354         memcpy(buf, &hdr, sizeof(hdr));
355         data = buf + sizeof(hdr);
356         msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds);
357         msg->type = VFIO_MSG_REQ;
358     }
359 
360     /*
361      * Read rest of message.
362      */
363     msgleft = hdr.size - sizeof(hdr);
364     while (msgleft > 0) {
365         ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
366 
367         /* prepare to complete read on next iternation */
368         if (ret == QIO_CHANNEL_ERR_BLOCK) {
369             proxy->part_recv = msg;
370             proxy->recv_left = msgleft;
371             return ret;
372         }
373 
374         if (ret <= 0) {
375             goto fatal;
376         }
377         trace_vfio_user_recv_read(hdr.id, ret);
378 
379         msgleft -= ret;
380         data += ret;
381     }
382 
383     vfio_user_process(proxy, msg, isreply);
384     return 0;
385 
386     /*
387      * fatal means the other side closed or we don't trust the stream
388      * err means this message is corrupt
389      */
390 fatal:
391     vfio_user_shutdown(proxy);
392     proxy->state = VFIO_PROXY_ERROR;
393 
394     /* set error if server side closed */
395     if (ret == 0) {
396         error_setg(errp, "server closed socket");
397     }
398 
399 err:
400     for (i = 0; i < numfds; i++) {
401         close(fdp[i]);
402     }
403     if (isreply && msg != NULL) {
404         /* force an error to keep sending thread from hanging */
405         vfio_user_set_error(msg->hdr, EINVAL);
406         msg->complete = true;
407         qemu_cond_signal(&msg->cv);
408     }
409     return -1;
410 }
411 
412 static void vfio_user_recv(void *opaque)
413 {
414     VFIOUserProxy *proxy = opaque;
415 
416     QEMU_LOCK_GUARD(&proxy->lock);
417 
418     if (proxy->state == VFIO_PROXY_CONNECTED) {
419         Error *local_err = NULL;
420 
421         while (vfio_user_recv_one(proxy, &local_err) == 0) {
422             ;
423         }
424 
425         if (local_err != NULL) {
426             error_report_err(local_err);
427         }
428     }
429 }
430 
431 /*
432  * Send a single message, same return semantics as vfio_user_send_qio().
433  *
434  * Sent async messages are freed, others are moved to pending queue.
435  */
436 static ssize_t vfio_user_send_one(VFIOUserProxy *proxy, Error **errp)
437 {
438     VFIOUserMsg *msg;
439     ssize_t ret;
440 
441     msg = QTAILQ_FIRST(&proxy->outgoing);
442     ret = vfio_user_send_qio(proxy, msg, errp);
443     if (ret < 0) {
444         return ret;
445     }
446 
447     QTAILQ_REMOVE(&proxy->outgoing, msg, next);
448     if (msg->type == VFIO_MSG_ASYNC) {
449         vfio_user_recycle(proxy, msg);
450     } else {
451         QTAILQ_INSERT_TAIL(&proxy->pending, msg, next);
452         msg->pending = true;
453     }
454 
455     return ret;
456 }
457 
458 /*
459  * Send messages from outgoing queue when the socket buffer has space.
460  * If we deplete 'outgoing', remove ourselves from the poll list.
461  */
462 static void vfio_user_send(void *opaque)
463 {
464     VFIOUserProxy *proxy = opaque;
465 
466     QEMU_LOCK_GUARD(&proxy->lock);
467 
468     if (proxy->state == VFIO_PROXY_CONNECTED) {
469         while (!QTAILQ_EMPTY(&proxy->outgoing)) {
470             Error *local_err = NULL;
471             int ret;
472 
473             ret = vfio_user_send_one(proxy, &local_err);
474 
475             if (ret == QIO_CHANNEL_ERR_BLOCK) {
476                 return;
477             } else if (ret == -1) {
478                 error_report_err(local_err);
479                 return;
480             }
481         }
482         qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
483                                        vfio_user_recv, NULL, NULL, proxy);
484     }
485 }
486 
487 static void vfio_user_cb(void *opaque)
488 {
489     VFIOUserProxy *proxy = opaque;
490 
491     QEMU_LOCK_GUARD(&proxy->lock);
492 
493     proxy->state = VFIO_PROXY_CLOSED;
494     qemu_cond_signal(&proxy->close_cv);
495 }
496 
497 
498 /*
499  * Functions called by main or CPU threads
500  */
501 
502 /*
503  * Process incoming requests.
504  *
505  * The bus-specific callback has the form:
506  *    request(opaque, msg)
507  * where 'opaque' was specified in vfio_user_set_handler
508  * and 'msg' is the inbound message.
509  *
510  * The callback is responsible for disposing of the message buffer,
511  * usually by re-using it when calling vfio_send_reply or vfio_send_error,
512  * both of which free their message buffer when the reply is sent.
513  *
514  * If the callback uses a new buffer, it needs to free the old one.
515  */
516 static void vfio_user_request(void *opaque)
517 {
518     VFIOUserProxy *proxy = opaque;
519     VFIOUserMsgQ new, free;
520     VFIOUserMsg *msg, *m1;
521 
522     /* reap all incoming */
523     QTAILQ_INIT(&new);
524     WITH_QEMU_LOCK_GUARD(&proxy->lock) {
525         QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) {
526             QTAILQ_REMOVE(&proxy->incoming, msg, next);
527             QTAILQ_INSERT_TAIL(&new, msg, next);
528         }
529     }
530 
531     /* process list */
532     QTAILQ_INIT(&free);
533     QTAILQ_FOREACH_SAFE(msg, &new, next, m1) {
534         QTAILQ_REMOVE(&new, msg, next);
535         trace_vfio_user_recv_request(msg->hdr->command);
536         proxy->request(proxy->req_arg, msg);
537         QTAILQ_INSERT_HEAD(&free, msg, next);
538     }
539 
540     /* free list */
541     WITH_QEMU_LOCK_GUARD(&proxy->lock) {
542         QTAILQ_FOREACH_SAFE(msg, &free, next, m1) {
543             vfio_user_recycle(proxy, msg);
544         }
545     }
546 }
547 
548 /*
549  * Messages are queued onto the proxy's outgoing list.
550  *
551  * It handles 3 types of messages:
552  *
553  * async messages - replies and posted writes
554  *
555  * There will be no reply from the server, so message
556  * buffers are freed after they're sent.
557  *
558  * nowait messages - map/unmap during address space transactions
559  *
560  * These are also sent async, but a reply is expected so that
561  * vfio_wait_reqs() can wait for the youngest nowait request.
562  * They transition from the outgoing list to the pending list
563  * when sent, and are freed when the reply is received.
564  *
565  * wait messages - all other requests
566  *
567  * The reply to these messages is waited for by their caller.
568  * They also transition from outgoing to pending when sent, but
569  * the message buffer is returned to the caller with the reply
570  * contents.  The caller is responsible for freeing these messages.
571  *
572  * As an optimization, if the outgoing list and the socket send
573  * buffer are empty, the message is sent inline instead of being
574  * added to the outgoing list.  The rest of the transitions are
575  * unchanged.
576  */
577 static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg,
578                                   Error **errp)
579 {
580     int ret;
581 
582     /*
583      * Unsent outgoing msgs - add to tail
584      */
585     if (!QTAILQ_EMPTY(&proxy->outgoing)) {
586         QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next);
587         return true;
588     }
589 
590     /*
591      * Try inline - if blocked, queue it and kick send poller
592      */
593     if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) {
594         ret = QIO_CHANNEL_ERR_BLOCK;
595     } else {
596         ret = vfio_user_send_qio(proxy, msg, errp);
597     }
598 
599     if (ret == QIO_CHANNEL_ERR_BLOCK) {
600         QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next);
601         qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
602                                        vfio_user_recv, proxy->ctx,
603                                        vfio_user_send, proxy);
604         return true;
605     }
606     if (ret == -1) {
607         return false;
608     }
609 
610     /*
611      * Sent - free async, add others to pending
612      */
613     if (msg->type == VFIO_MSG_ASYNC) {
614         vfio_user_recycle(proxy, msg);
615     } else {
616         QTAILQ_INSERT_TAIL(&proxy->pending, msg, next);
617         msg->pending = true;
618     }
619 
620     return true;
621 }
622 
623 /*
624  * Returns false if we did not successfully receive a reply message, in which
625  * case @errp will be populated.
626  *
627  * In either case, the caller must free @hdr and @fds if needed.
628  */
629 static bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
630                                 VFIOUserFDs *fds, int rsize, Error **errp)
631 {
632     VFIOUserMsg *msg;
633     bool ok = false;
634 
635     if (hdr->flags & VFIO_USER_NO_REPLY) {
636         error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__);
637         return false;
638     }
639 
640     qemu_mutex_lock(&proxy->lock);
641 
642     msg = vfio_user_getmsg(proxy, hdr, fds);
643     msg->id = hdr->id;
644     msg->rsize = rsize ? rsize : hdr->size;
645     msg->type = VFIO_MSG_WAIT;
646 
647     ok = vfio_user_send_queued(proxy, msg, errp);
648 
649     if (ok) {
650         while (!msg->complete) {
651             if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) {
652                 VFIOUserMsgQ *list;
653 
654                 list = msg->pending ? &proxy->pending : &proxy->outgoing;
655                 QTAILQ_REMOVE(list, msg, next);
656                 error_setg_errno(errp, ETIMEDOUT,
657                                  "timed out waiting for reply");
658                 ok = false;
659                 break;
660             }
661         }
662     }
663 
664     vfio_user_recycle(proxy, msg);
665 
666     qemu_mutex_unlock(&proxy->lock);
667 
668     return ok;
669 }
670 
671 static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets =
672     QLIST_HEAD_INITIALIZER(vfio_user_sockets);
673 
674 VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp)
675 {
676     VFIOUserProxy *proxy;
677     QIOChannelSocket *sioc;
678     QIOChannel *ioc;
679     char *sockname;
680 
681     if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) {
682         error_setg(errp, "vfio_user_connect - bad address family");
683         return NULL;
684     }
685     sockname = addr->u.q_unix.path;
686 
687     sioc = qio_channel_socket_new();
688     ioc = QIO_CHANNEL(sioc);
689     if (qio_channel_socket_connect_sync(sioc, addr, errp)) {
690         object_unref(OBJECT(ioc));
691         return NULL;
692     }
693     qio_channel_set_blocking(ioc, false, NULL);
694 
695     proxy = g_malloc0(sizeof(VFIOUserProxy));
696     proxy->sockname = g_strdup_printf("unix:%s", sockname);
697     proxy->ioc = ioc;
698 
699     /* init defaults */
700     proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER;
701     proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS;
702     proxy->max_dma = VFIO_USER_DEF_MAP_MAX;
703     proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE;
704     proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP;
705     proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE;
706 
707     proxy->flags = VFIO_PROXY_CLIENT;
708     proxy->state = VFIO_PROXY_CONNECTED;
709 
710     qemu_mutex_init(&proxy->lock);
711     qemu_cond_init(&proxy->close_cv);
712 
713     if (vfio_user_iothread == NULL) {
714         vfio_user_iothread = iothread_create("VFIO user", errp);
715     }
716 
717     proxy->ctx = iothread_get_aio_context(vfio_user_iothread);
718     proxy->req_bh = qemu_bh_new(vfio_user_request, proxy);
719 
720     QTAILQ_INIT(&proxy->outgoing);
721     QTAILQ_INIT(&proxy->incoming);
722     QTAILQ_INIT(&proxy->free);
723     QTAILQ_INIT(&proxy->pending);
724     QLIST_INSERT_HEAD(&vfio_user_sockets, proxy, next);
725 
726     return proxy;
727 }
728 
729 void vfio_user_set_handler(VFIODevice *vbasedev,
730                            void (*handler)(void *opaque, VFIOUserMsg *msg),
731                            void *req_arg)
732 {
733     VFIOUserProxy *proxy = vbasedev->proxy;
734 
735     proxy->request = handler;
736     proxy->req_arg = req_arg;
737     qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
738                                    vfio_user_recv, NULL, NULL, proxy);
739 }
740 
741 void vfio_user_disconnect(VFIOUserProxy *proxy)
742 {
743     VFIOUserMsg *r1, *r2;
744 
745     qemu_mutex_lock(&proxy->lock);
746 
747     /* our side is quitting */
748     if (proxy->state == VFIO_PROXY_CONNECTED) {
749         vfio_user_shutdown(proxy);
750         if (!QTAILQ_EMPTY(&proxy->pending)) {
751             error_printf("vfio_user_disconnect: outstanding requests\n");
752         }
753     }
754     object_unref(OBJECT(proxy->ioc));
755     proxy->ioc = NULL;
756     qemu_bh_delete(proxy->req_bh);
757     proxy->req_bh = NULL;
758 
759     proxy->state = VFIO_PROXY_CLOSING;
760     QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) {
761         qemu_cond_destroy(&r1->cv);
762         QTAILQ_REMOVE(&proxy->outgoing, r1, next);
763         g_free(r1);
764     }
765     QTAILQ_FOREACH_SAFE(r1, &proxy->incoming, next, r2) {
766         qemu_cond_destroy(&r1->cv);
767         QTAILQ_REMOVE(&proxy->incoming, r1, next);
768         g_free(r1);
769     }
770     QTAILQ_FOREACH_SAFE(r1, &proxy->pending, next, r2) {
771         qemu_cond_destroy(&r1->cv);
772         QTAILQ_REMOVE(&proxy->pending, r1, next);
773         g_free(r1);
774     }
775     QTAILQ_FOREACH_SAFE(r1, &proxy->free, next, r2) {
776         qemu_cond_destroy(&r1->cv);
777         QTAILQ_REMOVE(&proxy->free, r1, next);
778         g_free(r1);
779     }
780 
781     /*
782      * Make sure the iothread isn't blocking anywhere
783      * with a ref to this proxy by waiting for a BH
784      * handler to run after the proxy fd handlers were
785      * deleted above.
786      */
787     aio_bh_schedule_oneshot(proxy->ctx, vfio_user_cb, proxy);
788     qemu_cond_wait(&proxy->close_cv, &proxy->lock);
789 
790     /* we now hold the only ref to proxy */
791     qemu_mutex_unlock(&proxy->lock);
792     qemu_cond_destroy(&proxy->close_cv);
793     qemu_mutex_destroy(&proxy->lock);
794 
795     QLIST_REMOVE(proxy, next);
796     if (QLIST_EMPTY(&vfio_user_sockets)) {
797         iothread_destroy(vfio_user_iothread);
798         vfio_user_iothread = NULL;
799     }
800 
801     g_free(proxy->sockname);
802     g_free(proxy);
803 }
804 
805 static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
806                                   uint32_t size, uint32_t flags)
807 {
808     static uint16_t next_id;
809 
810     hdr->id = qatomic_fetch_inc(&next_id);
811     hdr->command = cmd;
812     hdr->size = size;
813     hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST;
814     hdr->error_reply = 0;
815 }
816 
817 struct cap_entry {
818     const char *name;
819     bool (*check)(VFIOUserProxy *proxy, QObject *qobj, Error **errp);
820 };
821 
822 static bool caps_parse(VFIOUserProxy *proxy, QDict *qdict,
823                        struct cap_entry caps[], Error **errp)
824 {
825     QObject *qobj;
826     struct cap_entry *p;
827 
828     for (p = caps; p->name != NULL; p++) {
829         qobj = qdict_get(qdict, p->name);
830         if (qobj != NULL) {
831             if (!p->check(proxy, qobj, errp)) {
832                 return false;
833             }
834             qdict_del(qdict, p->name);
835         }
836     }
837 
838     /* warning, for now */
839     if (qdict_size(qdict) != 0) {
840         warn_report("spurious capabilities");
841     }
842     return true;
843 }
844 
845 static bool check_migr_pgsize(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
846 {
847     QNum *qn = qobject_to(QNum, qobj);
848     uint64_t pgsize;
849 
850     if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) {
851         error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE);
852         return false;
853     }
854 
855     /* must be larger than default */
856     if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) {
857         error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize);
858         return false;
859     }
860 
861     proxy->migr_pgsize = pgsize;
862     return true;
863 }
864 
865 static bool check_bitmap(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
866 {
867     QNum *qn = qobject_to(QNum, qobj);
868     uint64_t bitmap_size;
869 
870     if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) {
871         error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP);
872         return false;
873     }
874 
875     /* can only lower it */
876     if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) {
877         error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP);
878         return false;
879     }
880 
881     proxy->max_bitmap = bitmap_size;
882     return true;
883 }
884 
885 static struct cap_entry caps_migr[] = {
886     { VFIO_USER_CAP_PGSIZE, check_migr_pgsize },
887     { VFIO_USER_CAP_MAX_BITMAP, check_bitmap },
888     { NULL }
889 };
890 
891 static bool check_max_fds(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
892 {
893     QNum *qn = qobject_to(QNum, qobj);
894     uint64_t max_send_fds;
895 
896     if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) ||
897         max_send_fds > VFIO_USER_MAX_MAX_FDS) {
898         error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS);
899         return false;
900     }
901     proxy->max_send_fds = max_send_fds;
902     return true;
903 }
904 
905 static bool check_max_xfer(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
906 {
907     QNum *qn = qobject_to(QNum, qobj);
908     uint64_t max_xfer_size;
909 
910     if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) ||
911         max_xfer_size > VFIO_USER_MAX_MAX_XFER) {
912         error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER);
913         return false;
914     }
915     proxy->max_xfer_size = max_xfer_size;
916     return true;
917 }
918 
919 static bool check_pgsizes(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
920 {
921     QNum *qn = qobject_to(QNum, qobj);
922     uint64_t pgsizes;
923 
924     if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) {
925         error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES);
926         return false;
927     }
928 
929     /* must be larger than default */
930     if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) {
931         error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes);
932         return false;
933     }
934 
935     proxy->dma_pgsizes = pgsizes;
936     return true;
937 }
938 
939 static bool check_max_dma(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
940 {
941     QNum *qn = qobject_to(QNum, qobj);
942     uint64_t max_dma;
943 
944     if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) {
945         error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX);
946         return false;
947     }
948 
949     /* can only lower it */
950     if (max_dma > VFIO_USER_DEF_MAP_MAX) {
951         error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX);
952         return false;
953     }
954 
955     proxy->max_dma = max_dma;
956     return true;
957 }
958 
959 static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
960 {
961     QDict *qdict = qobject_to(QDict, qobj);
962 
963     if (qdict == NULL) {
964         error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS);
965         return true;
966     }
967     return caps_parse(proxy, qdict, caps_migr, errp);
968 }
969 
970 static struct cap_entry caps_cap[] = {
971     { VFIO_USER_CAP_MAX_FDS, check_max_fds },
972     { VFIO_USER_CAP_MAX_XFER, check_max_xfer },
973     { VFIO_USER_CAP_PGSIZES, check_pgsizes },
974     { VFIO_USER_CAP_MAP_MAX, check_max_dma },
975     { VFIO_USER_CAP_MIGR, check_migr },
976     { NULL }
977 };
978 
979 static bool check_cap(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
980 {
981    QDict *qdict = qobject_to(QDict, qobj);
982 
983     if (qdict == NULL) {
984         error_setg(errp, "malformed %s", VFIO_USER_CAP);
985         return false;
986     }
987     return caps_parse(proxy, qdict, caps_cap, errp);
988 }
989 
990 static struct cap_entry ver_0_0[] = {
991     { VFIO_USER_CAP, check_cap },
992     { NULL }
993 };
994 
995 static bool caps_check(VFIOUserProxy *proxy, int minor, const char *caps,
996                        Error **errp)
997 {
998     QObject *qobj;
999     QDict *qdict;
1000     bool ret;
1001 
1002     qobj = qobject_from_json(caps, NULL);
1003     if (qobj == NULL) {
1004         error_setg(errp, "malformed capabilities %s", caps);
1005         return false;
1006     }
1007     qdict = qobject_to(QDict, qobj);
1008     if (qdict == NULL) {
1009         error_setg(errp, "capabilities %s not an object", caps);
1010         qobject_unref(qobj);
1011         return false;
1012     }
1013     ret = caps_parse(proxy, qdict, ver_0_0, errp);
1014 
1015     qobject_unref(qobj);
1016     return ret;
1017 }
1018 
1019 static GString *caps_json(void)
1020 {
1021     QDict *dict = qdict_new();
1022     QDict *capdict = qdict_new();
1023     QDict *migdict = qdict_new();
1024     GString *str;
1025 
1026     qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE);
1027     qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP);
1028     qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict));
1029 
1030     qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS);
1031     qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER);
1032     qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE);
1033     qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX);
1034 
1035     qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict));
1036 
1037     str = qobject_to_json(QOBJECT(dict));
1038     qobject_unref(dict);
1039     return str;
1040 }
1041 
1042 bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp)
1043 {
1044     g_autofree VFIOUserVersion *msgp = NULL;
1045     GString *caps;
1046     char *reply;
1047     int size, caplen;
1048 
1049     caps = caps_json();
1050     caplen = caps->len + 1;
1051     size = sizeof(*msgp) + caplen;
1052     msgp = g_malloc0(size);
1053 
1054     vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0);
1055     msgp->major = VFIO_USER_MAJOR_VER;
1056     msgp->minor = VFIO_USER_MINOR_VER;
1057     memcpy(&msgp->capabilities, caps->str, caplen);
1058     g_string_free(caps, true);
1059     trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities);
1060 
1061     if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, errp)) {
1062         return false;
1063     }
1064 
1065     if (msgp->hdr.flags & VFIO_USER_ERROR) {
1066         error_setg_errno(errp, msgp->hdr.error_reply, "version reply");
1067         return false;
1068     }
1069 
1070     if (msgp->major != VFIO_USER_MAJOR_VER ||
1071         msgp->minor > VFIO_USER_MINOR_VER) {
1072         error_setg(errp, "incompatible server version");
1073         return false;
1074     }
1075 
1076     reply = msgp->capabilities;
1077     if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') {
1078         error_setg(errp, "corrupt version reply");
1079         return false;
1080     }
1081 
1082     if (!caps_check(proxy, msgp->minor, reply, errp)) {
1083         return false;
1084     }
1085 
1086     trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities);
1087     return true;
1088 }
1089