1438d863fSJohn Levon /* 2438d863fSJohn Levon * vfio protocol over a UNIX socket. 3438d863fSJohn Levon * 4438d863fSJohn Levon * Copyright © 2018, 2021 Oracle and/or its affiliates. 5438d863fSJohn Levon * 6438d863fSJohn Levon * SPDX-License-Identifier: GPL-2.0-or-later 7438d863fSJohn Levon */ 8438d863fSJohn Levon 9438d863fSJohn Levon #include "qemu/osdep.h" 10438d863fSJohn Levon #include <sys/ioctl.h> 11438d863fSJohn Levon 12438d863fSJohn Levon #include "hw/vfio/vfio-device.h" 13438d863fSJohn Levon #include "hw/vfio-user/proxy.h" 140b3d881aSJohn Levon #include "hw/vfio-user/trace.h" 15438d863fSJohn Levon #include "qapi/error.h" 16*36227628SJohn Levon #include "qobject/qdict.h" 17*36227628SJohn Levon #include "qobject/qjson.h" 18*36227628SJohn Levon #include "qobject/qnum.h" 19438d863fSJohn Levon #include "qemu/error-report.h" 20438d863fSJohn Levon #include "qemu/lockable.h" 210b3d881aSJohn Levon #include "qemu/main-loop.h" 22438d863fSJohn Levon #include "system/iothread.h" 23438d863fSJohn Levon 24*36227628SJohn Levon static int wait_time = 5000; /* wait up to 5 sec for busy servers */ 25438d863fSJohn Levon static IOThread *vfio_user_iothread; 26438d863fSJohn Levon 27438d863fSJohn Levon static void vfio_user_shutdown(VFIOUserProxy *proxy); 280b3d881aSJohn Levon static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 290b3d881aSJohn Levon VFIOUserFDs *fds); 300b3d881aSJohn Levon static VFIOUserFDs *vfio_user_getfds(int numfds); 310b3d881aSJohn Levon static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg); 32438d863fSJohn Levon 330b3d881aSJohn Levon static void vfio_user_recv(void *opaque); 34*36227628SJohn Levon static void vfio_user_send(void *opaque); 350b3d881aSJohn Levon static void vfio_user_cb(void *opaque); 360b3d881aSJohn Levon 370b3d881aSJohn Levon static void vfio_user_request(void *opaque); 38*36227628SJohn Levon static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, 39*36227628SJohn Levon uint32_t size, uint32_t flags); 400b3d881aSJohn Levon 410b3d881aSJohn Levon static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) 420b3d881aSJohn Levon { 430b3d881aSJohn Levon hdr->flags |= VFIO_USER_ERROR; 440b3d881aSJohn Levon hdr->error_reply = err; 450b3d881aSJohn Levon } 46438d863fSJohn Levon 47438d863fSJohn Levon /* 48438d863fSJohn Levon * Functions called by main, CPU, or iothread threads 49438d863fSJohn Levon */ 50438d863fSJohn Levon 51438d863fSJohn Levon static void vfio_user_shutdown(VFIOUserProxy *proxy) 52438d863fSJohn Levon { 53438d863fSJohn Levon qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL); 54438d863fSJohn Levon qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL, 55438d863fSJohn Levon proxy->ctx, NULL, NULL); 56438d863fSJohn Levon } 57438d863fSJohn Levon 58*36227628SJohn Levon /* 59*36227628SJohn Levon * Same return values as qio_channel_writev_full(): 60*36227628SJohn Levon * 61*36227628SJohn Levon * QIO_CHANNEL_ERR_BLOCK: *errp not set 62*36227628SJohn Levon * -1: *errp will be populated 63*36227628SJohn Levon * otherwise: bytes written 64*36227628SJohn Levon */ 65*36227628SJohn Levon static ssize_t vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg, 66*36227628SJohn Levon Error **errp) 67*36227628SJohn Levon { 68*36227628SJohn Levon VFIOUserFDs *fds = msg->fds; 69*36227628SJohn Levon struct iovec iov = { 70*36227628SJohn Levon .iov_base = msg->hdr, 71*36227628SJohn Levon .iov_len = msg->hdr->size, 72*36227628SJohn Levon }; 73*36227628SJohn Levon size_t numfds = 0; 74*36227628SJohn Levon int *fdp = NULL; 75*36227628SJohn Levon ssize_t ret; 76*36227628SJohn Levon 77*36227628SJohn Levon if (fds != NULL && fds->send_fds != 0) { 78*36227628SJohn Levon numfds = fds->send_fds; 79*36227628SJohn Levon fdp = fds->fds; 80*36227628SJohn Levon } 81*36227628SJohn Levon 82*36227628SJohn Levon ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, errp); 83*36227628SJohn Levon 84*36227628SJohn Levon if (ret == -1) { 85*36227628SJohn Levon vfio_user_set_error(msg->hdr, EIO); 86*36227628SJohn Levon vfio_user_shutdown(proxy); 87*36227628SJohn Levon } 88*36227628SJohn Levon trace_vfio_user_send_write(msg->hdr->id, ret); 89*36227628SJohn Levon 90*36227628SJohn Levon return ret; 91*36227628SJohn Levon } 92*36227628SJohn Levon 930b3d881aSJohn Levon static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 940b3d881aSJohn Levon VFIOUserFDs *fds) 950b3d881aSJohn Levon { 960b3d881aSJohn Levon VFIOUserMsg *msg; 970b3d881aSJohn Levon 980b3d881aSJohn Levon msg = QTAILQ_FIRST(&proxy->free); 990b3d881aSJohn Levon if (msg != NULL) { 1000b3d881aSJohn Levon QTAILQ_REMOVE(&proxy->free, msg, next); 1010b3d881aSJohn Levon } else { 1020b3d881aSJohn Levon msg = g_malloc0(sizeof(*msg)); 1030b3d881aSJohn Levon qemu_cond_init(&msg->cv); 1040b3d881aSJohn Levon } 1050b3d881aSJohn Levon 1060b3d881aSJohn Levon msg->hdr = hdr; 1070b3d881aSJohn Levon msg->fds = fds; 1080b3d881aSJohn Levon return msg; 1090b3d881aSJohn Levon } 1100b3d881aSJohn Levon 1110b3d881aSJohn Levon /* 1120b3d881aSJohn Levon * Recycle a message list entry to the free list. 1130b3d881aSJohn Levon */ 1140b3d881aSJohn Levon static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg) 1150b3d881aSJohn Levon { 1160b3d881aSJohn Levon if (msg->type == VFIO_MSG_NONE) { 1170b3d881aSJohn Levon error_printf("vfio_user_recycle - freeing free msg\n"); 1180b3d881aSJohn Levon return; 1190b3d881aSJohn Levon } 1200b3d881aSJohn Levon 1210b3d881aSJohn Levon /* free msg buffer if no one is waiting to consume the reply */ 1220b3d881aSJohn Levon if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) { 1230b3d881aSJohn Levon g_free(msg->hdr); 1240b3d881aSJohn Levon if (msg->fds != NULL) { 1250b3d881aSJohn Levon g_free(msg->fds); 1260b3d881aSJohn Levon } 1270b3d881aSJohn Levon } 1280b3d881aSJohn Levon 1290b3d881aSJohn Levon msg->type = VFIO_MSG_NONE; 1300b3d881aSJohn Levon msg->hdr = NULL; 1310b3d881aSJohn Levon msg->fds = NULL; 1320b3d881aSJohn Levon msg->complete = false; 133*36227628SJohn Levon msg->pending = false; 1340b3d881aSJohn Levon QTAILQ_INSERT_HEAD(&proxy->free, msg, next); 1350b3d881aSJohn Levon } 1360b3d881aSJohn Levon 1370b3d881aSJohn Levon static VFIOUserFDs *vfio_user_getfds(int numfds) 1380b3d881aSJohn Levon { 1390b3d881aSJohn Levon VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int))); 1400b3d881aSJohn Levon 1410b3d881aSJohn Levon fds->fds = (int *)((char *)fds + sizeof(*fds)); 1420b3d881aSJohn Levon 1430b3d881aSJohn Levon return fds; 1440b3d881aSJohn Levon } 1450b3d881aSJohn Levon 146438d863fSJohn Levon /* 147438d863fSJohn Levon * Functions only called by iothread 148438d863fSJohn Levon */ 149438d863fSJohn Levon 1500b3d881aSJohn Levon /* 1510b3d881aSJohn Levon * Process a received message. 1520b3d881aSJohn Levon */ 1530b3d881aSJohn Levon static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg, 1540b3d881aSJohn Levon bool isreply) 1550b3d881aSJohn Levon { 1560b3d881aSJohn Levon 1570b3d881aSJohn Levon /* 1580b3d881aSJohn Levon * Replies signal a waiter, if none just check for errors 1590b3d881aSJohn Levon * and free the message buffer. 1600b3d881aSJohn Levon * 1610b3d881aSJohn Levon * Requests get queued for the BH. 1620b3d881aSJohn Levon */ 1630b3d881aSJohn Levon if (isreply) { 1640b3d881aSJohn Levon msg->complete = true; 1650b3d881aSJohn Levon if (msg->type == VFIO_MSG_WAIT) { 1660b3d881aSJohn Levon qemu_cond_signal(&msg->cv); 1670b3d881aSJohn Levon } else { 1680b3d881aSJohn Levon if (msg->hdr->flags & VFIO_USER_ERROR) { 1690b3d881aSJohn Levon error_printf("vfio_user_process: error reply on async "); 1700b3d881aSJohn Levon error_printf("request command %x error %s\n", 1710b3d881aSJohn Levon msg->hdr->command, 1720b3d881aSJohn Levon strerror(msg->hdr->error_reply)); 1730b3d881aSJohn Levon } 1740b3d881aSJohn Levon /* youngest nowait msg has been ack'd */ 1750b3d881aSJohn Levon if (proxy->last_nowait == msg) { 1760b3d881aSJohn Levon proxy->last_nowait = NULL; 1770b3d881aSJohn Levon } 1780b3d881aSJohn Levon vfio_user_recycle(proxy, msg); 1790b3d881aSJohn Levon } 1800b3d881aSJohn Levon } else { 1810b3d881aSJohn Levon QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next); 1820b3d881aSJohn Levon qemu_bh_schedule(proxy->req_bh); 1830b3d881aSJohn Levon } 1840b3d881aSJohn Levon } 1850b3d881aSJohn Levon 1860b3d881aSJohn Levon /* 1870b3d881aSJohn Levon * Complete a partial message read 1880b3d881aSJohn Levon */ 1890b3d881aSJohn Levon static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp) 1900b3d881aSJohn Levon { 1910b3d881aSJohn Levon VFIOUserMsg *msg = proxy->part_recv; 1920b3d881aSJohn Levon size_t msgleft = proxy->recv_left; 1930b3d881aSJohn Levon bool isreply; 1940b3d881aSJohn Levon char *data; 1950b3d881aSJohn Levon int ret; 1960b3d881aSJohn Levon 1970b3d881aSJohn Levon data = (char *)msg->hdr + (msg->hdr->size - msgleft); 1980b3d881aSJohn Levon while (msgleft > 0) { 1990b3d881aSJohn Levon ret = qio_channel_read(proxy->ioc, data, msgleft, errp); 2000b3d881aSJohn Levon 2010b3d881aSJohn Levon /* error or would block */ 2020b3d881aSJohn Levon if (ret <= 0) { 2030b3d881aSJohn Levon /* try for rest on next iternation */ 2040b3d881aSJohn Levon if (ret == QIO_CHANNEL_ERR_BLOCK) { 2050b3d881aSJohn Levon proxy->recv_left = msgleft; 2060b3d881aSJohn Levon } 2070b3d881aSJohn Levon return ret; 2080b3d881aSJohn Levon } 2090b3d881aSJohn Levon trace_vfio_user_recv_read(msg->hdr->id, ret); 2100b3d881aSJohn Levon 2110b3d881aSJohn Levon msgleft -= ret; 2120b3d881aSJohn Levon data += ret; 2130b3d881aSJohn Levon } 2140b3d881aSJohn Levon 2150b3d881aSJohn Levon /* 2160b3d881aSJohn Levon * Read complete message, process it. 2170b3d881aSJohn Levon */ 2180b3d881aSJohn Levon proxy->part_recv = NULL; 2190b3d881aSJohn Levon proxy->recv_left = 0; 2200b3d881aSJohn Levon isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY; 2210b3d881aSJohn Levon vfio_user_process(proxy, msg, isreply); 2220b3d881aSJohn Levon 2230b3d881aSJohn Levon /* return positive value */ 2240b3d881aSJohn Levon return 1; 2250b3d881aSJohn Levon } 2260b3d881aSJohn Levon 2270b3d881aSJohn Levon /* 2280b3d881aSJohn Levon * Receive and process one incoming message. 2290b3d881aSJohn Levon * 2300b3d881aSJohn Levon * For replies, find matching outgoing request and wake any waiters. 2310b3d881aSJohn Levon * For requests, queue in incoming list and run request BH. 2320b3d881aSJohn Levon */ 2330b3d881aSJohn Levon static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp) 2340b3d881aSJohn Levon { 2350b3d881aSJohn Levon VFIOUserMsg *msg = NULL; 2360b3d881aSJohn Levon g_autofree int *fdp = NULL; 2370b3d881aSJohn Levon VFIOUserFDs *reqfds; 2380b3d881aSJohn Levon VFIOUserHdr hdr; 2390b3d881aSJohn Levon struct iovec iov = { 2400b3d881aSJohn Levon .iov_base = &hdr, 2410b3d881aSJohn Levon .iov_len = sizeof(hdr), 2420b3d881aSJohn Levon }; 2430b3d881aSJohn Levon bool isreply = false; 2440b3d881aSJohn Levon int i, ret; 2450b3d881aSJohn Levon size_t msgleft, numfds = 0; 2460b3d881aSJohn Levon char *data = NULL; 2470b3d881aSJohn Levon char *buf = NULL; 2480b3d881aSJohn Levon 2490b3d881aSJohn Levon /* 2500b3d881aSJohn Levon * Complete any partial reads 2510b3d881aSJohn Levon */ 2520b3d881aSJohn Levon if (proxy->part_recv != NULL) { 2530b3d881aSJohn Levon ret = vfio_user_complete(proxy, errp); 2540b3d881aSJohn Levon 2550b3d881aSJohn Levon /* still not complete, try later */ 2560b3d881aSJohn Levon if (ret == QIO_CHANNEL_ERR_BLOCK) { 2570b3d881aSJohn Levon return ret; 2580b3d881aSJohn Levon } 2590b3d881aSJohn Levon 2600b3d881aSJohn Levon if (ret <= 0) { 2610b3d881aSJohn Levon goto fatal; 2620b3d881aSJohn Levon } 2630b3d881aSJohn Levon /* else fall into reading another msg */ 2640b3d881aSJohn Levon } 2650b3d881aSJohn Levon 2660b3d881aSJohn Levon /* 2670b3d881aSJohn Levon * Read header 2680b3d881aSJohn Levon */ 2690b3d881aSJohn Levon ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0, 2700b3d881aSJohn Levon errp); 2710b3d881aSJohn Levon if (ret == QIO_CHANNEL_ERR_BLOCK) { 2720b3d881aSJohn Levon return ret; 2730b3d881aSJohn Levon } 2740b3d881aSJohn Levon 2750b3d881aSJohn Levon /* read error or other side closed connection */ 2760b3d881aSJohn Levon if (ret <= 0) { 2770b3d881aSJohn Levon goto fatal; 2780b3d881aSJohn Levon } 2790b3d881aSJohn Levon 2800b3d881aSJohn Levon if (ret < sizeof(hdr)) { 2810b3d881aSJohn Levon error_setg(errp, "short read of header"); 2820b3d881aSJohn Levon goto fatal; 2830b3d881aSJohn Levon } 2840b3d881aSJohn Levon 2850b3d881aSJohn Levon /* 2860b3d881aSJohn Levon * Validate header 2870b3d881aSJohn Levon */ 2880b3d881aSJohn Levon if (hdr.size < sizeof(VFIOUserHdr)) { 2890b3d881aSJohn Levon error_setg(errp, "bad header size"); 2900b3d881aSJohn Levon goto fatal; 2910b3d881aSJohn Levon } 2920b3d881aSJohn Levon switch (hdr.flags & VFIO_USER_TYPE) { 2930b3d881aSJohn Levon case VFIO_USER_REQUEST: 2940b3d881aSJohn Levon isreply = false; 2950b3d881aSJohn Levon break; 2960b3d881aSJohn Levon case VFIO_USER_REPLY: 2970b3d881aSJohn Levon isreply = true; 2980b3d881aSJohn Levon break; 2990b3d881aSJohn Levon default: 3000b3d881aSJohn Levon error_setg(errp, "unknown message type"); 3010b3d881aSJohn Levon goto fatal; 3020b3d881aSJohn Levon } 3030b3d881aSJohn Levon trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size, 3040b3d881aSJohn Levon hdr.flags); 3050b3d881aSJohn Levon 3060b3d881aSJohn Levon /* 3070b3d881aSJohn Levon * For replies, find the matching pending request. 3080b3d881aSJohn Levon * For requests, reap incoming FDs. 3090b3d881aSJohn Levon */ 3100b3d881aSJohn Levon if (isreply) { 3110b3d881aSJohn Levon QTAILQ_FOREACH(msg, &proxy->pending, next) { 3120b3d881aSJohn Levon if (hdr.id == msg->id) { 3130b3d881aSJohn Levon break; 3140b3d881aSJohn Levon } 3150b3d881aSJohn Levon } 3160b3d881aSJohn Levon if (msg == NULL) { 3170b3d881aSJohn Levon error_setg(errp, "unexpected reply"); 3180b3d881aSJohn Levon goto err; 3190b3d881aSJohn Levon } 3200b3d881aSJohn Levon QTAILQ_REMOVE(&proxy->pending, msg, next); 3210b3d881aSJohn Levon 3220b3d881aSJohn Levon /* 3230b3d881aSJohn Levon * Process any received FDs 3240b3d881aSJohn Levon */ 3250b3d881aSJohn Levon if (numfds != 0) { 3260b3d881aSJohn Levon if (msg->fds == NULL || msg->fds->recv_fds < numfds) { 3270b3d881aSJohn Levon error_setg(errp, "unexpected FDs"); 3280b3d881aSJohn Levon goto err; 3290b3d881aSJohn Levon } 3300b3d881aSJohn Levon msg->fds->recv_fds = numfds; 3310b3d881aSJohn Levon memcpy(msg->fds->fds, fdp, numfds * sizeof(int)); 3320b3d881aSJohn Levon } 3330b3d881aSJohn Levon } else { 3340b3d881aSJohn Levon if (numfds != 0) { 3350b3d881aSJohn Levon reqfds = vfio_user_getfds(numfds); 3360b3d881aSJohn Levon memcpy(reqfds->fds, fdp, numfds * sizeof(int)); 3370b3d881aSJohn Levon } else { 3380b3d881aSJohn Levon reqfds = NULL; 3390b3d881aSJohn Levon } 3400b3d881aSJohn Levon } 3410b3d881aSJohn Levon 3420b3d881aSJohn Levon /* 3430b3d881aSJohn Levon * Put the whole message into a single buffer. 3440b3d881aSJohn Levon */ 3450b3d881aSJohn Levon if (isreply) { 3460b3d881aSJohn Levon if (hdr.size > msg->rsize) { 3470b3d881aSJohn Levon error_setg(errp, "reply larger than recv buffer"); 3480b3d881aSJohn Levon goto err; 3490b3d881aSJohn Levon } 3500b3d881aSJohn Levon *msg->hdr = hdr; 3510b3d881aSJohn Levon data = (char *)msg->hdr + sizeof(hdr); 3520b3d881aSJohn Levon } else { 3530b3d881aSJohn Levon buf = g_malloc0(hdr.size); 3540b3d881aSJohn Levon memcpy(buf, &hdr, sizeof(hdr)); 3550b3d881aSJohn Levon data = buf + sizeof(hdr); 3560b3d881aSJohn Levon msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds); 3570b3d881aSJohn Levon msg->type = VFIO_MSG_REQ; 3580b3d881aSJohn Levon } 3590b3d881aSJohn Levon 3600b3d881aSJohn Levon /* 3610b3d881aSJohn Levon * Read rest of message. 3620b3d881aSJohn Levon */ 3630b3d881aSJohn Levon msgleft = hdr.size - sizeof(hdr); 3640b3d881aSJohn Levon while (msgleft > 0) { 3650b3d881aSJohn Levon ret = qio_channel_read(proxy->ioc, data, msgleft, errp); 3660b3d881aSJohn Levon 3670b3d881aSJohn Levon /* prepare to complete read on next iternation */ 3680b3d881aSJohn Levon if (ret == QIO_CHANNEL_ERR_BLOCK) { 3690b3d881aSJohn Levon proxy->part_recv = msg; 3700b3d881aSJohn Levon proxy->recv_left = msgleft; 3710b3d881aSJohn Levon return ret; 3720b3d881aSJohn Levon } 3730b3d881aSJohn Levon 3740b3d881aSJohn Levon if (ret <= 0) { 3750b3d881aSJohn Levon goto fatal; 3760b3d881aSJohn Levon } 3770b3d881aSJohn Levon trace_vfio_user_recv_read(hdr.id, ret); 3780b3d881aSJohn Levon 3790b3d881aSJohn Levon msgleft -= ret; 3800b3d881aSJohn Levon data += ret; 3810b3d881aSJohn Levon } 3820b3d881aSJohn Levon 3830b3d881aSJohn Levon vfio_user_process(proxy, msg, isreply); 3840b3d881aSJohn Levon return 0; 3850b3d881aSJohn Levon 3860b3d881aSJohn Levon /* 3870b3d881aSJohn Levon * fatal means the other side closed or we don't trust the stream 3880b3d881aSJohn Levon * err means this message is corrupt 3890b3d881aSJohn Levon */ 3900b3d881aSJohn Levon fatal: 3910b3d881aSJohn Levon vfio_user_shutdown(proxy); 3920b3d881aSJohn Levon proxy->state = VFIO_PROXY_ERROR; 3930b3d881aSJohn Levon 3940b3d881aSJohn Levon /* set error if server side closed */ 3950b3d881aSJohn Levon if (ret == 0) { 3960b3d881aSJohn Levon error_setg(errp, "server closed socket"); 3970b3d881aSJohn Levon } 3980b3d881aSJohn Levon 3990b3d881aSJohn Levon err: 4000b3d881aSJohn Levon for (i = 0; i < numfds; i++) { 4010b3d881aSJohn Levon close(fdp[i]); 4020b3d881aSJohn Levon } 4030b3d881aSJohn Levon if (isreply && msg != NULL) { 4040b3d881aSJohn Levon /* force an error to keep sending thread from hanging */ 4050b3d881aSJohn Levon vfio_user_set_error(msg->hdr, EINVAL); 4060b3d881aSJohn Levon msg->complete = true; 4070b3d881aSJohn Levon qemu_cond_signal(&msg->cv); 4080b3d881aSJohn Levon } 4090b3d881aSJohn Levon return -1; 4100b3d881aSJohn Levon } 4110b3d881aSJohn Levon 4120b3d881aSJohn Levon static void vfio_user_recv(void *opaque) 4130b3d881aSJohn Levon { 4140b3d881aSJohn Levon VFIOUserProxy *proxy = opaque; 4150b3d881aSJohn Levon 4160b3d881aSJohn Levon QEMU_LOCK_GUARD(&proxy->lock); 4170b3d881aSJohn Levon 4180b3d881aSJohn Levon if (proxy->state == VFIO_PROXY_CONNECTED) { 4190b3d881aSJohn Levon Error *local_err = NULL; 4200b3d881aSJohn Levon 4210b3d881aSJohn Levon while (vfio_user_recv_one(proxy, &local_err) == 0) { 4220b3d881aSJohn Levon ; 4230b3d881aSJohn Levon } 4240b3d881aSJohn Levon 4250b3d881aSJohn Levon if (local_err != NULL) { 4260b3d881aSJohn Levon error_report_err(local_err); 4270b3d881aSJohn Levon } 4280b3d881aSJohn Levon } 4290b3d881aSJohn Levon } 4300b3d881aSJohn Levon 431*36227628SJohn Levon /* 432*36227628SJohn Levon * Send a single message, same return semantics as vfio_user_send_qio(). 433*36227628SJohn Levon * 434*36227628SJohn Levon * Sent async messages are freed, others are moved to pending queue. 435*36227628SJohn Levon */ 436*36227628SJohn Levon static ssize_t vfio_user_send_one(VFIOUserProxy *proxy, Error **errp) 437*36227628SJohn Levon { 438*36227628SJohn Levon VFIOUserMsg *msg; 439*36227628SJohn Levon ssize_t ret; 440*36227628SJohn Levon 441*36227628SJohn Levon msg = QTAILQ_FIRST(&proxy->outgoing); 442*36227628SJohn Levon ret = vfio_user_send_qio(proxy, msg, errp); 443*36227628SJohn Levon if (ret < 0) { 444*36227628SJohn Levon return ret; 445*36227628SJohn Levon } 446*36227628SJohn Levon 447*36227628SJohn Levon QTAILQ_REMOVE(&proxy->outgoing, msg, next); 448*36227628SJohn Levon if (msg->type == VFIO_MSG_ASYNC) { 449*36227628SJohn Levon vfio_user_recycle(proxy, msg); 450*36227628SJohn Levon } else { 451*36227628SJohn Levon QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); 452*36227628SJohn Levon msg->pending = true; 453*36227628SJohn Levon } 454*36227628SJohn Levon 455*36227628SJohn Levon return ret; 456*36227628SJohn Levon } 457*36227628SJohn Levon 458*36227628SJohn Levon /* 459*36227628SJohn Levon * Send messages from outgoing queue when the socket buffer has space. 460*36227628SJohn Levon * If we deplete 'outgoing', remove ourselves from the poll list. 461*36227628SJohn Levon */ 462*36227628SJohn Levon static void vfio_user_send(void *opaque) 463*36227628SJohn Levon { 464*36227628SJohn Levon VFIOUserProxy *proxy = opaque; 465*36227628SJohn Levon 466*36227628SJohn Levon QEMU_LOCK_GUARD(&proxy->lock); 467*36227628SJohn Levon 468*36227628SJohn Levon if (proxy->state == VFIO_PROXY_CONNECTED) { 469*36227628SJohn Levon while (!QTAILQ_EMPTY(&proxy->outgoing)) { 470*36227628SJohn Levon Error *local_err = NULL; 471*36227628SJohn Levon int ret; 472*36227628SJohn Levon 473*36227628SJohn Levon ret = vfio_user_send_one(proxy, &local_err); 474*36227628SJohn Levon 475*36227628SJohn Levon if (ret == QIO_CHANNEL_ERR_BLOCK) { 476*36227628SJohn Levon return; 477*36227628SJohn Levon } else if (ret == -1) { 478*36227628SJohn Levon error_report_err(local_err); 479*36227628SJohn Levon return; 480*36227628SJohn Levon } 481*36227628SJohn Levon } 482*36227628SJohn Levon qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 483*36227628SJohn Levon vfio_user_recv, NULL, NULL, proxy); 484*36227628SJohn Levon } 485*36227628SJohn Levon } 486*36227628SJohn Levon 487438d863fSJohn Levon static void vfio_user_cb(void *opaque) 488438d863fSJohn Levon { 489438d863fSJohn Levon VFIOUserProxy *proxy = opaque; 490438d863fSJohn Levon 491438d863fSJohn Levon QEMU_LOCK_GUARD(&proxy->lock); 492438d863fSJohn Levon 493438d863fSJohn Levon proxy->state = VFIO_PROXY_CLOSED; 494438d863fSJohn Levon qemu_cond_signal(&proxy->close_cv); 495438d863fSJohn Levon } 496438d863fSJohn Levon 497438d863fSJohn Levon 498438d863fSJohn Levon /* 499438d863fSJohn Levon * Functions called by main or CPU threads 500438d863fSJohn Levon */ 501438d863fSJohn Levon 5020b3d881aSJohn Levon /* 5030b3d881aSJohn Levon * Process incoming requests. 5040b3d881aSJohn Levon * 5050b3d881aSJohn Levon * The bus-specific callback has the form: 5060b3d881aSJohn Levon * request(opaque, msg) 5070b3d881aSJohn Levon * where 'opaque' was specified in vfio_user_set_handler 5080b3d881aSJohn Levon * and 'msg' is the inbound message. 5090b3d881aSJohn Levon * 5100b3d881aSJohn Levon * The callback is responsible for disposing of the message buffer, 5110b3d881aSJohn Levon * usually by re-using it when calling vfio_send_reply or vfio_send_error, 5120b3d881aSJohn Levon * both of which free their message buffer when the reply is sent. 5130b3d881aSJohn Levon * 5140b3d881aSJohn Levon * If the callback uses a new buffer, it needs to free the old one. 5150b3d881aSJohn Levon */ 5160b3d881aSJohn Levon static void vfio_user_request(void *opaque) 5170b3d881aSJohn Levon { 5180b3d881aSJohn Levon VFIOUserProxy *proxy = opaque; 5190b3d881aSJohn Levon VFIOUserMsgQ new, free; 5200b3d881aSJohn Levon VFIOUserMsg *msg, *m1; 5210b3d881aSJohn Levon 5220b3d881aSJohn Levon /* reap all incoming */ 5230b3d881aSJohn Levon QTAILQ_INIT(&new); 5240b3d881aSJohn Levon WITH_QEMU_LOCK_GUARD(&proxy->lock) { 5250b3d881aSJohn Levon QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) { 5260b3d881aSJohn Levon QTAILQ_REMOVE(&proxy->incoming, msg, next); 5270b3d881aSJohn Levon QTAILQ_INSERT_TAIL(&new, msg, next); 5280b3d881aSJohn Levon } 5290b3d881aSJohn Levon } 5300b3d881aSJohn Levon 5310b3d881aSJohn Levon /* process list */ 5320b3d881aSJohn Levon QTAILQ_INIT(&free); 5330b3d881aSJohn Levon QTAILQ_FOREACH_SAFE(msg, &new, next, m1) { 5340b3d881aSJohn Levon QTAILQ_REMOVE(&new, msg, next); 5350b3d881aSJohn Levon trace_vfio_user_recv_request(msg->hdr->command); 5360b3d881aSJohn Levon proxy->request(proxy->req_arg, msg); 5370b3d881aSJohn Levon QTAILQ_INSERT_HEAD(&free, msg, next); 5380b3d881aSJohn Levon } 5390b3d881aSJohn Levon 5400b3d881aSJohn Levon /* free list */ 5410b3d881aSJohn Levon WITH_QEMU_LOCK_GUARD(&proxy->lock) { 5420b3d881aSJohn Levon QTAILQ_FOREACH_SAFE(msg, &free, next, m1) { 5430b3d881aSJohn Levon vfio_user_recycle(proxy, msg); 5440b3d881aSJohn Levon } 5450b3d881aSJohn Levon } 5460b3d881aSJohn Levon } 5470b3d881aSJohn Levon 548*36227628SJohn Levon /* 549*36227628SJohn Levon * Messages are queued onto the proxy's outgoing list. 550*36227628SJohn Levon * 551*36227628SJohn Levon * It handles 3 types of messages: 552*36227628SJohn Levon * 553*36227628SJohn Levon * async messages - replies and posted writes 554*36227628SJohn Levon * 555*36227628SJohn Levon * There will be no reply from the server, so message 556*36227628SJohn Levon * buffers are freed after they're sent. 557*36227628SJohn Levon * 558*36227628SJohn Levon * nowait messages - map/unmap during address space transactions 559*36227628SJohn Levon * 560*36227628SJohn Levon * These are also sent async, but a reply is expected so that 561*36227628SJohn Levon * vfio_wait_reqs() can wait for the youngest nowait request. 562*36227628SJohn Levon * They transition from the outgoing list to the pending list 563*36227628SJohn Levon * when sent, and are freed when the reply is received. 564*36227628SJohn Levon * 565*36227628SJohn Levon * wait messages - all other requests 566*36227628SJohn Levon * 567*36227628SJohn Levon * The reply to these messages is waited for by their caller. 568*36227628SJohn Levon * They also transition from outgoing to pending when sent, but 569*36227628SJohn Levon * the message buffer is returned to the caller with the reply 570*36227628SJohn Levon * contents. The caller is responsible for freeing these messages. 571*36227628SJohn Levon * 572*36227628SJohn Levon * As an optimization, if the outgoing list and the socket send 573*36227628SJohn Levon * buffer are empty, the message is sent inline instead of being 574*36227628SJohn Levon * added to the outgoing list. The rest of the transitions are 575*36227628SJohn Levon * unchanged. 576*36227628SJohn Levon */ 577*36227628SJohn Levon static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg, 578*36227628SJohn Levon Error **errp) 579*36227628SJohn Levon { 580*36227628SJohn Levon int ret; 581*36227628SJohn Levon 582*36227628SJohn Levon /* 583*36227628SJohn Levon * Unsent outgoing msgs - add to tail 584*36227628SJohn Levon */ 585*36227628SJohn Levon if (!QTAILQ_EMPTY(&proxy->outgoing)) { 586*36227628SJohn Levon QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); 587*36227628SJohn Levon return true; 588*36227628SJohn Levon } 589*36227628SJohn Levon 590*36227628SJohn Levon /* 591*36227628SJohn Levon * Try inline - if blocked, queue it and kick send poller 592*36227628SJohn Levon */ 593*36227628SJohn Levon if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) { 594*36227628SJohn Levon ret = QIO_CHANNEL_ERR_BLOCK; 595*36227628SJohn Levon } else { 596*36227628SJohn Levon ret = vfio_user_send_qio(proxy, msg, errp); 597*36227628SJohn Levon } 598*36227628SJohn Levon 599*36227628SJohn Levon if (ret == QIO_CHANNEL_ERR_BLOCK) { 600*36227628SJohn Levon QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); 601*36227628SJohn Levon qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 602*36227628SJohn Levon vfio_user_recv, proxy->ctx, 603*36227628SJohn Levon vfio_user_send, proxy); 604*36227628SJohn Levon return true; 605*36227628SJohn Levon } 606*36227628SJohn Levon if (ret == -1) { 607*36227628SJohn Levon return false; 608*36227628SJohn Levon } 609*36227628SJohn Levon 610*36227628SJohn Levon /* 611*36227628SJohn Levon * Sent - free async, add others to pending 612*36227628SJohn Levon */ 613*36227628SJohn Levon if (msg->type == VFIO_MSG_ASYNC) { 614*36227628SJohn Levon vfio_user_recycle(proxy, msg); 615*36227628SJohn Levon } else { 616*36227628SJohn Levon QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); 617*36227628SJohn Levon msg->pending = true; 618*36227628SJohn Levon } 619*36227628SJohn Levon 620*36227628SJohn Levon return true; 621*36227628SJohn Levon } 622*36227628SJohn Levon 623*36227628SJohn Levon /* 624*36227628SJohn Levon * Returns false if we did not successfully receive a reply message, in which 625*36227628SJohn Levon * case @errp will be populated. 626*36227628SJohn Levon * 627*36227628SJohn Levon * In either case, the caller must free @hdr and @fds if needed. 628*36227628SJohn Levon */ 629*36227628SJohn Levon static bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 630*36227628SJohn Levon VFIOUserFDs *fds, int rsize, Error **errp) 631*36227628SJohn Levon { 632*36227628SJohn Levon VFIOUserMsg *msg; 633*36227628SJohn Levon bool ok = false; 634*36227628SJohn Levon 635*36227628SJohn Levon if (hdr->flags & VFIO_USER_NO_REPLY) { 636*36227628SJohn Levon error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__); 637*36227628SJohn Levon return false; 638*36227628SJohn Levon } 639*36227628SJohn Levon 640*36227628SJohn Levon qemu_mutex_lock(&proxy->lock); 641*36227628SJohn Levon 642*36227628SJohn Levon msg = vfio_user_getmsg(proxy, hdr, fds); 643*36227628SJohn Levon msg->id = hdr->id; 644*36227628SJohn Levon msg->rsize = rsize ? rsize : hdr->size; 645*36227628SJohn Levon msg->type = VFIO_MSG_WAIT; 646*36227628SJohn Levon 647*36227628SJohn Levon ok = vfio_user_send_queued(proxy, msg, errp); 648*36227628SJohn Levon 649*36227628SJohn Levon if (ok) { 650*36227628SJohn Levon while (!msg->complete) { 651*36227628SJohn Levon if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) { 652*36227628SJohn Levon VFIOUserMsgQ *list; 653*36227628SJohn Levon 654*36227628SJohn Levon list = msg->pending ? &proxy->pending : &proxy->outgoing; 655*36227628SJohn Levon QTAILQ_REMOVE(list, msg, next); 656*36227628SJohn Levon error_setg_errno(errp, ETIMEDOUT, 657*36227628SJohn Levon "timed out waiting for reply"); 658*36227628SJohn Levon ok = false; 659*36227628SJohn Levon break; 660*36227628SJohn Levon } 661*36227628SJohn Levon } 662*36227628SJohn Levon } 663*36227628SJohn Levon 664*36227628SJohn Levon vfio_user_recycle(proxy, msg); 665*36227628SJohn Levon 666*36227628SJohn Levon qemu_mutex_unlock(&proxy->lock); 667*36227628SJohn Levon 668*36227628SJohn Levon return ok; 669*36227628SJohn Levon } 6700b3d881aSJohn Levon 671438d863fSJohn Levon static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets = 672438d863fSJohn Levon QLIST_HEAD_INITIALIZER(vfio_user_sockets); 673438d863fSJohn Levon 674438d863fSJohn Levon VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp) 675438d863fSJohn Levon { 676438d863fSJohn Levon VFIOUserProxy *proxy; 677438d863fSJohn Levon QIOChannelSocket *sioc; 678438d863fSJohn Levon QIOChannel *ioc; 679438d863fSJohn Levon char *sockname; 680438d863fSJohn Levon 681438d863fSJohn Levon if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) { 682438d863fSJohn Levon error_setg(errp, "vfio_user_connect - bad address family"); 683438d863fSJohn Levon return NULL; 684438d863fSJohn Levon } 685438d863fSJohn Levon sockname = addr->u.q_unix.path; 686438d863fSJohn Levon 687438d863fSJohn Levon sioc = qio_channel_socket_new(); 688438d863fSJohn Levon ioc = QIO_CHANNEL(sioc); 689438d863fSJohn Levon if (qio_channel_socket_connect_sync(sioc, addr, errp)) { 690438d863fSJohn Levon object_unref(OBJECT(ioc)); 691438d863fSJohn Levon return NULL; 692438d863fSJohn Levon } 693438d863fSJohn Levon qio_channel_set_blocking(ioc, false, NULL); 694438d863fSJohn Levon 695438d863fSJohn Levon proxy = g_malloc0(sizeof(VFIOUserProxy)); 696438d863fSJohn Levon proxy->sockname = g_strdup_printf("unix:%s", sockname); 697438d863fSJohn Levon proxy->ioc = ioc; 698*36227628SJohn Levon 699*36227628SJohn Levon /* init defaults */ 700*36227628SJohn Levon proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER; 701*36227628SJohn Levon proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS; 702*36227628SJohn Levon proxy->max_dma = VFIO_USER_DEF_MAP_MAX; 703*36227628SJohn Levon proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE; 704*36227628SJohn Levon proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP; 705*36227628SJohn Levon proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE; 706*36227628SJohn Levon 707438d863fSJohn Levon proxy->flags = VFIO_PROXY_CLIENT; 708438d863fSJohn Levon proxy->state = VFIO_PROXY_CONNECTED; 709438d863fSJohn Levon 710438d863fSJohn Levon qemu_mutex_init(&proxy->lock); 711438d863fSJohn Levon qemu_cond_init(&proxy->close_cv); 712438d863fSJohn Levon 713438d863fSJohn Levon if (vfio_user_iothread == NULL) { 714438d863fSJohn Levon vfio_user_iothread = iothread_create("VFIO user", errp); 715438d863fSJohn Levon } 716438d863fSJohn Levon 717438d863fSJohn Levon proxy->ctx = iothread_get_aio_context(vfio_user_iothread); 7180b3d881aSJohn Levon proxy->req_bh = qemu_bh_new(vfio_user_request, proxy); 719438d863fSJohn Levon 720438d863fSJohn Levon QTAILQ_INIT(&proxy->outgoing); 721438d863fSJohn Levon QTAILQ_INIT(&proxy->incoming); 722438d863fSJohn Levon QTAILQ_INIT(&proxy->free); 723438d863fSJohn Levon QTAILQ_INIT(&proxy->pending); 724438d863fSJohn Levon QLIST_INSERT_HEAD(&vfio_user_sockets, proxy, next); 725438d863fSJohn Levon 726438d863fSJohn Levon return proxy; 727438d863fSJohn Levon } 728438d863fSJohn Levon 7290b3d881aSJohn Levon void vfio_user_set_handler(VFIODevice *vbasedev, 7300b3d881aSJohn Levon void (*handler)(void *opaque, VFIOUserMsg *msg), 7310b3d881aSJohn Levon void *req_arg) 7320b3d881aSJohn Levon { 7330b3d881aSJohn Levon VFIOUserProxy *proxy = vbasedev->proxy; 7340b3d881aSJohn Levon 7350b3d881aSJohn Levon proxy->request = handler; 7360b3d881aSJohn Levon proxy->req_arg = req_arg; 7370b3d881aSJohn Levon qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 7380b3d881aSJohn Levon vfio_user_recv, NULL, NULL, proxy); 7390b3d881aSJohn Levon } 7400b3d881aSJohn Levon 741438d863fSJohn Levon void vfio_user_disconnect(VFIOUserProxy *proxy) 742438d863fSJohn Levon { 743438d863fSJohn Levon VFIOUserMsg *r1, *r2; 744438d863fSJohn Levon 745438d863fSJohn Levon qemu_mutex_lock(&proxy->lock); 746438d863fSJohn Levon 747438d863fSJohn Levon /* our side is quitting */ 748438d863fSJohn Levon if (proxy->state == VFIO_PROXY_CONNECTED) { 749438d863fSJohn Levon vfio_user_shutdown(proxy); 750438d863fSJohn Levon if (!QTAILQ_EMPTY(&proxy->pending)) { 751438d863fSJohn Levon error_printf("vfio_user_disconnect: outstanding requests\n"); 752438d863fSJohn Levon } 753438d863fSJohn Levon } 754438d863fSJohn Levon object_unref(OBJECT(proxy->ioc)); 755438d863fSJohn Levon proxy->ioc = NULL; 7560b3d881aSJohn Levon qemu_bh_delete(proxy->req_bh); 7570b3d881aSJohn Levon proxy->req_bh = NULL; 758438d863fSJohn Levon 759438d863fSJohn Levon proxy->state = VFIO_PROXY_CLOSING; 760438d863fSJohn Levon QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) { 761438d863fSJohn Levon qemu_cond_destroy(&r1->cv); 762438d863fSJohn Levon QTAILQ_REMOVE(&proxy->outgoing, r1, next); 763438d863fSJohn Levon g_free(r1); 764438d863fSJohn Levon } 765438d863fSJohn Levon QTAILQ_FOREACH_SAFE(r1, &proxy->incoming, next, r2) { 766438d863fSJohn Levon qemu_cond_destroy(&r1->cv); 767438d863fSJohn Levon QTAILQ_REMOVE(&proxy->incoming, r1, next); 768438d863fSJohn Levon g_free(r1); 769438d863fSJohn Levon } 770438d863fSJohn Levon QTAILQ_FOREACH_SAFE(r1, &proxy->pending, next, r2) { 771438d863fSJohn Levon qemu_cond_destroy(&r1->cv); 772438d863fSJohn Levon QTAILQ_REMOVE(&proxy->pending, r1, next); 773438d863fSJohn Levon g_free(r1); 774438d863fSJohn Levon } 775438d863fSJohn Levon QTAILQ_FOREACH_SAFE(r1, &proxy->free, next, r2) { 776438d863fSJohn Levon qemu_cond_destroy(&r1->cv); 777438d863fSJohn Levon QTAILQ_REMOVE(&proxy->free, r1, next); 778438d863fSJohn Levon g_free(r1); 779438d863fSJohn Levon } 780438d863fSJohn Levon 781438d863fSJohn Levon /* 782438d863fSJohn Levon * Make sure the iothread isn't blocking anywhere 783438d863fSJohn Levon * with a ref to this proxy by waiting for a BH 784438d863fSJohn Levon * handler to run after the proxy fd handlers were 785438d863fSJohn Levon * deleted above. 786438d863fSJohn Levon */ 787438d863fSJohn Levon aio_bh_schedule_oneshot(proxy->ctx, vfio_user_cb, proxy); 788438d863fSJohn Levon qemu_cond_wait(&proxy->close_cv, &proxy->lock); 789438d863fSJohn Levon 790438d863fSJohn Levon /* we now hold the only ref to proxy */ 791438d863fSJohn Levon qemu_mutex_unlock(&proxy->lock); 792438d863fSJohn Levon qemu_cond_destroy(&proxy->close_cv); 793438d863fSJohn Levon qemu_mutex_destroy(&proxy->lock); 794438d863fSJohn Levon 795438d863fSJohn Levon QLIST_REMOVE(proxy, next); 796438d863fSJohn Levon if (QLIST_EMPTY(&vfio_user_sockets)) { 797438d863fSJohn Levon iothread_destroy(vfio_user_iothread); 798438d863fSJohn Levon vfio_user_iothread = NULL; 799438d863fSJohn Levon } 800438d863fSJohn Levon 801438d863fSJohn Levon g_free(proxy->sockname); 802438d863fSJohn Levon g_free(proxy); 803438d863fSJohn Levon } 804*36227628SJohn Levon 805*36227628SJohn Levon static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, 806*36227628SJohn Levon uint32_t size, uint32_t flags) 807*36227628SJohn Levon { 808*36227628SJohn Levon static uint16_t next_id; 809*36227628SJohn Levon 810*36227628SJohn Levon hdr->id = qatomic_fetch_inc(&next_id); 811*36227628SJohn Levon hdr->command = cmd; 812*36227628SJohn Levon hdr->size = size; 813*36227628SJohn Levon hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST; 814*36227628SJohn Levon hdr->error_reply = 0; 815*36227628SJohn Levon } 816*36227628SJohn Levon 817*36227628SJohn Levon struct cap_entry { 818*36227628SJohn Levon const char *name; 819*36227628SJohn Levon bool (*check)(VFIOUserProxy *proxy, QObject *qobj, Error **errp); 820*36227628SJohn Levon }; 821*36227628SJohn Levon 822*36227628SJohn Levon static bool caps_parse(VFIOUserProxy *proxy, QDict *qdict, 823*36227628SJohn Levon struct cap_entry caps[], Error **errp) 824*36227628SJohn Levon { 825*36227628SJohn Levon QObject *qobj; 826*36227628SJohn Levon struct cap_entry *p; 827*36227628SJohn Levon 828*36227628SJohn Levon for (p = caps; p->name != NULL; p++) { 829*36227628SJohn Levon qobj = qdict_get(qdict, p->name); 830*36227628SJohn Levon if (qobj != NULL) { 831*36227628SJohn Levon if (!p->check(proxy, qobj, errp)) { 832*36227628SJohn Levon return false; 833*36227628SJohn Levon } 834*36227628SJohn Levon qdict_del(qdict, p->name); 835*36227628SJohn Levon } 836*36227628SJohn Levon } 837*36227628SJohn Levon 838*36227628SJohn Levon /* warning, for now */ 839*36227628SJohn Levon if (qdict_size(qdict) != 0) { 840*36227628SJohn Levon warn_report("spurious capabilities"); 841*36227628SJohn Levon } 842*36227628SJohn Levon return true; 843*36227628SJohn Levon } 844*36227628SJohn Levon 845*36227628SJohn Levon static bool check_migr_pgsize(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 846*36227628SJohn Levon { 847*36227628SJohn Levon QNum *qn = qobject_to(QNum, qobj); 848*36227628SJohn Levon uint64_t pgsize; 849*36227628SJohn Levon 850*36227628SJohn Levon if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) { 851*36227628SJohn Levon error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE); 852*36227628SJohn Levon return false; 853*36227628SJohn Levon } 854*36227628SJohn Levon 855*36227628SJohn Levon /* must be larger than default */ 856*36227628SJohn Levon if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) { 857*36227628SJohn Levon error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize); 858*36227628SJohn Levon return false; 859*36227628SJohn Levon } 860*36227628SJohn Levon 861*36227628SJohn Levon proxy->migr_pgsize = pgsize; 862*36227628SJohn Levon return true; 863*36227628SJohn Levon } 864*36227628SJohn Levon 865*36227628SJohn Levon static bool check_bitmap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 866*36227628SJohn Levon { 867*36227628SJohn Levon QNum *qn = qobject_to(QNum, qobj); 868*36227628SJohn Levon uint64_t bitmap_size; 869*36227628SJohn Levon 870*36227628SJohn Levon if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) { 871*36227628SJohn Levon error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP); 872*36227628SJohn Levon return false; 873*36227628SJohn Levon } 874*36227628SJohn Levon 875*36227628SJohn Levon /* can only lower it */ 876*36227628SJohn Levon if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) { 877*36227628SJohn Levon error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP); 878*36227628SJohn Levon return false; 879*36227628SJohn Levon } 880*36227628SJohn Levon 881*36227628SJohn Levon proxy->max_bitmap = bitmap_size; 882*36227628SJohn Levon return true; 883*36227628SJohn Levon } 884*36227628SJohn Levon 885*36227628SJohn Levon static struct cap_entry caps_migr[] = { 886*36227628SJohn Levon { VFIO_USER_CAP_PGSIZE, check_migr_pgsize }, 887*36227628SJohn Levon { VFIO_USER_CAP_MAX_BITMAP, check_bitmap }, 888*36227628SJohn Levon { NULL } 889*36227628SJohn Levon }; 890*36227628SJohn Levon 891*36227628SJohn Levon static bool check_max_fds(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 892*36227628SJohn Levon { 893*36227628SJohn Levon QNum *qn = qobject_to(QNum, qobj); 894*36227628SJohn Levon uint64_t max_send_fds; 895*36227628SJohn Levon 896*36227628SJohn Levon if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) || 897*36227628SJohn Levon max_send_fds > VFIO_USER_MAX_MAX_FDS) { 898*36227628SJohn Levon error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); 899*36227628SJohn Levon return false; 900*36227628SJohn Levon } 901*36227628SJohn Levon proxy->max_send_fds = max_send_fds; 902*36227628SJohn Levon return true; 903*36227628SJohn Levon } 904*36227628SJohn Levon 905*36227628SJohn Levon static bool check_max_xfer(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 906*36227628SJohn Levon { 907*36227628SJohn Levon QNum *qn = qobject_to(QNum, qobj); 908*36227628SJohn Levon uint64_t max_xfer_size; 909*36227628SJohn Levon 910*36227628SJohn Levon if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) || 911*36227628SJohn Levon max_xfer_size > VFIO_USER_MAX_MAX_XFER) { 912*36227628SJohn Levon error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER); 913*36227628SJohn Levon return false; 914*36227628SJohn Levon } 915*36227628SJohn Levon proxy->max_xfer_size = max_xfer_size; 916*36227628SJohn Levon return true; 917*36227628SJohn Levon } 918*36227628SJohn Levon 919*36227628SJohn Levon static bool check_pgsizes(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 920*36227628SJohn Levon { 921*36227628SJohn Levon QNum *qn = qobject_to(QNum, qobj); 922*36227628SJohn Levon uint64_t pgsizes; 923*36227628SJohn Levon 924*36227628SJohn Levon if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) { 925*36227628SJohn Levon error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES); 926*36227628SJohn Levon return false; 927*36227628SJohn Levon } 928*36227628SJohn Levon 929*36227628SJohn Levon /* must be larger than default */ 930*36227628SJohn Levon if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) { 931*36227628SJohn Levon error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes); 932*36227628SJohn Levon return false; 933*36227628SJohn Levon } 934*36227628SJohn Levon 935*36227628SJohn Levon proxy->dma_pgsizes = pgsizes; 936*36227628SJohn Levon return true; 937*36227628SJohn Levon } 938*36227628SJohn Levon 939*36227628SJohn Levon static bool check_max_dma(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 940*36227628SJohn Levon { 941*36227628SJohn Levon QNum *qn = qobject_to(QNum, qobj); 942*36227628SJohn Levon uint64_t max_dma; 943*36227628SJohn Levon 944*36227628SJohn Levon if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) { 945*36227628SJohn Levon error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX); 946*36227628SJohn Levon return false; 947*36227628SJohn Levon } 948*36227628SJohn Levon 949*36227628SJohn Levon /* can only lower it */ 950*36227628SJohn Levon if (max_dma > VFIO_USER_DEF_MAP_MAX) { 951*36227628SJohn Levon error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX); 952*36227628SJohn Levon return false; 953*36227628SJohn Levon } 954*36227628SJohn Levon 955*36227628SJohn Levon proxy->max_dma = max_dma; 956*36227628SJohn Levon return true; 957*36227628SJohn Levon } 958*36227628SJohn Levon 959*36227628SJohn Levon static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 960*36227628SJohn Levon { 961*36227628SJohn Levon QDict *qdict = qobject_to(QDict, qobj); 962*36227628SJohn Levon 963*36227628SJohn Levon if (qdict == NULL) { 964*36227628SJohn Levon error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); 965*36227628SJohn Levon return true; 966*36227628SJohn Levon } 967*36227628SJohn Levon return caps_parse(proxy, qdict, caps_migr, errp); 968*36227628SJohn Levon } 969*36227628SJohn Levon 970*36227628SJohn Levon static struct cap_entry caps_cap[] = { 971*36227628SJohn Levon { VFIO_USER_CAP_MAX_FDS, check_max_fds }, 972*36227628SJohn Levon { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, 973*36227628SJohn Levon { VFIO_USER_CAP_PGSIZES, check_pgsizes }, 974*36227628SJohn Levon { VFIO_USER_CAP_MAP_MAX, check_max_dma }, 975*36227628SJohn Levon { VFIO_USER_CAP_MIGR, check_migr }, 976*36227628SJohn Levon { NULL } 977*36227628SJohn Levon }; 978*36227628SJohn Levon 979*36227628SJohn Levon static bool check_cap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 980*36227628SJohn Levon { 981*36227628SJohn Levon QDict *qdict = qobject_to(QDict, qobj); 982*36227628SJohn Levon 983*36227628SJohn Levon if (qdict == NULL) { 984*36227628SJohn Levon error_setg(errp, "malformed %s", VFIO_USER_CAP); 985*36227628SJohn Levon return false; 986*36227628SJohn Levon } 987*36227628SJohn Levon return caps_parse(proxy, qdict, caps_cap, errp); 988*36227628SJohn Levon } 989*36227628SJohn Levon 990*36227628SJohn Levon static struct cap_entry ver_0_0[] = { 991*36227628SJohn Levon { VFIO_USER_CAP, check_cap }, 992*36227628SJohn Levon { NULL } 993*36227628SJohn Levon }; 994*36227628SJohn Levon 995*36227628SJohn Levon static bool caps_check(VFIOUserProxy *proxy, int minor, const char *caps, 996*36227628SJohn Levon Error **errp) 997*36227628SJohn Levon { 998*36227628SJohn Levon QObject *qobj; 999*36227628SJohn Levon QDict *qdict; 1000*36227628SJohn Levon bool ret; 1001*36227628SJohn Levon 1002*36227628SJohn Levon qobj = qobject_from_json(caps, NULL); 1003*36227628SJohn Levon if (qobj == NULL) { 1004*36227628SJohn Levon error_setg(errp, "malformed capabilities %s", caps); 1005*36227628SJohn Levon return false; 1006*36227628SJohn Levon } 1007*36227628SJohn Levon qdict = qobject_to(QDict, qobj); 1008*36227628SJohn Levon if (qdict == NULL) { 1009*36227628SJohn Levon error_setg(errp, "capabilities %s not an object", caps); 1010*36227628SJohn Levon qobject_unref(qobj); 1011*36227628SJohn Levon return false; 1012*36227628SJohn Levon } 1013*36227628SJohn Levon ret = caps_parse(proxy, qdict, ver_0_0, errp); 1014*36227628SJohn Levon 1015*36227628SJohn Levon qobject_unref(qobj); 1016*36227628SJohn Levon return ret; 1017*36227628SJohn Levon } 1018*36227628SJohn Levon 1019*36227628SJohn Levon static GString *caps_json(void) 1020*36227628SJohn Levon { 1021*36227628SJohn Levon QDict *dict = qdict_new(); 1022*36227628SJohn Levon QDict *capdict = qdict_new(); 1023*36227628SJohn Levon QDict *migdict = qdict_new(); 1024*36227628SJohn Levon GString *str; 1025*36227628SJohn Levon 1026*36227628SJohn Levon qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE); 1027*36227628SJohn Levon qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP); 1028*36227628SJohn Levon qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict)); 1029*36227628SJohn Levon 1030*36227628SJohn Levon qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS); 1031*36227628SJohn Levon qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); 1032*36227628SJohn Levon qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); 1033*36227628SJohn Levon qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); 1034*36227628SJohn Levon 1035*36227628SJohn Levon qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); 1036*36227628SJohn Levon 1037*36227628SJohn Levon str = qobject_to_json(QOBJECT(dict)); 1038*36227628SJohn Levon qobject_unref(dict); 1039*36227628SJohn Levon return str; 1040*36227628SJohn Levon } 1041*36227628SJohn Levon 1042*36227628SJohn Levon bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp) 1043*36227628SJohn Levon { 1044*36227628SJohn Levon g_autofree VFIOUserVersion *msgp = NULL; 1045*36227628SJohn Levon GString *caps; 1046*36227628SJohn Levon char *reply; 1047*36227628SJohn Levon int size, caplen; 1048*36227628SJohn Levon 1049*36227628SJohn Levon caps = caps_json(); 1050*36227628SJohn Levon caplen = caps->len + 1; 1051*36227628SJohn Levon size = sizeof(*msgp) + caplen; 1052*36227628SJohn Levon msgp = g_malloc0(size); 1053*36227628SJohn Levon 1054*36227628SJohn Levon vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0); 1055*36227628SJohn Levon msgp->major = VFIO_USER_MAJOR_VER; 1056*36227628SJohn Levon msgp->minor = VFIO_USER_MINOR_VER; 1057*36227628SJohn Levon memcpy(&msgp->capabilities, caps->str, caplen); 1058*36227628SJohn Levon g_string_free(caps, true); 1059*36227628SJohn Levon trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); 1060*36227628SJohn Levon 1061*36227628SJohn Levon if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, errp)) { 1062*36227628SJohn Levon return false; 1063*36227628SJohn Levon } 1064*36227628SJohn Levon 1065*36227628SJohn Levon if (msgp->hdr.flags & VFIO_USER_ERROR) { 1066*36227628SJohn Levon error_setg_errno(errp, msgp->hdr.error_reply, "version reply"); 1067*36227628SJohn Levon return false; 1068*36227628SJohn Levon } 1069*36227628SJohn Levon 1070*36227628SJohn Levon if (msgp->major != VFIO_USER_MAJOR_VER || 1071*36227628SJohn Levon msgp->minor > VFIO_USER_MINOR_VER) { 1072*36227628SJohn Levon error_setg(errp, "incompatible server version"); 1073*36227628SJohn Levon return false; 1074*36227628SJohn Levon } 1075*36227628SJohn Levon 1076*36227628SJohn Levon reply = msgp->capabilities; 1077*36227628SJohn Levon if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') { 1078*36227628SJohn Levon error_setg(errp, "corrupt version reply"); 1079*36227628SJohn Levon return false; 1080*36227628SJohn Levon } 1081*36227628SJohn Levon 1082*36227628SJohn Levon if (!caps_check(proxy, msgp->minor, reply, errp)) { 1083*36227628SJohn Levon return false; 1084*36227628SJohn Levon } 1085*36227628SJohn Levon 1086*36227628SJohn Levon trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); 1087*36227628SJohn Levon return true; 1088*36227628SJohn Levon } 1089