1 /* 2 * vfio protocol over a UNIX socket. 3 * 4 * Copyright © 2018, 2021 Oracle and/or its affiliates. 5 * 6 * SPDX-License-Identifier: GPL-2.0-or-later 7 */ 8 9 #include "qemu/osdep.h" 10 #include <sys/ioctl.h> 11 12 #include "hw/vfio/vfio-device.h" 13 #include "hw/vfio-user/proxy.h" 14 #include "hw/vfio-user/trace.h" 15 #include "qapi/error.h" 16 #include "qobject/qdict.h" 17 #include "qobject/qjson.h" 18 #include "qobject/qnum.h" 19 #include "qemu/error-report.h" 20 #include "qemu/lockable.h" 21 #include "qemu/main-loop.h" 22 #include "system/iothread.h" 23 24 static int wait_time = 5000; /* wait up to 5 sec for busy servers */ 25 static IOThread *vfio_user_iothread; 26 27 static void vfio_user_shutdown(VFIOUserProxy *proxy); 28 static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 29 VFIOUserFDs *fds); 30 static VFIOUserFDs *vfio_user_getfds(int numfds); 31 static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg); 32 33 static void vfio_user_recv(void *opaque); 34 static void vfio_user_send(void *opaque); 35 static void vfio_user_cb(void *opaque); 36 37 static void vfio_user_request(void *opaque); 38 static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, 39 uint32_t size, uint32_t flags); 40 41 static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) 42 { 43 hdr->flags |= VFIO_USER_ERROR; 44 hdr->error_reply = err; 45 } 46 47 /* 48 * Functions called by main, CPU, or iothread threads 49 */ 50 51 static void vfio_user_shutdown(VFIOUserProxy *proxy) 52 { 53 qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL); 54 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL, 55 proxy->ctx, NULL, NULL); 56 } 57 58 /* 59 * Same return values as qio_channel_writev_full(): 60 * 61 * QIO_CHANNEL_ERR_BLOCK: *errp not set 62 * -1: *errp will be populated 63 * otherwise: bytes written 64 */ 65 static ssize_t vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg, 66 Error **errp) 67 { 68 VFIOUserFDs *fds = msg->fds; 69 struct iovec iov = { 70 .iov_base = msg->hdr, 71 .iov_len = msg->hdr->size, 72 }; 73 size_t numfds = 0; 74 int *fdp = NULL; 75 ssize_t ret; 76 77 if (fds != NULL && fds->send_fds != 0) { 78 numfds = fds->send_fds; 79 fdp = fds->fds; 80 } 81 82 ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, errp); 83 84 if (ret == -1) { 85 vfio_user_set_error(msg->hdr, EIO); 86 vfio_user_shutdown(proxy); 87 } 88 trace_vfio_user_send_write(msg->hdr->id, ret); 89 90 return ret; 91 } 92 93 static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 94 VFIOUserFDs *fds) 95 { 96 VFIOUserMsg *msg; 97 98 msg = QTAILQ_FIRST(&proxy->free); 99 if (msg != NULL) { 100 QTAILQ_REMOVE(&proxy->free, msg, next); 101 } else { 102 msg = g_malloc0(sizeof(*msg)); 103 qemu_cond_init(&msg->cv); 104 } 105 106 msg->hdr = hdr; 107 msg->fds = fds; 108 return msg; 109 } 110 111 /* 112 * Recycle a message list entry to the free list. 113 */ 114 static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg) 115 { 116 if (msg->type == VFIO_MSG_NONE) { 117 error_printf("vfio_user_recycle - freeing free msg\n"); 118 return; 119 } 120 121 /* free msg buffer if no one is waiting to consume the reply */ 122 if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) { 123 g_free(msg->hdr); 124 if (msg->fds != NULL) { 125 g_free(msg->fds); 126 } 127 } 128 129 msg->type = VFIO_MSG_NONE; 130 msg->hdr = NULL; 131 msg->fds = NULL; 132 msg->complete = false; 133 msg->pending = false; 134 QTAILQ_INSERT_HEAD(&proxy->free, msg, next); 135 } 136 137 static VFIOUserFDs *vfio_user_getfds(int numfds) 138 { 139 VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int))); 140 141 fds->fds = (int *)((char *)fds + sizeof(*fds)); 142 143 return fds; 144 } 145 146 /* 147 * Functions only called by iothread 148 */ 149 150 /* 151 * Process a received message. 152 */ 153 static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg, 154 bool isreply) 155 { 156 157 /* 158 * Replies signal a waiter, if none just check for errors 159 * and free the message buffer. 160 * 161 * Requests get queued for the BH. 162 */ 163 if (isreply) { 164 msg->complete = true; 165 if (msg->type == VFIO_MSG_WAIT) { 166 qemu_cond_signal(&msg->cv); 167 } else { 168 if (msg->hdr->flags & VFIO_USER_ERROR) { 169 error_printf("vfio_user_process: error reply on async "); 170 error_printf("request command %x error %s\n", 171 msg->hdr->command, 172 strerror(msg->hdr->error_reply)); 173 } 174 /* youngest nowait msg has been ack'd */ 175 if (proxy->last_nowait == msg) { 176 proxy->last_nowait = NULL; 177 } 178 vfio_user_recycle(proxy, msg); 179 } 180 } else { 181 QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next); 182 qemu_bh_schedule(proxy->req_bh); 183 } 184 } 185 186 /* 187 * Complete a partial message read 188 */ 189 static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp) 190 { 191 VFIOUserMsg *msg = proxy->part_recv; 192 size_t msgleft = proxy->recv_left; 193 bool isreply; 194 char *data; 195 int ret; 196 197 data = (char *)msg->hdr + (msg->hdr->size - msgleft); 198 while (msgleft > 0) { 199 ret = qio_channel_read(proxy->ioc, data, msgleft, errp); 200 201 /* error or would block */ 202 if (ret <= 0) { 203 /* try for rest on next iternation */ 204 if (ret == QIO_CHANNEL_ERR_BLOCK) { 205 proxy->recv_left = msgleft; 206 } 207 return ret; 208 } 209 trace_vfio_user_recv_read(msg->hdr->id, ret); 210 211 msgleft -= ret; 212 data += ret; 213 } 214 215 /* 216 * Read complete message, process it. 217 */ 218 proxy->part_recv = NULL; 219 proxy->recv_left = 0; 220 isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY; 221 vfio_user_process(proxy, msg, isreply); 222 223 /* return positive value */ 224 return 1; 225 } 226 227 /* 228 * Receive and process one incoming message. 229 * 230 * For replies, find matching outgoing request and wake any waiters. 231 * For requests, queue in incoming list and run request BH. 232 */ 233 static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp) 234 { 235 VFIOUserMsg *msg = NULL; 236 g_autofree int *fdp = NULL; 237 VFIOUserFDs *reqfds; 238 VFIOUserHdr hdr; 239 struct iovec iov = { 240 .iov_base = &hdr, 241 .iov_len = sizeof(hdr), 242 }; 243 bool isreply = false; 244 int i, ret; 245 size_t msgleft, numfds = 0; 246 char *data = NULL; 247 char *buf = NULL; 248 249 /* 250 * Complete any partial reads 251 */ 252 if (proxy->part_recv != NULL) { 253 ret = vfio_user_complete(proxy, errp); 254 255 /* still not complete, try later */ 256 if (ret == QIO_CHANNEL_ERR_BLOCK) { 257 return ret; 258 } 259 260 if (ret <= 0) { 261 goto fatal; 262 } 263 /* else fall into reading another msg */ 264 } 265 266 /* 267 * Read header 268 */ 269 ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0, 270 errp); 271 if (ret == QIO_CHANNEL_ERR_BLOCK) { 272 return ret; 273 } 274 275 /* read error or other side closed connection */ 276 if (ret <= 0) { 277 goto fatal; 278 } 279 280 if (ret < sizeof(hdr)) { 281 error_setg(errp, "short read of header"); 282 goto fatal; 283 } 284 285 /* 286 * Validate header 287 */ 288 if (hdr.size < sizeof(VFIOUserHdr)) { 289 error_setg(errp, "bad header size"); 290 goto fatal; 291 } 292 switch (hdr.flags & VFIO_USER_TYPE) { 293 case VFIO_USER_REQUEST: 294 isreply = false; 295 break; 296 case VFIO_USER_REPLY: 297 isreply = true; 298 break; 299 default: 300 error_setg(errp, "unknown message type"); 301 goto fatal; 302 } 303 trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size, 304 hdr.flags); 305 306 /* 307 * For replies, find the matching pending request. 308 * For requests, reap incoming FDs. 309 */ 310 if (isreply) { 311 QTAILQ_FOREACH(msg, &proxy->pending, next) { 312 if (hdr.id == msg->id) { 313 break; 314 } 315 } 316 if (msg == NULL) { 317 error_setg(errp, "unexpected reply"); 318 goto err; 319 } 320 QTAILQ_REMOVE(&proxy->pending, msg, next); 321 322 /* 323 * Process any received FDs 324 */ 325 if (numfds != 0) { 326 if (msg->fds == NULL || msg->fds->recv_fds < numfds) { 327 error_setg(errp, "unexpected FDs"); 328 goto err; 329 } 330 msg->fds->recv_fds = numfds; 331 memcpy(msg->fds->fds, fdp, numfds * sizeof(int)); 332 } 333 } else { 334 if (numfds != 0) { 335 reqfds = vfio_user_getfds(numfds); 336 memcpy(reqfds->fds, fdp, numfds * sizeof(int)); 337 } else { 338 reqfds = NULL; 339 } 340 } 341 342 /* 343 * Put the whole message into a single buffer. 344 */ 345 if (isreply) { 346 if (hdr.size > msg->rsize) { 347 error_setg(errp, "reply larger than recv buffer"); 348 goto err; 349 } 350 *msg->hdr = hdr; 351 data = (char *)msg->hdr + sizeof(hdr); 352 } else { 353 buf = g_malloc0(hdr.size); 354 memcpy(buf, &hdr, sizeof(hdr)); 355 data = buf + sizeof(hdr); 356 msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds); 357 msg->type = VFIO_MSG_REQ; 358 } 359 360 /* 361 * Read rest of message. 362 */ 363 msgleft = hdr.size - sizeof(hdr); 364 while (msgleft > 0) { 365 ret = qio_channel_read(proxy->ioc, data, msgleft, errp); 366 367 /* prepare to complete read on next iternation */ 368 if (ret == QIO_CHANNEL_ERR_BLOCK) { 369 proxy->part_recv = msg; 370 proxy->recv_left = msgleft; 371 return ret; 372 } 373 374 if (ret <= 0) { 375 goto fatal; 376 } 377 trace_vfio_user_recv_read(hdr.id, ret); 378 379 msgleft -= ret; 380 data += ret; 381 } 382 383 vfio_user_process(proxy, msg, isreply); 384 return 0; 385 386 /* 387 * fatal means the other side closed or we don't trust the stream 388 * err means this message is corrupt 389 */ 390 fatal: 391 vfio_user_shutdown(proxy); 392 proxy->state = VFIO_PROXY_ERROR; 393 394 /* set error if server side closed */ 395 if (ret == 0) { 396 error_setg(errp, "server closed socket"); 397 } 398 399 err: 400 for (i = 0; i < numfds; i++) { 401 close(fdp[i]); 402 } 403 if (isreply && msg != NULL) { 404 /* force an error to keep sending thread from hanging */ 405 vfio_user_set_error(msg->hdr, EINVAL); 406 msg->complete = true; 407 qemu_cond_signal(&msg->cv); 408 } 409 return -1; 410 } 411 412 static void vfio_user_recv(void *opaque) 413 { 414 VFIOUserProxy *proxy = opaque; 415 416 QEMU_LOCK_GUARD(&proxy->lock); 417 418 if (proxy->state == VFIO_PROXY_CONNECTED) { 419 Error *local_err = NULL; 420 421 while (vfio_user_recv_one(proxy, &local_err) == 0) { 422 ; 423 } 424 425 if (local_err != NULL) { 426 error_report_err(local_err); 427 } 428 } 429 } 430 431 /* 432 * Send a single message, same return semantics as vfio_user_send_qio(). 433 * 434 * Sent async messages are freed, others are moved to pending queue. 435 */ 436 static ssize_t vfio_user_send_one(VFIOUserProxy *proxy, Error **errp) 437 { 438 VFIOUserMsg *msg; 439 ssize_t ret; 440 441 msg = QTAILQ_FIRST(&proxy->outgoing); 442 ret = vfio_user_send_qio(proxy, msg, errp); 443 if (ret < 0) { 444 return ret; 445 } 446 447 QTAILQ_REMOVE(&proxy->outgoing, msg, next); 448 if (msg->type == VFIO_MSG_ASYNC) { 449 vfio_user_recycle(proxy, msg); 450 } else { 451 QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); 452 msg->pending = true; 453 } 454 455 return ret; 456 } 457 458 /* 459 * Send messages from outgoing queue when the socket buffer has space. 460 * If we deplete 'outgoing', remove ourselves from the poll list. 461 */ 462 static void vfio_user_send(void *opaque) 463 { 464 VFIOUserProxy *proxy = opaque; 465 466 QEMU_LOCK_GUARD(&proxy->lock); 467 468 if (proxy->state == VFIO_PROXY_CONNECTED) { 469 while (!QTAILQ_EMPTY(&proxy->outgoing)) { 470 Error *local_err = NULL; 471 int ret; 472 473 ret = vfio_user_send_one(proxy, &local_err); 474 475 if (ret == QIO_CHANNEL_ERR_BLOCK) { 476 return; 477 } else if (ret == -1) { 478 error_report_err(local_err); 479 return; 480 } 481 } 482 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 483 vfio_user_recv, NULL, NULL, proxy); 484 } 485 } 486 487 static void vfio_user_cb(void *opaque) 488 { 489 VFIOUserProxy *proxy = opaque; 490 491 QEMU_LOCK_GUARD(&proxy->lock); 492 493 proxy->state = VFIO_PROXY_CLOSED; 494 qemu_cond_signal(&proxy->close_cv); 495 } 496 497 498 /* 499 * Functions called by main or CPU threads 500 */ 501 502 /* 503 * Process incoming requests. 504 * 505 * The bus-specific callback has the form: 506 * request(opaque, msg) 507 * where 'opaque' was specified in vfio_user_set_handler 508 * and 'msg' is the inbound message. 509 * 510 * The callback is responsible for disposing of the message buffer, 511 * usually by re-using it when calling vfio_send_reply or vfio_send_error, 512 * both of which free their message buffer when the reply is sent. 513 * 514 * If the callback uses a new buffer, it needs to free the old one. 515 */ 516 static void vfio_user_request(void *opaque) 517 { 518 VFIOUserProxy *proxy = opaque; 519 VFIOUserMsgQ new, free; 520 VFIOUserMsg *msg, *m1; 521 522 /* reap all incoming */ 523 QTAILQ_INIT(&new); 524 WITH_QEMU_LOCK_GUARD(&proxy->lock) { 525 QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) { 526 QTAILQ_REMOVE(&proxy->incoming, msg, next); 527 QTAILQ_INSERT_TAIL(&new, msg, next); 528 } 529 } 530 531 /* process list */ 532 QTAILQ_INIT(&free); 533 QTAILQ_FOREACH_SAFE(msg, &new, next, m1) { 534 QTAILQ_REMOVE(&new, msg, next); 535 trace_vfio_user_recv_request(msg->hdr->command); 536 proxy->request(proxy->req_arg, msg); 537 QTAILQ_INSERT_HEAD(&free, msg, next); 538 } 539 540 /* free list */ 541 WITH_QEMU_LOCK_GUARD(&proxy->lock) { 542 QTAILQ_FOREACH_SAFE(msg, &free, next, m1) { 543 vfio_user_recycle(proxy, msg); 544 } 545 } 546 } 547 548 /* 549 * Messages are queued onto the proxy's outgoing list. 550 * 551 * It handles 3 types of messages: 552 * 553 * async messages - replies and posted writes 554 * 555 * There will be no reply from the server, so message 556 * buffers are freed after they're sent. 557 * 558 * nowait messages - map/unmap during address space transactions 559 * 560 * These are also sent async, but a reply is expected so that 561 * vfio_wait_reqs() can wait for the youngest nowait request. 562 * They transition from the outgoing list to the pending list 563 * when sent, and are freed when the reply is received. 564 * 565 * wait messages - all other requests 566 * 567 * The reply to these messages is waited for by their caller. 568 * They also transition from outgoing to pending when sent, but 569 * the message buffer is returned to the caller with the reply 570 * contents. The caller is responsible for freeing these messages. 571 * 572 * As an optimization, if the outgoing list and the socket send 573 * buffer are empty, the message is sent inline instead of being 574 * added to the outgoing list. The rest of the transitions are 575 * unchanged. 576 */ 577 static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg, 578 Error **errp) 579 { 580 int ret; 581 582 /* 583 * Unsent outgoing msgs - add to tail 584 */ 585 if (!QTAILQ_EMPTY(&proxy->outgoing)) { 586 QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); 587 return true; 588 } 589 590 /* 591 * Try inline - if blocked, queue it and kick send poller 592 */ 593 if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) { 594 ret = QIO_CHANNEL_ERR_BLOCK; 595 } else { 596 ret = vfio_user_send_qio(proxy, msg, errp); 597 } 598 599 if (ret == QIO_CHANNEL_ERR_BLOCK) { 600 QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); 601 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 602 vfio_user_recv, proxy->ctx, 603 vfio_user_send, proxy); 604 return true; 605 } 606 if (ret == -1) { 607 return false; 608 } 609 610 /* 611 * Sent - free async, add others to pending 612 */ 613 if (msg->type == VFIO_MSG_ASYNC) { 614 vfio_user_recycle(proxy, msg); 615 } else { 616 QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); 617 msg->pending = true; 618 } 619 620 return true; 621 } 622 623 /* 624 * Returns false if we did not successfully receive a reply message, in which 625 * case @errp will be populated. 626 * 627 * In either case, the caller must free @hdr and @fds if needed. 628 */ 629 static bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 630 VFIOUserFDs *fds, int rsize, Error **errp) 631 { 632 VFIOUserMsg *msg; 633 bool ok = false; 634 635 if (hdr->flags & VFIO_USER_NO_REPLY) { 636 error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__); 637 return false; 638 } 639 640 qemu_mutex_lock(&proxy->lock); 641 642 msg = vfio_user_getmsg(proxy, hdr, fds); 643 msg->id = hdr->id; 644 msg->rsize = rsize ? rsize : hdr->size; 645 msg->type = VFIO_MSG_WAIT; 646 647 ok = vfio_user_send_queued(proxy, msg, errp); 648 649 if (ok) { 650 while (!msg->complete) { 651 if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) { 652 VFIOUserMsgQ *list; 653 654 list = msg->pending ? &proxy->pending : &proxy->outgoing; 655 QTAILQ_REMOVE(list, msg, next); 656 error_setg_errno(errp, ETIMEDOUT, 657 "timed out waiting for reply"); 658 ok = false; 659 break; 660 } 661 } 662 } 663 664 vfio_user_recycle(proxy, msg); 665 666 qemu_mutex_unlock(&proxy->lock); 667 668 return ok; 669 } 670 671 static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets = 672 QLIST_HEAD_INITIALIZER(vfio_user_sockets); 673 674 VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp) 675 { 676 VFIOUserProxy *proxy; 677 QIOChannelSocket *sioc; 678 QIOChannel *ioc; 679 char *sockname; 680 681 if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) { 682 error_setg(errp, "vfio_user_connect - bad address family"); 683 return NULL; 684 } 685 sockname = addr->u.q_unix.path; 686 687 sioc = qio_channel_socket_new(); 688 ioc = QIO_CHANNEL(sioc); 689 if (qio_channel_socket_connect_sync(sioc, addr, errp)) { 690 object_unref(OBJECT(ioc)); 691 return NULL; 692 } 693 qio_channel_set_blocking(ioc, false, NULL); 694 695 proxy = g_malloc0(sizeof(VFIOUserProxy)); 696 proxy->sockname = g_strdup_printf("unix:%s", sockname); 697 proxy->ioc = ioc; 698 699 /* init defaults */ 700 proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER; 701 proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS; 702 proxy->max_dma = VFIO_USER_DEF_MAP_MAX; 703 proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE; 704 proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP; 705 proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE; 706 707 proxy->flags = VFIO_PROXY_CLIENT; 708 proxy->state = VFIO_PROXY_CONNECTED; 709 710 qemu_mutex_init(&proxy->lock); 711 qemu_cond_init(&proxy->close_cv); 712 713 if (vfio_user_iothread == NULL) { 714 vfio_user_iothread = iothread_create("VFIO user", errp); 715 } 716 717 proxy->ctx = iothread_get_aio_context(vfio_user_iothread); 718 proxy->req_bh = qemu_bh_new(vfio_user_request, proxy); 719 720 QTAILQ_INIT(&proxy->outgoing); 721 QTAILQ_INIT(&proxy->incoming); 722 QTAILQ_INIT(&proxy->free); 723 QTAILQ_INIT(&proxy->pending); 724 QLIST_INSERT_HEAD(&vfio_user_sockets, proxy, next); 725 726 return proxy; 727 } 728 729 void vfio_user_set_handler(VFIODevice *vbasedev, 730 void (*handler)(void *opaque, VFIOUserMsg *msg), 731 void *req_arg) 732 { 733 VFIOUserProxy *proxy = vbasedev->proxy; 734 735 proxy->request = handler; 736 proxy->req_arg = req_arg; 737 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 738 vfio_user_recv, NULL, NULL, proxy); 739 } 740 741 void vfio_user_disconnect(VFIOUserProxy *proxy) 742 { 743 VFIOUserMsg *r1, *r2; 744 745 qemu_mutex_lock(&proxy->lock); 746 747 /* our side is quitting */ 748 if (proxy->state == VFIO_PROXY_CONNECTED) { 749 vfio_user_shutdown(proxy); 750 if (!QTAILQ_EMPTY(&proxy->pending)) { 751 error_printf("vfio_user_disconnect: outstanding requests\n"); 752 } 753 } 754 object_unref(OBJECT(proxy->ioc)); 755 proxy->ioc = NULL; 756 qemu_bh_delete(proxy->req_bh); 757 proxy->req_bh = NULL; 758 759 proxy->state = VFIO_PROXY_CLOSING; 760 QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) { 761 qemu_cond_destroy(&r1->cv); 762 QTAILQ_REMOVE(&proxy->outgoing, r1, next); 763 g_free(r1); 764 } 765 QTAILQ_FOREACH_SAFE(r1, &proxy->incoming, next, r2) { 766 qemu_cond_destroy(&r1->cv); 767 QTAILQ_REMOVE(&proxy->incoming, r1, next); 768 g_free(r1); 769 } 770 QTAILQ_FOREACH_SAFE(r1, &proxy->pending, next, r2) { 771 qemu_cond_destroy(&r1->cv); 772 QTAILQ_REMOVE(&proxy->pending, r1, next); 773 g_free(r1); 774 } 775 QTAILQ_FOREACH_SAFE(r1, &proxy->free, next, r2) { 776 qemu_cond_destroy(&r1->cv); 777 QTAILQ_REMOVE(&proxy->free, r1, next); 778 g_free(r1); 779 } 780 781 /* 782 * Make sure the iothread isn't blocking anywhere 783 * with a ref to this proxy by waiting for a BH 784 * handler to run after the proxy fd handlers were 785 * deleted above. 786 */ 787 aio_bh_schedule_oneshot(proxy->ctx, vfio_user_cb, proxy); 788 qemu_cond_wait(&proxy->close_cv, &proxy->lock); 789 790 /* we now hold the only ref to proxy */ 791 qemu_mutex_unlock(&proxy->lock); 792 qemu_cond_destroy(&proxy->close_cv); 793 qemu_mutex_destroy(&proxy->lock); 794 795 QLIST_REMOVE(proxy, next); 796 if (QLIST_EMPTY(&vfio_user_sockets)) { 797 iothread_destroy(vfio_user_iothread); 798 vfio_user_iothread = NULL; 799 } 800 801 g_free(proxy->sockname); 802 g_free(proxy); 803 } 804 805 static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, 806 uint32_t size, uint32_t flags) 807 { 808 static uint16_t next_id; 809 810 hdr->id = qatomic_fetch_inc(&next_id); 811 hdr->command = cmd; 812 hdr->size = size; 813 hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST; 814 hdr->error_reply = 0; 815 } 816 817 struct cap_entry { 818 const char *name; 819 bool (*check)(VFIOUserProxy *proxy, QObject *qobj, Error **errp); 820 }; 821 822 static bool caps_parse(VFIOUserProxy *proxy, QDict *qdict, 823 struct cap_entry caps[], Error **errp) 824 { 825 QObject *qobj; 826 struct cap_entry *p; 827 828 for (p = caps; p->name != NULL; p++) { 829 qobj = qdict_get(qdict, p->name); 830 if (qobj != NULL) { 831 if (!p->check(proxy, qobj, errp)) { 832 return false; 833 } 834 qdict_del(qdict, p->name); 835 } 836 } 837 838 /* warning, for now */ 839 if (qdict_size(qdict) != 0) { 840 warn_report("spurious capabilities"); 841 } 842 return true; 843 } 844 845 static bool check_migr_pgsize(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 846 { 847 QNum *qn = qobject_to(QNum, qobj); 848 uint64_t pgsize; 849 850 if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) { 851 error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE); 852 return false; 853 } 854 855 /* must be larger than default */ 856 if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) { 857 error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize); 858 return false; 859 } 860 861 proxy->migr_pgsize = pgsize; 862 return true; 863 } 864 865 static bool check_bitmap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 866 { 867 QNum *qn = qobject_to(QNum, qobj); 868 uint64_t bitmap_size; 869 870 if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) { 871 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP); 872 return false; 873 } 874 875 /* can only lower it */ 876 if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) { 877 error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP); 878 return false; 879 } 880 881 proxy->max_bitmap = bitmap_size; 882 return true; 883 } 884 885 static struct cap_entry caps_migr[] = { 886 { VFIO_USER_CAP_PGSIZE, check_migr_pgsize }, 887 { VFIO_USER_CAP_MAX_BITMAP, check_bitmap }, 888 { NULL } 889 }; 890 891 static bool check_max_fds(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 892 { 893 QNum *qn = qobject_to(QNum, qobj); 894 uint64_t max_send_fds; 895 896 if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) || 897 max_send_fds > VFIO_USER_MAX_MAX_FDS) { 898 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); 899 return false; 900 } 901 proxy->max_send_fds = max_send_fds; 902 return true; 903 } 904 905 static bool check_max_xfer(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 906 { 907 QNum *qn = qobject_to(QNum, qobj); 908 uint64_t max_xfer_size; 909 910 if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) || 911 max_xfer_size > VFIO_USER_MAX_MAX_XFER) { 912 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER); 913 return false; 914 } 915 proxy->max_xfer_size = max_xfer_size; 916 return true; 917 } 918 919 static bool check_pgsizes(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 920 { 921 QNum *qn = qobject_to(QNum, qobj); 922 uint64_t pgsizes; 923 924 if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) { 925 error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES); 926 return false; 927 } 928 929 /* must be larger than default */ 930 if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) { 931 error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes); 932 return false; 933 } 934 935 proxy->dma_pgsizes = pgsizes; 936 return true; 937 } 938 939 static bool check_max_dma(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 940 { 941 QNum *qn = qobject_to(QNum, qobj); 942 uint64_t max_dma; 943 944 if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) { 945 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX); 946 return false; 947 } 948 949 /* can only lower it */ 950 if (max_dma > VFIO_USER_DEF_MAP_MAX) { 951 error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX); 952 return false; 953 } 954 955 proxy->max_dma = max_dma; 956 return true; 957 } 958 959 static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 960 { 961 QDict *qdict = qobject_to(QDict, qobj); 962 963 if (qdict == NULL) { 964 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); 965 return true; 966 } 967 return caps_parse(proxy, qdict, caps_migr, errp); 968 } 969 970 static struct cap_entry caps_cap[] = { 971 { VFIO_USER_CAP_MAX_FDS, check_max_fds }, 972 { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, 973 { VFIO_USER_CAP_PGSIZES, check_pgsizes }, 974 { VFIO_USER_CAP_MAP_MAX, check_max_dma }, 975 { VFIO_USER_CAP_MIGR, check_migr }, 976 { NULL } 977 }; 978 979 static bool check_cap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 980 { 981 QDict *qdict = qobject_to(QDict, qobj); 982 983 if (qdict == NULL) { 984 error_setg(errp, "malformed %s", VFIO_USER_CAP); 985 return false; 986 } 987 return caps_parse(proxy, qdict, caps_cap, errp); 988 } 989 990 static struct cap_entry ver_0_0[] = { 991 { VFIO_USER_CAP, check_cap }, 992 { NULL } 993 }; 994 995 static bool caps_check(VFIOUserProxy *proxy, int minor, const char *caps, 996 Error **errp) 997 { 998 QObject *qobj; 999 QDict *qdict; 1000 bool ret; 1001 1002 qobj = qobject_from_json(caps, NULL); 1003 if (qobj == NULL) { 1004 error_setg(errp, "malformed capabilities %s", caps); 1005 return false; 1006 } 1007 qdict = qobject_to(QDict, qobj); 1008 if (qdict == NULL) { 1009 error_setg(errp, "capabilities %s not an object", caps); 1010 qobject_unref(qobj); 1011 return false; 1012 } 1013 ret = caps_parse(proxy, qdict, ver_0_0, errp); 1014 1015 qobject_unref(qobj); 1016 return ret; 1017 } 1018 1019 static GString *caps_json(void) 1020 { 1021 QDict *dict = qdict_new(); 1022 QDict *capdict = qdict_new(); 1023 QDict *migdict = qdict_new(); 1024 GString *str; 1025 1026 qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE); 1027 qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP); 1028 qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict)); 1029 1030 qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS); 1031 qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); 1032 qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); 1033 qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); 1034 1035 qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); 1036 1037 str = qobject_to_json(QOBJECT(dict)); 1038 qobject_unref(dict); 1039 return str; 1040 } 1041 1042 bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp) 1043 { 1044 g_autofree VFIOUserVersion *msgp = NULL; 1045 GString *caps; 1046 char *reply; 1047 int size, caplen; 1048 1049 caps = caps_json(); 1050 caplen = caps->len + 1; 1051 size = sizeof(*msgp) + caplen; 1052 msgp = g_malloc0(size); 1053 1054 vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0); 1055 msgp->major = VFIO_USER_MAJOR_VER; 1056 msgp->minor = VFIO_USER_MINOR_VER; 1057 memcpy(&msgp->capabilities, caps->str, caplen); 1058 g_string_free(caps, true); 1059 trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); 1060 1061 if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, errp)) { 1062 return false; 1063 } 1064 1065 if (msgp->hdr.flags & VFIO_USER_ERROR) { 1066 error_setg_errno(errp, msgp->hdr.error_reply, "version reply"); 1067 return false; 1068 } 1069 1070 if (msgp->major != VFIO_USER_MAJOR_VER || 1071 msgp->minor > VFIO_USER_MINOR_VER) { 1072 error_setg(errp, "incompatible server version"); 1073 return false; 1074 } 1075 1076 reply = msgp->capabilities; 1077 if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') { 1078 error_setg(errp, "corrupt version reply"); 1079 return false; 1080 } 1081 1082 if (!caps_check(proxy, msgp->minor, reply, errp)) { 1083 return false; 1084 } 1085 1086 trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); 1087 return true; 1088 } 1089