1 /* 2 * vfio protocol over a UNIX socket. 3 * 4 * Copyright © 2018, 2021 Oracle and/or its affiliates. 5 * 6 * SPDX-License-Identifier: GPL-2.0-or-later 7 */ 8 9 #include "qemu/osdep.h" 10 #include <sys/ioctl.h> 11 12 #include "hw/vfio/vfio-device.h" 13 #include "hw/vfio-user/proxy.h" 14 #include "hw/vfio-user/trace.h" 15 #include "qapi/error.h" 16 #include "qobject/qdict.h" 17 #include "qobject/qjson.h" 18 #include "qobject/qnum.h" 19 #include "qemu/error-report.h" 20 #include "qemu/lockable.h" 21 #include "qemu/main-loop.h" 22 #include "system/iothread.h" 23 24 static int wait_time = 5000; /* wait up to 5 sec for busy servers */ 25 static IOThread *vfio_user_iothread; 26 27 static void vfio_user_shutdown(VFIOUserProxy *proxy); 28 static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 29 VFIOUserFDs *fds); 30 static VFIOUserFDs *vfio_user_getfds(int numfds); 31 static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg); 32 33 static void vfio_user_recv(void *opaque); 34 static void vfio_user_send(void *opaque); 35 static void vfio_user_cb(void *opaque); 36 37 static void vfio_user_request(void *opaque); 38 39 static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) 40 { 41 hdr->flags |= VFIO_USER_ERROR; 42 hdr->error_reply = err; 43 } 44 45 /* 46 * Functions called by main, CPU, or iothread threads 47 */ 48 49 static void vfio_user_shutdown(VFIOUserProxy *proxy) 50 { 51 qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL); 52 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL, 53 proxy->ctx, NULL, NULL); 54 } 55 56 /* 57 * Same return values as qio_channel_writev_full(): 58 * 59 * QIO_CHANNEL_ERR_BLOCK: *errp not set 60 * -1: *errp will be populated 61 * otherwise: bytes written 62 */ 63 static ssize_t vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg, 64 Error **errp) 65 { 66 VFIOUserFDs *fds = msg->fds; 67 struct iovec iov = { 68 .iov_base = msg->hdr, 69 .iov_len = msg->hdr->size, 70 }; 71 size_t numfds = 0; 72 int *fdp = NULL; 73 ssize_t ret; 74 75 if (fds != NULL && fds->send_fds != 0) { 76 numfds = fds->send_fds; 77 fdp = fds->fds; 78 } 79 80 ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, errp); 81 82 if (ret == -1) { 83 vfio_user_set_error(msg->hdr, EIO); 84 vfio_user_shutdown(proxy); 85 } 86 trace_vfio_user_send_write(msg->hdr->id, ret); 87 88 return ret; 89 } 90 91 static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 92 VFIOUserFDs *fds) 93 { 94 VFIOUserMsg *msg; 95 96 msg = QTAILQ_FIRST(&proxy->free); 97 if (msg != NULL) { 98 QTAILQ_REMOVE(&proxy->free, msg, next); 99 } else { 100 msg = g_malloc0(sizeof(*msg)); 101 qemu_cond_init(&msg->cv); 102 } 103 104 msg->hdr = hdr; 105 msg->fds = fds; 106 return msg; 107 } 108 109 /* 110 * Recycle a message list entry to the free list. 111 */ 112 static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg) 113 { 114 if (msg->type == VFIO_MSG_NONE) { 115 error_printf("vfio_user_recycle - freeing free msg\n"); 116 return; 117 } 118 119 /* free msg buffer if no one is waiting to consume the reply */ 120 if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) { 121 g_free(msg->hdr); 122 if (msg->fds != NULL) { 123 g_free(msg->fds); 124 } 125 } 126 127 msg->type = VFIO_MSG_NONE; 128 msg->hdr = NULL; 129 msg->fds = NULL; 130 msg->complete = false; 131 msg->pending = false; 132 QTAILQ_INSERT_HEAD(&proxy->free, msg, next); 133 } 134 135 static VFIOUserFDs *vfio_user_getfds(int numfds) 136 { 137 VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int))); 138 139 fds->fds = (int *)((char *)fds + sizeof(*fds)); 140 141 return fds; 142 } 143 144 /* 145 * Functions only called by iothread 146 */ 147 148 /* 149 * Process a received message. 150 */ 151 static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg, 152 bool isreply) 153 { 154 155 /* 156 * Replies signal a waiter, if none just check for errors 157 * and free the message buffer. 158 * 159 * Requests get queued for the BH. 160 */ 161 if (isreply) { 162 msg->complete = true; 163 if (msg->type == VFIO_MSG_WAIT) { 164 qemu_cond_signal(&msg->cv); 165 } else { 166 if (msg->hdr->flags & VFIO_USER_ERROR) { 167 error_printf("vfio_user_process: error reply on async "); 168 error_printf("request command %x error %s\n", 169 msg->hdr->command, 170 strerror(msg->hdr->error_reply)); 171 } 172 /* youngest nowait msg has been ack'd */ 173 if (proxy->last_nowait == msg) { 174 proxy->last_nowait = NULL; 175 } 176 vfio_user_recycle(proxy, msg); 177 } 178 } else { 179 QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next); 180 qemu_bh_schedule(proxy->req_bh); 181 } 182 } 183 184 /* 185 * Complete a partial message read 186 */ 187 static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp) 188 { 189 VFIOUserMsg *msg = proxy->part_recv; 190 size_t msgleft = proxy->recv_left; 191 bool isreply; 192 char *data; 193 int ret; 194 195 data = (char *)msg->hdr + (msg->hdr->size - msgleft); 196 while (msgleft > 0) { 197 ret = qio_channel_read(proxy->ioc, data, msgleft, errp); 198 199 /* error or would block */ 200 if (ret <= 0) { 201 /* try for rest on next iternation */ 202 if (ret == QIO_CHANNEL_ERR_BLOCK) { 203 proxy->recv_left = msgleft; 204 } 205 return ret; 206 } 207 trace_vfio_user_recv_read(msg->hdr->id, ret); 208 209 msgleft -= ret; 210 data += ret; 211 } 212 213 /* 214 * Read complete message, process it. 215 */ 216 proxy->part_recv = NULL; 217 proxy->recv_left = 0; 218 isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY; 219 vfio_user_process(proxy, msg, isreply); 220 221 /* return positive value */ 222 return 1; 223 } 224 225 /* 226 * Receive and process one incoming message. 227 * 228 * For replies, find matching outgoing request and wake any waiters. 229 * For requests, queue in incoming list and run request BH. 230 */ 231 static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp) 232 { 233 VFIOUserMsg *msg = NULL; 234 g_autofree int *fdp = NULL; 235 VFIOUserFDs *reqfds; 236 VFIOUserHdr hdr; 237 struct iovec iov = { 238 .iov_base = &hdr, 239 .iov_len = sizeof(hdr), 240 }; 241 bool isreply = false; 242 int i, ret; 243 size_t msgleft, numfds = 0; 244 char *data = NULL; 245 char *buf = NULL; 246 247 /* 248 * Complete any partial reads 249 */ 250 if (proxy->part_recv != NULL) { 251 ret = vfio_user_complete(proxy, errp); 252 253 /* still not complete, try later */ 254 if (ret == QIO_CHANNEL_ERR_BLOCK) { 255 return ret; 256 } 257 258 if (ret <= 0) { 259 goto fatal; 260 } 261 /* else fall into reading another msg */ 262 } 263 264 /* 265 * Read header 266 */ 267 ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0, 268 errp); 269 if (ret == QIO_CHANNEL_ERR_BLOCK) { 270 return ret; 271 } 272 273 /* read error or other side closed connection */ 274 if (ret <= 0) { 275 goto fatal; 276 } 277 278 if (ret < sizeof(hdr)) { 279 error_setg(errp, "short read of header"); 280 goto fatal; 281 } 282 283 /* 284 * Validate header 285 */ 286 if (hdr.size < sizeof(VFIOUserHdr)) { 287 error_setg(errp, "bad header size"); 288 goto fatal; 289 } 290 switch (hdr.flags & VFIO_USER_TYPE) { 291 case VFIO_USER_REQUEST: 292 isreply = false; 293 break; 294 case VFIO_USER_REPLY: 295 isreply = true; 296 break; 297 default: 298 error_setg(errp, "unknown message type"); 299 goto fatal; 300 } 301 trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size, 302 hdr.flags); 303 304 /* 305 * For replies, find the matching pending request. 306 * For requests, reap incoming FDs. 307 */ 308 if (isreply) { 309 QTAILQ_FOREACH(msg, &proxy->pending, next) { 310 if (hdr.id == msg->id) { 311 break; 312 } 313 } 314 if (msg == NULL) { 315 error_setg(errp, "unexpected reply"); 316 goto err; 317 } 318 QTAILQ_REMOVE(&proxy->pending, msg, next); 319 320 /* 321 * Process any received FDs 322 */ 323 if (numfds != 0) { 324 if (msg->fds == NULL || msg->fds->recv_fds < numfds) { 325 error_setg(errp, "unexpected FDs"); 326 goto err; 327 } 328 msg->fds->recv_fds = numfds; 329 memcpy(msg->fds->fds, fdp, numfds * sizeof(int)); 330 } 331 } else { 332 if (numfds != 0) { 333 reqfds = vfio_user_getfds(numfds); 334 memcpy(reqfds->fds, fdp, numfds * sizeof(int)); 335 } else { 336 reqfds = NULL; 337 } 338 } 339 340 /* 341 * Put the whole message into a single buffer. 342 */ 343 if (isreply) { 344 if (hdr.size > msg->rsize) { 345 error_setg(errp, "reply larger than recv buffer"); 346 goto err; 347 } 348 *msg->hdr = hdr; 349 data = (char *)msg->hdr + sizeof(hdr); 350 } else { 351 buf = g_malloc0(hdr.size); 352 memcpy(buf, &hdr, sizeof(hdr)); 353 data = buf + sizeof(hdr); 354 msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds); 355 msg->type = VFIO_MSG_REQ; 356 } 357 358 /* 359 * Read rest of message. 360 */ 361 msgleft = hdr.size - sizeof(hdr); 362 while (msgleft > 0) { 363 ret = qio_channel_read(proxy->ioc, data, msgleft, errp); 364 365 /* prepare to complete read on next iternation */ 366 if (ret == QIO_CHANNEL_ERR_BLOCK) { 367 proxy->part_recv = msg; 368 proxy->recv_left = msgleft; 369 return ret; 370 } 371 372 if (ret <= 0) { 373 goto fatal; 374 } 375 trace_vfio_user_recv_read(hdr.id, ret); 376 377 msgleft -= ret; 378 data += ret; 379 } 380 381 vfio_user_process(proxy, msg, isreply); 382 return 0; 383 384 /* 385 * fatal means the other side closed or we don't trust the stream 386 * err means this message is corrupt 387 */ 388 fatal: 389 vfio_user_shutdown(proxy); 390 proxy->state = VFIO_PROXY_ERROR; 391 392 /* set error if server side closed */ 393 if (ret == 0) { 394 error_setg(errp, "server closed socket"); 395 } 396 397 err: 398 for (i = 0; i < numfds; i++) { 399 close(fdp[i]); 400 } 401 if (isreply && msg != NULL) { 402 /* force an error to keep sending thread from hanging */ 403 vfio_user_set_error(msg->hdr, EINVAL); 404 msg->complete = true; 405 qemu_cond_signal(&msg->cv); 406 } 407 return -1; 408 } 409 410 static void vfio_user_recv(void *opaque) 411 { 412 VFIOUserProxy *proxy = opaque; 413 414 QEMU_LOCK_GUARD(&proxy->lock); 415 416 if (proxy->state == VFIO_PROXY_CONNECTED) { 417 Error *local_err = NULL; 418 419 while (vfio_user_recv_one(proxy, &local_err) == 0) { 420 ; 421 } 422 423 if (local_err != NULL) { 424 error_report_err(local_err); 425 } 426 } 427 } 428 429 /* 430 * Send a single message, same return semantics as vfio_user_send_qio(). 431 * 432 * Sent async messages are freed, others are moved to pending queue. 433 */ 434 static ssize_t vfio_user_send_one(VFIOUserProxy *proxy, Error **errp) 435 { 436 VFIOUserMsg *msg; 437 ssize_t ret; 438 439 msg = QTAILQ_FIRST(&proxy->outgoing); 440 ret = vfio_user_send_qio(proxy, msg, errp); 441 if (ret < 0) { 442 return ret; 443 } 444 445 QTAILQ_REMOVE(&proxy->outgoing, msg, next); 446 if (msg->type == VFIO_MSG_ASYNC) { 447 vfio_user_recycle(proxy, msg); 448 } else { 449 QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); 450 msg->pending = true; 451 } 452 453 return ret; 454 } 455 456 /* 457 * Send messages from outgoing queue when the socket buffer has space. 458 * If we deplete 'outgoing', remove ourselves from the poll list. 459 */ 460 static void vfio_user_send(void *opaque) 461 { 462 VFIOUserProxy *proxy = opaque; 463 464 QEMU_LOCK_GUARD(&proxy->lock); 465 466 if (proxy->state == VFIO_PROXY_CONNECTED) { 467 while (!QTAILQ_EMPTY(&proxy->outgoing)) { 468 Error *local_err = NULL; 469 int ret; 470 471 ret = vfio_user_send_one(proxy, &local_err); 472 473 if (ret == QIO_CHANNEL_ERR_BLOCK) { 474 return; 475 } else if (ret == -1) { 476 error_report_err(local_err); 477 return; 478 } 479 } 480 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 481 vfio_user_recv, NULL, NULL, proxy); 482 } 483 } 484 485 static void vfio_user_cb(void *opaque) 486 { 487 VFIOUserProxy *proxy = opaque; 488 489 QEMU_LOCK_GUARD(&proxy->lock); 490 491 proxy->state = VFIO_PROXY_CLOSED; 492 qemu_cond_signal(&proxy->close_cv); 493 } 494 495 496 /* 497 * Functions called by main or CPU threads 498 */ 499 500 /* 501 * Process incoming requests. 502 * 503 * The bus-specific callback has the form: 504 * request(opaque, msg) 505 * where 'opaque' was specified in vfio_user_set_handler 506 * and 'msg' is the inbound message. 507 * 508 * The callback is responsible for disposing of the message buffer, 509 * usually by re-using it when calling vfio_send_reply or vfio_send_error, 510 * both of which free their message buffer when the reply is sent. 511 * 512 * If the callback uses a new buffer, it needs to free the old one. 513 */ 514 static void vfio_user_request(void *opaque) 515 { 516 VFIOUserProxy *proxy = opaque; 517 VFIOUserMsgQ new, free; 518 VFIOUserMsg *msg, *m1; 519 520 /* reap all incoming */ 521 QTAILQ_INIT(&new); 522 WITH_QEMU_LOCK_GUARD(&proxy->lock) { 523 QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) { 524 QTAILQ_REMOVE(&proxy->incoming, msg, next); 525 QTAILQ_INSERT_TAIL(&new, msg, next); 526 } 527 } 528 529 /* process list */ 530 QTAILQ_INIT(&free); 531 QTAILQ_FOREACH_SAFE(msg, &new, next, m1) { 532 QTAILQ_REMOVE(&new, msg, next); 533 trace_vfio_user_recv_request(msg->hdr->command); 534 proxy->request(proxy->req_arg, msg); 535 QTAILQ_INSERT_HEAD(&free, msg, next); 536 } 537 538 /* free list */ 539 WITH_QEMU_LOCK_GUARD(&proxy->lock) { 540 QTAILQ_FOREACH_SAFE(msg, &free, next, m1) { 541 vfio_user_recycle(proxy, msg); 542 } 543 } 544 } 545 546 /* 547 * Messages are queued onto the proxy's outgoing list. 548 * 549 * It handles 3 types of messages: 550 * 551 * async messages - replies and posted writes 552 * 553 * There will be no reply from the server, so message 554 * buffers are freed after they're sent. 555 * 556 * nowait messages - map/unmap during address space transactions 557 * 558 * These are also sent async, but a reply is expected so that 559 * vfio_wait_reqs() can wait for the youngest nowait request. 560 * They transition from the outgoing list to the pending list 561 * when sent, and are freed when the reply is received. 562 * 563 * wait messages - all other requests 564 * 565 * The reply to these messages is waited for by their caller. 566 * They also transition from outgoing to pending when sent, but 567 * the message buffer is returned to the caller with the reply 568 * contents. The caller is responsible for freeing these messages. 569 * 570 * As an optimization, if the outgoing list and the socket send 571 * buffer are empty, the message is sent inline instead of being 572 * added to the outgoing list. The rest of the transitions are 573 * unchanged. 574 */ 575 static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg, 576 Error **errp) 577 { 578 int ret; 579 580 /* 581 * Unsent outgoing msgs - add to tail 582 */ 583 if (!QTAILQ_EMPTY(&proxy->outgoing)) { 584 QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); 585 return true; 586 } 587 588 /* 589 * Try inline - if blocked, queue it and kick send poller 590 */ 591 if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) { 592 ret = QIO_CHANNEL_ERR_BLOCK; 593 } else { 594 ret = vfio_user_send_qio(proxy, msg, errp); 595 } 596 597 if (ret == QIO_CHANNEL_ERR_BLOCK) { 598 QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); 599 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 600 vfio_user_recv, proxy->ctx, 601 vfio_user_send, proxy); 602 return true; 603 } 604 if (ret == -1) { 605 return false; 606 } 607 608 /* 609 * Sent - free async, add others to pending 610 */ 611 if (msg->type == VFIO_MSG_ASYNC) { 612 vfio_user_recycle(proxy, msg); 613 } else { 614 QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); 615 msg->pending = true; 616 } 617 618 return true; 619 } 620 621 /* 622 * Returns false if we did not successfully receive a reply message, in which 623 * case @errp will be populated. 624 * 625 * In either case, the caller must free @hdr and @fds if needed. 626 */ 627 bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 628 VFIOUserFDs *fds, int rsize, Error **errp) 629 { 630 VFIOUserMsg *msg; 631 bool ok = false; 632 633 if (hdr->flags & VFIO_USER_NO_REPLY) { 634 error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__); 635 return false; 636 } 637 638 qemu_mutex_lock(&proxy->lock); 639 640 msg = vfio_user_getmsg(proxy, hdr, fds); 641 msg->id = hdr->id; 642 msg->rsize = rsize ? rsize : hdr->size; 643 msg->type = VFIO_MSG_WAIT; 644 645 ok = vfio_user_send_queued(proxy, msg, errp); 646 647 if (ok) { 648 while (!msg->complete) { 649 if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) { 650 VFIOUserMsgQ *list; 651 652 list = msg->pending ? &proxy->pending : &proxy->outgoing; 653 QTAILQ_REMOVE(list, msg, next); 654 error_setg_errno(errp, ETIMEDOUT, 655 "timed out waiting for reply"); 656 ok = false; 657 break; 658 } 659 } 660 } 661 662 vfio_user_recycle(proxy, msg); 663 664 qemu_mutex_unlock(&proxy->lock); 665 666 return ok; 667 } 668 669 static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets = 670 QLIST_HEAD_INITIALIZER(vfio_user_sockets); 671 672 VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp) 673 { 674 VFIOUserProxy *proxy; 675 QIOChannelSocket *sioc; 676 QIOChannel *ioc; 677 char *sockname; 678 679 if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) { 680 error_setg(errp, "vfio_user_connect - bad address family"); 681 return NULL; 682 } 683 sockname = addr->u.q_unix.path; 684 685 sioc = qio_channel_socket_new(); 686 ioc = QIO_CHANNEL(sioc); 687 if (qio_channel_socket_connect_sync(sioc, addr, errp)) { 688 object_unref(OBJECT(ioc)); 689 return NULL; 690 } 691 qio_channel_set_blocking(ioc, false, NULL); 692 693 proxy = g_malloc0(sizeof(VFIOUserProxy)); 694 proxy->sockname = g_strdup_printf("unix:%s", sockname); 695 proxy->ioc = ioc; 696 697 /* init defaults */ 698 proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER; 699 proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS; 700 proxy->max_dma = VFIO_USER_DEF_MAP_MAX; 701 proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE; 702 proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP; 703 proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE; 704 705 proxy->flags = VFIO_PROXY_CLIENT; 706 proxy->state = VFIO_PROXY_CONNECTED; 707 708 qemu_mutex_init(&proxy->lock); 709 qemu_cond_init(&proxy->close_cv); 710 711 if (vfio_user_iothread == NULL) { 712 vfio_user_iothread = iothread_create("VFIO user", errp); 713 } 714 715 proxy->ctx = iothread_get_aio_context(vfio_user_iothread); 716 proxy->req_bh = qemu_bh_new(vfio_user_request, proxy); 717 718 QTAILQ_INIT(&proxy->outgoing); 719 QTAILQ_INIT(&proxy->incoming); 720 QTAILQ_INIT(&proxy->free); 721 QTAILQ_INIT(&proxy->pending); 722 QLIST_INSERT_HEAD(&vfio_user_sockets, proxy, next); 723 724 return proxy; 725 } 726 727 void vfio_user_set_handler(VFIODevice *vbasedev, 728 void (*handler)(void *opaque, VFIOUserMsg *msg), 729 void *req_arg) 730 { 731 VFIOUserProxy *proxy = vbasedev->proxy; 732 733 proxy->request = handler; 734 proxy->req_arg = req_arg; 735 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 736 vfio_user_recv, NULL, NULL, proxy); 737 } 738 739 void vfio_user_disconnect(VFIOUserProxy *proxy) 740 { 741 VFIOUserMsg *r1, *r2; 742 743 qemu_mutex_lock(&proxy->lock); 744 745 /* our side is quitting */ 746 if (proxy->state == VFIO_PROXY_CONNECTED) { 747 vfio_user_shutdown(proxy); 748 if (!QTAILQ_EMPTY(&proxy->pending)) { 749 error_printf("vfio_user_disconnect: outstanding requests\n"); 750 } 751 } 752 object_unref(OBJECT(proxy->ioc)); 753 proxy->ioc = NULL; 754 qemu_bh_delete(proxy->req_bh); 755 proxy->req_bh = NULL; 756 757 proxy->state = VFIO_PROXY_CLOSING; 758 QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) { 759 qemu_cond_destroy(&r1->cv); 760 QTAILQ_REMOVE(&proxy->outgoing, r1, next); 761 g_free(r1); 762 } 763 QTAILQ_FOREACH_SAFE(r1, &proxy->incoming, next, r2) { 764 qemu_cond_destroy(&r1->cv); 765 QTAILQ_REMOVE(&proxy->incoming, r1, next); 766 g_free(r1); 767 } 768 QTAILQ_FOREACH_SAFE(r1, &proxy->pending, next, r2) { 769 qemu_cond_destroy(&r1->cv); 770 QTAILQ_REMOVE(&proxy->pending, r1, next); 771 g_free(r1); 772 } 773 QTAILQ_FOREACH_SAFE(r1, &proxy->free, next, r2) { 774 qemu_cond_destroy(&r1->cv); 775 QTAILQ_REMOVE(&proxy->free, r1, next); 776 g_free(r1); 777 } 778 779 /* 780 * Make sure the iothread isn't blocking anywhere 781 * with a ref to this proxy by waiting for a BH 782 * handler to run after the proxy fd handlers were 783 * deleted above. 784 */ 785 aio_bh_schedule_oneshot(proxy->ctx, vfio_user_cb, proxy); 786 qemu_cond_wait(&proxy->close_cv, &proxy->lock); 787 788 /* we now hold the only ref to proxy */ 789 qemu_mutex_unlock(&proxy->lock); 790 qemu_cond_destroy(&proxy->close_cv); 791 qemu_mutex_destroy(&proxy->lock); 792 793 QLIST_REMOVE(proxy, next); 794 if (QLIST_EMPTY(&vfio_user_sockets)) { 795 iothread_destroy(vfio_user_iothread); 796 vfio_user_iothread = NULL; 797 } 798 799 g_free(proxy->sockname); 800 g_free(proxy); 801 } 802 803 void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, 804 uint32_t size, uint32_t flags) 805 { 806 static uint16_t next_id; 807 808 hdr->id = qatomic_fetch_inc(&next_id); 809 hdr->command = cmd; 810 hdr->size = size; 811 hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST; 812 hdr->error_reply = 0; 813 } 814 815 struct cap_entry { 816 const char *name; 817 bool (*check)(VFIOUserProxy *proxy, QObject *qobj, Error **errp); 818 }; 819 820 static bool caps_parse(VFIOUserProxy *proxy, QDict *qdict, 821 struct cap_entry caps[], Error **errp) 822 { 823 QObject *qobj; 824 struct cap_entry *p; 825 826 for (p = caps; p->name != NULL; p++) { 827 qobj = qdict_get(qdict, p->name); 828 if (qobj != NULL) { 829 if (!p->check(proxy, qobj, errp)) { 830 return false; 831 } 832 qdict_del(qdict, p->name); 833 } 834 } 835 836 /* warning, for now */ 837 if (qdict_size(qdict) != 0) { 838 warn_report("spurious capabilities"); 839 } 840 return true; 841 } 842 843 static bool check_migr_pgsize(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 844 { 845 QNum *qn = qobject_to(QNum, qobj); 846 uint64_t pgsize; 847 848 if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) { 849 error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE); 850 return false; 851 } 852 853 /* must be larger than default */ 854 if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) { 855 error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize); 856 return false; 857 } 858 859 proxy->migr_pgsize = pgsize; 860 return true; 861 } 862 863 static bool check_bitmap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 864 { 865 QNum *qn = qobject_to(QNum, qobj); 866 uint64_t bitmap_size; 867 868 if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) { 869 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP); 870 return false; 871 } 872 873 /* can only lower it */ 874 if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) { 875 error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP); 876 return false; 877 } 878 879 proxy->max_bitmap = bitmap_size; 880 return true; 881 } 882 883 static struct cap_entry caps_migr[] = { 884 { VFIO_USER_CAP_PGSIZE, check_migr_pgsize }, 885 { VFIO_USER_CAP_MAX_BITMAP, check_bitmap }, 886 { NULL } 887 }; 888 889 static bool check_max_fds(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 890 { 891 QNum *qn = qobject_to(QNum, qobj); 892 uint64_t max_send_fds; 893 894 if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) || 895 max_send_fds > VFIO_USER_MAX_MAX_FDS) { 896 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); 897 return false; 898 } 899 proxy->max_send_fds = max_send_fds; 900 return true; 901 } 902 903 static bool check_max_xfer(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 904 { 905 QNum *qn = qobject_to(QNum, qobj); 906 uint64_t max_xfer_size; 907 908 if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) || 909 max_xfer_size > VFIO_USER_MAX_MAX_XFER) { 910 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER); 911 return false; 912 } 913 proxy->max_xfer_size = max_xfer_size; 914 return true; 915 } 916 917 static bool check_pgsizes(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 918 { 919 QNum *qn = qobject_to(QNum, qobj); 920 uint64_t pgsizes; 921 922 if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) { 923 error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES); 924 return false; 925 } 926 927 /* must be larger than default */ 928 if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) { 929 error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes); 930 return false; 931 } 932 933 proxy->dma_pgsizes = pgsizes; 934 return true; 935 } 936 937 static bool check_max_dma(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 938 { 939 QNum *qn = qobject_to(QNum, qobj); 940 uint64_t max_dma; 941 942 if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) { 943 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX); 944 return false; 945 } 946 947 /* can only lower it */ 948 if (max_dma > VFIO_USER_DEF_MAP_MAX) { 949 error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX); 950 return false; 951 } 952 953 proxy->max_dma = max_dma; 954 return true; 955 } 956 957 static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 958 { 959 QDict *qdict = qobject_to(QDict, qobj); 960 961 if (qdict == NULL) { 962 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); 963 return true; 964 } 965 return caps_parse(proxy, qdict, caps_migr, errp); 966 } 967 968 static struct cap_entry caps_cap[] = { 969 { VFIO_USER_CAP_MAX_FDS, check_max_fds }, 970 { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, 971 { VFIO_USER_CAP_PGSIZES, check_pgsizes }, 972 { VFIO_USER_CAP_MAP_MAX, check_max_dma }, 973 { VFIO_USER_CAP_MIGR, check_migr }, 974 { NULL } 975 }; 976 977 static bool check_cap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 978 { 979 QDict *qdict = qobject_to(QDict, qobj); 980 981 if (qdict == NULL) { 982 error_setg(errp, "malformed %s", VFIO_USER_CAP); 983 return false; 984 } 985 return caps_parse(proxy, qdict, caps_cap, errp); 986 } 987 988 static struct cap_entry ver_0_0[] = { 989 { VFIO_USER_CAP, check_cap }, 990 { NULL } 991 }; 992 993 static bool caps_check(VFIOUserProxy *proxy, int minor, const char *caps, 994 Error **errp) 995 { 996 QObject *qobj; 997 QDict *qdict; 998 bool ret; 999 1000 qobj = qobject_from_json(caps, NULL); 1001 if (qobj == NULL) { 1002 error_setg(errp, "malformed capabilities %s", caps); 1003 return false; 1004 } 1005 qdict = qobject_to(QDict, qobj); 1006 if (qdict == NULL) { 1007 error_setg(errp, "capabilities %s not an object", caps); 1008 qobject_unref(qobj); 1009 return false; 1010 } 1011 ret = caps_parse(proxy, qdict, ver_0_0, errp); 1012 1013 qobject_unref(qobj); 1014 return ret; 1015 } 1016 1017 static GString *caps_json(void) 1018 { 1019 QDict *dict = qdict_new(); 1020 QDict *capdict = qdict_new(); 1021 QDict *migdict = qdict_new(); 1022 GString *str; 1023 1024 qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE); 1025 qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP); 1026 qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict)); 1027 1028 qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS); 1029 qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); 1030 qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); 1031 qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); 1032 1033 qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); 1034 1035 str = qobject_to_json(QOBJECT(dict)); 1036 qobject_unref(dict); 1037 return str; 1038 } 1039 1040 bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp) 1041 { 1042 g_autofree VFIOUserVersion *msgp = NULL; 1043 GString *caps; 1044 char *reply; 1045 int size, caplen; 1046 1047 caps = caps_json(); 1048 caplen = caps->len + 1; 1049 size = sizeof(*msgp) + caplen; 1050 msgp = g_malloc0(size); 1051 1052 vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0); 1053 msgp->major = VFIO_USER_MAJOR_VER; 1054 msgp->minor = VFIO_USER_MINOR_VER; 1055 memcpy(&msgp->capabilities, caps->str, caplen); 1056 g_string_free(caps, true); 1057 trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); 1058 1059 if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, errp)) { 1060 return false; 1061 } 1062 1063 if (msgp->hdr.flags & VFIO_USER_ERROR) { 1064 error_setg_errno(errp, msgp->hdr.error_reply, "version reply"); 1065 return false; 1066 } 1067 1068 if (msgp->major != VFIO_USER_MAJOR_VER || 1069 msgp->minor > VFIO_USER_MINOR_VER) { 1070 error_setg(errp, "incompatible server version"); 1071 return false; 1072 } 1073 1074 reply = msgp->capabilities; 1075 if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') { 1076 error_setg(errp, "corrupt version reply"); 1077 return false; 1078 } 1079 1080 if (!caps_check(proxy, msgp->minor, reply, errp)) { 1081 return false; 1082 } 1083 1084 trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); 1085 return true; 1086 } 1087