1 /* 2 * vfio protocol over a UNIX socket. 3 * 4 * Copyright © 2018, 2021 Oracle and/or its affiliates. 5 * 6 * SPDX-License-Identifier: GPL-2.0-or-later 7 */ 8 9 #include "qemu/osdep.h" 10 #include <sys/ioctl.h> 11 12 #include "hw/vfio/vfio-device.h" 13 #include "hw/vfio-user/proxy.h" 14 #include "hw/vfio-user/trace.h" 15 #include "qapi/error.h" 16 #include "qobject/qdict.h" 17 #include "qobject/qjson.h" 18 #include "qobject/qnum.h" 19 #include "qemu/error-report.h" 20 #include "qemu/lockable.h" 21 #include "qemu/main-loop.h" 22 #include "system/iothread.h" 23 24 static IOThread *vfio_user_iothread; 25 26 static void vfio_user_shutdown(VFIOUserProxy *proxy); 27 static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 28 VFIOUserFDs *fds); 29 static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg); 30 31 static void vfio_user_recv(void *opaque); 32 static void vfio_user_send(void *opaque); 33 static void vfio_user_cb(void *opaque); 34 35 static void vfio_user_request(void *opaque); 36 37 static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) 38 { 39 hdr->flags |= VFIO_USER_ERROR; 40 hdr->error_reply = err; 41 } 42 43 /* 44 * Functions called by main, CPU, or iothread threads 45 */ 46 47 static void vfio_user_shutdown(VFIOUserProxy *proxy) 48 { 49 qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL); 50 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL, 51 proxy->ctx, NULL, NULL); 52 } 53 54 /* 55 * Same return values as qio_channel_writev_full(): 56 * 57 * QIO_CHANNEL_ERR_BLOCK: *errp not set 58 * -1: *errp will be populated 59 * otherwise: bytes written 60 */ 61 static ssize_t vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg, 62 Error **errp) 63 { 64 VFIOUserFDs *fds = msg->fds; 65 struct iovec iov = { 66 .iov_base = msg->hdr, 67 .iov_len = msg->hdr->size, 68 }; 69 size_t numfds = 0; 70 int *fdp = NULL; 71 ssize_t ret; 72 73 if (fds != NULL && fds->send_fds != 0) { 74 numfds = fds->send_fds; 75 fdp = fds->fds; 76 } 77 78 ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, errp); 79 80 if (ret == -1) { 81 vfio_user_set_error(msg->hdr, EIO); 82 vfio_user_shutdown(proxy); 83 } 84 trace_vfio_user_send_write(msg->hdr->id, ret); 85 86 return ret; 87 } 88 89 static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 90 VFIOUserFDs *fds) 91 { 92 VFIOUserMsg *msg; 93 94 msg = QTAILQ_FIRST(&proxy->free); 95 if (msg != NULL) { 96 QTAILQ_REMOVE(&proxy->free, msg, next); 97 } else { 98 msg = g_malloc0(sizeof(*msg)); 99 qemu_cond_init(&msg->cv); 100 } 101 102 msg->hdr = hdr; 103 msg->fds = fds; 104 return msg; 105 } 106 107 /* 108 * Recycle a message list entry to the free list. 109 */ 110 static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg) 111 { 112 if (msg->type == VFIO_MSG_NONE) { 113 error_printf("vfio_user_recycle - freeing free msg\n"); 114 return; 115 } 116 117 /* free msg buffer if no one is waiting to consume the reply */ 118 if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) { 119 g_free(msg->hdr); 120 if (msg->fds != NULL) { 121 g_free(msg->fds); 122 } 123 } 124 125 msg->type = VFIO_MSG_NONE; 126 msg->hdr = NULL; 127 msg->fds = NULL; 128 msg->complete = false; 129 msg->pending = false; 130 QTAILQ_INSERT_HEAD(&proxy->free, msg, next); 131 } 132 133 VFIOUserFDs *vfio_user_getfds(int numfds) 134 { 135 VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int))); 136 137 fds->fds = (int *)((char *)fds + sizeof(*fds)); 138 139 return fds; 140 } 141 142 /* 143 * Functions only called by iothread 144 */ 145 146 /* 147 * Process a received message. 148 */ 149 static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg, 150 bool isreply) 151 { 152 153 /* 154 * Replies signal a waiter, if none just check for errors 155 * and free the message buffer. 156 * 157 * Requests get queued for the BH. 158 */ 159 if (isreply) { 160 msg->complete = true; 161 if (msg->type == VFIO_MSG_WAIT) { 162 qemu_cond_signal(&msg->cv); 163 } else { 164 if (msg->hdr->flags & VFIO_USER_ERROR) { 165 error_printf("vfio_user_process: error reply on async "); 166 error_printf("request command %x error %s\n", 167 msg->hdr->command, 168 strerror(msg->hdr->error_reply)); 169 } 170 /* youngest nowait msg has been ack'd */ 171 if (proxy->last_nowait == msg) { 172 proxy->last_nowait = NULL; 173 } 174 vfio_user_recycle(proxy, msg); 175 } 176 } else { 177 QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next); 178 qemu_bh_schedule(proxy->req_bh); 179 } 180 } 181 182 /* 183 * Complete a partial message read 184 */ 185 static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp) 186 { 187 VFIOUserMsg *msg = proxy->part_recv; 188 size_t msgleft = proxy->recv_left; 189 bool isreply; 190 char *data; 191 int ret; 192 193 data = (char *)msg->hdr + (msg->hdr->size - msgleft); 194 while (msgleft > 0) { 195 ret = qio_channel_read(proxy->ioc, data, msgleft, errp); 196 197 /* error or would block */ 198 if (ret <= 0) { 199 /* try for rest on next iternation */ 200 if (ret == QIO_CHANNEL_ERR_BLOCK) { 201 proxy->recv_left = msgleft; 202 } 203 return ret; 204 } 205 trace_vfio_user_recv_read(msg->hdr->id, ret); 206 207 msgleft -= ret; 208 data += ret; 209 } 210 211 /* 212 * Read complete message, process it. 213 */ 214 proxy->part_recv = NULL; 215 proxy->recv_left = 0; 216 isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY; 217 vfio_user_process(proxy, msg, isreply); 218 219 /* return positive value */ 220 return 1; 221 } 222 223 /* 224 * Receive and process one incoming message. 225 * 226 * For replies, find matching outgoing request and wake any waiters. 227 * For requests, queue in incoming list and run request BH. 228 */ 229 static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp) 230 { 231 VFIOUserMsg *msg = NULL; 232 g_autofree int *fdp = NULL; 233 VFIOUserFDs *reqfds; 234 VFIOUserHdr hdr; 235 struct iovec iov = { 236 .iov_base = &hdr, 237 .iov_len = sizeof(hdr), 238 }; 239 bool isreply = false; 240 int i, ret; 241 size_t msgleft, numfds = 0; 242 char *data = NULL; 243 char *buf = NULL; 244 245 /* 246 * Complete any partial reads 247 */ 248 if (proxy->part_recv != NULL) { 249 ret = vfio_user_complete(proxy, errp); 250 251 /* still not complete, try later */ 252 if (ret == QIO_CHANNEL_ERR_BLOCK) { 253 return ret; 254 } 255 256 if (ret <= 0) { 257 goto fatal; 258 } 259 /* else fall into reading another msg */ 260 } 261 262 /* 263 * Read header 264 */ 265 ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0, 266 errp); 267 if (ret == QIO_CHANNEL_ERR_BLOCK) { 268 return ret; 269 } 270 271 /* read error or other side closed connection */ 272 if (ret <= 0) { 273 goto fatal; 274 } 275 276 if (ret < sizeof(hdr)) { 277 error_setg(errp, "short read of header"); 278 goto fatal; 279 } 280 281 /* 282 * Validate header 283 */ 284 if (hdr.size < sizeof(VFIOUserHdr)) { 285 error_setg(errp, "bad header size"); 286 goto fatal; 287 } 288 switch (hdr.flags & VFIO_USER_TYPE) { 289 case VFIO_USER_REQUEST: 290 isreply = false; 291 break; 292 case VFIO_USER_REPLY: 293 isreply = true; 294 break; 295 default: 296 error_setg(errp, "unknown message type"); 297 goto fatal; 298 } 299 trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size, 300 hdr.flags); 301 302 /* 303 * For replies, find the matching pending request. 304 * For requests, reap incoming FDs. 305 */ 306 if (isreply) { 307 QTAILQ_FOREACH(msg, &proxy->pending, next) { 308 if (hdr.id == msg->id) { 309 break; 310 } 311 } 312 if (msg == NULL) { 313 error_setg(errp, "unexpected reply"); 314 goto err; 315 } 316 QTAILQ_REMOVE(&proxy->pending, msg, next); 317 318 /* 319 * Process any received FDs 320 */ 321 if (numfds != 0) { 322 if (msg->fds == NULL || msg->fds->recv_fds < numfds) { 323 error_setg(errp, "unexpected FDs"); 324 goto err; 325 } 326 msg->fds->recv_fds = numfds; 327 memcpy(msg->fds->fds, fdp, numfds * sizeof(int)); 328 } 329 } else { 330 if (numfds != 0) { 331 reqfds = vfio_user_getfds(numfds); 332 memcpy(reqfds->fds, fdp, numfds * sizeof(int)); 333 } else { 334 reqfds = NULL; 335 } 336 } 337 338 /* 339 * Put the whole message into a single buffer. 340 */ 341 if (isreply) { 342 if (hdr.size > msg->rsize) { 343 error_setg(errp, "reply larger than recv buffer"); 344 goto err; 345 } 346 *msg->hdr = hdr; 347 data = (char *)msg->hdr + sizeof(hdr); 348 } else { 349 if (hdr.size > proxy->max_xfer_size + sizeof(VFIOUserDMARW)) { 350 error_setg(errp, "vfio_user_recv request larger than max"); 351 goto err; 352 } 353 buf = g_malloc0(hdr.size); 354 memcpy(buf, &hdr, sizeof(hdr)); 355 data = buf + sizeof(hdr); 356 msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds); 357 msg->type = VFIO_MSG_REQ; 358 } 359 360 /* 361 * Read rest of message. 362 */ 363 msgleft = hdr.size - sizeof(hdr); 364 while (msgleft > 0) { 365 ret = qio_channel_read(proxy->ioc, data, msgleft, errp); 366 367 /* prepare to complete read on next iternation */ 368 if (ret == QIO_CHANNEL_ERR_BLOCK) { 369 proxy->part_recv = msg; 370 proxy->recv_left = msgleft; 371 return ret; 372 } 373 374 if (ret <= 0) { 375 goto fatal; 376 } 377 trace_vfio_user_recv_read(hdr.id, ret); 378 379 msgleft -= ret; 380 data += ret; 381 } 382 383 vfio_user_process(proxy, msg, isreply); 384 return 0; 385 386 /* 387 * fatal means the other side closed or we don't trust the stream 388 * err means this message is corrupt 389 */ 390 fatal: 391 vfio_user_shutdown(proxy); 392 proxy->state = VFIO_PROXY_ERROR; 393 394 /* set error if server side closed */ 395 if (ret == 0) { 396 error_setg(errp, "server closed socket"); 397 } 398 399 err: 400 for (i = 0; i < numfds; i++) { 401 close(fdp[i]); 402 } 403 if (isreply && msg != NULL) { 404 /* force an error to keep sending thread from hanging */ 405 vfio_user_set_error(msg->hdr, EINVAL); 406 msg->complete = true; 407 qemu_cond_signal(&msg->cv); 408 } 409 return -1; 410 } 411 412 static void vfio_user_recv(void *opaque) 413 { 414 VFIOUserProxy *proxy = opaque; 415 416 QEMU_LOCK_GUARD(&proxy->lock); 417 418 if (proxy->state == VFIO_PROXY_CONNECTED) { 419 Error *local_err = NULL; 420 421 while (vfio_user_recv_one(proxy, &local_err) == 0) { 422 ; 423 } 424 425 if (local_err != NULL) { 426 error_report_err(local_err); 427 } 428 } 429 } 430 431 /* 432 * Send a single message, same return semantics as vfio_user_send_qio(). 433 * 434 * Sent async messages are freed, others are moved to pending queue. 435 */ 436 static ssize_t vfio_user_send_one(VFIOUserProxy *proxy, Error **errp) 437 { 438 VFIOUserMsg *msg; 439 ssize_t ret; 440 441 msg = QTAILQ_FIRST(&proxy->outgoing); 442 ret = vfio_user_send_qio(proxy, msg, errp); 443 if (ret < 0) { 444 return ret; 445 } 446 447 QTAILQ_REMOVE(&proxy->outgoing, msg, next); 448 if (msg->type == VFIO_MSG_ASYNC) { 449 vfio_user_recycle(proxy, msg); 450 } else { 451 QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); 452 msg->pending = true; 453 } 454 455 return ret; 456 } 457 458 /* 459 * Send messages from outgoing queue when the socket buffer has space. 460 * If we deplete 'outgoing', remove ourselves from the poll list. 461 */ 462 static void vfio_user_send(void *opaque) 463 { 464 VFIOUserProxy *proxy = opaque; 465 466 QEMU_LOCK_GUARD(&proxy->lock); 467 468 if (proxy->state == VFIO_PROXY_CONNECTED) { 469 while (!QTAILQ_EMPTY(&proxy->outgoing)) { 470 Error *local_err = NULL; 471 int ret; 472 473 ret = vfio_user_send_one(proxy, &local_err); 474 475 if (ret == QIO_CHANNEL_ERR_BLOCK) { 476 return; 477 } else if (ret == -1) { 478 error_report_err(local_err); 479 return; 480 } 481 } 482 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 483 vfio_user_recv, NULL, NULL, proxy); 484 } 485 } 486 487 static void vfio_user_cb(void *opaque) 488 { 489 VFIOUserProxy *proxy = opaque; 490 491 QEMU_LOCK_GUARD(&proxy->lock); 492 493 proxy->state = VFIO_PROXY_CLOSED; 494 qemu_cond_signal(&proxy->close_cv); 495 } 496 497 498 /* 499 * Functions called by main or CPU threads 500 */ 501 502 /* 503 * Process incoming requests. 504 * 505 * The bus-specific callback has the form: 506 * request(opaque, msg) 507 * where 'opaque' was specified in vfio_user_set_handler 508 * and 'msg' is the inbound message. 509 * 510 * The callback is responsible for disposing of the message buffer, 511 * usually by re-using it when calling vfio_send_reply or vfio_send_error, 512 * both of which free their message buffer when the reply is sent. 513 * 514 * If the callback uses a new buffer, it needs to free the old one. 515 */ 516 static void vfio_user_request(void *opaque) 517 { 518 VFIOUserProxy *proxy = opaque; 519 VFIOUserMsgQ new, free; 520 VFIOUserMsg *msg, *m1; 521 522 /* reap all incoming */ 523 QTAILQ_INIT(&new); 524 WITH_QEMU_LOCK_GUARD(&proxy->lock) { 525 QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) { 526 QTAILQ_REMOVE(&proxy->incoming, msg, next); 527 QTAILQ_INSERT_TAIL(&new, msg, next); 528 } 529 } 530 531 /* process list */ 532 QTAILQ_INIT(&free); 533 QTAILQ_FOREACH_SAFE(msg, &new, next, m1) { 534 QTAILQ_REMOVE(&new, msg, next); 535 trace_vfio_user_recv_request(msg->hdr->command); 536 proxy->request(proxy->req_arg, msg); 537 QTAILQ_INSERT_HEAD(&free, msg, next); 538 } 539 540 /* free list */ 541 WITH_QEMU_LOCK_GUARD(&proxy->lock) { 542 QTAILQ_FOREACH_SAFE(msg, &free, next, m1) { 543 vfio_user_recycle(proxy, msg); 544 } 545 } 546 } 547 548 /* 549 * Messages are queued onto the proxy's outgoing list. 550 * 551 * It handles 3 types of messages: 552 * 553 * async messages - replies and posted writes 554 * 555 * There will be no reply from the server, so message 556 * buffers are freed after they're sent. 557 * 558 * nowait messages - map/unmap during address space transactions 559 * 560 * These are also sent async, but a reply is expected so that 561 * vfio_wait_reqs() can wait for the youngest nowait request. 562 * They transition from the outgoing list to the pending list 563 * when sent, and are freed when the reply is received. 564 * 565 * wait messages - all other requests 566 * 567 * The reply to these messages is waited for by their caller. 568 * They also transition from outgoing to pending when sent, but 569 * the message buffer is returned to the caller with the reply 570 * contents. The caller is responsible for freeing these messages. 571 * 572 * As an optimization, if the outgoing list and the socket send 573 * buffer are empty, the message is sent inline instead of being 574 * added to the outgoing list. The rest of the transitions are 575 * unchanged. 576 */ 577 static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg, 578 Error **errp) 579 { 580 int ret; 581 582 /* 583 * Unsent outgoing msgs - add to tail 584 */ 585 if (!QTAILQ_EMPTY(&proxy->outgoing)) { 586 QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); 587 return true; 588 } 589 590 /* 591 * Try inline - if blocked, queue it and kick send poller 592 */ 593 if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) { 594 ret = QIO_CHANNEL_ERR_BLOCK; 595 } else { 596 ret = vfio_user_send_qio(proxy, msg, errp); 597 } 598 599 if (ret == QIO_CHANNEL_ERR_BLOCK) { 600 QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); 601 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 602 vfio_user_recv, proxy->ctx, 603 vfio_user_send, proxy); 604 return true; 605 } 606 if (ret == -1) { 607 return false; 608 } 609 610 /* 611 * Sent - free async, add others to pending 612 */ 613 if (msg->type == VFIO_MSG_ASYNC) { 614 vfio_user_recycle(proxy, msg); 615 } else { 616 QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); 617 msg->pending = true; 618 } 619 620 return true; 621 } 622 623 /* 624 * nowait send - vfio_wait_reqs() can wait for it later 625 * 626 * Returns false if we did not successfully receive a reply message, in which 627 * case @errp will be populated. 628 * 629 * In either case, ownership of @hdr and @fds is taken, and the caller must 630 * *not* free them itself. 631 */ 632 bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 633 VFIOUserFDs *fds, int rsize, Error **errp) 634 { 635 VFIOUserMsg *msg; 636 637 QEMU_LOCK_GUARD(&proxy->lock); 638 639 msg = vfio_user_getmsg(proxy, hdr, fds); 640 msg->id = hdr->id; 641 msg->rsize = rsize ? rsize : hdr->size; 642 msg->type = VFIO_MSG_NOWAIT; 643 644 if (hdr->flags & VFIO_USER_NO_REPLY) { 645 error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__); 646 vfio_user_recycle(proxy, msg); 647 return false; 648 } 649 650 if (!vfio_user_send_queued(proxy, msg, errp)) { 651 vfio_user_recycle(proxy, msg); 652 return false; 653 } 654 655 proxy->last_nowait = msg; 656 657 return true; 658 } 659 660 /* 661 * Returns false if we did not successfully receive a reply message, in which 662 * case @errp will be populated. 663 * 664 * In either case, the caller must free @hdr and @fds if needed. 665 */ 666 bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 667 VFIOUserFDs *fds, int rsize, Error **errp) 668 { 669 VFIOUserMsg *msg; 670 bool ok = false; 671 672 if (hdr->flags & VFIO_USER_NO_REPLY) { 673 error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__); 674 return false; 675 } 676 677 qemu_mutex_lock(&proxy->lock); 678 679 msg = vfio_user_getmsg(proxy, hdr, fds); 680 msg->id = hdr->id; 681 msg->rsize = rsize ? rsize : hdr->size; 682 msg->type = VFIO_MSG_WAIT; 683 684 ok = vfio_user_send_queued(proxy, msg, errp); 685 686 if (ok) { 687 while (!msg->complete) { 688 if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, 689 proxy->wait_time)) { 690 VFIOUserMsgQ *list; 691 692 list = msg->pending ? &proxy->pending : &proxy->outgoing; 693 QTAILQ_REMOVE(list, msg, next); 694 error_setg_errno(errp, ETIMEDOUT, 695 "timed out waiting for reply"); 696 ok = false; 697 break; 698 } 699 } 700 } 701 702 vfio_user_recycle(proxy, msg); 703 704 qemu_mutex_unlock(&proxy->lock); 705 706 return ok; 707 } 708 709 /* 710 * async send - msg can be queued, but will be freed when sent 711 * 712 * Returns false on failure, in which case @errp will be populated. 713 * 714 * In either case, ownership of @hdr and @fds is taken, and the caller must 715 * *not* free them itself. 716 */ 717 static bool vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 718 VFIOUserFDs *fds, Error **errp) 719 { 720 VFIOUserMsg *msg; 721 722 QEMU_LOCK_GUARD(&proxy->lock); 723 724 msg = vfio_user_getmsg(proxy, hdr, fds); 725 msg->id = hdr->id; 726 msg->rsize = 0; 727 msg->type = VFIO_MSG_ASYNC; 728 729 if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) { 730 error_setg_errno(errp, EINVAL, "%s on sync message", __func__); 731 vfio_user_recycle(proxy, msg); 732 return false; 733 } 734 735 if (!vfio_user_send_queued(proxy, msg, errp)) { 736 vfio_user_recycle(proxy, msg); 737 return false; 738 } 739 740 return true; 741 } 742 743 void vfio_user_wait_reqs(VFIOUserProxy *proxy) 744 { 745 VFIOUserMsg *msg; 746 747 /* 748 * Any DMA map/unmap requests sent in the middle 749 * of a memory region transaction were sent nowait. 750 * Wait for them here. 751 */ 752 qemu_mutex_lock(&proxy->lock); 753 if (proxy->last_nowait != NULL) { 754 /* 755 * Change type to WAIT to wait for reply 756 */ 757 msg = proxy->last_nowait; 758 msg->type = VFIO_MSG_WAIT; 759 proxy->last_nowait = NULL; 760 while (!msg->complete) { 761 if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, 762 proxy->wait_time)) { 763 VFIOUserMsgQ *list; 764 765 list = msg->pending ? &proxy->pending : &proxy->outgoing; 766 QTAILQ_REMOVE(list, msg, next); 767 error_printf("vfio_wait_reqs - timed out\n"); 768 break; 769 } 770 } 771 772 if (msg->hdr->flags & VFIO_USER_ERROR) { 773 error_printf("vfio_user_wait_reqs - error reply on async "); 774 error_printf("request: command %x error %s\n", msg->hdr->command, 775 strerror(msg->hdr->error_reply)); 776 } 777 778 /* 779 * Change type back to NOWAIT to free 780 */ 781 msg->type = VFIO_MSG_NOWAIT; 782 vfio_user_recycle(proxy, msg); 783 } 784 785 qemu_mutex_unlock(&proxy->lock); 786 } 787 788 /* 789 * Reply to an incoming request. 790 */ 791 void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size) 792 { 793 Error *local_err = NULL; 794 795 if (size < sizeof(VFIOUserHdr)) { 796 error_printf("%s: size too small", __func__); 797 g_free(hdr); 798 return; 799 } 800 801 /* 802 * convert header to associated reply 803 */ 804 hdr->flags = VFIO_USER_REPLY; 805 hdr->size = size; 806 807 if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) { 808 error_report_err(local_err); 809 } 810 } 811 812 /* 813 * Send an error reply to an incoming request. 814 */ 815 void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error) 816 { 817 Error *local_err = NULL; 818 819 /* 820 * convert header to associated reply 821 */ 822 hdr->flags = VFIO_USER_REPLY; 823 hdr->flags |= VFIO_USER_ERROR; 824 hdr->error_reply = error; 825 hdr->size = sizeof(*hdr); 826 827 if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) { 828 error_report_err(local_err); 829 } 830 } 831 832 /* 833 * Close FDs erroneously received in an incoming request. 834 */ 835 void vfio_user_putfds(VFIOUserMsg *msg) 836 { 837 VFIOUserFDs *fds = msg->fds; 838 int i; 839 840 for (i = 0; i < fds->recv_fds; i++) { 841 close(fds->fds[i]); 842 } 843 g_free(fds); 844 msg->fds = NULL; 845 } 846 847 static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets = 848 QLIST_HEAD_INITIALIZER(vfio_user_sockets); 849 850 VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp) 851 { 852 VFIOUserProxy *proxy; 853 QIOChannelSocket *sioc; 854 QIOChannel *ioc; 855 char *sockname; 856 857 if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) { 858 error_setg(errp, "vfio_user_connect - bad address family"); 859 return NULL; 860 } 861 sockname = addr->u.q_unix.path; 862 863 sioc = qio_channel_socket_new(); 864 ioc = QIO_CHANNEL(sioc); 865 if (qio_channel_socket_connect_sync(sioc, addr, errp)) { 866 object_unref(OBJECT(ioc)); 867 return NULL; 868 } 869 qio_channel_set_blocking(ioc, false, NULL); 870 871 proxy = g_malloc0(sizeof(VFIOUserProxy)); 872 proxy->sockname = g_strdup_printf("unix:%s", sockname); 873 proxy->ioc = ioc; 874 875 /* init defaults */ 876 proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER; 877 proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS; 878 proxy->max_dma = VFIO_USER_DEF_MAP_MAX; 879 proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE; 880 proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP; 881 proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE; 882 883 proxy->flags = VFIO_PROXY_CLIENT; 884 proxy->state = VFIO_PROXY_CONNECTED; 885 886 qemu_mutex_init(&proxy->lock); 887 qemu_cond_init(&proxy->close_cv); 888 889 if (vfio_user_iothread == NULL) { 890 vfio_user_iothread = iothread_create("VFIO user", errp); 891 } 892 893 proxy->ctx = iothread_get_aio_context(vfio_user_iothread); 894 proxy->req_bh = qemu_bh_new(vfio_user_request, proxy); 895 896 QTAILQ_INIT(&proxy->outgoing); 897 QTAILQ_INIT(&proxy->incoming); 898 QTAILQ_INIT(&proxy->free); 899 QTAILQ_INIT(&proxy->pending); 900 QLIST_INSERT_HEAD(&vfio_user_sockets, proxy, next); 901 902 return proxy; 903 } 904 905 void vfio_user_set_handler(VFIODevice *vbasedev, 906 void (*handler)(void *opaque, VFIOUserMsg *msg), 907 void *req_arg) 908 { 909 VFIOUserProxy *proxy = vbasedev->proxy; 910 911 proxy->request = handler; 912 proxy->req_arg = req_arg; 913 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 914 vfio_user_recv, NULL, NULL, proxy); 915 } 916 917 void vfio_user_disconnect(VFIOUserProxy *proxy) 918 { 919 VFIOUserMsg *r1, *r2; 920 921 qemu_mutex_lock(&proxy->lock); 922 923 /* our side is quitting */ 924 if (proxy->state == VFIO_PROXY_CONNECTED) { 925 vfio_user_shutdown(proxy); 926 if (!QTAILQ_EMPTY(&proxy->pending)) { 927 error_printf("vfio_user_disconnect: outstanding requests\n"); 928 } 929 } 930 object_unref(OBJECT(proxy->ioc)); 931 proxy->ioc = NULL; 932 qemu_bh_delete(proxy->req_bh); 933 proxy->req_bh = NULL; 934 935 proxy->state = VFIO_PROXY_CLOSING; 936 QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) { 937 qemu_cond_destroy(&r1->cv); 938 QTAILQ_REMOVE(&proxy->outgoing, r1, next); 939 g_free(r1); 940 } 941 QTAILQ_FOREACH_SAFE(r1, &proxy->incoming, next, r2) { 942 qemu_cond_destroy(&r1->cv); 943 QTAILQ_REMOVE(&proxy->incoming, r1, next); 944 g_free(r1); 945 } 946 QTAILQ_FOREACH_SAFE(r1, &proxy->pending, next, r2) { 947 qemu_cond_destroy(&r1->cv); 948 QTAILQ_REMOVE(&proxy->pending, r1, next); 949 g_free(r1); 950 } 951 QTAILQ_FOREACH_SAFE(r1, &proxy->free, next, r2) { 952 qemu_cond_destroy(&r1->cv); 953 QTAILQ_REMOVE(&proxy->free, r1, next); 954 g_free(r1); 955 } 956 957 /* 958 * Make sure the iothread isn't blocking anywhere 959 * with a ref to this proxy by waiting for a BH 960 * handler to run after the proxy fd handlers were 961 * deleted above. 962 */ 963 aio_bh_schedule_oneshot(proxy->ctx, vfio_user_cb, proxy); 964 qemu_cond_wait(&proxy->close_cv, &proxy->lock); 965 966 /* we now hold the only ref to proxy */ 967 qemu_mutex_unlock(&proxy->lock); 968 qemu_cond_destroy(&proxy->close_cv); 969 qemu_mutex_destroy(&proxy->lock); 970 971 QLIST_REMOVE(proxy, next); 972 if (QLIST_EMPTY(&vfio_user_sockets)) { 973 iothread_destroy(vfio_user_iothread); 974 vfio_user_iothread = NULL; 975 } 976 977 g_free(proxy->sockname); 978 g_free(proxy); 979 } 980 981 void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, 982 uint32_t size, uint32_t flags) 983 { 984 static uint16_t next_id; 985 986 hdr->id = qatomic_fetch_inc(&next_id); 987 hdr->command = cmd; 988 hdr->size = size; 989 hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST; 990 hdr->error_reply = 0; 991 } 992 993 struct cap_entry { 994 const char *name; 995 bool (*check)(VFIOUserProxy *proxy, QObject *qobj, Error **errp); 996 }; 997 998 static bool caps_parse(VFIOUserProxy *proxy, QDict *qdict, 999 struct cap_entry caps[], Error **errp) 1000 { 1001 QObject *qobj; 1002 struct cap_entry *p; 1003 1004 for (p = caps; p->name != NULL; p++) { 1005 qobj = qdict_get(qdict, p->name); 1006 if (qobj != NULL) { 1007 if (!p->check(proxy, qobj, errp)) { 1008 return false; 1009 } 1010 qdict_del(qdict, p->name); 1011 } 1012 } 1013 1014 /* warning, for now */ 1015 if (qdict_size(qdict) != 0) { 1016 warn_report("spurious capabilities"); 1017 } 1018 return true; 1019 } 1020 1021 static bool check_migr_pgsize(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1022 { 1023 QNum *qn = qobject_to(QNum, qobj); 1024 uint64_t pgsize; 1025 1026 if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) { 1027 error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE); 1028 return false; 1029 } 1030 1031 /* must be larger than default */ 1032 if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) { 1033 error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize); 1034 return false; 1035 } 1036 1037 proxy->migr_pgsize = pgsize; 1038 return true; 1039 } 1040 1041 static bool check_bitmap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1042 { 1043 QNum *qn = qobject_to(QNum, qobj); 1044 uint64_t bitmap_size; 1045 1046 if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) { 1047 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP); 1048 return false; 1049 } 1050 1051 /* can only lower it */ 1052 if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) { 1053 error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP); 1054 return false; 1055 } 1056 1057 proxy->max_bitmap = bitmap_size; 1058 return true; 1059 } 1060 1061 static struct cap_entry caps_migr[] = { 1062 { VFIO_USER_CAP_PGSIZE, check_migr_pgsize }, 1063 { VFIO_USER_CAP_MAX_BITMAP, check_bitmap }, 1064 { NULL } 1065 }; 1066 1067 static bool check_max_fds(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1068 { 1069 QNum *qn = qobject_to(QNum, qobj); 1070 uint64_t max_send_fds; 1071 1072 if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) || 1073 max_send_fds > VFIO_USER_MAX_MAX_FDS) { 1074 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); 1075 return false; 1076 } 1077 proxy->max_send_fds = max_send_fds; 1078 return true; 1079 } 1080 1081 static bool check_max_xfer(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1082 { 1083 QNum *qn = qobject_to(QNum, qobj); 1084 uint64_t max_xfer_size; 1085 1086 if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) || 1087 max_xfer_size > VFIO_USER_MAX_MAX_XFER) { 1088 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER); 1089 return false; 1090 } 1091 proxy->max_xfer_size = max_xfer_size; 1092 return true; 1093 } 1094 1095 static bool check_pgsizes(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1096 { 1097 QNum *qn = qobject_to(QNum, qobj); 1098 uint64_t pgsizes; 1099 1100 if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) { 1101 error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES); 1102 return false; 1103 } 1104 1105 /* must be larger than default */ 1106 if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) { 1107 error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes); 1108 return false; 1109 } 1110 1111 proxy->dma_pgsizes = pgsizes; 1112 return true; 1113 } 1114 1115 static bool check_max_dma(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1116 { 1117 QNum *qn = qobject_to(QNum, qobj); 1118 uint64_t max_dma; 1119 1120 if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) { 1121 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX); 1122 return false; 1123 } 1124 1125 /* can only lower it */ 1126 if (max_dma > VFIO_USER_DEF_MAP_MAX) { 1127 error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX); 1128 return false; 1129 } 1130 1131 proxy->max_dma = max_dma; 1132 return true; 1133 } 1134 1135 static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1136 { 1137 QDict *qdict = qobject_to(QDict, qobj); 1138 1139 if (qdict == NULL) { 1140 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); 1141 return true; 1142 } 1143 return caps_parse(proxy, qdict, caps_migr, errp); 1144 } 1145 1146 static struct cap_entry caps_cap[] = { 1147 { VFIO_USER_CAP_MAX_FDS, check_max_fds }, 1148 { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, 1149 { VFIO_USER_CAP_PGSIZES, check_pgsizes }, 1150 { VFIO_USER_CAP_MAP_MAX, check_max_dma }, 1151 { VFIO_USER_CAP_MIGR, check_migr }, 1152 { NULL } 1153 }; 1154 1155 static bool check_cap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1156 { 1157 QDict *qdict = qobject_to(QDict, qobj); 1158 1159 if (qdict == NULL) { 1160 error_setg(errp, "malformed %s", VFIO_USER_CAP); 1161 return false; 1162 } 1163 return caps_parse(proxy, qdict, caps_cap, errp); 1164 } 1165 1166 static struct cap_entry ver_0_0[] = { 1167 { VFIO_USER_CAP, check_cap }, 1168 { NULL } 1169 }; 1170 1171 static bool caps_check(VFIOUserProxy *proxy, int minor, const char *caps, 1172 Error **errp) 1173 { 1174 QObject *qobj; 1175 QDict *qdict; 1176 bool ret; 1177 1178 qobj = qobject_from_json(caps, NULL); 1179 if (qobj == NULL) { 1180 error_setg(errp, "malformed capabilities %s", caps); 1181 return false; 1182 } 1183 qdict = qobject_to(QDict, qobj); 1184 if (qdict == NULL) { 1185 error_setg(errp, "capabilities %s not an object", caps); 1186 qobject_unref(qobj); 1187 return false; 1188 } 1189 ret = caps_parse(proxy, qdict, ver_0_0, errp); 1190 1191 qobject_unref(qobj); 1192 return ret; 1193 } 1194 1195 static GString *caps_json(void) 1196 { 1197 QDict *dict = qdict_new(); 1198 QDict *capdict = qdict_new(); 1199 QDict *migdict = qdict_new(); 1200 GString *str; 1201 1202 qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE); 1203 qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP); 1204 qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict)); 1205 1206 qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS); 1207 qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); 1208 qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); 1209 qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); 1210 1211 qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); 1212 1213 str = qobject_to_json(QOBJECT(dict)); 1214 qobject_unref(dict); 1215 return str; 1216 } 1217 1218 bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp) 1219 { 1220 g_autofree VFIOUserVersion *msgp = NULL; 1221 GString *caps; 1222 char *reply; 1223 int size, caplen; 1224 1225 caps = caps_json(); 1226 caplen = caps->len + 1; 1227 size = sizeof(*msgp) + caplen; 1228 msgp = g_malloc0(size); 1229 1230 vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0); 1231 msgp->major = VFIO_USER_MAJOR_VER; 1232 msgp->minor = VFIO_USER_MINOR_VER; 1233 memcpy(&msgp->capabilities, caps->str, caplen); 1234 g_string_free(caps, true); 1235 trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); 1236 1237 if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, errp)) { 1238 return false; 1239 } 1240 1241 if (msgp->hdr.flags & VFIO_USER_ERROR) { 1242 error_setg_errno(errp, msgp->hdr.error_reply, "version reply"); 1243 return false; 1244 } 1245 1246 if (msgp->major != VFIO_USER_MAJOR_VER || 1247 msgp->minor > VFIO_USER_MINOR_VER) { 1248 error_setg(errp, "incompatible server version"); 1249 return false; 1250 } 1251 1252 reply = msgp->capabilities; 1253 if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') { 1254 error_setg(errp, "corrupt version reply"); 1255 return false; 1256 } 1257 1258 if (!caps_check(proxy, msgp->minor, reply, errp)) { 1259 return false; 1260 } 1261 1262 trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); 1263 return true; 1264 } 1265