1 /* 2 * vfio protocol over a UNIX socket. 3 * 4 * Copyright © 2018, 2021 Oracle and/or its affiliates. 5 * 6 * SPDX-License-Identifier: GPL-2.0-or-later 7 */ 8 9 #include "qemu/osdep.h" 10 #include <sys/ioctl.h> 11 12 #include "hw/vfio/vfio-device.h" 13 #include "hw/vfio-user/proxy.h" 14 #include "hw/vfio-user/trace.h" 15 #include "qapi/error.h" 16 #include "qobject/qdict.h" 17 #include "qobject/qjson.h" 18 #include "qobject/qnum.h" 19 #include "qemu/error-report.h" 20 #include "qemu/lockable.h" 21 #include "qemu/main-loop.h" 22 #include "system/iothread.h" 23 24 static int wait_time = 5000; /* wait up to 5 sec for busy servers */ 25 static IOThread *vfio_user_iothread; 26 27 static void vfio_user_shutdown(VFIOUserProxy *proxy); 28 static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 29 VFIOUserFDs *fds); 30 static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg); 31 32 static void vfio_user_recv(void *opaque); 33 static void vfio_user_send(void *opaque); 34 static void vfio_user_cb(void *opaque); 35 36 static void vfio_user_request(void *opaque); 37 38 static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) 39 { 40 hdr->flags |= VFIO_USER_ERROR; 41 hdr->error_reply = err; 42 } 43 44 /* 45 * Functions called by main, CPU, or iothread threads 46 */ 47 48 static void vfio_user_shutdown(VFIOUserProxy *proxy) 49 { 50 qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL); 51 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL, 52 proxy->ctx, NULL, NULL); 53 } 54 55 /* 56 * Same return values as qio_channel_writev_full(): 57 * 58 * QIO_CHANNEL_ERR_BLOCK: *errp not set 59 * -1: *errp will be populated 60 * otherwise: bytes written 61 */ 62 static ssize_t vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg, 63 Error **errp) 64 { 65 VFIOUserFDs *fds = msg->fds; 66 struct iovec iov = { 67 .iov_base = msg->hdr, 68 .iov_len = msg->hdr->size, 69 }; 70 size_t numfds = 0; 71 int *fdp = NULL; 72 ssize_t ret; 73 74 if (fds != NULL && fds->send_fds != 0) { 75 numfds = fds->send_fds; 76 fdp = fds->fds; 77 } 78 79 ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, errp); 80 81 if (ret == -1) { 82 vfio_user_set_error(msg->hdr, EIO); 83 vfio_user_shutdown(proxy); 84 } 85 trace_vfio_user_send_write(msg->hdr->id, ret); 86 87 return ret; 88 } 89 90 static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 91 VFIOUserFDs *fds) 92 { 93 VFIOUserMsg *msg; 94 95 msg = QTAILQ_FIRST(&proxy->free); 96 if (msg != NULL) { 97 QTAILQ_REMOVE(&proxy->free, msg, next); 98 } else { 99 msg = g_malloc0(sizeof(*msg)); 100 qemu_cond_init(&msg->cv); 101 } 102 103 msg->hdr = hdr; 104 msg->fds = fds; 105 return msg; 106 } 107 108 /* 109 * Recycle a message list entry to the free list. 110 */ 111 static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg) 112 { 113 if (msg->type == VFIO_MSG_NONE) { 114 error_printf("vfio_user_recycle - freeing free msg\n"); 115 return; 116 } 117 118 /* free msg buffer if no one is waiting to consume the reply */ 119 if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) { 120 g_free(msg->hdr); 121 if (msg->fds != NULL) { 122 g_free(msg->fds); 123 } 124 } 125 126 msg->type = VFIO_MSG_NONE; 127 msg->hdr = NULL; 128 msg->fds = NULL; 129 msg->complete = false; 130 msg->pending = false; 131 QTAILQ_INSERT_HEAD(&proxy->free, msg, next); 132 } 133 134 VFIOUserFDs *vfio_user_getfds(int numfds) 135 { 136 VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int))); 137 138 fds->fds = (int *)((char *)fds + sizeof(*fds)); 139 140 return fds; 141 } 142 143 /* 144 * Functions only called by iothread 145 */ 146 147 /* 148 * Process a received message. 149 */ 150 static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg, 151 bool isreply) 152 { 153 154 /* 155 * Replies signal a waiter, if none just check for errors 156 * and free the message buffer. 157 * 158 * Requests get queued for the BH. 159 */ 160 if (isreply) { 161 msg->complete = true; 162 if (msg->type == VFIO_MSG_WAIT) { 163 qemu_cond_signal(&msg->cv); 164 } else { 165 if (msg->hdr->flags & VFIO_USER_ERROR) { 166 error_printf("vfio_user_process: error reply on async "); 167 error_printf("request command %x error %s\n", 168 msg->hdr->command, 169 strerror(msg->hdr->error_reply)); 170 } 171 /* youngest nowait msg has been ack'd */ 172 if (proxy->last_nowait == msg) { 173 proxy->last_nowait = NULL; 174 } 175 vfio_user_recycle(proxy, msg); 176 } 177 } else { 178 QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next); 179 qemu_bh_schedule(proxy->req_bh); 180 } 181 } 182 183 /* 184 * Complete a partial message read 185 */ 186 static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp) 187 { 188 VFIOUserMsg *msg = proxy->part_recv; 189 size_t msgleft = proxy->recv_left; 190 bool isreply; 191 char *data; 192 int ret; 193 194 data = (char *)msg->hdr + (msg->hdr->size - msgleft); 195 while (msgleft > 0) { 196 ret = qio_channel_read(proxy->ioc, data, msgleft, errp); 197 198 /* error or would block */ 199 if (ret <= 0) { 200 /* try for rest on next iternation */ 201 if (ret == QIO_CHANNEL_ERR_BLOCK) { 202 proxy->recv_left = msgleft; 203 } 204 return ret; 205 } 206 trace_vfio_user_recv_read(msg->hdr->id, ret); 207 208 msgleft -= ret; 209 data += ret; 210 } 211 212 /* 213 * Read complete message, process it. 214 */ 215 proxy->part_recv = NULL; 216 proxy->recv_left = 0; 217 isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY; 218 vfio_user_process(proxy, msg, isreply); 219 220 /* return positive value */ 221 return 1; 222 } 223 224 /* 225 * Receive and process one incoming message. 226 * 227 * For replies, find matching outgoing request and wake any waiters. 228 * For requests, queue in incoming list and run request BH. 229 */ 230 static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp) 231 { 232 VFIOUserMsg *msg = NULL; 233 g_autofree int *fdp = NULL; 234 VFIOUserFDs *reqfds; 235 VFIOUserHdr hdr; 236 struct iovec iov = { 237 .iov_base = &hdr, 238 .iov_len = sizeof(hdr), 239 }; 240 bool isreply = false; 241 int i, ret; 242 size_t msgleft, numfds = 0; 243 char *data = NULL; 244 char *buf = NULL; 245 246 /* 247 * Complete any partial reads 248 */ 249 if (proxy->part_recv != NULL) { 250 ret = vfio_user_complete(proxy, errp); 251 252 /* still not complete, try later */ 253 if (ret == QIO_CHANNEL_ERR_BLOCK) { 254 return ret; 255 } 256 257 if (ret <= 0) { 258 goto fatal; 259 } 260 /* else fall into reading another msg */ 261 } 262 263 /* 264 * Read header 265 */ 266 ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0, 267 errp); 268 if (ret == QIO_CHANNEL_ERR_BLOCK) { 269 return ret; 270 } 271 272 /* read error or other side closed connection */ 273 if (ret <= 0) { 274 goto fatal; 275 } 276 277 if (ret < sizeof(hdr)) { 278 error_setg(errp, "short read of header"); 279 goto fatal; 280 } 281 282 /* 283 * Validate header 284 */ 285 if (hdr.size < sizeof(VFIOUserHdr)) { 286 error_setg(errp, "bad header size"); 287 goto fatal; 288 } 289 switch (hdr.flags & VFIO_USER_TYPE) { 290 case VFIO_USER_REQUEST: 291 isreply = false; 292 break; 293 case VFIO_USER_REPLY: 294 isreply = true; 295 break; 296 default: 297 error_setg(errp, "unknown message type"); 298 goto fatal; 299 } 300 trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size, 301 hdr.flags); 302 303 /* 304 * For replies, find the matching pending request. 305 * For requests, reap incoming FDs. 306 */ 307 if (isreply) { 308 QTAILQ_FOREACH(msg, &proxy->pending, next) { 309 if (hdr.id == msg->id) { 310 break; 311 } 312 } 313 if (msg == NULL) { 314 error_setg(errp, "unexpected reply"); 315 goto err; 316 } 317 QTAILQ_REMOVE(&proxy->pending, msg, next); 318 319 /* 320 * Process any received FDs 321 */ 322 if (numfds != 0) { 323 if (msg->fds == NULL || msg->fds->recv_fds < numfds) { 324 error_setg(errp, "unexpected FDs"); 325 goto err; 326 } 327 msg->fds->recv_fds = numfds; 328 memcpy(msg->fds->fds, fdp, numfds * sizeof(int)); 329 } 330 } else { 331 if (numfds != 0) { 332 reqfds = vfio_user_getfds(numfds); 333 memcpy(reqfds->fds, fdp, numfds * sizeof(int)); 334 } else { 335 reqfds = NULL; 336 } 337 } 338 339 /* 340 * Put the whole message into a single buffer. 341 */ 342 if (isreply) { 343 if (hdr.size > msg->rsize) { 344 error_setg(errp, "reply larger than recv buffer"); 345 goto err; 346 } 347 *msg->hdr = hdr; 348 data = (char *)msg->hdr + sizeof(hdr); 349 } else { 350 if (hdr.size > proxy->max_xfer_size + sizeof(VFIOUserDMARW)) { 351 error_setg(errp, "vfio_user_recv request larger than max"); 352 goto err; 353 } 354 buf = g_malloc0(hdr.size); 355 memcpy(buf, &hdr, sizeof(hdr)); 356 data = buf + sizeof(hdr); 357 msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds); 358 msg->type = VFIO_MSG_REQ; 359 } 360 361 /* 362 * Read rest of message. 363 */ 364 msgleft = hdr.size - sizeof(hdr); 365 while (msgleft > 0) { 366 ret = qio_channel_read(proxy->ioc, data, msgleft, errp); 367 368 /* prepare to complete read on next iternation */ 369 if (ret == QIO_CHANNEL_ERR_BLOCK) { 370 proxy->part_recv = msg; 371 proxy->recv_left = msgleft; 372 return ret; 373 } 374 375 if (ret <= 0) { 376 goto fatal; 377 } 378 trace_vfio_user_recv_read(hdr.id, ret); 379 380 msgleft -= ret; 381 data += ret; 382 } 383 384 vfio_user_process(proxy, msg, isreply); 385 return 0; 386 387 /* 388 * fatal means the other side closed or we don't trust the stream 389 * err means this message is corrupt 390 */ 391 fatal: 392 vfio_user_shutdown(proxy); 393 proxy->state = VFIO_PROXY_ERROR; 394 395 /* set error if server side closed */ 396 if (ret == 0) { 397 error_setg(errp, "server closed socket"); 398 } 399 400 err: 401 for (i = 0; i < numfds; i++) { 402 close(fdp[i]); 403 } 404 if (isreply && msg != NULL) { 405 /* force an error to keep sending thread from hanging */ 406 vfio_user_set_error(msg->hdr, EINVAL); 407 msg->complete = true; 408 qemu_cond_signal(&msg->cv); 409 } 410 return -1; 411 } 412 413 static void vfio_user_recv(void *opaque) 414 { 415 VFIOUserProxy *proxy = opaque; 416 417 QEMU_LOCK_GUARD(&proxy->lock); 418 419 if (proxy->state == VFIO_PROXY_CONNECTED) { 420 Error *local_err = NULL; 421 422 while (vfio_user_recv_one(proxy, &local_err) == 0) { 423 ; 424 } 425 426 if (local_err != NULL) { 427 error_report_err(local_err); 428 } 429 } 430 } 431 432 /* 433 * Send a single message, same return semantics as vfio_user_send_qio(). 434 * 435 * Sent async messages are freed, others are moved to pending queue. 436 */ 437 static ssize_t vfio_user_send_one(VFIOUserProxy *proxy, Error **errp) 438 { 439 VFIOUserMsg *msg; 440 ssize_t ret; 441 442 msg = QTAILQ_FIRST(&proxy->outgoing); 443 ret = vfio_user_send_qio(proxy, msg, errp); 444 if (ret < 0) { 445 return ret; 446 } 447 448 QTAILQ_REMOVE(&proxy->outgoing, msg, next); 449 if (msg->type == VFIO_MSG_ASYNC) { 450 vfio_user_recycle(proxy, msg); 451 } else { 452 QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); 453 msg->pending = true; 454 } 455 456 return ret; 457 } 458 459 /* 460 * Send messages from outgoing queue when the socket buffer has space. 461 * If we deplete 'outgoing', remove ourselves from the poll list. 462 */ 463 static void vfio_user_send(void *opaque) 464 { 465 VFIOUserProxy *proxy = opaque; 466 467 QEMU_LOCK_GUARD(&proxy->lock); 468 469 if (proxy->state == VFIO_PROXY_CONNECTED) { 470 while (!QTAILQ_EMPTY(&proxy->outgoing)) { 471 Error *local_err = NULL; 472 int ret; 473 474 ret = vfio_user_send_one(proxy, &local_err); 475 476 if (ret == QIO_CHANNEL_ERR_BLOCK) { 477 return; 478 } else if (ret == -1) { 479 error_report_err(local_err); 480 return; 481 } 482 } 483 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 484 vfio_user_recv, NULL, NULL, proxy); 485 } 486 } 487 488 static void vfio_user_cb(void *opaque) 489 { 490 VFIOUserProxy *proxy = opaque; 491 492 QEMU_LOCK_GUARD(&proxy->lock); 493 494 proxy->state = VFIO_PROXY_CLOSED; 495 qemu_cond_signal(&proxy->close_cv); 496 } 497 498 499 /* 500 * Functions called by main or CPU threads 501 */ 502 503 /* 504 * Process incoming requests. 505 * 506 * The bus-specific callback has the form: 507 * request(opaque, msg) 508 * where 'opaque' was specified in vfio_user_set_handler 509 * and 'msg' is the inbound message. 510 * 511 * The callback is responsible for disposing of the message buffer, 512 * usually by re-using it when calling vfio_send_reply or vfio_send_error, 513 * both of which free their message buffer when the reply is sent. 514 * 515 * If the callback uses a new buffer, it needs to free the old one. 516 */ 517 static void vfio_user_request(void *opaque) 518 { 519 VFIOUserProxy *proxy = opaque; 520 VFIOUserMsgQ new, free; 521 VFIOUserMsg *msg, *m1; 522 523 /* reap all incoming */ 524 QTAILQ_INIT(&new); 525 WITH_QEMU_LOCK_GUARD(&proxy->lock) { 526 QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) { 527 QTAILQ_REMOVE(&proxy->incoming, msg, next); 528 QTAILQ_INSERT_TAIL(&new, msg, next); 529 } 530 } 531 532 /* process list */ 533 QTAILQ_INIT(&free); 534 QTAILQ_FOREACH_SAFE(msg, &new, next, m1) { 535 QTAILQ_REMOVE(&new, msg, next); 536 trace_vfio_user_recv_request(msg->hdr->command); 537 proxy->request(proxy->req_arg, msg); 538 QTAILQ_INSERT_HEAD(&free, msg, next); 539 } 540 541 /* free list */ 542 WITH_QEMU_LOCK_GUARD(&proxy->lock) { 543 QTAILQ_FOREACH_SAFE(msg, &free, next, m1) { 544 vfio_user_recycle(proxy, msg); 545 } 546 } 547 } 548 549 /* 550 * Messages are queued onto the proxy's outgoing list. 551 * 552 * It handles 3 types of messages: 553 * 554 * async messages - replies and posted writes 555 * 556 * There will be no reply from the server, so message 557 * buffers are freed after they're sent. 558 * 559 * nowait messages - map/unmap during address space transactions 560 * 561 * These are also sent async, but a reply is expected so that 562 * vfio_wait_reqs() can wait for the youngest nowait request. 563 * They transition from the outgoing list to the pending list 564 * when sent, and are freed when the reply is received. 565 * 566 * wait messages - all other requests 567 * 568 * The reply to these messages is waited for by their caller. 569 * They also transition from outgoing to pending when sent, but 570 * the message buffer is returned to the caller with the reply 571 * contents. The caller is responsible for freeing these messages. 572 * 573 * As an optimization, if the outgoing list and the socket send 574 * buffer are empty, the message is sent inline instead of being 575 * added to the outgoing list. The rest of the transitions are 576 * unchanged. 577 */ 578 static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg, 579 Error **errp) 580 { 581 int ret; 582 583 /* 584 * Unsent outgoing msgs - add to tail 585 */ 586 if (!QTAILQ_EMPTY(&proxy->outgoing)) { 587 QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); 588 return true; 589 } 590 591 /* 592 * Try inline - if blocked, queue it and kick send poller 593 */ 594 if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) { 595 ret = QIO_CHANNEL_ERR_BLOCK; 596 } else { 597 ret = vfio_user_send_qio(proxy, msg, errp); 598 } 599 600 if (ret == QIO_CHANNEL_ERR_BLOCK) { 601 QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); 602 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 603 vfio_user_recv, proxy->ctx, 604 vfio_user_send, proxy); 605 return true; 606 } 607 if (ret == -1) { 608 return false; 609 } 610 611 /* 612 * Sent - free async, add others to pending 613 */ 614 if (msg->type == VFIO_MSG_ASYNC) { 615 vfio_user_recycle(proxy, msg); 616 } else { 617 QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); 618 msg->pending = true; 619 } 620 621 return true; 622 } 623 624 /* 625 * nowait send - vfio_wait_reqs() can wait for it later 626 * 627 * Returns false if we did not successfully receive a reply message, in which 628 * case @errp will be populated. 629 * 630 * In either case, ownership of @hdr and @fds is taken, and the caller must 631 * *not* free them itself. 632 */ 633 bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 634 VFIOUserFDs *fds, int rsize, Error **errp) 635 { 636 VFIOUserMsg *msg; 637 638 QEMU_LOCK_GUARD(&proxy->lock); 639 640 msg = vfio_user_getmsg(proxy, hdr, fds); 641 msg->id = hdr->id; 642 msg->rsize = rsize ? rsize : hdr->size; 643 msg->type = VFIO_MSG_NOWAIT; 644 645 if (hdr->flags & VFIO_USER_NO_REPLY) { 646 error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__); 647 vfio_user_recycle(proxy, msg); 648 return false; 649 } 650 651 if (!vfio_user_send_queued(proxy, msg, errp)) { 652 vfio_user_recycle(proxy, msg); 653 return false; 654 } 655 656 proxy->last_nowait = msg; 657 658 return true; 659 } 660 661 /* 662 * Returns false if we did not successfully receive a reply message, in which 663 * case @errp will be populated. 664 * 665 * In either case, the caller must free @hdr and @fds if needed. 666 */ 667 bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 668 VFIOUserFDs *fds, int rsize, Error **errp) 669 { 670 VFIOUserMsg *msg; 671 bool ok = false; 672 673 if (hdr->flags & VFIO_USER_NO_REPLY) { 674 error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__); 675 return false; 676 } 677 678 qemu_mutex_lock(&proxy->lock); 679 680 msg = vfio_user_getmsg(proxy, hdr, fds); 681 msg->id = hdr->id; 682 msg->rsize = rsize ? rsize : hdr->size; 683 msg->type = VFIO_MSG_WAIT; 684 685 ok = vfio_user_send_queued(proxy, msg, errp); 686 687 if (ok) { 688 while (!msg->complete) { 689 if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) { 690 VFIOUserMsgQ *list; 691 692 list = msg->pending ? &proxy->pending : &proxy->outgoing; 693 QTAILQ_REMOVE(list, msg, next); 694 error_setg_errno(errp, ETIMEDOUT, 695 "timed out waiting for reply"); 696 ok = false; 697 break; 698 } 699 } 700 } 701 702 vfio_user_recycle(proxy, msg); 703 704 qemu_mutex_unlock(&proxy->lock); 705 706 return ok; 707 } 708 709 /* 710 * async send - msg can be queued, but will be freed when sent 711 * 712 * Returns false on failure, in which case @errp will be populated. 713 * 714 * In either case, ownership of @hdr and @fds is taken, and the caller must 715 * *not* free them itself. 716 */ 717 static bool vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr, 718 VFIOUserFDs *fds, Error **errp) 719 { 720 VFIOUserMsg *msg; 721 722 QEMU_LOCK_GUARD(&proxy->lock); 723 724 msg = vfio_user_getmsg(proxy, hdr, fds); 725 msg->id = hdr->id; 726 msg->rsize = 0; 727 msg->type = VFIO_MSG_ASYNC; 728 729 if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) { 730 error_setg_errno(errp, EINVAL, "%s on sync message", __func__); 731 vfio_user_recycle(proxy, msg); 732 return false; 733 } 734 735 if (!vfio_user_send_queued(proxy, msg, errp)) { 736 vfio_user_recycle(proxy, msg); 737 return false; 738 } 739 740 return true; 741 } 742 743 void vfio_user_wait_reqs(VFIOUserProxy *proxy) 744 { 745 VFIOUserMsg *msg; 746 747 /* 748 * Any DMA map/unmap requests sent in the middle 749 * of a memory region transaction were sent nowait. 750 * Wait for them here. 751 */ 752 qemu_mutex_lock(&proxy->lock); 753 if (proxy->last_nowait != NULL) { 754 /* 755 * Change type to WAIT to wait for reply 756 */ 757 msg = proxy->last_nowait; 758 msg->type = VFIO_MSG_WAIT; 759 proxy->last_nowait = NULL; 760 while (!msg->complete) { 761 if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) { 762 VFIOUserMsgQ *list; 763 764 list = msg->pending ? &proxy->pending : &proxy->outgoing; 765 QTAILQ_REMOVE(list, msg, next); 766 error_printf("vfio_wait_reqs - timed out\n"); 767 break; 768 } 769 } 770 771 if (msg->hdr->flags & VFIO_USER_ERROR) { 772 error_printf("vfio_user_wait_reqs - error reply on async "); 773 error_printf("request: command %x error %s\n", msg->hdr->command, 774 strerror(msg->hdr->error_reply)); 775 } 776 777 /* 778 * Change type back to NOWAIT to free 779 */ 780 msg->type = VFIO_MSG_NOWAIT; 781 vfio_user_recycle(proxy, msg); 782 } 783 784 qemu_mutex_unlock(&proxy->lock); 785 } 786 787 /* 788 * Reply to an incoming request. 789 */ 790 void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size) 791 { 792 Error *local_err = NULL; 793 794 if (size < sizeof(VFIOUserHdr)) { 795 error_printf("%s: size too small", __func__); 796 g_free(hdr); 797 return; 798 } 799 800 /* 801 * convert header to associated reply 802 */ 803 hdr->flags = VFIO_USER_REPLY; 804 hdr->size = size; 805 806 if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) { 807 error_report_err(local_err); 808 } 809 } 810 811 /* 812 * Send an error reply to an incoming request. 813 */ 814 void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error) 815 { 816 Error *local_err = NULL; 817 818 /* 819 * convert header to associated reply 820 */ 821 hdr->flags = VFIO_USER_REPLY; 822 hdr->flags |= VFIO_USER_ERROR; 823 hdr->error_reply = error; 824 hdr->size = sizeof(*hdr); 825 826 if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) { 827 error_report_err(local_err); 828 } 829 } 830 831 /* 832 * Close FDs erroneously received in an incoming request. 833 */ 834 void vfio_user_putfds(VFIOUserMsg *msg) 835 { 836 VFIOUserFDs *fds = msg->fds; 837 int i; 838 839 for (i = 0; i < fds->recv_fds; i++) { 840 close(fds->fds[i]); 841 } 842 g_free(fds); 843 msg->fds = NULL; 844 } 845 846 static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets = 847 QLIST_HEAD_INITIALIZER(vfio_user_sockets); 848 849 VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp) 850 { 851 VFIOUserProxy *proxy; 852 QIOChannelSocket *sioc; 853 QIOChannel *ioc; 854 char *sockname; 855 856 if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) { 857 error_setg(errp, "vfio_user_connect - bad address family"); 858 return NULL; 859 } 860 sockname = addr->u.q_unix.path; 861 862 sioc = qio_channel_socket_new(); 863 ioc = QIO_CHANNEL(sioc); 864 if (qio_channel_socket_connect_sync(sioc, addr, errp)) { 865 object_unref(OBJECT(ioc)); 866 return NULL; 867 } 868 qio_channel_set_blocking(ioc, false, NULL); 869 870 proxy = g_malloc0(sizeof(VFIOUserProxy)); 871 proxy->sockname = g_strdup_printf("unix:%s", sockname); 872 proxy->ioc = ioc; 873 874 /* init defaults */ 875 proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER; 876 proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS; 877 proxy->max_dma = VFIO_USER_DEF_MAP_MAX; 878 proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE; 879 proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP; 880 proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE; 881 882 proxy->flags = VFIO_PROXY_CLIENT; 883 proxy->state = VFIO_PROXY_CONNECTED; 884 885 qemu_mutex_init(&proxy->lock); 886 qemu_cond_init(&proxy->close_cv); 887 888 if (vfio_user_iothread == NULL) { 889 vfio_user_iothread = iothread_create("VFIO user", errp); 890 } 891 892 proxy->ctx = iothread_get_aio_context(vfio_user_iothread); 893 proxy->req_bh = qemu_bh_new(vfio_user_request, proxy); 894 895 QTAILQ_INIT(&proxy->outgoing); 896 QTAILQ_INIT(&proxy->incoming); 897 QTAILQ_INIT(&proxy->free); 898 QTAILQ_INIT(&proxy->pending); 899 QLIST_INSERT_HEAD(&vfio_user_sockets, proxy, next); 900 901 return proxy; 902 } 903 904 void vfio_user_set_handler(VFIODevice *vbasedev, 905 void (*handler)(void *opaque, VFIOUserMsg *msg), 906 void *req_arg) 907 { 908 VFIOUserProxy *proxy = vbasedev->proxy; 909 910 proxy->request = handler; 911 proxy->req_arg = req_arg; 912 qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, 913 vfio_user_recv, NULL, NULL, proxy); 914 } 915 916 void vfio_user_disconnect(VFIOUserProxy *proxy) 917 { 918 VFIOUserMsg *r1, *r2; 919 920 qemu_mutex_lock(&proxy->lock); 921 922 /* our side is quitting */ 923 if (proxy->state == VFIO_PROXY_CONNECTED) { 924 vfio_user_shutdown(proxy); 925 if (!QTAILQ_EMPTY(&proxy->pending)) { 926 error_printf("vfio_user_disconnect: outstanding requests\n"); 927 } 928 } 929 object_unref(OBJECT(proxy->ioc)); 930 proxy->ioc = NULL; 931 qemu_bh_delete(proxy->req_bh); 932 proxy->req_bh = NULL; 933 934 proxy->state = VFIO_PROXY_CLOSING; 935 QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) { 936 qemu_cond_destroy(&r1->cv); 937 QTAILQ_REMOVE(&proxy->outgoing, r1, next); 938 g_free(r1); 939 } 940 QTAILQ_FOREACH_SAFE(r1, &proxy->incoming, next, r2) { 941 qemu_cond_destroy(&r1->cv); 942 QTAILQ_REMOVE(&proxy->incoming, r1, next); 943 g_free(r1); 944 } 945 QTAILQ_FOREACH_SAFE(r1, &proxy->pending, next, r2) { 946 qemu_cond_destroy(&r1->cv); 947 QTAILQ_REMOVE(&proxy->pending, r1, next); 948 g_free(r1); 949 } 950 QTAILQ_FOREACH_SAFE(r1, &proxy->free, next, r2) { 951 qemu_cond_destroy(&r1->cv); 952 QTAILQ_REMOVE(&proxy->free, r1, next); 953 g_free(r1); 954 } 955 956 /* 957 * Make sure the iothread isn't blocking anywhere 958 * with a ref to this proxy by waiting for a BH 959 * handler to run after the proxy fd handlers were 960 * deleted above. 961 */ 962 aio_bh_schedule_oneshot(proxy->ctx, vfio_user_cb, proxy); 963 qemu_cond_wait(&proxy->close_cv, &proxy->lock); 964 965 /* we now hold the only ref to proxy */ 966 qemu_mutex_unlock(&proxy->lock); 967 qemu_cond_destroy(&proxy->close_cv); 968 qemu_mutex_destroy(&proxy->lock); 969 970 QLIST_REMOVE(proxy, next); 971 if (QLIST_EMPTY(&vfio_user_sockets)) { 972 iothread_destroy(vfio_user_iothread); 973 vfio_user_iothread = NULL; 974 } 975 976 g_free(proxy->sockname); 977 g_free(proxy); 978 } 979 980 void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, 981 uint32_t size, uint32_t flags) 982 { 983 static uint16_t next_id; 984 985 hdr->id = qatomic_fetch_inc(&next_id); 986 hdr->command = cmd; 987 hdr->size = size; 988 hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST; 989 hdr->error_reply = 0; 990 } 991 992 struct cap_entry { 993 const char *name; 994 bool (*check)(VFIOUserProxy *proxy, QObject *qobj, Error **errp); 995 }; 996 997 static bool caps_parse(VFIOUserProxy *proxy, QDict *qdict, 998 struct cap_entry caps[], Error **errp) 999 { 1000 QObject *qobj; 1001 struct cap_entry *p; 1002 1003 for (p = caps; p->name != NULL; p++) { 1004 qobj = qdict_get(qdict, p->name); 1005 if (qobj != NULL) { 1006 if (!p->check(proxy, qobj, errp)) { 1007 return false; 1008 } 1009 qdict_del(qdict, p->name); 1010 } 1011 } 1012 1013 /* warning, for now */ 1014 if (qdict_size(qdict) != 0) { 1015 warn_report("spurious capabilities"); 1016 } 1017 return true; 1018 } 1019 1020 static bool check_migr_pgsize(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1021 { 1022 QNum *qn = qobject_to(QNum, qobj); 1023 uint64_t pgsize; 1024 1025 if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) { 1026 error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE); 1027 return false; 1028 } 1029 1030 /* must be larger than default */ 1031 if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) { 1032 error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize); 1033 return false; 1034 } 1035 1036 proxy->migr_pgsize = pgsize; 1037 return true; 1038 } 1039 1040 static bool check_bitmap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1041 { 1042 QNum *qn = qobject_to(QNum, qobj); 1043 uint64_t bitmap_size; 1044 1045 if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) { 1046 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP); 1047 return false; 1048 } 1049 1050 /* can only lower it */ 1051 if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) { 1052 error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP); 1053 return false; 1054 } 1055 1056 proxy->max_bitmap = bitmap_size; 1057 return true; 1058 } 1059 1060 static struct cap_entry caps_migr[] = { 1061 { VFIO_USER_CAP_PGSIZE, check_migr_pgsize }, 1062 { VFIO_USER_CAP_MAX_BITMAP, check_bitmap }, 1063 { NULL } 1064 }; 1065 1066 static bool check_max_fds(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1067 { 1068 QNum *qn = qobject_to(QNum, qobj); 1069 uint64_t max_send_fds; 1070 1071 if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) || 1072 max_send_fds > VFIO_USER_MAX_MAX_FDS) { 1073 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); 1074 return false; 1075 } 1076 proxy->max_send_fds = max_send_fds; 1077 return true; 1078 } 1079 1080 static bool check_max_xfer(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1081 { 1082 QNum *qn = qobject_to(QNum, qobj); 1083 uint64_t max_xfer_size; 1084 1085 if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) || 1086 max_xfer_size > VFIO_USER_MAX_MAX_XFER) { 1087 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER); 1088 return false; 1089 } 1090 proxy->max_xfer_size = max_xfer_size; 1091 return true; 1092 } 1093 1094 static bool check_pgsizes(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1095 { 1096 QNum *qn = qobject_to(QNum, qobj); 1097 uint64_t pgsizes; 1098 1099 if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) { 1100 error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES); 1101 return false; 1102 } 1103 1104 /* must be larger than default */ 1105 if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) { 1106 error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes); 1107 return false; 1108 } 1109 1110 proxy->dma_pgsizes = pgsizes; 1111 return true; 1112 } 1113 1114 static bool check_max_dma(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1115 { 1116 QNum *qn = qobject_to(QNum, qobj); 1117 uint64_t max_dma; 1118 1119 if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) { 1120 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX); 1121 return false; 1122 } 1123 1124 /* can only lower it */ 1125 if (max_dma > VFIO_USER_DEF_MAP_MAX) { 1126 error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX); 1127 return false; 1128 } 1129 1130 proxy->max_dma = max_dma; 1131 return true; 1132 } 1133 1134 static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1135 { 1136 QDict *qdict = qobject_to(QDict, qobj); 1137 1138 if (qdict == NULL) { 1139 error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); 1140 return true; 1141 } 1142 return caps_parse(proxy, qdict, caps_migr, errp); 1143 } 1144 1145 static struct cap_entry caps_cap[] = { 1146 { VFIO_USER_CAP_MAX_FDS, check_max_fds }, 1147 { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, 1148 { VFIO_USER_CAP_PGSIZES, check_pgsizes }, 1149 { VFIO_USER_CAP_MAP_MAX, check_max_dma }, 1150 { VFIO_USER_CAP_MIGR, check_migr }, 1151 { NULL } 1152 }; 1153 1154 static bool check_cap(VFIOUserProxy *proxy, QObject *qobj, Error **errp) 1155 { 1156 QDict *qdict = qobject_to(QDict, qobj); 1157 1158 if (qdict == NULL) { 1159 error_setg(errp, "malformed %s", VFIO_USER_CAP); 1160 return false; 1161 } 1162 return caps_parse(proxy, qdict, caps_cap, errp); 1163 } 1164 1165 static struct cap_entry ver_0_0[] = { 1166 { VFIO_USER_CAP, check_cap }, 1167 { NULL } 1168 }; 1169 1170 static bool caps_check(VFIOUserProxy *proxy, int minor, const char *caps, 1171 Error **errp) 1172 { 1173 QObject *qobj; 1174 QDict *qdict; 1175 bool ret; 1176 1177 qobj = qobject_from_json(caps, NULL); 1178 if (qobj == NULL) { 1179 error_setg(errp, "malformed capabilities %s", caps); 1180 return false; 1181 } 1182 qdict = qobject_to(QDict, qobj); 1183 if (qdict == NULL) { 1184 error_setg(errp, "capabilities %s not an object", caps); 1185 qobject_unref(qobj); 1186 return false; 1187 } 1188 ret = caps_parse(proxy, qdict, ver_0_0, errp); 1189 1190 qobject_unref(qobj); 1191 return ret; 1192 } 1193 1194 static GString *caps_json(void) 1195 { 1196 QDict *dict = qdict_new(); 1197 QDict *capdict = qdict_new(); 1198 QDict *migdict = qdict_new(); 1199 GString *str; 1200 1201 qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE); 1202 qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP); 1203 qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict)); 1204 1205 qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS); 1206 qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); 1207 qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); 1208 qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); 1209 1210 qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); 1211 1212 str = qobject_to_json(QOBJECT(dict)); 1213 qobject_unref(dict); 1214 return str; 1215 } 1216 1217 bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp) 1218 { 1219 g_autofree VFIOUserVersion *msgp = NULL; 1220 GString *caps; 1221 char *reply; 1222 int size, caplen; 1223 1224 caps = caps_json(); 1225 caplen = caps->len + 1; 1226 size = sizeof(*msgp) + caplen; 1227 msgp = g_malloc0(size); 1228 1229 vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0); 1230 msgp->major = VFIO_USER_MAJOR_VER; 1231 msgp->minor = VFIO_USER_MINOR_VER; 1232 memcpy(&msgp->capabilities, caps->str, caplen); 1233 g_string_free(caps, true); 1234 trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); 1235 1236 if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, errp)) { 1237 return false; 1238 } 1239 1240 if (msgp->hdr.flags & VFIO_USER_ERROR) { 1241 error_setg_errno(errp, msgp->hdr.error_reply, "version reply"); 1242 return false; 1243 } 1244 1245 if (msgp->major != VFIO_USER_MAJOR_VER || 1246 msgp->minor > VFIO_USER_MINOR_VER) { 1247 error_setg(errp, "incompatible server version"); 1248 return false; 1249 } 1250 1251 reply = msgp->capabilities; 1252 if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') { 1253 error_setg(errp, "corrupt version reply"); 1254 return false; 1255 } 1256 1257 if (!caps_check(proxy, msgp->minor, reply, errp)) { 1258 return false; 1259 } 1260 1261 trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities); 1262 return true; 1263 } 1264