1 /* 2 * vhost-user 3 * 4 * Copyright (c) 2013 Virtual Open Systems Sarl. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include "qapi/error.h" 13 #include "hw/virtio/virtio-dmabuf.h" 14 #include "hw/virtio/vhost.h" 15 #include "hw/virtio/virtio-crypto.h" 16 #include "hw/virtio/vhost-user.h" 17 #include "hw/virtio/vhost-backend.h" 18 #include "hw/virtio/virtio.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "chardev/char-fe.h" 21 #include "io/channel-socket.h" 22 #include "system/kvm.h" 23 #include "qemu/error-report.h" 24 #include "qemu/main-loop.h" 25 #include "qemu/uuid.h" 26 #include "qemu/sockets.h" 27 #include "system/runstate.h" 28 #include "system/cryptodev.h" 29 #include "migration/postcopy-ram.h" 30 #include "trace.h" 31 #include "system/ramblock.h" 32 33 #include <sys/ioctl.h> 34 #include <sys/socket.h> 35 #include <sys/un.h> 36 37 #include "standard-headers/linux/vhost_types.h" 38 39 #ifdef CONFIG_LINUX 40 #include <linux/userfaultfd.h> 41 #endif 42 43 #define VHOST_MEMORY_BASELINE_NREGIONS 8 44 #define VHOST_USER_F_PROTOCOL_FEATURES 30 45 #define VHOST_USER_BACKEND_MAX_FDS 8 46 47 #if defined(TARGET_PPC) || defined(TARGET_PPC64) 48 #include "hw/ppc/spapr.h" 49 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS 50 51 #else 52 #define VHOST_USER_MAX_RAM_SLOTS 512 53 #endif 54 55 /* 56 * Maximum size of virtio device config space 57 */ 58 #define VHOST_USER_MAX_CONFIG_SIZE 256 59 60 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 61 62 typedef enum VhostUserRequest { 63 VHOST_USER_NONE = 0, 64 VHOST_USER_GET_FEATURES = 1, 65 VHOST_USER_SET_FEATURES = 2, 66 VHOST_USER_SET_OWNER = 3, 67 VHOST_USER_RESET_OWNER = 4, 68 VHOST_USER_SET_MEM_TABLE = 5, 69 VHOST_USER_SET_LOG_BASE = 6, 70 VHOST_USER_SET_LOG_FD = 7, 71 VHOST_USER_SET_VRING_NUM = 8, 72 VHOST_USER_SET_VRING_ADDR = 9, 73 VHOST_USER_SET_VRING_BASE = 10, 74 VHOST_USER_GET_VRING_BASE = 11, 75 VHOST_USER_SET_VRING_KICK = 12, 76 VHOST_USER_SET_VRING_CALL = 13, 77 VHOST_USER_SET_VRING_ERR = 14, 78 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 79 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 80 VHOST_USER_GET_QUEUE_NUM = 17, 81 VHOST_USER_SET_VRING_ENABLE = 18, 82 VHOST_USER_SEND_RARP = 19, 83 VHOST_USER_NET_SET_MTU = 20, 84 VHOST_USER_SET_BACKEND_REQ_FD = 21, 85 VHOST_USER_IOTLB_MSG = 22, 86 VHOST_USER_SET_VRING_ENDIAN = 23, 87 VHOST_USER_GET_CONFIG = 24, 88 VHOST_USER_SET_CONFIG = 25, 89 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 90 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 91 VHOST_USER_POSTCOPY_ADVISE = 28, 92 VHOST_USER_POSTCOPY_LISTEN = 29, 93 VHOST_USER_POSTCOPY_END = 30, 94 VHOST_USER_GET_INFLIGHT_FD = 31, 95 VHOST_USER_SET_INFLIGHT_FD = 32, 96 VHOST_USER_GPU_SET_SOCKET = 33, 97 VHOST_USER_RESET_DEVICE = 34, 98 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */ 99 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 100 VHOST_USER_ADD_MEM_REG = 37, 101 VHOST_USER_REM_MEM_REG = 38, 102 VHOST_USER_SET_STATUS = 39, 103 VHOST_USER_GET_STATUS = 40, 104 VHOST_USER_GET_SHARED_OBJECT = 41, 105 VHOST_USER_SET_DEVICE_STATE_FD = 42, 106 VHOST_USER_CHECK_DEVICE_STATE = 43, 107 VHOST_USER_MAX 108 } VhostUserRequest; 109 110 typedef enum VhostUserBackendRequest { 111 VHOST_USER_BACKEND_NONE = 0, 112 VHOST_USER_BACKEND_IOTLB_MSG = 1, 113 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, 114 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, 115 VHOST_USER_BACKEND_SHARED_OBJECT_ADD = 6, 116 VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE = 7, 117 VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP = 8, 118 VHOST_USER_BACKEND_MAX 119 } VhostUserBackendRequest; 120 121 typedef struct VhostUserMemoryRegion { 122 uint64_t guest_phys_addr; 123 uint64_t memory_size; 124 uint64_t userspace_addr; 125 uint64_t mmap_offset; 126 } VhostUserMemoryRegion; 127 128 typedef struct VhostUserMemory { 129 uint32_t nregions; 130 uint32_t padding; 131 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 132 } VhostUserMemory; 133 134 typedef struct VhostUserMemRegMsg { 135 uint64_t padding; 136 VhostUserMemoryRegion region; 137 } VhostUserMemRegMsg; 138 139 typedef struct VhostUserLog { 140 uint64_t mmap_size; 141 uint64_t mmap_offset; 142 } VhostUserLog; 143 144 typedef struct VhostUserConfig { 145 uint32_t offset; 146 uint32_t size; 147 uint32_t flags; 148 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 149 } VhostUserConfig; 150 151 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512 152 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64 153 #define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024 154 155 typedef struct VhostUserCryptoSession { 156 uint64_t op_code; 157 union { 158 struct { 159 CryptoDevBackendSymSessionInfo session_setup_data; 160 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN]; 161 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN]; 162 } sym; 163 struct { 164 CryptoDevBackendAsymSessionInfo session_setup_data; 165 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN]; 166 } asym; 167 } u; 168 169 /* session id for success, -1 on errors */ 170 int64_t session_id; 171 } VhostUserCryptoSession; 172 173 static VhostUserConfig c __attribute__ ((unused)); 174 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 175 + sizeof(c.size) \ 176 + sizeof(c.flags)) 177 178 typedef struct VhostUserVringArea { 179 uint64_t u64; 180 uint64_t size; 181 uint64_t offset; 182 } VhostUserVringArea; 183 184 typedef struct VhostUserInflight { 185 uint64_t mmap_size; 186 uint64_t mmap_offset; 187 uint16_t num_queues; 188 uint16_t queue_size; 189 } VhostUserInflight; 190 191 typedef struct VhostUserShared { 192 unsigned char uuid[16]; 193 } VhostUserShared; 194 195 typedef struct { 196 VhostUserRequest request; 197 198 #define VHOST_USER_VERSION_MASK (0x3) 199 #define VHOST_USER_REPLY_MASK (0x1 << 2) 200 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 201 uint32_t flags; 202 uint32_t size; /* the following payload size */ 203 } QEMU_PACKED VhostUserHeader; 204 205 /* Request payload of VHOST_USER_SET_DEVICE_STATE_FD */ 206 typedef struct VhostUserTransferDeviceState { 207 uint32_t direction; 208 uint32_t phase; 209 } VhostUserTransferDeviceState; 210 211 typedef union { 212 #define VHOST_USER_VRING_IDX_MASK (0xff) 213 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 214 uint64_t u64; 215 struct vhost_vring_state state; 216 struct vhost_vring_addr addr; 217 VhostUserMemory memory; 218 VhostUserMemRegMsg mem_reg; 219 VhostUserLog log; 220 struct vhost_iotlb_msg iotlb; 221 VhostUserConfig config; 222 VhostUserCryptoSession session; 223 VhostUserVringArea area; 224 VhostUserInflight inflight; 225 VhostUserShared object; 226 VhostUserTransferDeviceState transfer_state; 227 } VhostUserPayload; 228 229 typedef struct VhostUserMsg { 230 VhostUserHeader hdr; 231 VhostUserPayload payload; 232 } QEMU_PACKED VhostUserMsg; 233 234 static VhostUserMsg m __attribute__ ((unused)); 235 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) 236 237 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) 238 239 /* The version of the protocol we support */ 240 #define VHOST_USER_VERSION (0x1) 241 242 struct vhost_user { 243 struct vhost_dev *dev; 244 /* Shared between vhost devs of the same virtio device */ 245 VhostUserState *user; 246 QIOChannel *backend_ioc; 247 GSource *backend_src; 248 NotifierWithReturn postcopy_notifier; 249 struct PostCopyFD postcopy_fd; 250 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS]; 251 /* Length of the region_rb and region_rb_offset arrays */ 252 size_t region_rb_len; 253 /* RAMBlock associated with a given region */ 254 RAMBlock **region_rb; 255 /* 256 * The offset from the start of the RAMBlock to the start of the 257 * vhost region. 258 */ 259 ram_addr_t *region_rb_offset; 260 261 /* True once we've entered postcopy_listen */ 262 bool postcopy_listen; 263 264 /* Our current regions */ 265 int num_shadow_regions; 266 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS]; 267 }; 268 269 struct scrub_regions { 270 struct vhost_memory_region *region; 271 int reg_idx; 272 int fd_idx; 273 }; 274 275 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) 276 { 277 struct vhost_user *u = dev->opaque; 278 CharBackend *chr = u->user->chr; 279 uint8_t *p = (uint8_t *) msg; 280 int r, size = VHOST_USER_HDR_SIZE; 281 282 r = qemu_chr_fe_read_all(chr, p, size); 283 if (r != size) { 284 int saved_errno = errno; 285 error_report("Failed to read msg header. Read %d instead of %d." 286 " Original request %d.", r, size, msg->hdr.request); 287 return r < 0 ? -saved_errno : -EIO; 288 } 289 290 /* validate received flags */ 291 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { 292 error_report("Failed to read msg header." 293 " Flags 0x%x instead of 0x%x.", msg->hdr.flags, 294 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); 295 return -EPROTO; 296 } 297 298 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags); 299 300 return 0; 301 } 302 303 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) 304 { 305 struct vhost_user *u = dev->opaque; 306 CharBackend *chr = u->user->chr; 307 uint8_t *p = (uint8_t *) msg; 308 int r, size; 309 310 r = vhost_user_read_header(dev, msg); 311 if (r < 0) { 312 return r; 313 } 314 315 /* validate message size is sane */ 316 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { 317 error_report("Failed to read msg header." 318 " Size %d exceeds the maximum %zu.", msg->hdr.size, 319 VHOST_USER_PAYLOAD_SIZE); 320 return -EPROTO; 321 } 322 323 if (msg->hdr.size) { 324 p += VHOST_USER_HDR_SIZE; 325 size = msg->hdr.size; 326 r = qemu_chr_fe_read_all(chr, p, size); 327 if (r != size) { 328 int saved_errno = errno; 329 error_report("Failed to read msg payload." 330 " Read %d instead of %d.", r, msg->hdr.size); 331 return r < 0 ? -saved_errno : -EIO; 332 } 333 } 334 335 return 0; 336 } 337 338 static int process_message_reply(struct vhost_dev *dev, 339 const VhostUserMsg *msg) 340 { 341 int ret; 342 VhostUserMsg msg_reply; 343 344 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 345 return 0; 346 } 347 348 ret = vhost_user_read(dev, &msg_reply); 349 if (ret < 0) { 350 return ret; 351 } 352 353 if (msg_reply.hdr.request != msg->hdr.request) { 354 error_report("Received unexpected msg type. " 355 "Expected %d received %d", 356 msg->hdr.request, msg_reply.hdr.request); 357 return -EPROTO; 358 } 359 360 return msg_reply.payload.u64 ? -EIO : 0; 361 } 362 363 static bool vhost_user_per_device_request(VhostUserRequest request) 364 { 365 switch (request) { 366 case VHOST_USER_SET_OWNER: 367 case VHOST_USER_RESET_OWNER: 368 case VHOST_USER_SET_MEM_TABLE: 369 case VHOST_USER_GET_QUEUE_NUM: 370 case VHOST_USER_NET_SET_MTU: 371 case VHOST_USER_RESET_DEVICE: 372 case VHOST_USER_ADD_MEM_REG: 373 case VHOST_USER_REM_MEM_REG: 374 case VHOST_USER_SET_LOG_BASE: 375 return true; 376 default: 377 return false; 378 } 379 } 380 381 /* most non-init callers ignore the error */ 382 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 383 int *fds, int fd_num) 384 { 385 struct vhost_user *u = dev->opaque; 386 CharBackend *chr = u->user->chr; 387 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; 388 389 /* 390 * Some devices, like virtio-scsi, are implemented as a single vhost_dev, 391 * while others, like virtio-net, contain multiple vhost_devs. For 392 * operations such as configuring device memory mappings or issuing device 393 * resets, which affect the whole device instead of individual VQs, 394 * vhost-user messages should only be sent once. 395 * 396 * Devices with multiple vhost_devs are given an associated dev->vq_index 397 * so per_device requests are only sent if vq_index is 0. 398 */ 399 if (vhost_user_per_device_request(msg->hdr.request) 400 && dev->vq_index != 0) { 401 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; 402 return 0; 403 } 404 405 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { 406 error_report("Failed to set msg fds."); 407 return -EINVAL; 408 } 409 410 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); 411 if (ret != size) { 412 int saved_errno = errno; 413 error_report("Failed to write msg." 414 " Wrote %d instead of %d.", ret, size); 415 return ret < 0 ? -saved_errno : -EIO; 416 } 417 418 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags); 419 420 return 0; 421 } 422 423 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd) 424 { 425 VhostUserMsg msg = { 426 .hdr.request = VHOST_USER_GPU_SET_SOCKET, 427 .hdr.flags = VHOST_USER_VERSION, 428 }; 429 430 return vhost_user_write(dev, &msg, &fd, 1); 431 } 432 433 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, 434 struct vhost_log *log) 435 { 436 int fds[VHOST_USER_MAX_RAM_SLOTS]; 437 size_t fd_num = 0; 438 bool shmfd = virtio_has_feature(dev->protocol_features, 439 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 440 int ret; 441 VhostUserMsg msg = { 442 .hdr.request = VHOST_USER_SET_LOG_BASE, 443 .hdr.flags = VHOST_USER_VERSION, 444 .payload.log.mmap_size = log->size * sizeof(*(log->log)), 445 .payload.log.mmap_offset = 0, 446 .hdr.size = sizeof(msg.payload.log), 447 }; 448 449 /* Send only once with first queue pair */ 450 if (dev->vq_index != 0) { 451 return 0; 452 } 453 454 if (shmfd && log->fd != -1) { 455 fds[fd_num++] = log->fd; 456 } 457 458 ret = vhost_user_write(dev, &msg, fds, fd_num); 459 if (ret < 0) { 460 return ret; 461 } 462 463 if (shmfd) { 464 msg.hdr.size = 0; 465 ret = vhost_user_read(dev, &msg); 466 if (ret < 0) { 467 return ret; 468 } 469 470 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { 471 error_report("Received unexpected msg type. " 472 "Expected %d received %d", 473 VHOST_USER_SET_LOG_BASE, msg.hdr.request); 474 return -EPROTO; 475 } 476 } 477 478 return 0; 479 } 480 481 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset, 482 int *fd) 483 { 484 MemoryRegion *mr; 485 486 assert((uintptr_t)addr == addr); 487 mr = memory_region_from_host((void *)(uintptr_t)addr, offset); 488 *fd = memory_region_get_fd(mr); 489 *offset += mr->ram_block->fd_offset; 490 491 return mr; 492 } 493 494 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst, 495 struct vhost_memory_region *src, 496 uint64_t mmap_offset) 497 { 498 assert(src != NULL && dst != NULL); 499 dst->userspace_addr = src->userspace_addr; 500 dst->memory_size = src->memory_size; 501 dst->guest_phys_addr = src->guest_phys_addr; 502 dst->mmap_offset = mmap_offset; 503 } 504 505 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u, 506 struct vhost_dev *dev, 507 VhostUserMsg *msg, 508 int *fds, size_t *fd_num, 509 bool track_ramblocks) 510 { 511 int i, fd; 512 ram_addr_t offset; 513 MemoryRegion *mr; 514 struct vhost_memory_region *reg; 515 VhostUserMemoryRegion region_buffer; 516 517 msg->hdr.request = VHOST_USER_SET_MEM_TABLE; 518 519 for (i = 0; i < dev->mem->nregions; ++i) { 520 reg = dev->mem->regions + i; 521 522 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 523 if (fd > 0) { 524 if (track_ramblocks) { 525 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS); 526 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name, 527 reg->memory_size, 528 reg->guest_phys_addr, 529 reg->userspace_addr, 530 offset); 531 u->region_rb_offset[i] = offset; 532 u->region_rb[i] = mr->ram_block; 533 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) { 534 error_report("Failed preparing vhost-user memory table msg"); 535 return -ENOBUFS; 536 } 537 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 538 msg->payload.memory.regions[*fd_num] = region_buffer; 539 fds[(*fd_num)++] = fd; 540 } else if (track_ramblocks) { 541 u->region_rb_offset[i] = 0; 542 u->region_rb[i] = NULL; 543 } 544 } 545 546 msg->payload.memory.nregions = *fd_num; 547 548 if (!*fd_num) { 549 error_report("Failed initializing vhost-user memory map, " 550 "consider using -object memory-backend-file share=on"); 551 return -EINVAL; 552 } 553 554 msg->hdr.size = sizeof(msg->payload.memory.nregions); 555 msg->hdr.size += sizeof(msg->payload.memory.padding); 556 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion); 557 558 return 0; 559 } 560 561 static inline bool reg_equal(struct vhost_memory_region *shadow_reg, 562 struct vhost_memory_region *vdev_reg) 563 { 564 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr && 565 shadow_reg->userspace_addr == vdev_reg->userspace_addr && 566 shadow_reg->memory_size == vdev_reg->memory_size; 567 } 568 569 static void scrub_shadow_regions(struct vhost_dev *dev, 570 struct scrub_regions *add_reg, 571 int *nr_add_reg, 572 struct scrub_regions *rem_reg, 573 int *nr_rem_reg, uint64_t *shadow_pcb, 574 bool track_ramblocks) 575 { 576 struct vhost_user *u = dev->opaque; 577 bool found[VHOST_USER_MAX_RAM_SLOTS] = {}; 578 struct vhost_memory_region *reg, *shadow_reg; 579 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0; 580 ram_addr_t offset; 581 MemoryRegion *mr; 582 bool matching; 583 584 /* 585 * Find memory regions present in our shadow state which are not in 586 * the device's current memory state. 587 * 588 * Mark regions in both the shadow and device state as "found". 589 */ 590 for (i = 0; i < u->num_shadow_regions; i++) { 591 shadow_reg = &u->shadow_regions[i]; 592 matching = false; 593 594 for (j = 0; j < dev->mem->nregions; j++) { 595 reg = &dev->mem->regions[j]; 596 597 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 598 599 if (reg_equal(shadow_reg, reg)) { 600 matching = true; 601 found[j] = true; 602 if (track_ramblocks) { 603 /* 604 * Reset postcopy client bases, region_rb, and 605 * region_rb_offset in case regions are removed. 606 */ 607 if (fd > 0) { 608 u->region_rb_offset[j] = offset; 609 u->region_rb[j] = mr->ram_block; 610 shadow_pcb[j] = u->postcopy_client_bases[i]; 611 } else { 612 u->region_rb_offset[j] = 0; 613 u->region_rb[j] = NULL; 614 } 615 } 616 break; 617 } 618 } 619 620 /* 621 * If the region was not found in the current device memory state 622 * create an entry for it in the removed list. 623 */ 624 if (!matching) { 625 rem_reg[rm_idx].region = shadow_reg; 626 rem_reg[rm_idx++].reg_idx = i; 627 } 628 } 629 630 /* 631 * For regions not marked "found", create entries in the added list. 632 * 633 * Note their indexes in the device memory state and the indexes of their 634 * file descriptors. 635 */ 636 for (i = 0; i < dev->mem->nregions; i++) { 637 reg = &dev->mem->regions[i]; 638 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 639 if (fd > 0) { 640 ++fd_num; 641 } 642 643 /* 644 * If the region was in both the shadow and device state we don't 645 * need to send a VHOST_USER_ADD_MEM_REG message for it. 646 */ 647 if (found[i]) { 648 continue; 649 } 650 651 add_reg[add_idx].region = reg; 652 add_reg[add_idx].reg_idx = i; 653 add_reg[add_idx++].fd_idx = fd_num; 654 } 655 *nr_rem_reg = rm_idx; 656 *nr_add_reg = add_idx; 657 } 658 659 static int send_remove_regions(struct vhost_dev *dev, 660 struct scrub_regions *remove_reg, 661 int nr_rem_reg, VhostUserMsg *msg, 662 bool reply_supported) 663 { 664 struct vhost_user *u = dev->opaque; 665 struct vhost_memory_region *shadow_reg; 666 int i, fd, shadow_reg_idx, ret; 667 ram_addr_t offset; 668 VhostUserMemoryRegion region_buffer; 669 670 /* 671 * The regions in remove_reg appear in the same order they do in the 672 * shadow table. Therefore we can minimize memory copies by iterating 673 * through remove_reg backwards. 674 */ 675 for (i = nr_rem_reg - 1; i >= 0; i--) { 676 shadow_reg = remove_reg[i].region; 677 shadow_reg_idx = remove_reg[i].reg_idx; 678 679 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd); 680 681 if (fd > 0) { 682 msg->hdr.request = VHOST_USER_REM_MEM_REG; 683 vhost_user_fill_msg_region(®ion_buffer, shadow_reg, 0); 684 msg->payload.mem_reg.region = region_buffer; 685 686 ret = vhost_user_write(dev, msg, NULL, 0); 687 if (ret < 0) { 688 return ret; 689 } 690 691 if (reply_supported) { 692 ret = process_message_reply(dev, msg); 693 if (ret) { 694 return ret; 695 } 696 } 697 } 698 699 /* 700 * At this point we know the backend has unmapped the region. It is now 701 * safe to remove it from the shadow table. 702 */ 703 memmove(&u->shadow_regions[shadow_reg_idx], 704 &u->shadow_regions[shadow_reg_idx + 1], 705 sizeof(struct vhost_memory_region) * 706 (u->num_shadow_regions - shadow_reg_idx - 1)); 707 u->num_shadow_regions--; 708 } 709 710 return 0; 711 } 712 713 static int send_add_regions(struct vhost_dev *dev, 714 struct scrub_regions *add_reg, int nr_add_reg, 715 VhostUserMsg *msg, uint64_t *shadow_pcb, 716 bool reply_supported, bool track_ramblocks) 717 { 718 struct vhost_user *u = dev->opaque; 719 int i, fd, ret, reg_idx, reg_fd_idx; 720 struct vhost_memory_region *reg; 721 MemoryRegion *mr; 722 ram_addr_t offset; 723 VhostUserMsg msg_reply; 724 VhostUserMemoryRegion region_buffer; 725 726 for (i = 0; i < nr_add_reg; i++) { 727 reg = add_reg[i].region; 728 reg_idx = add_reg[i].reg_idx; 729 reg_fd_idx = add_reg[i].fd_idx; 730 731 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd); 732 733 if (fd > 0) { 734 if (track_ramblocks) { 735 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name, 736 reg->memory_size, 737 reg->guest_phys_addr, 738 reg->userspace_addr, 739 offset); 740 u->region_rb_offset[reg_idx] = offset; 741 u->region_rb[reg_idx] = mr->ram_block; 742 } 743 msg->hdr.request = VHOST_USER_ADD_MEM_REG; 744 vhost_user_fill_msg_region(®ion_buffer, reg, offset); 745 msg->payload.mem_reg.region = region_buffer; 746 747 ret = vhost_user_write(dev, msg, &fd, 1); 748 if (ret < 0) { 749 return ret; 750 } 751 752 if (track_ramblocks) { 753 uint64_t reply_gpa; 754 755 ret = vhost_user_read(dev, &msg_reply); 756 if (ret < 0) { 757 return ret; 758 } 759 760 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr; 761 762 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) { 763 error_report("%s: Received unexpected msg type." 764 "Expected %d received %d", __func__, 765 VHOST_USER_ADD_MEM_REG, 766 msg_reply.hdr.request); 767 return -EPROTO; 768 } 769 770 /* 771 * We're using the same structure, just reusing one of the 772 * fields, so it should be the same size. 773 */ 774 if (msg_reply.hdr.size != msg->hdr.size) { 775 error_report("%s: Unexpected size for postcopy reply " 776 "%d vs %d", __func__, msg_reply.hdr.size, 777 msg->hdr.size); 778 return -EPROTO; 779 } 780 781 /* Get the postcopy client base from the backend's reply. */ 782 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) { 783 shadow_pcb[reg_idx] = 784 msg_reply.payload.mem_reg.region.userspace_addr; 785 trace_vhost_user_set_mem_table_postcopy( 786 msg_reply.payload.mem_reg.region.userspace_addr, 787 msg->payload.mem_reg.region.userspace_addr, 788 reg_fd_idx, reg_idx); 789 } else { 790 error_report("%s: invalid postcopy reply for region. " 791 "Got guest physical address %" PRIX64 ", expected " 792 "%" PRIX64, __func__, reply_gpa, 793 dev->mem->regions[reg_idx].guest_phys_addr); 794 return -EPROTO; 795 } 796 } else if (reply_supported) { 797 ret = process_message_reply(dev, msg); 798 if (ret) { 799 return ret; 800 } 801 } 802 } else if (track_ramblocks) { 803 u->region_rb_offset[reg_idx] = 0; 804 u->region_rb[reg_idx] = NULL; 805 } 806 807 /* 808 * At this point, we know the backend has mapped in the new 809 * region, if the region has a valid file descriptor. 810 * 811 * The region should now be added to the shadow table. 812 */ 813 u->shadow_regions[u->num_shadow_regions].guest_phys_addr = 814 reg->guest_phys_addr; 815 u->shadow_regions[u->num_shadow_regions].userspace_addr = 816 reg->userspace_addr; 817 u->shadow_regions[u->num_shadow_regions].memory_size = 818 reg->memory_size; 819 u->num_shadow_regions++; 820 } 821 822 return 0; 823 } 824 825 static int vhost_user_add_remove_regions(struct vhost_dev *dev, 826 VhostUserMsg *msg, 827 bool reply_supported, 828 bool track_ramblocks) 829 { 830 struct vhost_user *u = dev->opaque; 831 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS]; 832 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS]; 833 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {}; 834 int nr_add_reg, nr_rem_reg; 835 int ret; 836 837 msg->hdr.size = sizeof(msg->payload.mem_reg); 838 839 /* Find the regions which need to be removed or added. */ 840 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg, 841 shadow_pcb, track_ramblocks); 842 843 if (nr_rem_reg) { 844 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg, 845 reply_supported); 846 if (ret < 0) { 847 goto err; 848 } 849 } 850 851 if (nr_add_reg) { 852 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb, 853 reply_supported, track_ramblocks); 854 if (ret < 0) { 855 goto err; 856 } 857 } 858 859 if (track_ramblocks) { 860 memcpy(u->postcopy_client_bases, shadow_pcb, 861 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 862 /* 863 * Now we've registered this with the postcopy code, we ack to the 864 * client, because now we're in the position to be able to deal with 865 * any faults it generates. 866 */ 867 /* TODO: Use this for failure cases as well with a bad value. */ 868 msg->hdr.size = sizeof(msg->payload.u64); 869 msg->payload.u64 = 0; /* OK */ 870 871 ret = vhost_user_write(dev, msg, NULL, 0); 872 if (ret < 0) { 873 return ret; 874 } 875 } 876 877 return 0; 878 879 err: 880 if (track_ramblocks) { 881 memcpy(u->postcopy_client_bases, shadow_pcb, 882 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 883 } 884 885 return ret; 886 } 887 888 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 889 struct vhost_memory *mem, 890 bool reply_supported, 891 bool config_mem_slots) 892 { 893 struct vhost_user *u = dev->opaque; 894 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 895 size_t fd_num = 0; 896 VhostUserMsg msg_reply; 897 int region_i, msg_i; 898 int ret; 899 900 VhostUserMsg msg = { 901 .hdr.flags = VHOST_USER_VERSION, 902 }; 903 904 if (u->region_rb_len < dev->mem->nregions) { 905 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 906 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 907 dev->mem->nregions); 908 memset(&(u->region_rb[u->region_rb_len]), '\0', 909 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 910 memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 911 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 912 u->region_rb_len = dev->mem->nregions; 913 } 914 915 if (config_mem_slots) { 916 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true); 917 if (ret < 0) { 918 return ret; 919 } 920 } else { 921 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 922 true); 923 if (ret < 0) { 924 return ret; 925 } 926 927 ret = vhost_user_write(dev, &msg, fds, fd_num); 928 if (ret < 0) { 929 return ret; 930 } 931 932 ret = vhost_user_read(dev, &msg_reply); 933 if (ret < 0) { 934 return ret; 935 } 936 937 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 938 error_report("%s: Received unexpected msg type." 939 "Expected %d received %d", __func__, 940 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 941 return -EPROTO; 942 } 943 944 /* 945 * We're using the same structure, just reusing one of the 946 * fields, so it should be the same size. 947 */ 948 if (msg_reply.hdr.size != msg.hdr.size) { 949 error_report("%s: Unexpected size for postcopy reply " 950 "%d vs %d", __func__, msg_reply.hdr.size, 951 msg.hdr.size); 952 return -EPROTO; 953 } 954 955 memset(u->postcopy_client_bases, 0, 956 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS); 957 958 /* 959 * They're in the same order as the regions that were sent 960 * but some of the regions were skipped (above) if they 961 * didn't have fd's 962 */ 963 for (msg_i = 0, region_i = 0; 964 region_i < dev->mem->nregions; 965 region_i++) { 966 if (msg_i < fd_num && 967 msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 968 dev->mem->regions[region_i].guest_phys_addr) { 969 u->postcopy_client_bases[region_i] = 970 msg_reply.payload.memory.regions[msg_i].userspace_addr; 971 trace_vhost_user_set_mem_table_postcopy( 972 msg_reply.payload.memory.regions[msg_i].userspace_addr, 973 msg.payload.memory.regions[msg_i].userspace_addr, 974 msg_i, region_i); 975 msg_i++; 976 } 977 } 978 if (msg_i != fd_num) { 979 error_report("%s: postcopy reply not fully consumed " 980 "%d vs %zd", 981 __func__, msg_i, fd_num); 982 return -EIO; 983 } 984 985 /* 986 * Now we've registered this with the postcopy code, we ack to the 987 * client, because now we're in the position to be able to deal 988 * with any faults it generates. 989 */ 990 /* TODO: Use this for failure cases as well with a bad value. */ 991 msg.hdr.size = sizeof(msg.payload.u64); 992 msg.payload.u64 = 0; /* OK */ 993 ret = vhost_user_write(dev, &msg, NULL, 0); 994 if (ret < 0) { 995 return ret; 996 } 997 } 998 999 return 0; 1000 } 1001 1002 static int vhost_user_set_mem_table(struct vhost_dev *dev, 1003 struct vhost_memory *mem) 1004 { 1005 struct vhost_user *u = dev->opaque; 1006 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 1007 size_t fd_num = 0; 1008 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 1009 bool reply_supported = virtio_has_feature(dev->protocol_features, 1010 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1011 bool config_mem_slots = 1012 virtio_has_feature(dev->protocol_features, 1013 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); 1014 int ret; 1015 1016 if (do_postcopy) { 1017 /* 1018 * Postcopy has enough differences that it's best done in it's own 1019 * version 1020 */ 1021 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported, 1022 config_mem_slots); 1023 } 1024 1025 VhostUserMsg msg = { 1026 .hdr.flags = VHOST_USER_VERSION, 1027 }; 1028 1029 if (reply_supported) { 1030 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1031 } 1032 1033 if (config_mem_slots) { 1034 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false); 1035 if (ret < 0) { 1036 return ret; 1037 } 1038 } else { 1039 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num, 1040 false); 1041 if (ret < 0) { 1042 return ret; 1043 } 1044 1045 ret = vhost_user_write(dev, &msg, fds, fd_num); 1046 if (ret < 0) { 1047 return ret; 1048 } 1049 1050 if (reply_supported) { 1051 return process_message_reply(dev, &msg); 1052 } 1053 } 1054 1055 return 0; 1056 } 1057 1058 static int vhost_user_set_vring_endian(struct vhost_dev *dev, 1059 struct vhost_vring_state *ring) 1060 { 1061 bool cross_endian = virtio_has_feature(dev->protocol_features, 1062 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); 1063 VhostUserMsg msg = { 1064 .hdr.request = VHOST_USER_SET_VRING_ENDIAN, 1065 .hdr.flags = VHOST_USER_VERSION, 1066 .payload.state = *ring, 1067 .hdr.size = sizeof(msg.payload.state), 1068 }; 1069 1070 if (!cross_endian) { 1071 error_report("vhost-user trying to send unhandled ioctl"); 1072 return -ENOTSUP; 1073 } 1074 1075 return vhost_user_write(dev, &msg, NULL, 0); 1076 } 1077 1078 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) 1079 { 1080 int ret; 1081 VhostUserMsg msg = { 1082 .hdr.request = request, 1083 .hdr.flags = VHOST_USER_VERSION, 1084 }; 1085 1086 if (vhost_user_per_device_request(request) && dev->vq_index != 0) { 1087 return 0; 1088 } 1089 1090 ret = vhost_user_write(dev, &msg, NULL, 0); 1091 if (ret < 0) { 1092 return ret; 1093 } 1094 1095 ret = vhost_user_read(dev, &msg); 1096 if (ret < 0) { 1097 return ret; 1098 } 1099 1100 if (msg.hdr.request != request) { 1101 error_report("Received unexpected msg type. Expected %d received %d", 1102 request, msg.hdr.request); 1103 return -EPROTO; 1104 } 1105 1106 if (msg.hdr.size != sizeof(msg.payload.u64)) { 1107 error_report("Received bad msg size."); 1108 return -EPROTO; 1109 } 1110 1111 *u64 = msg.payload.u64; 1112 1113 return 0; 1114 } 1115 1116 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) 1117 { 1118 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { 1119 return -EPROTO; 1120 } 1121 1122 return 0; 1123 } 1124 1125 /* Note: "msg->hdr.flags" may be modified. */ 1126 static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, 1127 bool wait_for_reply) 1128 { 1129 int ret; 1130 1131 if (wait_for_reply) { 1132 bool reply_supported = virtio_has_feature(dev->protocol_features, 1133 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1134 if (reply_supported) { 1135 msg->hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1136 } 1137 } 1138 1139 ret = vhost_user_write(dev, msg, NULL, 0); 1140 if (ret < 0) { 1141 return ret; 1142 } 1143 1144 if (wait_for_reply) { 1145 uint64_t dummy; 1146 1147 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1148 return process_message_reply(dev, msg); 1149 } 1150 1151 /* 1152 * We need to wait for a reply but the backend does not 1153 * support replies for the command we just sent. 1154 * Send VHOST_USER_GET_FEATURES which makes all backends 1155 * send a reply. 1156 */ 1157 return vhost_user_get_features(dev, &dummy); 1158 } 1159 1160 return 0; 1161 } 1162 1163 static int vhost_set_vring(struct vhost_dev *dev, 1164 unsigned long int request, 1165 struct vhost_vring_state *ring, 1166 bool wait_for_reply) 1167 { 1168 VhostUserMsg msg = { 1169 .hdr.request = request, 1170 .hdr.flags = VHOST_USER_VERSION, 1171 .payload.state = *ring, 1172 .hdr.size = sizeof(msg.payload.state), 1173 }; 1174 1175 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1176 } 1177 1178 static int vhost_user_set_vring_num(struct vhost_dev *dev, 1179 struct vhost_vring_state *ring) 1180 { 1181 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring, false); 1182 } 1183 1184 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) 1185 { 1186 if (n->unmap_addr) { 1187 munmap(n->unmap_addr, qemu_real_host_page_size()); 1188 n->unmap_addr = NULL; 1189 } 1190 if (n->destroy) { 1191 memory_region_transaction_begin(); 1192 object_unparent(OBJECT(&n->mr)); 1193 memory_region_transaction_commit(); 1194 g_free(n); 1195 } 1196 } 1197 1198 /* 1199 * clean-up function for notifier, will finally free the structure 1200 * under rcu. 1201 */ 1202 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, 1203 VirtIODevice *vdev, bool destroy) 1204 { 1205 /* 1206 * if destroy == false and n->addr == NULL, we have nothing to do. 1207 * so, just return. 1208 */ 1209 if (!n || (!destroy && !n->addr)) { 1210 return; 1211 } 1212 1213 if (n->addr) { 1214 if (vdev) { 1215 memory_region_transaction_begin(); 1216 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); 1217 memory_region_transaction_commit(); 1218 } 1219 assert(!n->unmap_addr); 1220 n->unmap_addr = n->addr; 1221 n->addr = NULL; 1222 } 1223 n->destroy = destroy; 1224 call_rcu(n, vhost_user_host_notifier_free, rcu); 1225 } 1226 1227 static int vhost_user_set_vring_base(struct vhost_dev *dev, 1228 struct vhost_vring_state *ring) 1229 { 1230 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring, false); 1231 } 1232 1233 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) 1234 { 1235 int i; 1236 1237 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { 1238 return -EINVAL; 1239 } 1240 1241 for (i = 0; i < dev->nvqs; ++i) { 1242 int ret; 1243 struct vhost_vring_state state = { 1244 .index = dev->vq_index + i, 1245 .num = enable, 1246 }; 1247 1248 /* 1249 * SET_VRING_ENABLE travels from guest to QEMU to vhost-user backend / 1250 * control plane thread via unix domain socket. Virtio requests travel 1251 * from guest to vhost-user backend / data plane thread via eventfd. 1252 * Even if the guest enables the ring first, and pushes its first virtio 1253 * request second (conforming to the virtio spec), the data plane thread 1254 * in the backend may see the virtio request before the control plane 1255 * thread sees the queue enablement. This causes (in fact, requires) the 1256 * data plane thread to discard the virtio request (it arrived on a 1257 * seemingly disabled queue). To prevent this out-of-order delivery, 1258 * don't let the guest proceed to pushing the virtio request until the 1259 * backend control plane acknowledges enabling the queue -- IOW, pass 1260 * wait_for_reply=true below. 1261 */ 1262 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state, true); 1263 if (ret < 0) { 1264 /* 1265 * Restoring the previous state is likely infeasible, as well as 1266 * proceeding regardless the error, so just bail out and hope for 1267 * the device-level recovery. 1268 */ 1269 return ret; 1270 } 1271 } 1272 1273 return 0; 1274 } 1275 1276 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u, 1277 int idx) 1278 { 1279 if (idx >= u->notifiers->len) { 1280 return NULL; 1281 } 1282 return g_ptr_array_index(u->notifiers, idx); 1283 } 1284 1285 static int vhost_user_get_vring_base(struct vhost_dev *dev, 1286 struct vhost_vring_state *ring) 1287 { 1288 int ret; 1289 VhostUserMsg msg = { 1290 .hdr.request = VHOST_USER_GET_VRING_BASE, 1291 .hdr.flags = VHOST_USER_VERSION, 1292 .payload.state = *ring, 1293 .hdr.size = sizeof(msg.payload.state), 1294 }; 1295 struct vhost_user *u = dev->opaque; 1296 1297 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); 1298 vhost_user_host_notifier_remove(n, dev->vdev, false); 1299 1300 ret = vhost_user_write(dev, &msg, NULL, 0); 1301 if (ret < 0) { 1302 return ret; 1303 } 1304 1305 ret = vhost_user_read(dev, &msg); 1306 if (ret < 0) { 1307 return ret; 1308 } 1309 1310 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { 1311 error_report("Received unexpected msg type. Expected %d received %d", 1312 VHOST_USER_GET_VRING_BASE, msg.hdr.request); 1313 return -EPROTO; 1314 } 1315 1316 if (msg.hdr.size != sizeof(msg.payload.state)) { 1317 error_report("Received bad msg size."); 1318 return -EPROTO; 1319 } 1320 1321 *ring = msg.payload.state; 1322 1323 return 0; 1324 } 1325 1326 static int vhost_set_vring_file(struct vhost_dev *dev, 1327 VhostUserRequest request, 1328 struct vhost_vring_file *file) 1329 { 1330 int fds[VHOST_USER_MAX_RAM_SLOTS]; 1331 size_t fd_num = 0; 1332 VhostUserMsg msg = { 1333 .hdr.request = request, 1334 .hdr.flags = VHOST_USER_VERSION, 1335 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, 1336 .hdr.size = sizeof(msg.payload.u64), 1337 }; 1338 1339 if (file->fd > 0) { 1340 fds[fd_num++] = file->fd; 1341 } else { 1342 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; 1343 } 1344 1345 return vhost_user_write(dev, &msg, fds, fd_num); 1346 } 1347 1348 static int vhost_user_set_vring_kick(struct vhost_dev *dev, 1349 struct vhost_vring_file *file) 1350 { 1351 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); 1352 } 1353 1354 static int vhost_user_set_vring_call(struct vhost_dev *dev, 1355 struct vhost_vring_file *file) 1356 { 1357 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); 1358 } 1359 1360 static int vhost_user_set_vring_err(struct vhost_dev *dev, 1361 struct vhost_vring_file *file) 1362 { 1363 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); 1364 } 1365 1366 static int vhost_user_set_vring_addr(struct vhost_dev *dev, 1367 struct vhost_vring_addr *addr) 1368 { 1369 VhostUserMsg msg = { 1370 .hdr.request = VHOST_USER_SET_VRING_ADDR, 1371 .hdr.flags = VHOST_USER_VERSION, 1372 .payload.addr = *addr, 1373 .hdr.size = sizeof(msg.payload.addr), 1374 }; 1375 1376 /* 1377 * wait for a reply if logging is enabled to make sure 1378 * backend is actually logging changes 1379 */ 1380 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); 1381 1382 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1383 } 1384 1385 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, 1386 bool wait_for_reply) 1387 { 1388 VhostUserMsg msg = { 1389 .hdr.request = request, 1390 .hdr.flags = VHOST_USER_VERSION, 1391 .payload.u64 = u64, 1392 .hdr.size = sizeof(msg.payload.u64), 1393 }; 1394 1395 return vhost_user_write_sync(dev, &msg, wait_for_reply); 1396 } 1397 1398 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) 1399 { 1400 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); 1401 } 1402 1403 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) 1404 { 1405 uint64_t value; 1406 int ret; 1407 1408 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); 1409 if (ret < 0) { 1410 return ret; 1411 } 1412 *status = value; 1413 1414 return 0; 1415 } 1416 1417 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) 1418 { 1419 uint8_t s; 1420 int ret; 1421 1422 ret = vhost_user_get_status(dev, &s); 1423 if (ret < 0) { 1424 return ret; 1425 } 1426 1427 if ((s & status) == status) { 1428 return 0; 1429 } 1430 s |= status; 1431 1432 return vhost_user_set_status(dev, s); 1433 } 1434 1435 static int vhost_user_set_features(struct vhost_dev *dev, 1436 uint64_t features) 1437 { 1438 /* 1439 * wait for a reply if logging is enabled to make sure 1440 * backend is actually logging changes 1441 */ 1442 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); 1443 int ret; 1444 1445 /* 1446 * We need to include any extra backend only feature bits that 1447 * might be needed by our device. Currently this includes the 1448 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol 1449 * features. 1450 */ 1451 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, 1452 features | dev->backend_features, 1453 log_enabled); 1454 1455 if (virtio_has_feature(dev->protocol_features, 1456 VHOST_USER_PROTOCOL_F_STATUS)) { 1457 if (!ret) { 1458 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 1459 } 1460 } 1461 1462 return ret; 1463 } 1464 1465 static int vhost_user_set_protocol_features(struct vhost_dev *dev, 1466 uint64_t features) 1467 { 1468 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features, 1469 false); 1470 } 1471 1472 static int vhost_user_set_owner(struct vhost_dev *dev) 1473 { 1474 VhostUserMsg msg = { 1475 .hdr.request = VHOST_USER_SET_OWNER, 1476 .hdr.flags = VHOST_USER_VERSION, 1477 }; 1478 1479 return vhost_user_write(dev, &msg, NULL, 0); 1480 } 1481 1482 static int vhost_user_get_max_memslots(struct vhost_dev *dev, 1483 uint64_t *max_memslots) 1484 { 1485 uint64_t backend_max_memslots; 1486 int err; 1487 1488 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS, 1489 &backend_max_memslots); 1490 if (err < 0) { 1491 return err; 1492 } 1493 1494 *max_memslots = backend_max_memslots; 1495 1496 return 0; 1497 } 1498 1499 static int vhost_user_reset_device(struct vhost_dev *dev) 1500 { 1501 VhostUserMsg msg = { 1502 .hdr.flags = VHOST_USER_VERSION, 1503 .hdr.request = VHOST_USER_RESET_DEVICE, 1504 }; 1505 1506 /* 1507 * Historically, reset was not implemented so only reset devices 1508 * that are expecting it. 1509 */ 1510 if (!virtio_has_feature(dev->protocol_features, 1511 VHOST_USER_PROTOCOL_F_RESET_DEVICE)) { 1512 return -ENOSYS; 1513 } 1514 1515 return vhost_user_write(dev, &msg, NULL, 0); 1516 } 1517 1518 static int vhost_user_backend_handle_config_change(struct vhost_dev *dev) 1519 { 1520 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) { 1521 return -ENOSYS; 1522 } 1523 1524 return dev->config_ops->vhost_dev_config_notifier(dev); 1525 } 1526 1527 /* 1528 * Fetch or create the notifier for a given idx. Newly created 1529 * notifiers are added to the pointer array that tracks them. 1530 */ 1531 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, 1532 int idx) 1533 { 1534 VhostUserHostNotifier *n = NULL; 1535 if (idx >= u->notifiers->len) { 1536 g_ptr_array_set_size(u->notifiers, idx + 1); 1537 } 1538 1539 n = g_ptr_array_index(u->notifiers, idx); 1540 if (!n) { 1541 /* 1542 * In case notification arrive out-of-order, 1543 * make room for current index. 1544 */ 1545 g_ptr_array_remove_index(u->notifiers, idx); 1546 n = g_new0(VhostUserHostNotifier, 1); 1547 n->idx = idx; 1548 g_ptr_array_insert(u->notifiers, idx, n); 1549 trace_vhost_user_create_notifier(idx, n); 1550 } 1551 1552 return n; 1553 } 1554 1555 static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, 1556 VhostUserVringArea *area, 1557 int fd) 1558 { 1559 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK; 1560 size_t page_size = qemu_real_host_page_size(); 1561 struct vhost_user *u = dev->opaque; 1562 VhostUserState *user = u->user; 1563 VirtIODevice *vdev = dev->vdev; 1564 VhostUserHostNotifier *n; 1565 void *addr; 1566 char *name; 1567 1568 if (!virtio_has_feature(dev->protocol_features, 1569 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) || 1570 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) { 1571 return -EINVAL; 1572 } 1573 1574 /* 1575 * Fetch notifier and invalidate any old data before setting up 1576 * new mapped address. 1577 */ 1578 n = fetch_or_create_notifier(user, queue_idx); 1579 vhost_user_host_notifier_remove(n, vdev, false); 1580 1581 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { 1582 return 0; 1583 } 1584 1585 /* Sanity check. */ 1586 if (area->size != page_size) { 1587 return -EINVAL; 1588 } 1589 1590 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 1591 fd, area->offset); 1592 if (addr == MAP_FAILED) { 1593 return -EFAULT; 1594 } 1595 1596 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]", 1597 user, queue_idx); 1598 if (!n->mr.ram) { /* Don't init again after suspend. */ 1599 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, 1600 page_size, addr); 1601 } else { 1602 n->mr.ram_block->host = addr; 1603 } 1604 g_free(name); 1605 1606 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) { 1607 object_unparent(OBJECT(&n->mr)); 1608 munmap(addr, page_size); 1609 return -ENXIO; 1610 } 1611 1612 n->addr = addr; 1613 1614 return 0; 1615 } 1616 1617 static int 1618 vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev, 1619 VhostUserShared *object) 1620 { 1621 QemuUUID uuid; 1622 1623 memcpy(uuid.data, object->uuid, sizeof(object->uuid)); 1624 return !virtio_add_vhost_device(&uuid, dev); 1625 } 1626 1627 /* 1628 * Handle VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE backend requests. 1629 * 1630 * Return: 0 on success, 1 on error. 1631 */ 1632 static int 1633 vhost_user_backend_handle_shared_object_remove(struct vhost_dev *dev, 1634 VhostUserShared *object) 1635 { 1636 QemuUUID uuid; 1637 1638 memcpy(uuid.data, object->uuid, sizeof(object->uuid)); 1639 switch (virtio_object_type(&uuid)) { 1640 case TYPE_VHOST_DEV: 1641 { 1642 struct vhost_dev *owner = virtio_lookup_vhost_device(&uuid); 1643 if (dev != owner) { 1644 /* Not allowed to remove non-owned entries */ 1645 return 1; 1646 } 1647 break; 1648 } 1649 default: 1650 /* Not allowed to remove non-owned entries */ 1651 return 1; 1652 } 1653 1654 return !virtio_remove_resource(&uuid); 1655 } 1656 1657 static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, 1658 VhostUserPayload *payload, Error **errp) 1659 { 1660 struct iovec iov[] = { 1661 { .iov_base = hdr, .iov_len = VHOST_USER_HDR_SIZE }, 1662 { .iov_base = payload, .iov_len = hdr->size }, 1663 }; 1664 1665 hdr->flags &= ~VHOST_USER_NEED_REPLY_MASK; 1666 hdr->flags |= VHOST_USER_REPLY_MASK; 1667 1668 return !qio_channel_writev_all(ioc, iov, ARRAY_SIZE(iov), errp); 1669 } 1670 1671 static bool 1672 vhost_user_backend_send_dmabuf_fd(QIOChannel *ioc, VhostUserHeader *hdr, 1673 VhostUserPayload *payload, Error **errp) 1674 { 1675 hdr->size = sizeof(payload->u64); 1676 return vhost_user_send_resp(ioc, hdr, payload, errp); 1677 } 1678 1679 int vhost_user_get_shared_object(struct vhost_dev *dev, unsigned char *uuid, 1680 int *dmabuf_fd) 1681 { 1682 struct vhost_user *u = dev->opaque; 1683 CharBackend *chr = u->user->chr; 1684 int ret; 1685 VhostUserMsg msg = { 1686 .hdr.request = VHOST_USER_GET_SHARED_OBJECT, 1687 .hdr.flags = VHOST_USER_VERSION, 1688 }; 1689 memcpy(msg.payload.object.uuid, uuid, sizeof(msg.payload.object.uuid)); 1690 1691 ret = vhost_user_write(dev, &msg, NULL, 0); 1692 if (ret < 0) { 1693 return ret; 1694 } 1695 1696 ret = vhost_user_read(dev, &msg); 1697 if (ret < 0) { 1698 return ret; 1699 } 1700 1701 if (msg.hdr.request != VHOST_USER_GET_SHARED_OBJECT) { 1702 error_report("Received unexpected msg type. " 1703 "Expected %d received %d", 1704 VHOST_USER_GET_SHARED_OBJECT, msg.hdr.request); 1705 return -EPROTO; 1706 } 1707 1708 *dmabuf_fd = qemu_chr_fe_get_msgfd(chr); 1709 if (*dmabuf_fd < 0) { 1710 error_report("Failed to get dmabuf fd"); 1711 return -EIO; 1712 } 1713 1714 return 0; 1715 } 1716 1717 static int 1718 vhost_user_backend_handle_shared_object_lookup(struct vhost_user *u, 1719 QIOChannel *ioc, 1720 VhostUserHeader *hdr, 1721 VhostUserPayload *payload) 1722 { 1723 QemuUUID uuid; 1724 CharBackend *chr = u->user->chr; 1725 Error *local_err = NULL; 1726 int dmabuf_fd = -1; 1727 int fd_num = 0; 1728 1729 memcpy(uuid.data, payload->object.uuid, sizeof(payload->object.uuid)); 1730 1731 payload->u64 = 0; 1732 switch (virtio_object_type(&uuid)) { 1733 case TYPE_DMABUF: 1734 dmabuf_fd = virtio_lookup_dmabuf(&uuid); 1735 break; 1736 case TYPE_VHOST_DEV: 1737 { 1738 struct vhost_dev *dev = virtio_lookup_vhost_device(&uuid); 1739 if (dev == NULL) { 1740 payload->u64 = -EINVAL; 1741 break; 1742 } 1743 int ret = vhost_user_get_shared_object(dev, uuid.data, &dmabuf_fd); 1744 if (ret < 0) { 1745 payload->u64 = ret; 1746 } 1747 break; 1748 } 1749 case TYPE_INVALID: 1750 payload->u64 = -EINVAL; 1751 break; 1752 } 1753 1754 if (dmabuf_fd != -1) { 1755 fd_num++; 1756 } 1757 1758 if (qemu_chr_fe_set_msgfds(chr, &dmabuf_fd, fd_num) < 0) { 1759 error_report("Failed to set msg fds."); 1760 payload->u64 = -EINVAL; 1761 } 1762 1763 if (!vhost_user_backend_send_dmabuf_fd(ioc, hdr, payload, &local_err)) { 1764 error_report_err(local_err); 1765 return -EINVAL; 1766 } 1767 1768 return 0; 1769 } 1770 1771 static void close_backend_channel(struct vhost_user *u) 1772 { 1773 g_source_destroy(u->backend_src); 1774 g_source_unref(u->backend_src); 1775 u->backend_src = NULL; 1776 object_unref(OBJECT(u->backend_ioc)); 1777 u->backend_ioc = NULL; 1778 } 1779 1780 static gboolean backend_read(QIOChannel *ioc, GIOCondition condition, 1781 gpointer opaque) 1782 { 1783 struct vhost_dev *dev = opaque; 1784 struct vhost_user *u = dev->opaque; 1785 VhostUserHeader hdr = { 0, }; 1786 VhostUserPayload payload = { 0, }; 1787 Error *local_err = NULL; 1788 gboolean rc = G_SOURCE_CONTINUE; 1789 int ret = 0; 1790 struct iovec iov; 1791 g_autofree int *fd = NULL; 1792 size_t fdsize = 0; 1793 int i; 1794 1795 /* Read header */ 1796 iov.iov_base = &hdr; 1797 iov.iov_len = VHOST_USER_HDR_SIZE; 1798 1799 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) { 1800 error_report_err(local_err); 1801 goto err; 1802 } 1803 1804 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { 1805 error_report("Failed to read msg header." 1806 " Size %d exceeds the maximum %zu.", hdr.size, 1807 VHOST_USER_PAYLOAD_SIZE); 1808 goto err; 1809 } 1810 1811 /* Read payload */ 1812 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) { 1813 error_report_err(local_err); 1814 goto err; 1815 } 1816 1817 switch (hdr.request) { 1818 case VHOST_USER_BACKEND_IOTLB_MSG: 1819 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); 1820 break; 1821 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: 1822 ret = vhost_user_backend_handle_config_change(dev); 1823 break; 1824 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: 1825 ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area, 1826 fd ? fd[0] : -1); 1827 break; 1828 case VHOST_USER_BACKEND_SHARED_OBJECT_ADD: 1829 ret = vhost_user_backend_handle_shared_object_add(dev, &payload.object); 1830 break; 1831 case VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE: 1832 ret = vhost_user_backend_handle_shared_object_remove(dev, 1833 &payload.object); 1834 break; 1835 case VHOST_USER_BACKEND_SHARED_OBJECT_LOOKUP: 1836 ret = vhost_user_backend_handle_shared_object_lookup(dev->opaque, ioc, 1837 &hdr, &payload); 1838 break; 1839 default: 1840 error_report("Received unexpected msg type: %d.", hdr.request); 1841 ret = -EINVAL; 1842 } 1843 1844 /* 1845 * REPLY_ACK feature handling. Other reply types has to be managed 1846 * directly in their request handlers. 1847 */ 1848 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { 1849 payload.u64 = !!ret; 1850 hdr.size = sizeof(payload.u64); 1851 1852 if (!vhost_user_send_resp(ioc, &hdr, &payload, &local_err)) { 1853 error_report_err(local_err); 1854 goto err; 1855 } 1856 } 1857 1858 goto fdcleanup; 1859 1860 err: 1861 close_backend_channel(u); 1862 rc = G_SOURCE_REMOVE; 1863 1864 fdcleanup: 1865 if (fd) { 1866 for (i = 0; i < fdsize; i++) { 1867 close(fd[i]); 1868 } 1869 } 1870 return rc; 1871 } 1872 1873 static int vhost_setup_backend_channel(struct vhost_dev *dev) 1874 { 1875 VhostUserMsg msg = { 1876 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, 1877 .hdr.flags = VHOST_USER_VERSION, 1878 }; 1879 struct vhost_user *u = dev->opaque; 1880 int sv[2], ret = 0; 1881 bool reply_supported = virtio_has_feature(dev->protocol_features, 1882 VHOST_USER_PROTOCOL_F_REPLY_ACK); 1883 Error *local_err = NULL; 1884 QIOChannel *ioc; 1885 1886 if (!virtio_has_feature(dev->protocol_features, 1887 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { 1888 return 0; 1889 } 1890 1891 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { 1892 int saved_errno = errno; 1893 error_report("socketpair() failed"); 1894 return -saved_errno; 1895 } 1896 1897 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err)); 1898 if (!ioc) { 1899 error_report_err(local_err); 1900 return -ECONNREFUSED; 1901 } 1902 u->backend_ioc = ioc; 1903 u->backend_src = qio_channel_add_watch_source(u->backend_ioc, 1904 G_IO_IN | G_IO_HUP, 1905 backend_read, dev, NULL, NULL); 1906 1907 if (reply_supported) { 1908 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 1909 } 1910 1911 ret = vhost_user_write(dev, &msg, &sv[1], 1); 1912 if (ret) { 1913 goto out; 1914 } 1915 1916 if (reply_supported) { 1917 ret = process_message_reply(dev, &msg); 1918 } 1919 1920 out: 1921 close(sv[1]); 1922 if (ret) { 1923 close_backend_channel(u); 1924 } 1925 1926 return ret; 1927 } 1928 1929 #ifdef CONFIG_LINUX 1930 /* 1931 * Called back from the postcopy fault thread when a fault is received on our 1932 * ufd. 1933 * TODO: This is Linux specific 1934 */ 1935 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 1936 void *ufd) 1937 { 1938 struct vhost_dev *dev = pcfd->data; 1939 struct vhost_user *u = dev->opaque; 1940 struct uffd_msg *msg = ufd; 1941 uint64_t faultaddr = msg->arg.pagefault.address; 1942 RAMBlock *rb = NULL; 1943 uint64_t rb_offset; 1944 int i; 1945 1946 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 1947 dev->mem->nregions); 1948 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1949 trace_vhost_user_postcopy_fault_handler_loop(i, 1950 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 1951 if (faultaddr >= u->postcopy_client_bases[i]) { 1952 /* Ofset of the fault address in the vhost region */ 1953 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 1954 if (region_offset < dev->mem->regions[i].memory_size) { 1955 rb_offset = region_offset + u->region_rb_offset[i]; 1956 trace_vhost_user_postcopy_fault_handler_found(i, 1957 region_offset, rb_offset); 1958 rb = u->region_rb[i]; 1959 return postcopy_request_shared_page(pcfd, rb, faultaddr, 1960 rb_offset); 1961 } 1962 } 1963 } 1964 error_report("%s: Failed to find region for fault %" PRIx64, 1965 __func__, faultaddr); 1966 return -1; 1967 } 1968 1969 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1970 uint64_t offset) 1971 { 1972 struct vhost_dev *dev = pcfd->data; 1973 struct vhost_user *u = dev->opaque; 1974 int i; 1975 1976 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1977 1978 if (!u) { 1979 return 0; 1980 } 1981 /* Translate the offset into an address in the clients address space */ 1982 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1983 if (u->region_rb[i] == rb && 1984 offset >= u->region_rb_offset[i] && 1985 offset < (u->region_rb_offset[i] + 1986 dev->mem->regions[i].memory_size)) { 1987 uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1988 u->postcopy_client_bases[i]; 1989 trace_vhost_user_postcopy_waker_found(client_addr); 1990 return postcopy_wake_shared(pcfd, client_addr, rb); 1991 } 1992 } 1993 1994 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1995 return 0; 1996 } 1997 #endif 1998 1999 /* 2000 * Called at the start of an inbound postcopy on reception of the 2001 * 'advise' command. 2002 */ 2003 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 2004 { 2005 #ifdef CONFIG_LINUX 2006 struct vhost_user *u = dev->opaque; 2007 CharBackend *chr = u->user->chr; 2008 int ufd; 2009 int ret; 2010 VhostUserMsg msg = { 2011 .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 2012 .hdr.flags = VHOST_USER_VERSION, 2013 }; 2014 2015 ret = vhost_user_write(dev, &msg, NULL, 0); 2016 if (ret < 0) { 2017 error_setg(errp, "Failed to send postcopy_advise to vhost"); 2018 return ret; 2019 } 2020 2021 ret = vhost_user_read(dev, &msg); 2022 if (ret < 0) { 2023 error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 2024 return ret; 2025 } 2026 2027 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 2028 error_setg(errp, "Unexpected msg type. Expected %d received %d", 2029 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 2030 return -EPROTO; 2031 } 2032 2033 if (msg.hdr.size) { 2034 error_setg(errp, "Received bad msg size."); 2035 return -EPROTO; 2036 } 2037 ufd = qemu_chr_fe_get_msgfd(chr); 2038 if (ufd < 0) { 2039 error_setg(errp, "%s: Failed to get ufd", __func__); 2040 return -EIO; 2041 } 2042 qemu_socket_set_nonblock(ufd); 2043 2044 /* register ufd with userfault thread */ 2045 u->postcopy_fd.fd = ufd; 2046 u->postcopy_fd.data = dev; 2047 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 2048 u->postcopy_fd.waker = vhost_user_postcopy_waker; 2049 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 2050 postcopy_register_shared_ufd(&u->postcopy_fd); 2051 return 0; 2052 #else 2053 error_setg(errp, "Postcopy not supported on non-Linux systems"); 2054 return -ENOSYS; 2055 #endif 2056 } 2057 2058 /* 2059 * Called at the switch to postcopy on reception of the 'listen' command. 2060 */ 2061 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 2062 { 2063 struct vhost_user *u = dev->opaque; 2064 int ret; 2065 VhostUserMsg msg = { 2066 .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 2067 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2068 }; 2069 u->postcopy_listen = true; 2070 2071 trace_vhost_user_postcopy_listen(); 2072 2073 ret = vhost_user_write(dev, &msg, NULL, 0); 2074 if (ret < 0) { 2075 error_setg(errp, "Failed to send postcopy_listen to vhost"); 2076 return ret; 2077 } 2078 2079 ret = process_message_reply(dev, &msg); 2080 if (ret) { 2081 error_setg(errp, "Failed to receive reply to postcopy_listen"); 2082 return ret; 2083 } 2084 2085 return 0; 2086 } 2087 2088 /* 2089 * Called at the end of postcopy 2090 */ 2091 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 2092 { 2093 VhostUserMsg msg = { 2094 .hdr.request = VHOST_USER_POSTCOPY_END, 2095 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2096 }; 2097 int ret; 2098 struct vhost_user *u = dev->opaque; 2099 2100 trace_vhost_user_postcopy_end_entry(); 2101 2102 ret = vhost_user_write(dev, &msg, NULL, 0); 2103 if (ret < 0) { 2104 error_setg(errp, "Failed to send postcopy_end to vhost"); 2105 return ret; 2106 } 2107 2108 ret = process_message_reply(dev, &msg); 2109 if (ret) { 2110 error_setg(errp, "Failed to receive reply to postcopy_end"); 2111 return ret; 2112 } 2113 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2114 close(u->postcopy_fd.fd); 2115 u->postcopy_fd.handler = NULL; 2116 2117 trace_vhost_user_postcopy_end_exit(); 2118 2119 return 0; 2120 } 2121 2122 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 2123 void *opaque, Error **errp) 2124 { 2125 struct PostcopyNotifyData *pnd = opaque; 2126 struct vhost_user *u = container_of(notifier, struct vhost_user, 2127 postcopy_notifier); 2128 struct vhost_dev *dev = u->dev; 2129 2130 switch (pnd->reason) { 2131 case POSTCOPY_NOTIFY_PROBE: 2132 if (!virtio_has_feature(dev->protocol_features, 2133 VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 2134 /* TODO: Get the device name into this error somehow */ 2135 error_setg(errp, 2136 "vhost-user backend not capable of postcopy"); 2137 return -ENOENT; 2138 } 2139 break; 2140 2141 case POSTCOPY_NOTIFY_INBOUND_ADVISE: 2142 return vhost_user_postcopy_advise(dev, errp); 2143 2144 case POSTCOPY_NOTIFY_INBOUND_LISTEN: 2145 return vhost_user_postcopy_listen(dev, errp); 2146 2147 case POSTCOPY_NOTIFY_INBOUND_END: 2148 return vhost_user_postcopy_end(dev, errp); 2149 2150 default: 2151 /* We ignore notifications we don't know */ 2152 break; 2153 } 2154 2155 return 0; 2156 } 2157 2158 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, 2159 Error **errp) 2160 { 2161 uint64_t features, ram_slots; 2162 struct vhost_user *u; 2163 VhostUserState *vus = (VhostUserState *) opaque; 2164 int err; 2165 2166 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2167 2168 u = g_new0(struct vhost_user, 1); 2169 u->user = vus; 2170 u->dev = dev; 2171 dev->opaque = u; 2172 2173 err = vhost_user_get_features(dev, &features); 2174 if (err < 0) { 2175 error_setg_errno(errp, -err, "vhost_backend_init failed"); 2176 return err; 2177 } 2178 2179 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { 2180 bool supports_f_config = vus->supports_config || 2181 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier); 2182 uint64_t protocol_features; 2183 2184 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; 2185 2186 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, 2187 &protocol_features); 2188 if (err < 0) { 2189 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2190 return -EPROTO; 2191 } 2192 2193 /* 2194 * We will use all the protocol features we support - although 2195 * we suppress F_CONFIG if we know QEMUs internal code can not support 2196 * it. 2197 */ 2198 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK; 2199 2200 if (supports_f_config) { 2201 if (!virtio_has_feature(protocol_features, 2202 VHOST_USER_PROTOCOL_F_CONFIG)) { 2203 error_setg(errp, "vhost-user device expecting " 2204 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does " 2205 "not support it."); 2206 return -EPROTO; 2207 } 2208 } else { 2209 if (virtio_has_feature(protocol_features, 2210 VHOST_USER_PROTOCOL_F_CONFIG)) { 2211 warn_report("vhost-user backend supports " 2212 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not."); 2213 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); 2214 } 2215 } 2216 2217 /* final set of protocol features */ 2218 dev->protocol_features = protocol_features; 2219 err = vhost_user_set_protocol_features(dev, dev->protocol_features); 2220 if (err < 0) { 2221 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2222 return -EPROTO; 2223 } 2224 2225 /* query the max queues we support if backend supports Multiple Queue */ 2226 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { 2227 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, 2228 &dev->max_queues); 2229 if (err < 0) { 2230 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2231 return -EPROTO; 2232 } 2233 } else { 2234 dev->max_queues = 1; 2235 } 2236 2237 if (dev->num_queues && dev->max_queues < dev->num_queues) { 2238 error_setg(errp, "The maximum number of queues supported by the " 2239 "backend is %" PRIu64, dev->max_queues); 2240 return -EINVAL; 2241 } 2242 2243 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && 2244 !(virtio_has_feature(dev->protocol_features, 2245 VHOST_USER_PROTOCOL_F_BACKEND_REQ) && 2246 virtio_has_feature(dev->protocol_features, 2247 VHOST_USER_PROTOCOL_F_REPLY_ACK))) { 2248 error_setg(errp, "IOMMU support requires reply-ack and " 2249 "backend-req protocol features."); 2250 return -EINVAL; 2251 } 2252 2253 /* get max memory regions if backend supports configurable RAM slots */ 2254 if (!virtio_has_feature(dev->protocol_features, 2255 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { 2256 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; 2257 } else { 2258 err = vhost_user_get_max_memslots(dev, &ram_slots); 2259 if (err < 0) { 2260 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2261 return -EPROTO; 2262 } 2263 2264 if (ram_slots < u->user->memory_slots) { 2265 error_setg(errp, "The backend specified a max ram slots limit " 2266 "of %" PRIu64", when the prior validated limit was " 2267 "%d. This limit should never decrease.", ram_slots, 2268 u->user->memory_slots); 2269 return -EINVAL; 2270 } 2271 2272 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); 2273 } 2274 } 2275 2276 if (dev->migration_blocker == NULL && 2277 !virtio_has_feature(dev->protocol_features, 2278 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { 2279 error_setg(&dev->migration_blocker, 2280 "Migration disabled: vhost-user backend lacks " 2281 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); 2282 } 2283 2284 if (dev->vq_index == 0) { 2285 err = vhost_setup_backend_channel(dev); 2286 if (err < 0) { 2287 error_setg_errno(errp, EPROTO, "vhost_backend_init failed"); 2288 return -EPROTO; 2289 } 2290 } 2291 2292 u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 2293 postcopy_add_notifier(&u->postcopy_notifier); 2294 2295 return 0; 2296 } 2297 2298 static int vhost_user_backend_cleanup(struct vhost_dev *dev) 2299 { 2300 struct vhost_user *u; 2301 2302 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2303 2304 u = dev->opaque; 2305 if (u->postcopy_notifier.notify) { 2306 postcopy_remove_notifier(&u->postcopy_notifier); 2307 u->postcopy_notifier.notify = NULL; 2308 } 2309 u->postcopy_listen = false; 2310 if (u->postcopy_fd.handler) { 2311 postcopy_unregister_shared_ufd(&u->postcopy_fd); 2312 close(u->postcopy_fd.fd); 2313 u->postcopy_fd.handler = NULL; 2314 } 2315 if (u->backend_ioc) { 2316 close_backend_channel(u); 2317 } 2318 g_free(u->region_rb); 2319 u->region_rb = NULL; 2320 g_free(u->region_rb_offset); 2321 u->region_rb_offset = NULL; 2322 u->region_rb_len = 0; 2323 g_free(u); 2324 dev->opaque = 0; 2325 2326 return 0; 2327 } 2328 2329 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) 2330 { 2331 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 2332 2333 return idx; 2334 } 2335 2336 static int vhost_user_memslots_limit(struct vhost_dev *dev) 2337 { 2338 struct vhost_user *u = dev->opaque; 2339 2340 return u->user->memory_slots; 2341 } 2342 2343 static bool vhost_user_requires_shm_log(struct vhost_dev *dev) 2344 { 2345 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2346 2347 return virtio_has_feature(dev->protocol_features, 2348 VHOST_USER_PROTOCOL_F_LOG_SHMFD); 2349 } 2350 2351 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) 2352 { 2353 VhostUserMsg msg = { }; 2354 2355 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2356 2357 /* If guest supports GUEST_ANNOUNCE do nothing */ 2358 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { 2359 return 0; 2360 } 2361 2362 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ 2363 if (virtio_has_feature(dev->protocol_features, 2364 VHOST_USER_PROTOCOL_F_RARP)) { 2365 msg.hdr.request = VHOST_USER_SEND_RARP; 2366 msg.hdr.flags = VHOST_USER_VERSION; 2367 memcpy((char *)&msg.payload.u64, mac_addr, 6); 2368 msg.hdr.size = sizeof(msg.payload.u64); 2369 2370 return vhost_user_write(dev, &msg, NULL, 0); 2371 } 2372 return -ENOTSUP; 2373 } 2374 2375 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) 2376 { 2377 VhostUserMsg msg; 2378 bool reply_supported = virtio_has_feature(dev->protocol_features, 2379 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2380 int ret; 2381 2382 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) { 2383 return 0; 2384 } 2385 2386 msg.hdr.request = VHOST_USER_NET_SET_MTU; 2387 msg.payload.u64 = mtu; 2388 msg.hdr.size = sizeof(msg.payload.u64); 2389 msg.hdr.flags = VHOST_USER_VERSION; 2390 if (reply_supported) { 2391 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2392 } 2393 2394 ret = vhost_user_write(dev, &msg, NULL, 0); 2395 if (ret < 0) { 2396 return ret; 2397 } 2398 2399 /* If reply_ack supported, backend has to ack specified MTU is valid */ 2400 if (reply_supported) { 2401 return process_message_reply(dev, &msg); 2402 } 2403 2404 return 0; 2405 } 2406 2407 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, 2408 struct vhost_iotlb_msg *imsg) 2409 { 2410 int ret; 2411 VhostUserMsg msg = { 2412 .hdr.request = VHOST_USER_IOTLB_MSG, 2413 .hdr.size = sizeof(msg.payload.iotlb), 2414 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 2415 .payload.iotlb = *imsg, 2416 }; 2417 2418 ret = vhost_user_write(dev, &msg, NULL, 0); 2419 if (ret < 0) { 2420 return ret; 2421 } 2422 2423 return process_message_reply(dev, &msg); 2424 } 2425 2426 2427 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) 2428 { 2429 /* No-op as the receive channel is not dedicated to IOTLB messages. */ 2430 } 2431 2432 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, 2433 uint32_t config_len, Error **errp) 2434 { 2435 int ret; 2436 VhostUserMsg msg = { 2437 .hdr.request = VHOST_USER_GET_CONFIG, 2438 .hdr.flags = VHOST_USER_VERSION, 2439 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, 2440 }; 2441 2442 if (!virtio_has_feature(dev->protocol_features, 2443 VHOST_USER_PROTOCOL_F_CONFIG)) { 2444 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported"); 2445 return -EINVAL; 2446 } 2447 2448 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE); 2449 2450 msg.payload.config.offset = 0; 2451 msg.payload.config.size = config_len; 2452 ret = vhost_user_write(dev, &msg, NULL, 0); 2453 if (ret < 0) { 2454 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2455 return ret; 2456 } 2457 2458 ret = vhost_user_read(dev, &msg); 2459 if (ret < 0) { 2460 error_setg_errno(errp, -ret, "vhost_get_config failed"); 2461 return ret; 2462 } 2463 2464 if (msg.hdr.request != VHOST_USER_GET_CONFIG) { 2465 error_setg(errp, 2466 "Received unexpected msg type. Expected %d received %d", 2467 VHOST_USER_GET_CONFIG, msg.hdr.request); 2468 return -EPROTO; 2469 } 2470 2471 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { 2472 error_setg(errp, "Received bad msg size."); 2473 return -EPROTO; 2474 } 2475 2476 memcpy(config, msg.payload.config.region, config_len); 2477 2478 return 0; 2479 } 2480 2481 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, 2482 uint32_t offset, uint32_t size, uint32_t flags) 2483 { 2484 int ret; 2485 uint8_t *p; 2486 bool reply_supported = virtio_has_feature(dev->protocol_features, 2487 VHOST_USER_PROTOCOL_F_REPLY_ACK); 2488 2489 VhostUserMsg msg = { 2490 .hdr.request = VHOST_USER_SET_CONFIG, 2491 .hdr.flags = VHOST_USER_VERSION, 2492 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, 2493 }; 2494 2495 if (!virtio_has_feature(dev->protocol_features, 2496 VHOST_USER_PROTOCOL_F_CONFIG)) { 2497 return -ENOTSUP; 2498 } 2499 2500 if (reply_supported) { 2501 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 2502 } 2503 2504 if (size > VHOST_USER_MAX_CONFIG_SIZE) { 2505 return -EINVAL; 2506 } 2507 2508 msg.payload.config.offset = offset, 2509 msg.payload.config.size = size, 2510 msg.payload.config.flags = flags, 2511 p = msg.payload.config.region; 2512 memcpy(p, data, size); 2513 2514 ret = vhost_user_write(dev, &msg, NULL, 0); 2515 if (ret < 0) { 2516 return ret; 2517 } 2518 2519 if (reply_supported) { 2520 return process_message_reply(dev, &msg); 2521 } 2522 2523 return 0; 2524 } 2525 2526 static int vhost_user_crypto_create_session(struct vhost_dev *dev, 2527 void *session_info, 2528 uint64_t *session_id) 2529 { 2530 int ret; 2531 bool crypto_session = virtio_has_feature(dev->protocol_features, 2532 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2533 CryptoDevBackendSessionInfo *backend_info = session_info; 2534 VhostUserMsg msg = { 2535 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION, 2536 .hdr.flags = VHOST_USER_VERSION, 2537 .hdr.size = sizeof(msg.payload.session), 2538 }; 2539 2540 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 2541 2542 if (!crypto_session) { 2543 error_report("vhost-user trying to send unhandled ioctl"); 2544 return -ENOTSUP; 2545 } 2546 2547 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) { 2548 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info; 2549 size_t keylen; 2550 2551 memcpy(&msg.payload.session.u.asym.session_setup_data, sess, 2552 sizeof(CryptoDevBackendAsymSessionInfo)); 2553 if (sess->keylen) { 2554 keylen = sizeof(msg.payload.session.u.asym.key); 2555 if (sess->keylen > keylen) { 2556 error_report("Unsupported asymmetric key size"); 2557 return -ENOTSUP; 2558 } 2559 2560 memcpy(&msg.payload.session.u.asym.key, sess->key, 2561 sess->keylen); 2562 } 2563 } else { 2564 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info; 2565 size_t keylen; 2566 2567 memcpy(&msg.payload.session.u.sym.session_setup_data, sess, 2568 sizeof(CryptoDevBackendSymSessionInfo)); 2569 if (sess->key_len) { 2570 keylen = sizeof(msg.payload.session.u.sym.key); 2571 if (sess->key_len > keylen) { 2572 error_report("Unsupported cipher key size"); 2573 return -ENOTSUP; 2574 } 2575 2576 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key, 2577 sess->key_len); 2578 } 2579 2580 if (sess->auth_key_len > 0) { 2581 keylen = sizeof(msg.payload.session.u.sym.auth_key); 2582 if (sess->auth_key_len > keylen) { 2583 error_report("Unsupported auth key size"); 2584 return -ENOTSUP; 2585 } 2586 2587 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key, 2588 sess->auth_key_len); 2589 } 2590 } 2591 2592 msg.payload.session.op_code = backend_info->op_code; 2593 msg.payload.session.session_id = backend_info->session_id; 2594 ret = vhost_user_write(dev, &msg, NULL, 0); 2595 if (ret < 0) { 2596 error_report("vhost_user_write() return %d, create session failed", 2597 ret); 2598 return ret; 2599 } 2600 2601 ret = vhost_user_read(dev, &msg); 2602 if (ret < 0) { 2603 error_report("vhost_user_read() return %d, create session failed", 2604 ret); 2605 return ret; 2606 } 2607 2608 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) { 2609 error_report("Received unexpected msg type. Expected %d received %d", 2610 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request); 2611 return -EPROTO; 2612 } 2613 2614 if (msg.hdr.size != sizeof(msg.payload.session)) { 2615 error_report("Received bad msg size."); 2616 return -EPROTO; 2617 } 2618 2619 if (msg.payload.session.session_id < 0) { 2620 error_report("Bad session id: %" PRId64 "", 2621 msg.payload.session.session_id); 2622 return -EINVAL; 2623 } 2624 *session_id = msg.payload.session.session_id; 2625 2626 return 0; 2627 } 2628 2629 static int 2630 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) 2631 { 2632 int ret; 2633 bool crypto_session = virtio_has_feature(dev->protocol_features, 2634 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION); 2635 VhostUserMsg msg = { 2636 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION, 2637 .hdr.flags = VHOST_USER_VERSION, 2638 .hdr.size = sizeof(msg.payload.u64), 2639 }; 2640 msg.payload.u64 = session_id; 2641 2642 if (!crypto_session) { 2643 error_report("vhost-user trying to send unhandled ioctl"); 2644 return -ENOTSUP; 2645 } 2646 2647 ret = vhost_user_write(dev, &msg, NULL, 0); 2648 if (ret < 0) { 2649 error_report("vhost_user_write() return %d, close session failed", 2650 ret); 2651 return ret; 2652 } 2653 2654 return 0; 2655 } 2656 2657 static bool vhost_user_no_private_memslots(struct vhost_dev *dev) 2658 { 2659 return true; 2660 } 2661 2662 static int vhost_user_get_inflight_fd(struct vhost_dev *dev, 2663 uint16_t queue_size, 2664 struct vhost_inflight *inflight) 2665 { 2666 void *addr; 2667 int fd; 2668 int ret; 2669 struct vhost_user *u = dev->opaque; 2670 CharBackend *chr = u->user->chr; 2671 VhostUserMsg msg = { 2672 .hdr.request = VHOST_USER_GET_INFLIGHT_FD, 2673 .hdr.flags = VHOST_USER_VERSION, 2674 .payload.inflight.num_queues = dev->nvqs, 2675 .payload.inflight.queue_size = queue_size, 2676 .hdr.size = sizeof(msg.payload.inflight), 2677 }; 2678 2679 if (!virtio_has_feature(dev->protocol_features, 2680 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2681 return 0; 2682 } 2683 2684 ret = vhost_user_write(dev, &msg, NULL, 0); 2685 if (ret < 0) { 2686 return ret; 2687 } 2688 2689 ret = vhost_user_read(dev, &msg); 2690 if (ret < 0) { 2691 return ret; 2692 } 2693 2694 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) { 2695 error_report("Received unexpected msg type. " 2696 "Expected %d received %d", 2697 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request); 2698 return -EPROTO; 2699 } 2700 2701 if (msg.hdr.size != sizeof(msg.payload.inflight)) { 2702 error_report("Received bad msg size."); 2703 return -EPROTO; 2704 } 2705 2706 if (!msg.payload.inflight.mmap_size) { 2707 return 0; 2708 } 2709 2710 fd = qemu_chr_fe_get_msgfd(chr); 2711 if (fd < 0) { 2712 error_report("Failed to get mem fd"); 2713 return -EIO; 2714 } 2715 2716 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE, 2717 MAP_SHARED, fd, msg.payload.inflight.mmap_offset); 2718 2719 if (addr == MAP_FAILED) { 2720 error_report("Failed to mmap mem fd"); 2721 close(fd); 2722 return -EFAULT; 2723 } 2724 2725 inflight->addr = addr; 2726 inflight->fd = fd; 2727 inflight->size = msg.payload.inflight.mmap_size; 2728 inflight->offset = msg.payload.inflight.mmap_offset; 2729 inflight->queue_size = queue_size; 2730 2731 return 0; 2732 } 2733 2734 static int vhost_user_set_inflight_fd(struct vhost_dev *dev, 2735 struct vhost_inflight *inflight) 2736 { 2737 VhostUserMsg msg = { 2738 .hdr.request = VHOST_USER_SET_INFLIGHT_FD, 2739 .hdr.flags = VHOST_USER_VERSION, 2740 .payload.inflight.mmap_size = inflight->size, 2741 .payload.inflight.mmap_offset = inflight->offset, 2742 .payload.inflight.num_queues = dev->nvqs, 2743 .payload.inflight.queue_size = inflight->queue_size, 2744 .hdr.size = sizeof(msg.payload.inflight), 2745 }; 2746 2747 if (!virtio_has_feature(dev->protocol_features, 2748 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) { 2749 return 0; 2750 } 2751 2752 return vhost_user_write(dev, &msg, &inflight->fd, 1); 2753 } 2754 2755 static void vhost_user_state_destroy(gpointer data) 2756 { 2757 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; 2758 vhost_user_host_notifier_remove(n, NULL, true); 2759 } 2760 2761 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) 2762 { 2763 if (user->chr) { 2764 error_setg(errp, "Cannot initialize vhost-user state"); 2765 return false; 2766 } 2767 user->chr = chr; 2768 user->memory_slots = 0; 2769 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4, 2770 &vhost_user_state_destroy); 2771 return true; 2772 } 2773 2774 void vhost_user_cleanup(VhostUserState *user) 2775 { 2776 if (!user->chr) { 2777 return; 2778 } 2779 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); 2780 user->chr = NULL; 2781 } 2782 2783 2784 typedef struct { 2785 vu_async_close_fn cb; 2786 DeviceState *dev; 2787 CharBackend *cd; 2788 struct vhost_dev *vhost; 2789 } VhostAsyncCallback; 2790 2791 static void vhost_user_async_close_bh(void *opaque) 2792 { 2793 VhostAsyncCallback *data = opaque; 2794 2795 data->cb(data->dev); 2796 2797 g_free(data); 2798 } 2799 2800 /* 2801 * We only schedule the work if the machine is running. If suspended 2802 * we want to keep all the in-flight data as is for migration 2803 * purposes. 2804 */ 2805 void vhost_user_async_close(DeviceState *d, 2806 CharBackend *chardev, struct vhost_dev *vhost, 2807 vu_async_close_fn cb) 2808 { 2809 if (!runstate_check(RUN_STATE_SHUTDOWN)) { 2810 /* 2811 * A close event may happen during a read/write, but vhost 2812 * code assumes the vhost_dev remains setup, so delay the 2813 * stop & clear. 2814 */ 2815 AioContext *ctx = qemu_get_current_aio_context(); 2816 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1); 2817 2818 /* Save data for the callback */ 2819 data->cb = cb; 2820 data->dev = d; 2821 data->cd = chardev; 2822 data->vhost = vhost; 2823 2824 /* Disable any further notifications on the chardev */ 2825 qemu_chr_fe_set_handlers(chardev, 2826 NULL, NULL, NULL, NULL, NULL, NULL, 2827 false); 2828 2829 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data); 2830 2831 /* 2832 * Move vhost device to the stopped state. The vhost-user device 2833 * will be clean up and disconnected in BH. This can be useful in 2834 * the vhost migration code. If disconnect was caught there is an 2835 * option for the general vhost code to get the dev state without 2836 * knowing its type (in this case vhost-user). 2837 * 2838 * Note if the vhost device is fully cleared by the time we 2839 * execute the bottom half we won't continue with the cleanup. 2840 */ 2841 vhost->started = false; 2842 } 2843 } 2844 2845 static int vhost_user_dev_start(struct vhost_dev *dev, bool started) 2846 { 2847 if (!virtio_has_feature(dev->protocol_features, 2848 VHOST_USER_PROTOCOL_F_STATUS)) { 2849 return 0; 2850 } 2851 2852 /* Set device status only for last queue pair */ 2853 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2854 return 0; 2855 } 2856 2857 if (started) { 2858 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 2859 VIRTIO_CONFIG_S_DRIVER | 2860 VIRTIO_CONFIG_S_DRIVER_OK); 2861 } else { 2862 return 0; 2863 } 2864 } 2865 2866 static void vhost_user_reset_status(struct vhost_dev *dev) 2867 { 2868 /* Set device status only for last queue pair */ 2869 if (dev->vq_index + dev->nvqs != dev->vq_index_end) { 2870 return; 2871 } 2872 2873 if (virtio_has_feature(dev->protocol_features, 2874 VHOST_USER_PROTOCOL_F_STATUS)) { 2875 vhost_user_set_status(dev, 0); 2876 } 2877 } 2878 2879 static bool vhost_user_supports_device_state(struct vhost_dev *dev) 2880 { 2881 return virtio_has_feature(dev->protocol_features, 2882 VHOST_USER_PROTOCOL_F_DEVICE_STATE); 2883 } 2884 2885 static int vhost_user_set_device_state_fd(struct vhost_dev *dev, 2886 VhostDeviceStateDirection direction, 2887 VhostDeviceStatePhase phase, 2888 int fd, 2889 int *reply_fd, 2890 Error **errp) 2891 { 2892 int ret; 2893 struct vhost_user *vu = dev->opaque; 2894 VhostUserMsg msg = { 2895 .hdr = { 2896 .request = VHOST_USER_SET_DEVICE_STATE_FD, 2897 .flags = VHOST_USER_VERSION, 2898 .size = sizeof(msg.payload.transfer_state), 2899 }, 2900 .payload.transfer_state = { 2901 .direction = direction, 2902 .phase = phase, 2903 }, 2904 }; 2905 2906 *reply_fd = -1; 2907 2908 if (!vhost_user_supports_device_state(dev)) { 2909 close(fd); 2910 error_setg(errp, "Back-end does not support migration state transfer"); 2911 return -ENOTSUP; 2912 } 2913 2914 ret = vhost_user_write(dev, &msg, &fd, 1); 2915 close(fd); 2916 if (ret < 0) { 2917 error_setg_errno(errp, -ret, 2918 "Failed to send SET_DEVICE_STATE_FD message"); 2919 return ret; 2920 } 2921 2922 ret = vhost_user_read(dev, &msg); 2923 if (ret < 0) { 2924 error_setg_errno(errp, -ret, 2925 "Failed to receive SET_DEVICE_STATE_FD reply"); 2926 return ret; 2927 } 2928 2929 if (msg.hdr.request != VHOST_USER_SET_DEVICE_STATE_FD) { 2930 error_setg(errp, 2931 "Received unexpected message type, expected %d, received %d", 2932 VHOST_USER_SET_DEVICE_STATE_FD, msg.hdr.request); 2933 return -EPROTO; 2934 } 2935 2936 if (msg.hdr.size != sizeof(msg.payload.u64)) { 2937 error_setg(errp, 2938 "Received bad message size, expected %zu, received %" PRIu32, 2939 sizeof(msg.payload.u64), msg.hdr.size); 2940 return -EPROTO; 2941 } 2942 2943 if ((msg.payload.u64 & 0xff) != 0) { 2944 error_setg(errp, "Back-end did not accept migration state transfer"); 2945 return -EIO; 2946 } 2947 2948 if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) { 2949 *reply_fd = qemu_chr_fe_get_msgfd(vu->user->chr); 2950 if (*reply_fd < 0) { 2951 error_setg(errp, 2952 "Failed to get back-end-provided transfer pipe FD"); 2953 *reply_fd = -1; 2954 return -EIO; 2955 } 2956 } 2957 2958 return 0; 2959 } 2960 2961 static int vhost_user_check_device_state(struct vhost_dev *dev, Error **errp) 2962 { 2963 int ret; 2964 VhostUserMsg msg = { 2965 .hdr = { 2966 .request = VHOST_USER_CHECK_DEVICE_STATE, 2967 .flags = VHOST_USER_VERSION, 2968 .size = 0, 2969 }, 2970 }; 2971 2972 if (!vhost_user_supports_device_state(dev)) { 2973 error_setg(errp, "Back-end does not support migration state transfer"); 2974 return -ENOTSUP; 2975 } 2976 2977 ret = vhost_user_write(dev, &msg, NULL, 0); 2978 if (ret < 0) { 2979 error_setg_errno(errp, -ret, 2980 "Failed to send CHECK_DEVICE_STATE message"); 2981 return ret; 2982 } 2983 2984 ret = vhost_user_read(dev, &msg); 2985 if (ret < 0) { 2986 error_setg_errno(errp, -ret, 2987 "Failed to receive CHECK_DEVICE_STATE reply"); 2988 return ret; 2989 } 2990 2991 if (msg.hdr.request != VHOST_USER_CHECK_DEVICE_STATE) { 2992 error_setg(errp, 2993 "Received unexpected message type, expected %d, received %d", 2994 VHOST_USER_CHECK_DEVICE_STATE, msg.hdr.request); 2995 return -EPROTO; 2996 } 2997 2998 if (msg.hdr.size != sizeof(msg.payload.u64)) { 2999 error_setg(errp, 3000 "Received bad message size, expected %zu, received %" PRIu32, 3001 sizeof(msg.payload.u64), msg.hdr.size); 3002 return -EPROTO; 3003 } 3004 3005 if (msg.payload.u64 != 0) { 3006 error_setg(errp, "Back-end failed to process its internal state"); 3007 return -EIO; 3008 } 3009 3010 return 0; 3011 } 3012 3013 const VhostOps user_ops = { 3014 .backend_type = VHOST_BACKEND_TYPE_USER, 3015 .vhost_backend_init = vhost_user_backend_init, 3016 .vhost_backend_cleanup = vhost_user_backend_cleanup, 3017 .vhost_backend_memslots_limit = vhost_user_memslots_limit, 3018 .vhost_backend_no_private_memslots = vhost_user_no_private_memslots, 3019 .vhost_set_log_base = vhost_user_set_log_base, 3020 .vhost_set_mem_table = vhost_user_set_mem_table, 3021 .vhost_set_vring_addr = vhost_user_set_vring_addr, 3022 .vhost_set_vring_endian = vhost_user_set_vring_endian, 3023 .vhost_set_vring_num = vhost_user_set_vring_num, 3024 .vhost_set_vring_base = vhost_user_set_vring_base, 3025 .vhost_get_vring_base = vhost_user_get_vring_base, 3026 .vhost_set_vring_kick = vhost_user_set_vring_kick, 3027 .vhost_set_vring_call = vhost_user_set_vring_call, 3028 .vhost_set_vring_err = vhost_user_set_vring_err, 3029 .vhost_set_features = vhost_user_set_features, 3030 .vhost_get_features = vhost_user_get_features, 3031 .vhost_set_owner = vhost_user_set_owner, 3032 .vhost_reset_device = vhost_user_reset_device, 3033 .vhost_get_vq_index = vhost_user_get_vq_index, 3034 .vhost_set_vring_enable = vhost_user_set_vring_enable, 3035 .vhost_requires_shm_log = vhost_user_requires_shm_log, 3036 .vhost_migration_done = vhost_user_migration_done, 3037 .vhost_net_set_mtu = vhost_user_net_set_mtu, 3038 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, 3039 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, 3040 .vhost_get_config = vhost_user_get_config, 3041 .vhost_set_config = vhost_user_set_config, 3042 .vhost_crypto_create_session = vhost_user_crypto_create_session, 3043 .vhost_crypto_close_session = vhost_user_crypto_close_session, 3044 .vhost_get_inflight_fd = vhost_user_get_inflight_fd, 3045 .vhost_set_inflight_fd = vhost_user_set_inflight_fd, 3046 .vhost_dev_start = vhost_user_dev_start, 3047 .vhost_reset_status = vhost_user_reset_status, 3048 .vhost_supports_device_state = vhost_user_supports_device_state, 3049 .vhost_set_device_state_fd = vhost_user_set_device_state_fd, 3050 .vhost_check_device_state = vhost_user_check_device_state, 3051 }; 3052