1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-vdpa.h" 21 #include "exec/address-spaces.h" 22 #include "qemu/main-loop.h" 23 #include "cpu.h" 24 #include "trace.h" 25 #include "qemu-common.h" 26 27 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section) 28 { 29 return (!memory_region_is_ram(section->mr) && 30 !memory_region_is_iommu(section->mr)) || 31 /* 32 * Sizing an enabled 64-bit BAR can cause spurious mappings to 33 * addresses in the upper part of the 64-bit address space. These 34 * are never accessed by the CPU and beyond the address width of 35 * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width. 36 */ 37 section->offset_within_address_space & (1ULL << 63); 38 } 39 40 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 41 void *vaddr, bool readonly) 42 { 43 struct vhost_msg_v2 msg = {}; 44 int fd = v->device_fd; 45 int ret = 0; 46 47 msg.type = v->msg_type; 48 msg.iotlb.iova = iova; 49 msg.iotlb.size = size; 50 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 51 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 52 msg.iotlb.type = VHOST_IOTLB_UPDATE; 53 54 trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, 55 msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); 56 57 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 58 error_report("failed to write, fd=%d, errno=%d (%s)", 59 fd, errno, strerror(errno)); 60 return -EIO ; 61 } 62 63 return ret; 64 } 65 66 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 67 hwaddr size) 68 { 69 struct vhost_msg_v2 msg = {}; 70 int fd = v->device_fd; 71 int ret = 0; 72 73 msg.type = v->msg_type; 74 msg.iotlb.iova = iova; 75 msg.iotlb.size = size; 76 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 77 78 trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, 79 msg.iotlb.size, msg.iotlb.type); 80 81 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 82 error_report("failed to write, fd=%d, errno=%d (%s)", 83 fd, errno, strerror(errno)); 84 return -EIO ; 85 } 86 87 return ret; 88 } 89 90 static void vhost_vdpa_listener_begin(MemoryListener *listener) 91 { 92 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 93 struct vhost_dev *dev = v->dev; 94 struct vhost_msg_v2 msg = {}; 95 int fd = v->device_fd; 96 97 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 98 return; 99 } 100 101 msg.type = v->msg_type; 102 msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN; 103 104 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 105 error_report("failed to write, fd=%d, errno=%d (%s)", 106 fd, errno, strerror(errno)); 107 } 108 } 109 110 static void vhost_vdpa_listener_commit(MemoryListener *listener) 111 { 112 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 113 struct vhost_dev *dev = v->dev; 114 struct vhost_msg_v2 msg = {}; 115 int fd = v->device_fd; 116 117 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 118 return; 119 } 120 121 msg.type = v->msg_type; 122 msg.iotlb.type = VHOST_IOTLB_BATCH_END; 123 124 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 125 error_report("failed to write, fd=%d, errno=%d (%s)", 126 fd, errno, strerror(errno)); 127 } 128 } 129 130 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 131 MemoryRegionSection *section) 132 { 133 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 134 hwaddr iova; 135 Int128 llend, llsize; 136 void *vaddr; 137 int ret; 138 139 if (vhost_vdpa_listener_skipped_section(section)) { 140 return; 141 } 142 143 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 144 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 145 error_report("%s received unaligned region", __func__); 146 return; 147 } 148 149 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 150 llend = int128_make64(section->offset_within_address_space); 151 llend = int128_add(llend, section->size); 152 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 153 154 if (int128_ge(int128_make64(iova), llend)) { 155 return; 156 } 157 158 memory_region_ref(section->mr); 159 160 /* Here we assume that memory_region_is_ram(section->mr)==true */ 161 162 vaddr = memory_region_get_ram_ptr(section->mr) + 163 section->offset_within_region + 164 (iova - section->offset_within_address_space); 165 166 trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), 167 vaddr, section->readonly); 168 169 llsize = int128_sub(llend, int128_make64(iova)); 170 171 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 172 vaddr, section->readonly); 173 if (ret) { 174 error_report("vhost vdpa map fail!"); 175 if (memory_region_is_ram_device(section->mr)) { 176 /* Allow unexpected mappings not to be fatal for RAM devices */ 177 error_report("map ram fail!"); 178 return ; 179 } 180 goto fail; 181 } 182 183 return; 184 185 fail: 186 if (memory_region_is_ram_device(section->mr)) { 187 error_report("failed to vdpa_dma_map. pci p2p may not work"); 188 return; 189 190 } 191 /* 192 * On the initfn path, store the first error in the container so we 193 * can gracefully fail. Runtime, there's not much we can do other 194 * than throw a hardware error. 195 */ 196 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 197 return; 198 199 } 200 201 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 202 MemoryRegionSection *section) 203 { 204 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 205 hwaddr iova; 206 Int128 llend, llsize; 207 int ret; 208 209 if (vhost_vdpa_listener_skipped_section(section)) { 210 return; 211 } 212 213 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 214 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 215 error_report("%s received unaligned region", __func__); 216 return; 217 } 218 219 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 220 llend = int128_make64(section->offset_within_address_space); 221 llend = int128_add(llend, section->size); 222 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 223 224 trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); 225 226 if (int128_ge(int128_make64(iova), llend)) { 227 return; 228 } 229 230 llsize = int128_sub(llend, int128_make64(iova)); 231 232 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 233 if (ret) { 234 error_report("vhost_vdpa dma unmap error!"); 235 } 236 237 memory_region_unref(section->mr); 238 } 239 /* 240 * IOTLB API is used by vhost-vpda which requires incremental updating 241 * of the mapping. So we can not use generic vhost memory listener which 242 * depends on the addnop(). 243 */ 244 static const MemoryListener vhost_vdpa_memory_listener = { 245 .begin = vhost_vdpa_listener_begin, 246 .commit = vhost_vdpa_listener_commit, 247 .region_add = vhost_vdpa_listener_region_add, 248 .region_del = vhost_vdpa_listener_region_del, 249 }; 250 251 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 252 void *arg) 253 { 254 struct vhost_vdpa *v = dev->opaque; 255 int fd = v->device_fd; 256 257 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 258 259 return ioctl(fd, request, arg); 260 } 261 262 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 263 { 264 uint8_t s; 265 266 trace_vhost_vdpa_add_status(dev, status); 267 if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) { 268 return; 269 } 270 271 s |= status; 272 273 vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 274 } 275 276 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) 277 { 278 struct vhost_vdpa *v; 279 uint64_t features; 280 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 281 trace_vhost_vdpa_init(dev, opaque); 282 283 v = opaque; 284 v->dev = dev; 285 dev->opaque = opaque ; 286 vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features); 287 dev->backend_features = features; 288 v->listener = vhost_vdpa_memory_listener; 289 v->msg_type = VHOST_IOTLB_MSG_V2; 290 291 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 292 VIRTIO_CONFIG_S_DRIVER); 293 294 return 0; 295 } 296 297 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 298 { 299 struct vhost_vdpa *v; 300 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 301 v = dev->opaque; 302 trace_vhost_vdpa_cleanup(dev, v); 303 memory_listener_unregister(&v->listener); 304 305 dev->opaque = NULL; 306 return 0; 307 } 308 309 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 310 { 311 trace_vhost_vdpa_memslots_limit(dev, INT_MAX); 312 return INT_MAX; 313 } 314 315 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 316 struct vhost_memory *mem) 317 { 318 trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); 319 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && 320 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { 321 int i; 322 for (i = 0; i < mem->nregions; i++) { 323 trace_vhost_vdpa_dump_regions(dev, i, 324 mem->regions[i].guest_phys_addr, 325 mem->regions[i].memory_size, 326 mem->regions[i].userspace_addr, 327 mem->regions[i].flags_padding); 328 } 329 } 330 if (mem->padding) { 331 return -1; 332 } 333 334 return 0; 335 } 336 337 static int vhost_vdpa_set_features(struct vhost_dev *dev, 338 uint64_t features) 339 { 340 int ret; 341 trace_vhost_vdpa_set_features(dev, features); 342 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 343 uint8_t status = 0; 344 if (ret) { 345 return ret; 346 } 347 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 348 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 349 350 return !(status & VIRTIO_CONFIG_S_FEATURES_OK); 351 } 352 353 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) 354 { 355 uint64_t features; 356 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 357 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; 358 int r; 359 360 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { 361 return 0; 362 } 363 364 features &= f; 365 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); 366 if (r) { 367 return 0; 368 } 369 370 dev->backend_cap = features; 371 372 return 0; 373 } 374 375 static int vhost_vdpa_get_device_id(struct vhost_dev *dev, 376 uint32_t *device_id) 377 { 378 int ret; 379 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 380 trace_vhost_vdpa_get_device_id(dev, *device_id); 381 return ret; 382 } 383 384 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 385 { 386 int ret; 387 uint8_t status = 0; 388 389 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 390 trace_vhost_vdpa_reset_device(dev, status); 391 return ret; 392 } 393 394 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 395 { 396 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 397 398 trace_vhost_vdpa_get_vq_index(dev, idx, idx - dev->vq_index); 399 return idx - dev->vq_index; 400 } 401 402 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 403 { 404 int i; 405 trace_vhost_vdpa_set_vring_ready(dev); 406 for (i = 0; i < dev->nvqs; ++i) { 407 struct vhost_vring_state state = { 408 .index = dev->vq_index + i, 409 .num = 1, 410 }; 411 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 412 } 413 return 0; 414 } 415 416 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, 417 uint32_t config_len) 418 { 419 int b, len; 420 char line[QEMU_HEXDUMP_LINE_LEN]; 421 422 for (b = 0; b < config_len; b += 16) { 423 len = config_len - b; 424 qemu_hexdump_line(line, b, config, len, false); 425 trace_vhost_vdpa_dump_config(dev, line); 426 } 427 } 428 429 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 430 uint32_t offset, uint32_t size, 431 uint32_t flags) 432 { 433 struct vhost_vdpa_config *config; 434 int ret; 435 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 436 437 trace_vhost_vdpa_set_config(dev, offset, size, flags); 438 config = g_malloc(size + config_size); 439 config->off = offset; 440 config->len = size; 441 memcpy(config->buf, data, size); 442 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && 443 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 444 vhost_vdpa_dump_config(dev, data, size); 445 } 446 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 447 g_free(config); 448 return ret; 449 } 450 451 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 452 uint32_t config_len) 453 { 454 struct vhost_vdpa_config *v_config; 455 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 456 int ret; 457 458 trace_vhost_vdpa_get_config(dev, config, config_len); 459 v_config = g_malloc(config_len + config_size); 460 v_config->len = config_len; 461 v_config->off = 0; 462 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 463 memcpy(config, v_config->buf, config_len); 464 g_free(v_config); 465 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && 466 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 467 vhost_vdpa_dump_config(dev, config, config_len); 468 } 469 return ret; 470 } 471 472 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 473 { 474 struct vhost_vdpa *v = dev->opaque; 475 trace_vhost_vdpa_dev_start(dev, started); 476 if (started) { 477 uint8_t status = 0; 478 memory_listener_register(&v->listener, &address_space_memory); 479 vhost_vdpa_set_vring_ready(dev); 480 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 481 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 482 483 return !(status & VIRTIO_CONFIG_S_DRIVER_OK); 484 } else { 485 vhost_vdpa_reset_device(dev); 486 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 487 VIRTIO_CONFIG_S_DRIVER); 488 memory_listener_unregister(&v->listener); 489 490 return 0; 491 } 492 } 493 494 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 495 struct vhost_log *log) 496 { 497 trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, 498 log->log); 499 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 500 } 501 502 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 503 struct vhost_vring_addr *addr) 504 { 505 trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, 506 addr->desc_user_addr, addr->used_user_addr, 507 addr->avail_user_addr, 508 addr->log_guest_addr); 509 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 510 } 511 512 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 513 struct vhost_vring_state *ring) 514 { 515 trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); 516 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 517 } 518 519 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 520 struct vhost_vring_state *ring) 521 { 522 trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); 523 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 524 } 525 526 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 527 struct vhost_vring_state *ring) 528 { 529 int ret; 530 531 ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 532 trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); 533 return ret; 534 } 535 536 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 537 struct vhost_vring_file *file) 538 { 539 trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); 540 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 541 } 542 543 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 544 struct vhost_vring_file *file) 545 { 546 trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); 547 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 548 } 549 550 static int vhost_vdpa_get_features(struct vhost_dev *dev, 551 uint64_t *features) 552 { 553 int ret; 554 555 ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 556 trace_vhost_vdpa_get_features(dev, *features); 557 return ret; 558 } 559 560 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 561 { 562 trace_vhost_vdpa_set_owner(dev); 563 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 564 } 565 566 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 567 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 568 { 569 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 570 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 571 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 572 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 573 trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, 574 addr->avail_user_addr, addr->used_user_addr); 575 return 0; 576 } 577 578 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 579 { 580 return true; 581 } 582 583 const VhostOps vdpa_ops = { 584 .backend_type = VHOST_BACKEND_TYPE_VDPA, 585 .vhost_backend_init = vhost_vdpa_init, 586 .vhost_backend_cleanup = vhost_vdpa_cleanup, 587 .vhost_set_log_base = vhost_vdpa_set_log_base, 588 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 589 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 590 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 591 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 592 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 593 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 594 .vhost_get_features = vhost_vdpa_get_features, 595 .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, 596 .vhost_set_owner = vhost_vdpa_set_owner, 597 .vhost_set_vring_endian = NULL, 598 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 599 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 600 .vhost_set_features = vhost_vdpa_set_features, 601 .vhost_reset_device = vhost_vdpa_reset_device, 602 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 603 .vhost_get_config = vhost_vdpa_get_config, 604 .vhost_set_config = vhost_vdpa_set_config, 605 .vhost_requires_shm_log = NULL, 606 .vhost_migration_done = NULL, 607 .vhost_backend_can_merge = NULL, 608 .vhost_net_set_mtu = NULL, 609 .vhost_set_iotlb_callback = NULL, 610 .vhost_send_device_iotlb_msg = NULL, 611 .vhost_dev_start = vhost_vdpa_dev_start, 612 .vhost_get_device_id = vhost_vdpa_get_device_id, 613 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 614 .vhost_force_iommu = vhost_vdpa_force_iommu, 615 }; 616