1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-vdpa.h" 21 #include "exec/address-spaces.h" 22 #include "qemu/main-loop.h" 23 #include "cpu.h" 24 #include "trace.h" 25 #include "qemu-common.h" 26 27 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section) 28 { 29 return (!memory_region_is_ram(section->mr) && 30 !memory_region_is_iommu(section->mr)) || 31 /* vhost-vDPA doesn't allow MMIO to be mapped */ 32 memory_region_is_ram_device(section->mr) || 33 /* 34 * Sizing an enabled 64-bit BAR can cause spurious mappings to 35 * addresses in the upper part of the 64-bit address space. These 36 * are never accessed by the CPU and beyond the address width of 37 * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width. 38 */ 39 section->offset_within_address_space & (1ULL << 63); 40 } 41 42 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 43 void *vaddr, bool readonly) 44 { 45 struct vhost_msg_v2 msg = {}; 46 int fd = v->device_fd; 47 int ret = 0; 48 49 msg.type = v->msg_type; 50 msg.iotlb.iova = iova; 51 msg.iotlb.size = size; 52 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 53 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 54 msg.iotlb.type = VHOST_IOTLB_UPDATE; 55 56 trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, 57 msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); 58 59 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 60 error_report("failed to write, fd=%d, errno=%d (%s)", 61 fd, errno, strerror(errno)); 62 return -EIO ; 63 } 64 65 return ret; 66 } 67 68 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 69 hwaddr size) 70 { 71 struct vhost_msg_v2 msg = {}; 72 int fd = v->device_fd; 73 int ret = 0; 74 75 msg.type = v->msg_type; 76 msg.iotlb.iova = iova; 77 msg.iotlb.size = size; 78 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 79 80 trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, 81 msg.iotlb.size, msg.iotlb.type); 82 83 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 84 error_report("failed to write, fd=%d, errno=%d (%s)", 85 fd, errno, strerror(errno)); 86 return -EIO ; 87 } 88 89 return ret; 90 } 91 92 static void vhost_vdpa_listener_begin(MemoryListener *listener) 93 { 94 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 95 struct vhost_dev *dev = v->dev; 96 struct vhost_msg_v2 msg = {}; 97 int fd = v->device_fd; 98 99 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 100 return; 101 } 102 103 msg.type = v->msg_type; 104 msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN; 105 106 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 107 error_report("failed to write, fd=%d, errno=%d (%s)", 108 fd, errno, strerror(errno)); 109 } 110 } 111 112 static void vhost_vdpa_listener_commit(MemoryListener *listener) 113 { 114 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 115 struct vhost_dev *dev = v->dev; 116 struct vhost_msg_v2 msg = {}; 117 int fd = v->device_fd; 118 119 if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { 120 return; 121 } 122 123 msg.type = v->msg_type; 124 msg.iotlb.type = VHOST_IOTLB_BATCH_END; 125 126 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 127 error_report("failed to write, fd=%d, errno=%d (%s)", 128 fd, errno, strerror(errno)); 129 } 130 } 131 132 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 133 MemoryRegionSection *section) 134 { 135 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 136 hwaddr iova; 137 Int128 llend, llsize; 138 void *vaddr; 139 int ret; 140 141 if (vhost_vdpa_listener_skipped_section(section)) { 142 return; 143 } 144 145 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 146 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 147 error_report("%s received unaligned region", __func__); 148 return; 149 } 150 151 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 152 llend = int128_make64(section->offset_within_address_space); 153 llend = int128_add(llend, section->size); 154 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 155 156 if (int128_ge(int128_make64(iova), llend)) { 157 return; 158 } 159 160 memory_region_ref(section->mr); 161 162 /* Here we assume that memory_region_is_ram(section->mr)==true */ 163 164 vaddr = memory_region_get_ram_ptr(section->mr) + 165 section->offset_within_region + 166 (iova - section->offset_within_address_space); 167 168 trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), 169 vaddr, section->readonly); 170 171 llsize = int128_sub(llend, int128_make64(iova)); 172 173 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 174 vaddr, section->readonly); 175 if (ret) { 176 error_report("vhost vdpa map fail!"); 177 goto fail; 178 } 179 180 return; 181 182 fail: 183 /* 184 * On the initfn path, store the first error in the container so we 185 * can gracefully fail. Runtime, there's not much we can do other 186 * than throw a hardware error. 187 */ 188 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 189 return; 190 191 } 192 193 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 194 MemoryRegionSection *section) 195 { 196 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 197 hwaddr iova; 198 Int128 llend, llsize; 199 int ret; 200 201 if (vhost_vdpa_listener_skipped_section(section)) { 202 return; 203 } 204 205 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 206 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 207 error_report("%s received unaligned region", __func__); 208 return; 209 } 210 211 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 212 llend = int128_make64(section->offset_within_address_space); 213 llend = int128_add(llend, section->size); 214 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 215 216 trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); 217 218 if (int128_ge(int128_make64(iova), llend)) { 219 return; 220 } 221 222 llsize = int128_sub(llend, int128_make64(iova)); 223 224 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 225 if (ret) { 226 error_report("vhost_vdpa dma unmap error!"); 227 } 228 229 memory_region_unref(section->mr); 230 } 231 /* 232 * IOTLB API is used by vhost-vpda which requires incremental updating 233 * of the mapping. So we can not use generic vhost memory listener which 234 * depends on the addnop(). 235 */ 236 static const MemoryListener vhost_vdpa_memory_listener = { 237 .begin = vhost_vdpa_listener_begin, 238 .commit = vhost_vdpa_listener_commit, 239 .region_add = vhost_vdpa_listener_region_add, 240 .region_del = vhost_vdpa_listener_region_del, 241 }; 242 243 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 244 void *arg) 245 { 246 struct vhost_vdpa *v = dev->opaque; 247 int fd = v->device_fd; 248 249 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 250 251 return ioctl(fd, request, arg); 252 } 253 254 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 255 { 256 uint8_t s; 257 258 trace_vhost_vdpa_add_status(dev, status); 259 if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) { 260 return; 261 } 262 263 s |= status; 264 265 vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 266 } 267 268 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) 269 { 270 struct vhost_vdpa *v; 271 uint64_t features; 272 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 273 trace_vhost_vdpa_init(dev, opaque); 274 275 v = opaque; 276 v->dev = dev; 277 dev->opaque = opaque ; 278 vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features); 279 dev->backend_features = features; 280 v->listener = vhost_vdpa_memory_listener; 281 v->msg_type = VHOST_IOTLB_MSG_V2; 282 283 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 284 VIRTIO_CONFIG_S_DRIVER); 285 286 return 0; 287 } 288 289 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 290 { 291 struct vhost_vdpa *v; 292 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 293 v = dev->opaque; 294 trace_vhost_vdpa_cleanup(dev, v); 295 memory_listener_unregister(&v->listener); 296 297 dev->opaque = NULL; 298 return 0; 299 } 300 301 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 302 { 303 trace_vhost_vdpa_memslots_limit(dev, INT_MAX); 304 return INT_MAX; 305 } 306 307 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 308 struct vhost_memory *mem) 309 { 310 trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); 311 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && 312 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { 313 int i; 314 for (i = 0; i < mem->nregions; i++) { 315 trace_vhost_vdpa_dump_regions(dev, i, 316 mem->regions[i].guest_phys_addr, 317 mem->regions[i].memory_size, 318 mem->regions[i].userspace_addr, 319 mem->regions[i].flags_padding); 320 } 321 } 322 if (mem->padding) { 323 return -1; 324 } 325 326 return 0; 327 } 328 329 static int vhost_vdpa_set_features(struct vhost_dev *dev, 330 uint64_t features) 331 { 332 int ret; 333 trace_vhost_vdpa_set_features(dev, features); 334 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 335 uint8_t status = 0; 336 if (ret) { 337 return ret; 338 } 339 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 340 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 341 342 return !(status & VIRTIO_CONFIG_S_FEATURES_OK); 343 } 344 345 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) 346 { 347 uint64_t features; 348 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 349 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; 350 int r; 351 352 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { 353 return 0; 354 } 355 356 features &= f; 357 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); 358 if (r) { 359 return 0; 360 } 361 362 dev->backend_cap = features; 363 364 return 0; 365 } 366 367 static int vhost_vdpa_get_device_id(struct vhost_dev *dev, 368 uint32_t *device_id) 369 { 370 int ret; 371 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 372 trace_vhost_vdpa_get_device_id(dev, *device_id); 373 return ret; 374 } 375 376 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 377 { 378 int ret; 379 uint8_t status = 0; 380 381 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 382 trace_vhost_vdpa_reset_device(dev, status); 383 return ret; 384 } 385 386 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 387 { 388 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 389 390 trace_vhost_vdpa_get_vq_index(dev, idx, idx - dev->vq_index); 391 return idx - dev->vq_index; 392 } 393 394 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 395 { 396 int i; 397 trace_vhost_vdpa_set_vring_ready(dev); 398 for (i = 0; i < dev->nvqs; ++i) { 399 struct vhost_vring_state state = { 400 .index = dev->vq_index + i, 401 .num = 1, 402 }; 403 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 404 } 405 return 0; 406 } 407 408 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, 409 uint32_t config_len) 410 { 411 int b, len; 412 char line[QEMU_HEXDUMP_LINE_LEN]; 413 414 for (b = 0; b < config_len; b += 16) { 415 len = config_len - b; 416 qemu_hexdump_line(line, b, config, len, false); 417 trace_vhost_vdpa_dump_config(dev, line); 418 } 419 } 420 421 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 422 uint32_t offset, uint32_t size, 423 uint32_t flags) 424 { 425 struct vhost_vdpa_config *config; 426 int ret; 427 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 428 429 trace_vhost_vdpa_set_config(dev, offset, size, flags); 430 config = g_malloc(size + config_size); 431 config->off = offset; 432 config->len = size; 433 memcpy(config->buf, data, size); 434 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && 435 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 436 vhost_vdpa_dump_config(dev, data, size); 437 } 438 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 439 g_free(config); 440 return ret; 441 } 442 443 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 444 uint32_t config_len) 445 { 446 struct vhost_vdpa_config *v_config; 447 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 448 int ret; 449 450 trace_vhost_vdpa_get_config(dev, config, config_len); 451 v_config = g_malloc(config_len + config_size); 452 v_config->len = config_len; 453 v_config->off = 0; 454 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 455 memcpy(config, v_config->buf, config_len); 456 g_free(v_config); 457 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && 458 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { 459 vhost_vdpa_dump_config(dev, config, config_len); 460 } 461 return ret; 462 } 463 464 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 465 { 466 struct vhost_vdpa *v = dev->opaque; 467 trace_vhost_vdpa_dev_start(dev, started); 468 if (started) { 469 uint8_t status = 0; 470 memory_listener_register(&v->listener, &address_space_memory); 471 vhost_vdpa_set_vring_ready(dev); 472 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 473 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 474 475 return !(status & VIRTIO_CONFIG_S_DRIVER_OK); 476 } else { 477 vhost_vdpa_reset_device(dev); 478 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 479 VIRTIO_CONFIG_S_DRIVER); 480 memory_listener_unregister(&v->listener); 481 482 return 0; 483 } 484 } 485 486 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 487 struct vhost_log *log) 488 { 489 trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, 490 log->log); 491 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 492 } 493 494 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 495 struct vhost_vring_addr *addr) 496 { 497 trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, 498 addr->desc_user_addr, addr->used_user_addr, 499 addr->avail_user_addr, 500 addr->log_guest_addr); 501 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 502 } 503 504 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 505 struct vhost_vring_state *ring) 506 { 507 trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); 508 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 509 } 510 511 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 512 struct vhost_vring_state *ring) 513 { 514 trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); 515 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 516 } 517 518 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 519 struct vhost_vring_state *ring) 520 { 521 int ret; 522 523 ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 524 trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); 525 return ret; 526 } 527 528 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 529 struct vhost_vring_file *file) 530 { 531 trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); 532 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 533 } 534 535 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 536 struct vhost_vring_file *file) 537 { 538 trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); 539 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 540 } 541 542 static int vhost_vdpa_get_features(struct vhost_dev *dev, 543 uint64_t *features) 544 { 545 int ret; 546 547 ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 548 trace_vhost_vdpa_get_features(dev, *features); 549 return ret; 550 } 551 552 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 553 { 554 trace_vhost_vdpa_set_owner(dev); 555 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 556 } 557 558 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 559 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 560 { 561 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 562 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 563 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 564 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 565 trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, 566 addr->avail_user_addr, addr->used_user_addr); 567 return 0; 568 } 569 570 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 571 { 572 return true; 573 } 574 575 const VhostOps vdpa_ops = { 576 .backend_type = VHOST_BACKEND_TYPE_VDPA, 577 .vhost_backend_init = vhost_vdpa_init, 578 .vhost_backend_cleanup = vhost_vdpa_cleanup, 579 .vhost_set_log_base = vhost_vdpa_set_log_base, 580 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 581 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 582 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 583 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 584 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 585 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 586 .vhost_get_features = vhost_vdpa_get_features, 587 .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, 588 .vhost_set_owner = vhost_vdpa_set_owner, 589 .vhost_set_vring_endian = NULL, 590 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 591 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 592 .vhost_set_features = vhost_vdpa_set_features, 593 .vhost_reset_device = vhost_vdpa_reset_device, 594 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 595 .vhost_get_config = vhost_vdpa_get_config, 596 .vhost_set_config = vhost_vdpa_set_config, 597 .vhost_requires_shm_log = NULL, 598 .vhost_migration_done = NULL, 599 .vhost_backend_can_merge = NULL, 600 .vhost_net_set_mtu = NULL, 601 .vhost_set_iotlb_callback = NULL, 602 .vhost_send_device_iotlb_msg = NULL, 603 .vhost_dev_start = vhost_vdpa_dev_start, 604 .vhost_get_device_id = vhost_vdpa_get_device_id, 605 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 606 .vhost_force_iommu = vhost_vdpa_force_iommu, 607 }; 608