1 /* 2 * vhost-vdpa 3 * 4 * Copyright(c) 2017-2018 Intel Corporation. 5 * Copyright(c) 2020 Red Hat, Inc. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <linux/vhost.h> 14 #include <linux/vfio.h> 15 #include <sys/eventfd.h> 16 #include <sys/ioctl.h> 17 #include "hw/virtio/vhost.h" 18 #include "hw/virtio/vhost-backend.h" 19 #include "hw/virtio/virtio-net.h" 20 #include "hw/virtio/vhost-vdpa.h" 21 #include "qemu/main-loop.h" 22 #include "cpu.h" 23 24 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section) 25 { 26 return (!memory_region_is_ram(section->mr) && 27 !memory_region_is_iommu(section->mr)) || 28 /* 29 * Sizing an enabled 64-bit BAR can cause spurious mappings to 30 * addresses in the upper part of the 64-bit address space. These 31 * are never accessed by the CPU and beyond the address width of 32 * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width. 33 */ 34 section->offset_within_address_space & (1ULL << 63); 35 } 36 37 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, 38 void *vaddr, bool readonly) 39 { 40 struct vhost_msg_v2 msg = {}; 41 int fd = v->device_fd; 42 int ret = 0; 43 44 msg.type = v->msg_type; 45 msg.iotlb.iova = iova; 46 msg.iotlb.size = size; 47 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; 48 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; 49 msg.iotlb.type = VHOST_IOTLB_UPDATE; 50 51 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 52 error_report("failed to write, fd=%d, errno=%d (%s)", 53 fd, errno, strerror(errno)); 54 return -EIO ; 55 } 56 57 return ret; 58 } 59 60 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, 61 hwaddr size) 62 { 63 struct vhost_msg_v2 msg = {}; 64 int fd = v->device_fd; 65 int ret = 0; 66 67 msg.type = v->msg_type; 68 msg.iotlb.iova = iova; 69 msg.iotlb.size = size; 70 msg.iotlb.type = VHOST_IOTLB_INVALIDATE; 71 72 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { 73 error_report("failed to write, fd=%d, errno=%d (%s)", 74 fd, errno, strerror(errno)); 75 return -EIO ; 76 } 77 78 return ret; 79 } 80 81 static void vhost_vdpa_listener_region_add(MemoryListener *listener, 82 MemoryRegionSection *section) 83 { 84 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 85 hwaddr iova; 86 Int128 llend, llsize; 87 void *vaddr; 88 int ret; 89 90 if (vhost_vdpa_listener_skipped_section(section)) { 91 return; 92 } 93 94 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 95 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 96 error_report("%s received unaligned region", __func__); 97 return; 98 } 99 100 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 101 llend = int128_make64(section->offset_within_address_space); 102 llend = int128_add(llend, section->size); 103 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 104 105 if (int128_ge(int128_make64(iova), llend)) { 106 return; 107 } 108 109 memory_region_ref(section->mr); 110 111 /* Here we assume that memory_region_is_ram(section->mr)==true */ 112 113 vaddr = memory_region_get_ram_ptr(section->mr) + 114 section->offset_within_region + 115 (iova - section->offset_within_address_space); 116 117 llsize = int128_sub(llend, int128_make64(iova)); 118 119 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), 120 vaddr, section->readonly); 121 if (ret) { 122 error_report("vhost vdpa map fail!"); 123 if (memory_region_is_ram_device(section->mr)) { 124 /* Allow unexpected mappings not to be fatal for RAM devices */ 125 error_report("map ram fail!"); 126 return ; 127 } 128 goto fail; 129 } 130 131 return; 132 133 fail: 134 if (memory_region_is_ram_device(section->mr)) { 135 error_report("failed to vdpa_dma_map. pci p2p may not work"); 136 return; 137 138 } 139 /* 140 * On the initfn path, store the first error in the container so we 141 * can gracefully fail. Runtime, there's not much we can do other 142 * than throw a hardware error. 143 */ 144 error_report("vhost-vdpa: DMA mapping failed, unable to continue"); 145 return; 146 147 } 148 149 static void vhost_vdpa_listener_region_del(MemoryListener *listener, 150 MemoryRegionSection *section) 151 { 152 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); 153 hwaddr iova; 154 Int128 llend, llsize; 155 int ret; 156 157 if (vhost_vdpa_listener_skipped_section(section)) { 158 return; 159 } 160 161 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != 162 (section->offset_within_region & ~TARGET_PAGE_MASK))) { 163 error_report("%s received unaligned region", __func__); 164 return; 165 } 166 167 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); 168 llend = int128_make64(section->offset_within_address_space); 169 llend = int128_add(llend, section->size); 170 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); 171 172 if (int128_ge(int128_make64(iova), llend)) { 173 return; 174 } 175 176 llsize = int128_sub(llend, int128_make64(iova)); 177 178 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); 179 if (ret) { 180 error_report("vhost_vdpa dma unmap error!"); 181 } 182 183 memory_region_unref(section->mr); 184 } 185 /* 186 * IOTLB API is used by vhost-vpda which requires incremental updating 187 * of the mapping. So we can not use generic vhost memory listener which 188 * depends on the addnop(). 189 */ 190 static const MemoryListener vhost_vdpa_memory_listener = { 191 .region_add = vhost_vdpa_listener_region_add, 192 .region_del = vhost_vdpa_listener_region_del, 193 }; 194 195 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, 196 void *arg) 197 { 198 struct vhost_vdpa *v = dev->opaque; 199 int fd = v->device_fd; 200 201 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 202 203 return ioctl(fd, request, arg); 204 } 205 206 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) 207 { 208 uint8_t s; 209 210 if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) { 211 return; 212 } 213 214 s |= status; 215 216 vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); 217 } 218 219 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque) 220 { 221 struct vhost_vdpa *v; 222 uint64_t features; 223 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 224 225 v = opaque; 226 dev->opaque = opaque ; 227 vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features); 228 dev->backend_features = features; 229 v->listener = vhost_vdpa_memory_listener; 230 v->msg_type = VHOST_IOTLB_MSG_V2; 231 232 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 233 VIRTIO_CONFIG_S_DRIVER); 234 235 return 0; 236 } 237 238 static int vhost_vdpa_cleanup(struct vhost_dev *dev) 239 { 240 struct vhost_vdpa *v; 241 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 242 v = dev->opaque; 243 memory_listener_unregister(&v->listener); 244 245 dev->opaque = NULL; 246 return 0; 247 } 248 249 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) 250 { 251 return INT_MAX; 252 } 253 254 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, 255 struct vhost_memory *mem) 256 { 257 258 if (mem->padding) { 259 return -1; 260 } 261 262 return 0; 263 } 264 265 static int vhost_vdpa_set_features(struct vhost_dev *dev, 266 uint64_t features) 267 { 268 int ret; 269 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); 270 uint8_t status = 0; 271 if (ret) { 272 return ret; 273 } 274 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); 275 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 276 277 return !(status & VIRTIO_CONFIG_S_FEATURES_OK); 278 } 279 280 int vhost_vdpa_get_device_id(struct vhost_dev *dev, 281 uint32_t *device_id) 282 { 283 return vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); 284 } 285 286 static int vhost_vdpa_reset_device(struct vhost_dev *dev) 287 { 288 uint8_t status = 0; 289 290 return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); 291 } 292 293 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) 294 { 295 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); 296 297 return idx - dev->vq_index; 298 } 299 300 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) 301 { 302 int i; 303 for (i = 0; i < dev->nvqs; ++i) { 304 struct vhost_vring_state state = { 305 .index = dev->vq_index + i, 306 .num = 1, 307 }; 308 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); 309 } 310 return 0; 311 } 312 313 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, 314 uint32_t offset, uint32_t size, 315 uint32_t flags) 316 { 317 struct vhost_vdpa_config *config; 318 int ret; 319 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 320 321 config = g_malloc(size + config_size); 322 config->off = offset; 323 config->len = size; 324 memcpy(config->buf, data, size); 325 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); 326 g_free(config); 327 return ret; 328 } 329 330 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, 331 uint32_t config_len) 332 { 333 struct vhost_vdpa_config *v_config; 334 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); 335 int ret; 336 337 v_config = g_malloc(config_len + config_size); 338 v_config->len = config_len; 339 v_config->off = 0; 340 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); 341 memcpy(config, v_config->buf, config_len); 342 g_free(v_config); 343 return ret; 344 } 345 346 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) 347 { 348 struct vhost_vdpa *v = dev->opaque; 349 if (started) { 350 uint8_t status = 0; 351 memory_listener_register(&v->listener, &address_space_memory); 352 vhost_vdpa_set_vring_ready(dev); 353 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 354 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status); 355 356 return !(status & VIRTIO_CONFIG_S_DRIVER_OK); 357 } else { 358 vhost_vdpa_reset_device(dev); 359 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | 360 VIRTIO_CONFIG_S_DRIVER); 361 memory_listener_unregister(&v->listener); 362 363 return 0; 364 } 365 } 366 367 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, 368 struct vhost_log *log) 369 { 370 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); 371 } 372 373 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, 374 struct vhost_vring_addr *addr) 375 { 376 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); 377 } 378 379 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, 380 struct vhost_vring_state *ring) 381 { 382 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); 383 } 384 385 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, 386 struct vhost_vring_state *ring) 387 { 388 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); 389 } 390 391 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, 392 struct vhost_vring_state *ring) 393 { 394 return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); 395 } 396 397 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, 398 struct vhost_vring_file *file) 399 { 400 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); 401 } 402 403 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, 404 struct vhost_vring_file *file) 405 { 406 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); 407 } 408 409 static int vhost_vdpa_get_features(struct vhost_dev *dev, 410 uint64_t *features) 411 { 412 return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); 413 } 414 415 static int vhost_vdpa_set_owner(struct vhost_dev *dev) 416 { 417 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); 418 } 419 420 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, 421 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) 422 { 423 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); 424 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; 425 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; 426 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; 427 return 0; 428 } 429 430 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) 431 { 432 return true; 433 } 434 435 const VhostOps vdpa_ops = { 436 .backend_type = VHOST_BACKEND_TYPE_VDPA, 437 .vhost_backend_init = vhost_vdpa_init, 438 .vhost_backend_cleanup = vhost_vdpa_cleanup, 439 .vhost_set_log_base = vhost_vdpa_set_log_base, 440 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, 441 .vhost_set_vring_num = vhost_vdpa_set_vring_num, 442 .vhost_set_vring_base = vhost_vdpa_set_vring_base, 443 .vhost_get_vring_base = vhost_vdpa_get_vring_base, 444 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, 445 .vhost_set_vring_call = vhost_vdpa_set_vring_call, 446 .vhost_get_features = vhost_vdpa_get_features, 447 .vhost_set_owner = vhost_vdpa_set_owner, 448 .vhost_set_vring_endian = NULL, 449 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, 450 .vhost_set_mem_table = vhost_vdpa_set_mem_table, 451 .vhost_set_features = vhost_vdpa_set_features, 452 .vhost_reset_device = vhost_vdpa_reset_device, 453 .vhost_get_vq_index = vhost_vdpa_get_vq_index, 454 .vhost_get_config = vhost_vdpa_get_config, 455 .vhost_set_config = vhost_vdpa_set_config, 456 .vhost_requires_shm_log = NULL, 457 .vhost_migration_done = NULL, 458 .vhost_backend_can_merge = NULL, 459 .vhost_net_set_mtu = NULL, 460 .vhost_set_iotlb_callback = NULL, 461 .vhost_send_device_iotlb_msg = NULL, 462 .vhost_dev_start = vhost_vdpa_dev_start, 463 .vhost_get_device_id = vhost_vdpa_get_device_id, 464 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, 465 .vhost_force_iommu = vhost_vdpa_force_iommu, 466 }; 467