1 #include "kvm/virtio-pci.h" 2 3 #include "kvm/ioport.h" 4 #include "kvm/kvm.h" 5 #include "kvm/kvm-cpu.h" 6 #include "kvm/virtio-pci-dev.h" 7 #include "kvm/irq.h" 8 #include "kvm/virtio.h" 9 #include "kvm/ioeventfd.h" 10 11 #include <sys/ioctl.h> 12 #include <linux/virtio_pci.h> 13 #include <linux/byteorder.h> 14 #include <assert.h> 15 #include <string.h> 16 17 static u16 virtio_pci__port_addr(struct virtio_pci *vpci) 18 { 19 return pci__bar_address(&vpci->pci_hdr, 0); 20 } 21 22 static u32 virtio_pci__mmio_addr(struct virtio_pci *vpci) 23 { 24 return pci__bar_address(&vpci->pci_hdr, 1); 25 } 26 27 static u32 virtio_pci__msix_io_addr(struct virtio_pci *vpci) 28 { 29 return pci__bar_address(&vpci->pci_hdr, 2); 30 } 31 32 static void virtio_pci__ioevent_callback(struct kvm *kvm, void *param) 33 { 34 struct virtio_pci_ioevent_param *ioeventfd = param; 35 struct virtio_pci *vpci = ioeventfd->vdev->virtio; 36 37 ioeventfd->vdev->ops->notify_vq(kvm, vpci->dev, ioeventfd->vq); 38 } 39 40 static int virtio_pci__init_ioeventfd(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 41 { 42 struct ioevent ioevent; 43 struct virtio_pci *vpci = vdev->virtio; 44 u32 mmio_addr = virtio_pci__mmio_addr(vpci); 45 u16 port_addr = virtio_pci__port_addr(vpci); 46 int r, flags = 0; 47 int fd; 48 49 vpci->ioeventfds[vq] = (struct virtio_pci_ioevent_param) { 50 .vdev = vdev, 51 .vq = vq, 52 }; 53 54 ioevent = (struct ioevent) { 55 .fn = virtio_pci__ioevent_callback, 56 .fn_ptr = &vpci->ioeventfds[vq], 57 .datamatch = vq, 58 .fn_kvm = kvm, 59 }; 60 61 /* 62 * Vhost will poll the eventfd in host kernel side, otherwise we 63 * need to poll in userspace. 64 */ 65 if (!vdev->use_vhost) 66 flags |= IOEVENTFD_FLAG_USER_POLL; 67 68 /* ioport */ 69 ioevent.io_addr = port_addr + VIRTIO_PCI_QUEUE_NOTIFY; 70 ioevent.io_len = sizeof(u16); 71 ioevent.fd = fd = eventfd(0, 0); 72 r = ioeventfd__add_event(&ioevent, flags | IOEVENTFD_FLAG_PIO); 73 if (r) 74 return r; 75 76 /* mmio */ 77 ioevent.io_addr = mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY; 78 ioevent.io_len = sizeof(u16); 79 ioevent.fd = eventfd(0, 0); 80 r = ioeventfd__add_event(&ioevent, flags); 81 if (r) 82 goto free_ioport_evt; 83 84 if (vdev->ops->notify_vq_eventfd) 85 vdev->ops->notify_vq_eventfd(kvm, vpci->dev, vq, fd); 86 return 0; 87 88 free_ioport_evt: 89 ioeventfd__del_event(port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 90 return r; 91 } 92 93 static void virtio_pci_exit_vq(struct kvm *kvm, struct virtio_device *vdev, 94 int vq) 95 { 96 struct virtio_pci *vpci = vdev->virtio; 97 u32 mmio_addr = virtio_pci__mmio_addr(vpci); 98 u16 port_addr = virtio_pci__port_addr(vpci); 99 100 ioeventfd__del_event(mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 101 ioeventfd__del_event(port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 102 virtio_exit_vq(kvm, vdev, vpci->dev, vq); 103 } 104 105 static inline bool virtio_pci__msix_enabled(struct virtio_pci *vpci) 106 { 107 return vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_ENABLE); 108 } 109 110 static bool virtio_pci__specific_data_in(struct kvm *kvm, struct virtio_device *vdev, 111 void *data, int size, unsigned long offset) 112 { 113 u32 config_offset; 114 struct virtio_pci *vpci = vdev->virtio; 115 int type = virtio__get_dev_specific_field(offset - 20, 116 virtio_pci__msix_enabled(vpci), 117 &config_offset); 118 if (type == VIRTIO_PCI_O_MSIX) { 119 switch (offset) { 120 case VIRTIO_MSI_CONFIG_VECTOR: 121 ioport__write16(data, vpci->config_vector); 122 break; 123 case VIRTIO_MSI_QUEUE_VECTOR: 124 ioport__write16(data, vpci->vq_vector[vpci->queue_selector]); 125 break; 126 }; 127 128 return true; 129 } else if (type == VIRTIO_PCI_O_CONFIG) { 130 u8 cfg; 131 132 cfg = vdev->ops->get_config(kvm, vpci->dev)[config_offset]; 133 ioport__write8(data, cfg); 134 return true; 135 } 136 137 return false; 138 } 139 140 static bool virtio_pci__data_in(struct kvm_cpu *vcpu, struct virtio_device *vdev, 141 unsigned long offset, void *data, int size) 142 { 143 bool ret = true; 144 struct virtio_pci *vpci; 145 struct virt_queue *vq; 146 struct kvm *kvm; 147 u32 val; 148 149 kvm = vcpu->kvm; 150 vpci = vdev->virtio; 151 152 switch (offset) { 153 case VIRTIO_PCI_HOST_FEATURES: 154 val = vdev->ops->get_host_features(kvm, vpci->dev); 155 ioport__write32(data, val); 156 break; 157 case VIRTIO_PCI_QUEUE_PFN: 158 vq = vdev->ops->get_vq(kvm, vpci->dev, vpci->queue_selector); 159 ioport__write32(data, vq->pfn); 160 break; 161 case VIRTIO_PCI_QUEUE_NUM: 162 val = vdev->ops->get_size_vq(kvm, vpci->dev, vpci->queue_selector); 163 ioport__write16(data, val); 164 break; 165 case VIRTIO_PCI_STATUS: 166 ioport__write8(data, vpci->status); 167 break; 168 case VIRTIO_PCI_ISR: 169 ioport__write8(data, vpci->isr); 170 kvm__irq_line(kvm, vpci->legacy_irq_line, VIRTIO_IRQ_LOW); 171 vpci->isr = VIRTIO_IRQ_LOW; 172 break; 173 default: 174 ret = virtio_pci__specific_data_in(kvm, vdev, data, size, offset); 175 break; 176 }; 177 178 return ret; 179 } 180 181 static void update_msix_map(struct virtio_pci *vpci, 182 struct msix_table *msix_entry, u32 vecnum) 183 { 184 u32 gsi, i; 185 186 /* Find the GSI number used for that vector */ 187 if (vecnum == vpci->config_vector) { 188 gsi = vpci->config_gsi; 189 } else { 190 for (i = 0; i < VIRTIO_PCI_MAX_VQ; i++) 191 if (vpci->vq_vector[i] == vecnum) 192 break; 193 if (i == VIRTIO_PCI_MAX_VQ) 194 return; 195 gsi = vpci->gsis[i]; 196 } 197 198 if (gsi == 0) 199 return; 200 201 msix_entry = &msix_entry[vecnum]; 202 irq__update_msix_route(vpci->kvm, gsi, &msix_entry->msg); 203 } 204 205 static bool virtio_pci__specific_data_out(struct kvm *kvm, struct virtio_device *vdev, 206 void *data, int size, unsigned long offset) 207 { 208 struct virtio_pci *vpci = vdev->virtio; 209 u32 config_offset, vec; 210 int gsi; 211 int type = virtio__get_dev_specific_field(offset - 20, virtio_pci__msix_enabled(vpci), 212 &config_offset); 213 if (type == VIRTIO_PCI_O_MSIX) { 214 switch (offset) { 215 case VIRTIO_MSI_CONFIG_VECTOR: 216 vec = vpci->config_vector = ioport__read16(data); 217 if (vec == VIRTIO_MSI_NO_VECTOR) 218 break; 219 220 gsi = irq__add_msix_route(kvm, 221 &vpci->msix_table[vec].msg, 222 vpci->dev_hdr.dev_num << 3); 223 /* 224 * We don't need IRQ routing if we can use 225 * MSI injection via the KVM_SIGNAL_MSI ioctl. 226 */ 227 if (gsi == -ENXIO && 228 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 229 break; 230 231 if (gsi < 0) { 232 die("failed to configure MSIs"); 233 break; 234 } 235 236 vpci->config_gsi = gsi; 237 break; 238 case VIRTIO_MSI_QUEUE_VECTOR: 239 vec = ioport__read16(data); 240 vpci->vq_vector[vpci->queue_selector] = vec; 241 242 if (vec == VIRTIO_MSI_NO_VECTOR) 243 break; 244 245 gsi = irq__add_msix_route(kvm, 246 &vpci->msix_table[vec].msg, 247 vpci->dev_hdr.dev_num << 3); 248 /* 249 * We don't need IRQ routing if we can use 250 * MSI injection via the KVM_SIGNAL_MSI ioctl. 251 */ 252 if (gsi == -ENXIO && 253 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 254 break; 255 256 if (gsi < 0) { 257 die("failed to configure MSIs"); 258 break; 259 } 260 261 vpci->gsis[vpci->queue_selector] = gsi; 262 if (vdev->ops->notify_vq_gsi) 263 vdev->ops->notify_vq_gsi(kvm, vpci->dev, 264 vpci->queue_selector, 265 gsi); 266 break; 267 }; 268 269 return true; 270 } else if (type == VIRTIO_PCI_O_CONFIG) { 271 vdev->ops->get_config(kvm, vpci->dev)[config_offset] = *(u8 *)data; 272 273 return true; 274 } 275 276 return false; 277 } 278 279 static bool virtio_pci__data_out(struct kvm_cpu *vcpu, struct virtio_device *vdev, 280 unsigned long offset, void *data, int size) 281 { 282 bool ret = true; 283 struct virtio_pci *vpci; 284 struct kvm *kvm; 285 u32 val; 286 287 kvm = vcpu->kvm; 288 vpci = vdev->virtio; 289 290 switch (offset) { 291 case VIRTIO_PCI_GUEST_FEATURES: 292 val = ioport__read32(data); 293 virtio_set_guest_features(kvm, vdev, vpci->dev, val); 294 break; 295 case VIRTIO_PCI_QUEUE_PFN: 296 val = ioport__read32(data); 297 if (val) { 298 virtio_pci__init_ioeventfd(kvm, vdev, 299 vpci->queue_selector); 300 vdev->ops->init_vq(kvm, vpci->dev, vpci->queue_selector, 301 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT, 302 VIRTIO_PCI_VRING_ALIGN, val); 303 } else { 304 virtio_pci_exit_vq(kvm, vdev, vpci->queue_selector); 305 } 306 break; 307 case VIRTIO_PCI_QUEUE_SEL: 308 vpci->queue_selector = ioport__read16(data); 309 break; 310 case VIRTIO_PCI_QUEUE_NOTIFY: 311 val = ioport__read16(data); 312 vdev->ops->notify_vq(kvm, vpci->dev, val); 313 break; 314 case VIRTIO_PCI_STATUS: 315 vpci->status = ioport__read8(data); 316 if (!vpci->status) /* Sample endianness on reset */ 317 vdev->endian = kvm_cpu__get_endianness(vcpu); 318 virtio_notify_status(kvm, vdev, vpci->dev, vpci->status); 319 break; 320 default: 321 ret = virtio_pci__specific_data_out(kvm, vdev, data, size, offset); 322 break; 323 }; 324 325 return ret; 326 } 327 328 static void virtio_pci__msix_mmio_callback(struct kvm_cpu *vcpu, 329 u64 addr, u8 *data, u32 len, 330 u8 is_write, void *ptr) 331 { 332 struct virtio_device *vdev = ptr; 333 struct virtio_pci *vpci = vdev->virtio; 334 struct msix_table *table; 335 u32 msix_io_addr = virtio_pci__msix_io_addr(vpci); 336 int vecnum; 337 size_t offset; 338 339 if (addr > msix_io_addr + PCI_IO_SIZE) { 340 if (is_write) 341 return; 342 table = (struct msix_table *)&vpci->msix_pba; 343 offset = addr - (msix_io_addr + PCI_IO_SIZE); 344 } else { 345 table = vpci->msix_table; 346 offset = addr - msix_io_addr; 347 } 348 vecnum = offset / sizeof(struct msix_table); 349 offset = offset % sizeof(struct msix_table); 350 351 if (!is_write) { 352 memcpy(data, (void *)&table[vecnum] + offset, len); 353 return; 354 } 355 356 memcpy((void *)&table[vecnum] + offset, data, len); 357 358 /* Did we just update the address or payload? */ 359 if (offset < offsetof(struct msix_table, ctrl)) 360 update_msix_map(vpci, table, vecnum); 361 } 362 363 static void virtio_pci__signal_msi(struct kvm *kvm, struct virtio_pci *vpci, 364 int vec) 365 { 366 struct kvm_msi msi = { 367 .address_lo = vpci->msix_table[vec].msg.address_lo, 368 .address_hi = vpci->msix_table[vec].msg.address_hi, 369 .data = vpci->msix_table[vec].msg.data, 370 }; 371 372 if (kvm->msix_needs_devid) { 373 msi.flags = KVM_MSI_VALID_DEVID; 374 msi.devid = vpci->dev_hdr.dev_num << 3; 375 } 376 377 irq__signal_msi(kvm, &msi); 378 } 379 380 int virtio_pci__signal_vq(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 381 { 382 struct virtio_pci *vpci = vdev->virtio; 383 int tbl = vpci->vq_vector[vq]; 384 385 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 386 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 387 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 388 389 vpci->msix_pba |= 1 << tbl; 390 return 0; 391 } 392 393 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 394 virtio_pci__signal_msi(kvm, vpci, vpci->vq_vector[vq]); 395 else 396 kvm__irq_trigger(kvm, vpci->gsis[vq]); 397 } else { 398 vpci->isr = VIRTIO_IRQ_HIGH; 399 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 400 } 401 return 0; 402 } 403 404 int virtio_pci__signal_config(struct kvm *kvm, struct virtio_device *vdev) 405 { 406 struct virtio_pci *vpci = vdev->virtio; 407 int tbl = vpci->config_vector; 408 409 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 410 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 411 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 412 413 vpci->msix_pba |= 1 << tbl; 414 return 0; 415 } 416 417 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 418 virtio_pci__signal_msi(kvm, vpci, tbl); 419 else 420 kvm__irq_trigger(kvm, vpci->config_gsi); 421 } else { 422 vpci->isr = VIRTIO_PCI_ISR_CONFIG; 423 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 424 } 425 426 return 0; 427 } 428 429 static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu, 430 u64 addr, u8 *data, u32 len, 431 u8 is_write, void *ptr) 432 { 433 struct virtio_device *vdev = ptr; 434 struct virtio_pci *vpci = vdev->virtio; 435 u32 ioport_addr = virtio_pci__port_addr(vpci); 436 u32 base_addr; 437 438 if (addr >= ioport_addr && 439 addr < ioport_addr + pci__bar_size(&vpci->pci_hdr, 0)) 440 base_addr = ioport_addr; 441 else 442 base_addr = virtio_pci__mmio_addr(vpci); 443 444 if (!is_write) 445 virtio_pci__data_in(vcpu, vdev, addr - base_addr, data, len); 446 else 447 virtio_pci__data_out(vcpu, vdev, addr - base_addr, data, len); 448 } 449 450 static int virtio_pci__bar_activate(struct kvm *kvm, 451 struct pci_device_header *pci_hdr, 452 int bar_num, void *data) 453 { 454 struct virtio_device *vdev = data; 455 u32 bar_addr, bar_size; 456 int r = -EINVAL; 457 458 assert(bar_num <= 2); 459 460 bar_addr = pci__bar_address(pci_hdr, bar_num); 461 bar_size = pci__bar_size(pci_hdr, bar_num); 462 463 switch (bar_num) { 464 case 0: 465 r = kvm__register_pio(kvm, bar_addr, bar_size, 466 virtio_pci__io_mmio_callback, vdev); 467 break; 468 case 1: 469 r = kvm__register_mmio(kvm, bar_addr, bar_size, false, 470 virtio_pci__io_mmio_callback, vdev); 471 break; 472 case 2: 473 r = kvm__register_mmio(kvm, bar_addr, bar_size, false, 474 virtio_pci__msix_mmio_callback, vdev); 475 break; 476 } 477 478 return r; 479 } 480 481 static int virtio_pci__bar_deactivate(struct kvm *kvm, 482 struct pci_device_header *pci_hdr, 483 int bar_num, void *data) 484 { 485 u32 bar_addr; 486 bool success; 487 int r = -EINVAL; 488 489 assert(bar_num <= 2); 490 491 bar_addr = pci__bar_address(pci_hdr, bar_num); 492 493 switch (bar_num) { 494 case 0: 495 r = kvm__deregister_pio(kvm, bar_addr); 496 break; 497 case 1: 498 case 2: 499 success = kvm__deregister_mmio(kvm, bar_addr); 500 /* kvm__deregister_mmio fails when the region is not found. */ 501 r = (success ? 0 : -ENOENT); 502 break; 503 } 504 505 return r; 506 } 507 508 int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev, 509 int device_id, int subsys_id, int class) 510 { 511 struct virtio_pci *vpci = vdev->virtio; 512 u32 mmio_addr, msix_io_block; 513 u16 port_addr; 514 int r; 515 516 vpci->kvm = kvm; 517 vpci->dev = dev; 518 519 BUILD_BUG_ON(!is_power_of_two(PCI_IO_SIZE)); 520 521 port_addr = pci_get_io_port_block(PCI_IO_SIZE); 522 mmio_addr = pci_get_mmio_block(PCI_IO_SIZE); 523 msix_io_block = pci_get_mmio_block(PCI_IO_SIZE * 2); 524 525 vpci->pci_hdr = (struct pci_device_header) { 526 .vendor_id = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET), 527 .device_id = cpu_to_le16(device_id), 528 .command = PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 529 .header_type = PCI_HEADER_TYPE_NORMAL, 530 .revision_id = 0, 531 .class[0] = class & 0xff, 532 .class[1] = (class >> 8) & 0xff, 533 .class[2] = (class >> 16) & 0xff, 534 .subsys_vendor_id = cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET), 535 .subsys_id = cpu_to_le16(subsys_id), 536 .bar[0] = cpu_to_le32(port_addr 537 | PCI_BASE_ADDRESS_SPACE_IO), 538 .bar[1] = cpu_to_le32(mmio_addr 539 | PCI_BASE_ADDRESS_SPACE_MEMORY), 540 .bar[2] = cpu_to_le32(msix_io_block 541 | PCI_BASE_ADDRESS_SPACE_MEMORY), 542 .status = cpu_to_le16(PCI_STATUS_CAP_LIST), 543 .capabilities = (void *)&vpci->pci_hdr.msix - (void *)&vpci->pci_hdr, 544 .bar_size[0] = cpu_to_le32(PCI_IO_SIZE), 545 .bar_size[1] = cpu_to_le32(PCI_IO_SIZE), 546 .bar_size[2] = cpu_to_le32(PCI_IO_SIZE*2), 547 }; 548 549 r = pci__register_bar_regions(kvm, &vpci->pci_hdr, 550 virtio_pci__bar_activate, 551 virtio_pci__bar_deactivate, vdev); 552 if (r < 0) 553 return r; 554 555 vpci->dev_hdr = (struct device_header) { 556 .bus_type = DEVICE_BUS_PCI, 557 .data = &vpci->pci_hdr, 558 }; 559 560 vpci->pci_hdr.msix.cap = PCI_CAP_ID_MSIX; 561 vpci->pci_hdr.msix.next = 0; 562 /* 563 * We at most have VIRTIO_PCI_MAX_VQ entries for virt queue, 564 * VIRTIO_PCI_MAX_CONFIG entries for config. 565 * 566 * To quote the PCI spec: 567 * 568 * System software reads this field to determine the 569 * MSI-X Table Size N, which is encoded as N-1. 570 * For example, a returned value of "00000000011" 571 * indicates a table size of 4. 572 */ 573 vpci->pci_hdr.msix.ctrl = cpu_to_le16(VIRTIO_PCI_MAX_VQ + VIRTIO_PCI_MAX_CONFIG - 1); 574 575 /* Both table and PBA are mapped to the same BAR (2) */ 576 vpci->pci_hdr.msix.table_offset = cpu_to_le32(2); 577 vpci->pci_hdr.msix.pba_offset = cpu_to_le32(2 | PCI_IO_SIZE); 578 vpci->config_vector = 0; 579 580 if (irq__can_signal_msi(kvm)) 581 vpci->features |= VIRTIO_PCI_F_SIGNAL_MSI; 582 583 vpci->legacy_irq_line = pci__assign_irq(&vpci->pci_hdr); 584 585 r = device__register(&vpci->dev_hdr); 586 if (r < 0) 587 return r; 588 589 return 0; 590 } 591 592 int virtio_pci__reset(struct kvm *kvm, struct virtio_device *vdev) 593 { 594 int vq; 595 struct virtio_pci *vpci = vdev->virtio; 596 597 for (vq = 0; vq < vdev->ops->get_vq_count(kvm, vpci->dev); vq++) 598 virtio_pci_exit_vq(kvm, vdev, vq); 599 600 return 0; 601 } 602 603 int virtio_pci__exit(struct kvm *kvm, struct virtio_device *vdev) 604 { 605 struct virtio_pci *vpci = vdev->virtio; 606 607 virtio_pci__reset(kvm, vdev); 608 kvm__deregister_mmio(kvm, virtio_pci__mmio_addr(vpci)); 609 kvm__deregister_mmio(kvm, virtio_pci__msix_io_addr(vpci)); 610 kvm__deregister_pio(kvm, virtio_pci__port_addr(vpci)); 611 612 return 0; 613 } 614