1 #include "kvm/virtio-pci.h" 2 3 #include "kvm/ioport.h" 4 #include "kvm/kvm.h" 5 #include "kvm/kvm-cpu.h" 6 #include "kvm/virtio-pci-dev.h" 7 #include "kvm/irq.h" 8 #include "kvm/virtio.h" 9 #include "kvm/ioeventfd.h" 10 11 #include <sys/ioctl.h> 12 #include <linux/virtio_pci.h> 13 #include <linux/byteorder.h> 14 #include <string.h> 15 16 static u16 virtio_pci__port_addr(struct virtio_pci *vpci) 17 { 18 return pci__bar_address(&vpci->pci_hdr, 0); 19 } 20 21 static u32 virtio_pci__mmio_addr(struct virtio_pci *vpci) 22 { 23 return pci__bar_address(&vpci->pci_hdr, 1); 24 } 25 26 static u32 virtio_pci__msix_io_addr(struct virtio_pci *vpci) 27 { 28 return pci__bar_address(&vpci->pci_hdr, 2); 29 } 30 31 static void virtio_pci__ioevent_callback(struct kvm *kvm, void *param) 32 { 33 struct virtio_pci_ioevent_param *ioeventfd = param; 34 struct virtio_pci *vpci = ioeventfd->vdev->virtio; 35 36 ioeventfd->vdev->ops->notify_vq(kvm, vpci->dev, ioeventfd->vq); 37 } 38 39 static int virtio_pci__init_ioeventfd(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 40 { 41 struct ioevent ioevent; 42 struct virtio_pci *vpci = vdev->virtio; 43 u32 mmio_addr = virtio_pci__mmio_addr(vpci); 44 u16 port_addr = virtio_pci__port_addr(vpci); 45 int r, flags = 0; 46 int fd; 47 48 vpci->ioeventfds[vq] = (struct virtio_pci_ioevent_param) { 49 .vdev = vdev, 50 .vq = vq, 51 }; 52 53 ioevent = (struct ioevent) { 54 .fn = virtio_pci__ioevent_callback, 55 .fn_ptr = &vpci->ioeventfds[vq], 56 .datamatch = vq, 57 .fn_kvm = kvm, 58 }; 59 60 /* 61 * Vhost will poll the eventfd in host kernel side, otherwise we 62 * need to poll in userspace. 63 */ 64 if (!vdev->use_vhost) 65 flags |= IOEVENTFD_FLAG_USER_POLL; 66 67 /* ioport */ 68 ioevent.io_addr = port_addr + VIRTIO_PCI_QUEUE_NOTIFY; 69 ioevent.io_len = sizeof(u16); 70 ioevent.fd = fd = eventfd(0, 0); 71 r = ioeventfd__add_event(&ioevent, flags | IOEVENTFD_FLAG_PIO); 72 if (r) 73 return r; 74 75 /* mmio */ 76 ioevent.io_addr = mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY; 77 ioevent.io_len = sizeof(u16); 78 ioevent.fd = eventfd(0, 0); 79 r = ioeventfd__add_event(&ioevent, flags); 80 if (r) 81 goto free_ioport_evt; 82 83 if (vdev->ops->notify_vq_eventfd) 84 vdev->ops->notify_vq_eventfd(kvm, vpci->dev, vq, fd); 85 return 0; 86 87 free_ioport_evt: 88 ioeventfd__del_event(port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 89 return r; 90 } 91 92 static void virtio_pci_exit_vq(struct kvm *kvm, struct virtio_device *vdev, 93 int vq) 94 { 95 struct virtio_pci *vpci = vdev->virtio; 96 u32 mmio_addr = virtio_pci__mmio_addr(vpci); 97 u16 port_addr = virtio_pci__port_addr(vpci); 98 99 ioeventfd__del_event(mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 100 ioeventfd__del_event(port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 101 virtio_exit_vq(kvm, vdev, vpci->dev, vq); 102 } 103 104 static inline bool virtio_pci__msix_enabled(struct virtio_pci *vpci) 105 { 106 return vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_ENABLE); 107 } 108 109 static bool virtio_pci__specific_data_in(struct kvm *kvm, struct virtio_device *vdev, 110 void *data, int size, unsigned long offset) 111 { 112 u32 config_offset; 113 struct virtio_pci *vpci = vdev->virtio; 114 int type = virtio__get_dev_specific_field(offset - 20, 115 virtio_pci__msix_enabled(vpci), 116 &config_offset); 117 if (type == VIRTIO_PCI_O_MSIX) { 118 switch (offset) { 119 case VIRTIO_MSI_CONFIG_VECTOR: 120 ioport__write16(data, vpci->config_vector); 121 break; 122 case VIRTIO_MSI_QUEUE_VECTOR: 123 ioport__write16(data, vpci->vq_vector[vpci->queue_selector]); 124 break; 125 }; 126 127 return true; 128 } else if (type == VIRTIO_PCI_O_CONFIG) { 129 u8 cfg; 130 131 cfg = vdev->ops->get_config(kvm, vpci->dev)[config_offset]; 132 ioport__write8(data, cfg); 133 return true; 134 } 135 136 return false; 137 } 138 139 static bool virtio_pci__data_in(struct kvm_cpu *vcpu, struct virtio_device *vdev, 140 unsigned long offset, void *data, int size) 141 { 142 bool ret = true; 143 struct virtio_pci *vpci; 144 struct virt_queue *vq; 145 struct kvm *kvm; 146 u32 val; 147 148 kvm = vcpu->kvm; 149 vpci = vdev->virtio; 150 151 switch (offset) { 152 case VIRTIO_PCI_HOST_FEATURES: 153 val = vdev->ops->get_host_features(kvm, vpci->dev); 154 ioport__write32(data, val); 155 break; 156 case VIRTIO_PCI_QUEUE_PFN: 157 vq = vdev->ops->get_vq(kvm, vpci->dev, vpci->queue_selector); 158 ioport__write32(data, vq->pfn); 159 break; 160 case VIRTIO_PCI_QUEUE_NUM: 161 val = vdev->ops->get_size_vq(kvm, vpci->dev, vpci->queue_selector); 162 ioport__write16(data, val); 163 break; 164 case VIRTIO_PCI_STATUS: 165 ioport__write8(data, vpci->status); 166 break; 167 case VIRTIO_PCI_ISR: 168 ioport__write8(data, vpci->isr); 169 kvm__irq_line(kvm, vpci->legacy_irq_line, VIRTIO_IRQ_LOW); 170 vpci->isr = VIRTIO_IRQ_LOW; 171 break; 172 default: 173 ret = virtio_pci__specific_data_in(kvm, vdev, data, size, offset); 174 break; 175 }; 176 177 return ret; 178 } 179 180 static bool virtio_pci__io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 181 { 182 struct virtio_device *vdev = ioport->priv; 183 struct virtio_pci *vpci = vdev->virtio; 184 unsigned long offset = port - virtio_pci__port_addr(vpci); 185 186 return virtio_pci__data_in(vcpu, vdev, offset, data, size); 187 } 188 189 static void update_msix_map(struct virtio_pci *vpci, 190 struct msix_table *msix_entry, u32 vecnum) 191 { 192 u32 gsi, i; 193 194 /* Find the GSI number used for that vector */ 195 if (vecnum == vpci->config_vector) { 196 gsi = vpci->config_gsi; 197 } else { 198 for (i = 0; i < VIRTIO_PCI_MAX_VQ; i++) 199 if (vpci->vq_vector[i] == vecnum) 200 break; 201 if (i == VIRTIO_PCI_MAX_VQ) 202 return; 203 gsi = vpci->gsis[i]; 204 } 205 206 if (gsi == 0) 207 return; 208 209 msix_entry = &msix_entry[vecnum]; 210 irq__update_msix_route(vpci->kvm, gsi, &msix_entry->msg); 211 } 212 213 static bool virtio_pci__specific_data_out(struct kvm *kvm, struct virtio_device *vdev, 214 void *data, int size, unsigned long offset) 215 { 216 struct virtio_pci *vpci = vdev->virtio; 217 u32 config_offset, vec; 218 int gsi; 219 int type = virtio__get_dev_specific_field(offset - 20, virtio_pci__msix_enabled(vpci), 220 &config_offset); 221 if (type == VIRTIO_PCI_O_MSIX) { 222 switch (offset) { 223 case VIRTIO_MSI_CONFIG_VECTOR: 224 vec = vpci->config_vector = ioport__read16(data); 225 if (vec == VIRTIO_MSI_NO_VECTOR) 226 break; 227 228 gsi = irq__add_msix_route(kvm, 229 &vpci->msix_table[vec].msg, 230 vpci->dev_hdr.dev_num << 3); 231 /* 232 * We don't need IRQ routing if we can use 233 * MSI injection via the KVM_SIGNAL_MSI ioctl. 234 */ 235 if (gsi == -ENXIO && 236 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 237 break; 238 239 if (gsi < 0) { 240 die("failed to configure MSIs"); 241 break; 242 } 243 244 vpci->config_gsi = gsi; 245 break; 246 case VIRTIO_MSI_QUEUE_VECTOR: 247 vec = ioport__read16(data); 248 vpci->vq_vector[vpci->queue_selector] = vec; 249 250 if (vec == VIRTIO_MSI_NO_VECTOR) 251 break; 252 253 gsi = irq__add_msix_route(kvm, 254 &vpci->msix_table[vec].msg, 255 vpci->dev_hdr.dev_num << 3); 256 /* 257 * We don't need IRQ routing if we can use 258 * MSI injection via the KVM_SIGNAL_MSI ioctl. 259 */ 260 if (gsi == -ENXIO && 261 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 262 break; 263 264 if (gsi < 0) { 265 die("failed to configure MSIs"); 266 break; 267 } 268 269 vpci->gsis[vpci->queue_selector] = gsi; 270 if (vdev->ops->notify_vq_gsi) 271 vdev->ops->notify_vq_gsi(kvm, vpci->dev, 272 vpci->queue_selector, 273 gsi); 274 break; 275 }; 276 277 return true; 278 } else if (type == VIRTIO_PCI_O_CONFIG) { 279 vdev->ops->get_config(kvm, vpci->dev)[config_offset] = *(u8 *)data; 280 281 return true; 282 } 283 284 return false; 285 } 286 287 static bool virtio_pci__data_out(struct kvm_cpu *vcpu, struct virtio_device *vdev, 288 unsigned long offset, void *data, int size) 289 { 290 bool ret = true; 291 struct virtio_pci *vpci; 292 struct kvm *kvm; 293 u32 val; 294 295 kvm = vcpu->kvm; 296 vpci = vdev->virtio; 297 298 switch (offset) { 299 case VIRTIO_PCI_GUEST_FEATURES: 300 val = ioport__read32(data); 301 virtio_set_guest_features(kvm, vdev, vpci->dev, val); 302 break; 303 case VIRTIO_PCI_QUEUE_PFN: 304 val = ioport__read32(data); 305 if (val) { 306 virtio_pci__init_ioeventfd(kvm, vdev, 307 vpci->queue_selector); 308 vdev->ops->init_vq(kvm, vpci->dev, vpci->queue_selector, 309 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT, 310 VIRTIO_PCI_VRING_ALIGN, val); 311 } else { 312 virtio_pci_exit_vq(kvm, vdev, vpci->queue_selector); 313 } 314 break; 315 case VIRTIO_PCI_QUEUE_SEL: 316 vpci->queue_selector = ioport__read16(data); 317 break; 318 case VIRTIO_PCI_QUEUE_NOTIFY: 319 val = ioport__read16(data); 320 vdev->ops->notify_vq(kvm, vpci->dev, val); 321 break; 322 case VIRTIO_PCI_STATUS: 323 vpci->status = ioport__read8(data); 324 if (!vpci->status) /* Sample endianness on reset */ 325 vdev->endian = kvm_cpu__get_endianness(vcpu); 326 virtio_notify_status(kvm, vdev, vpci->dev, vpci->status); 327 break; 328 default: 329 ret = virtio_pci__specific_data_out(kvm, vdev, data, size, offset); 330 break; 331 }; 332 333 return ret; 334 } 335 336 static bool virtio_pci__io_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 337 { 338 struct virtio_device *vdev = ioport->priv; 339 struct virtio_pci *vpci = vdev->virtio; 340 unsigned long offset = port - virtio_pci__port_addr(vpci); 341 342 return virtio_pci__data_out(vcpu, vdev, offset, data, size); 343 } 344 345 static struct ioport_operations virtio_pci__io_ops = { 346 .io_in = virtio_pci__io_in, 347 .io_out = virtio_pci__io_out, 348 }; 349 350 static void virtio_pci__msix_mmio_callback(struct kvm_cpu *vcpu, 351 u64 addr, u8 *data, u32 len, 352 u8 is_write, void *ptr) 353 { 354 struct virtio_device *vdev = ptr; 355 struct virtio_pci *vpci = vdev->virtio; 356 struct msix_table *table; 357 u32 msix_io_addr = virtio_pci__msix_io_addr(vpci); 358 int vecnum; 359 size_t offset; 360 361 if (addr > msix_io_addr + PCI_IO_SIZE) { 362 if (is_write) 363 return; 364 table = (struct msix_table *)&vpci->msix_pba; 365 offset = addr - (msix_io_addr + PCI_IO_SIZE); 366 } else { 367 table = vpci->msix_table; 368 offset = addr - msix_io_addr; 369 } 370 vecnum = offset / sizeof(struct msix_table); 371 offset = offset % sizeof(struct msix_table); 372 373 if (!is_write) { 374 memcpy(data, (void *)&table[vecnum] + offset, len); 375 return; 376 } 377 378 memcpy((void *)&table[vecnum] + offset, data, len); 379 380 /* Did we just update the address or payload? */ 381 if (offset < offsetof(struct msix_table, ctrl)) 382 update_msix_map(vpci, table, vecnum); 383 } 384 385 static void virtio_pci__signal_msi(struct kvm *kvm, struct virtio_pci *vpci, 386 int vec) 387 { 388 struct kvm_msi msi = { 389 .address_lo = vpci->msix_table[vec].msg.address_lo, 390 .address_hi = vpci->msix_table[vec].msg.address_hi, 391 .data = vpci->msix_table[vec].msg.data, 392 }; 393 394 if (kvm->msix_needs_devid) { 395 msi.flags = KVM_MSI_VALID_DEVID; 396 msi.devid = vpci->dev_hdr.dev_num << 3; 397 } 398 399 irq__signal_msi(kvm, &msi); 400 } 401 402 int virtio_pci__signal_vq(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 403 { 404 struct virtio_pci *vpci = vdev->virtio; 405 int tbl = vpci->vq_vector[vq]; 406 407 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 408 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 409 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 410 411 vpci->msix_pba |= 1 << tbl; 412 return 0; 413 } 414 415 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 416 virtio_pci__signal_msi(kvm, vpci, vpci->vq_vector[vq]); 417 else 418 kvm__irq_trigger(kvm, vpci->gsis[vq]); 419 } else { 420 vpci->isr = VIRTIO_IRQ_HIGH; 421 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 422 } 423 return 0; 424 } 425 426 int virtio_pci__signal_config(struct kvm *kvm, struct virtio_device *vdev) 427 { 428 struct virtio_pci *vpci = vdev->virtio; 429 int tbl = vpci->config_vector; 430 431 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 432 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 433 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 434 435 vpci->msix_pba |= 1 << tbl; 436 return 0; 437 } 438 439 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 440 virtio_pci__signal_msi(kvm, vpci, tbl); 441 else 442 kvm__irq_trigger(kvm, vpci->config_gsi); 443 } else { 444 vpci->isr = VIRTIO_PCI_ISR_CONFIG; 445 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 446 } 447 448 return 0; 449 } 450 451 static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu, 452 u64 addr, u8 *data, u32 len, 453 u8 is_write, void *ptr) 454 { 455 struct virtio_device *vdev = ptr; 456 struct virtio_pci *vpci = vdev->virtio; 457 u32 mmio_addr = virtio_pci__mmio_addr(vpci); 458 459 if (!is_write) 460 virtio_pci__data_in(vcpu, vdev, addr - mmio_addr, data, len); 461 else 462 virtio_pci__data_out(vcpu, vdev, addr - mmio_addr, data, len); 463 } 464 465 int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev, 466 int device_id, int subsys_id, int class) 467 { 468 struct virtio_pci *vpci = vdev->virtio; 469 u32 mmio_addr, msix_io_block; 470 u16 port_addr; 471 int r; 472 473 vpci->kvm = kvm; 474 vpci->dev = dev; 475 476 BUILD_BUG_ON(!is_power_of_two(PCI_IO_SIZE)); 477 478 port_addr = pci_get_io_port_block(PCI_IO_SIZE); 479 r = ioport__register(kvm, port_addr, &virtio_pci__io_ops, PCI_IO_SIZE, 480 vdev); 481 if (r < 0) 482 return r; 483 port_addr = (u16)r; 484 485 mmio_addr = pci_get_mmio_block(PCI_IO_SIZE); 486 r = kvm__register_mmio(kvm, mmio_addr, PCI_IO_SIZE, false, 487 virtio_pci__io_mmio_callback, vdev); 488 if (r < 0) 489 goto free_ioport; 490 491 msix_io_block = pci_get_mmio_block(PCI_IO_SIZE * 2); 492 r = kvm__register_mmio(kvm, msix_io_block, PCI_IO_SIZE * 2, false, 493 virtio_pci__msix_mmio_callback, vdev); 494 if (r < 0) 495 goto free_mmio; 496 497 vpci->pci_hdr = (struct pci_device_header) { 498 .vendor_id = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET), 499 .device_id = cpu_to_le16(device_id), 500 .command = PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 501 .header_type = PCI_HEADER_TYPE_NORMAL, 502 .revision_id = 0, 503 .class[0] = class & 0xff, 504 .class[1] = (class >> 8) & 0xff, 505 .class[2] = (class >> 16) & 0xff, 506 .subsys_vendor_id = cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET), 507 .subsys_id = cpu_to_le16(subsys_id), 508 .bar[0] = cpu_to_le32(port_addr 509 | PCI_BASE_ADDRESS_SPACE_IO), 510 .bar[1] = cpu_to_le32(mmio_addr 511 | PCI_BASE_ADDRESS_SPACE_MEMORY), 512 .bar[2] = cpu_to_le32(msix_io_block 513 | PCI_BASE_ADDRESS_SPACE_MEMORY), 514 .status = cpu_to_le16(PCI_STATUS_CAP_LIST), 515 .capabilities = (void *)&vpci->pci_hdr.msix - (void *)&vpci->pci_hdr, 516 .bar_size[0] = cpu_to_le32(PCI_IO_SIZE), 517 .bar_size[1] = cpu_to_le32(PCI_IO_SIZE), 518 .bar_size[2] = cpu_to_le32(PCI_IO_SIZE*2), 519 }; 520 521 vpci->dev_hdr = (struct device_header) { 522 .bus_type = DEVICE_BUS_PCI, 523 .data = &vpci->pci_hdr, 524 }; 525 526 vpci->pci_hdr.msix.cap = PCI_CAP_ID_MSIX; 527 vpci->pci_hdr.msix.next = 0; 528 /* 529 * We at most have VIRTIO_PCI_MAX_VQ entries for virt queue, 530 * VIRTIO_PCI_MAX_CONFIG entries for config. 531 * 532 * To quote the PCI spec: 533 * 534 * System software reads this field to determine the 535 * MSI-X Table Size N, which is encoded as N-1. 536 * For example, a returned value of "00000000011" 537 * indicates a table size of 4. 538 */ 539 vpci->pci_hdr.msix.ctrl = cpu_to_le16(VIRTIO_PCI_MAX_VQ + VIRTIO_PCI_MAX_CONFIG - 1); 540 541 /* Both table and PBA are mapped to the same BAR (2) */ 542 vpci->pci_hdr.msix.table_offset = cpu_to_le32(2); 543 vpci->pci_hdr.msix.pba_offset = cpu_to_le32(2 | PCI_IO_SIZE); 544 vpci->config_vector = 0; 545 546 if (irq__can_signal_msi(kvm)) 547 vpci->features |= VIRTIO_PCI_F_SIGNAL_MSI; 548 549 vpci->legacy_irq_line = pci__assign_irq(&vpci->pci_hdr); 550 551 r = device__register(&vpci->dev_hdr); 552 if (r < 0) 553 goto free_msix_mmio; 554 555 return 0; 556 557 free_msix_mmio: 558 kvm__deregister_mmio(kvm, msix_io_block); 559 free_mmio: 560 kvm__deregister_mmio(kvm, mmio_addr); 561 free_ioport: 562 ioport__unregister(kvm, port_addr); 563 return r; 564 } 565 566 int virtio_pci__reset(struct kvm *kvm, struct virtio_device *vdev) 567 { 568 int vq; 569 struct virtio_pci *vpci = vdev->virtio; 570 571 for (vq = 0; vq < vdev->ops->get_vq_count(kvm, vpci->dev); vq++) 572 virtio_pci_exit_vq(kvm, vdev, vq); 573 574 return 0; 575 } 576 577 int virtio_pci__exit(struct kvm *kvm, struct virtio_device *vdev) 578 { 579 struct virtio_pci *vpci = vdev->virtio; 580 581 virtio_pci__reset(kvm, vdev); 582 kvm__deregister_mmio(kvm, virtio_pci__mmio_addr(vpci)); 583 kvm__deregister_mmio(kvm, virtio_pci__msix_io_addr(vpci)); 584 ioport__unregister(kvm, virtio_pci__port_addr(vpci)); 585 586 return 0; 587 } 588