1 #include "kvm/virtio-pci.h" 2 3 #include "kvm/ioport.h" 4 #include "kvm/kvm.h" 5 #include "kvm/kvm-cpu.h" 6 #include "kvm/virtio-pci-dev.h" 7 #include "kvm/irq.h" 8 #include "kvm/virtio.h" 9 #include "kvm/ioeventfd.h" 10 11 #include <sys/ioctl.h> 12 #include <linux/virtio_pci.h> 13 #include <linux/byteorder.h> 14 #include <string.h> 15 16 static void virtio_pci__ioevent_callback(struct kvm *kvm, void *param) 17 { 18 struct virtio_pci_ioevent_param *ioeventfd = param; 19 struct virtio_pci *vpci = ioeventfd->vdev->virtio; 20 21 ioeventfd->vdev->ops->notify_vq(kvm, vpci->dev, ioeventfd->vq); 22 } 23 24 static int virtio_pci__init_ioeventfd(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 25 { 26 struct ioevent ioevent; 27 struct virtio_pci *vpci = vdev->virtio; 28 int r, flags = 0; 29 int fd; 30 31 vpci->ioeventfds[vq] = (struct virtio_pci_ioevent_param) { 32 .vdev = vdev, 33 .vq = vq, 34 }; 35 36 ioevent = (struct ioevent) { 37 .fn = virtio_pci__ioevent_callback, 38 .fn_ptr = &vpci->ioeventfds[vq], 39 .datamatch = vq, 40 .fn_kvm = kvm, 41 }; 42 43 /* 44 * Vhost will poll the eventfd in host kernel side, otherwise we 45 * need to poll in userspace. 46 */ 47 if (!vdev->use_vhost) 48 flags |= IOEVENTFD_FLAG_USER_POLL; 49 50 /* ioport */ 51 ioevent.io_addr = vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY; 52 ioevent.io_len = sizeof(u16); 53 ioevent.fd = fd = eventfd(0, 0); 54 r = ioeventfd__add_event(&ioevent, flags | IOEVENTFD_FLAG_PIO); 55 if (r) 56 return r; 57 58 /* mmio */ 59 ioevent.io_addr = vpci->mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY; 60 ioevent.io_len = sizeof(u16); 61 ioevent.fd = eventfd(0, 0); 62 r = ioeventfd__add_event(&ioevent, flags); 63 if (r) 64 goto free_ioport_evt; 65 66 if (vdev->ops->notify_vq_eventfd) 67 vdev->ops->notify_vq_eventfd(kvm, vpci->dev, vq, fd); 68 return 0; 69 70 free_ioport_evt: 71 ioeventfd__del_event(vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 72 return r; 73 } 74 75 static void virtio_pci_exit_vq(struct kvm *kvm, struct virtio_device *vdev, 76 int vq) 77 { 78 struct virtio_pci *vpci = vdev->virtio; 79 80 ioeventfd__del_event(vpci->mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 81 ioeventfd__del_event(vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 82 virtio_exit_vq(kvm, vdev, vpci->dev, vq); 83 } 84 85 static inline bool virtio_pci__msix_enabled(struct virtio_pci *vpci) 86 { 87 return vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_ENABLE); 88 } 89 90 static bool virtio_pci__specific_data_in(struct kvm *kvm, struct virtio_device *vdev, 91 void *data, int size, unsigned long offset) 92 { 93 u32 config_offset; 94 struct virtio_pci *vpci = vdev->virtio; 95 int type = virtio__get_dev_specific_field(offset - 20, 96 virtio_pci__msix_enabled(vpci), 97 &config_offset); 98 if (type == VIRTIO_PCI_O_MSIX) { 99 switch (offset) { 100 case VIRTIO_MSI_CONFIG_VECTOR: 101 ioport__write16(data, vpci->config_vector); 102 break; 103 case VIRTIO_MSI_QUEUE_VECTOR: 104 ioport__write16(data, vpci->vq_vector[vpci->queue_selector]); 105 break; 106 }; 107 108 return true; 109 } else if (type == VIRTIO_PCI_O_CONFIG) { 110 u8 cfg; 111 112 cfg = vdev->ops->get_config(kvm, vpci->dev)[config_offset]; 113 ioport__write8(data, cfg); 114 return true; 115 } 116 117 return false; 118 } 119 120 static bool virtio_pci__data_in(struct kvm_cpu *vcpu, struct virtio_device *vdev, 121 unsigned long offset, void *data, int size) 122 { 123 bool ret = true; 124 struct virtio_pci *vpci; 125 struct virt_queue *vq; 126 struct kvm *kvm; 127 u32 val; 128 129 kvm = vcpu->kvm; 130 vpci = vdev->virtio; 131 132 switch (offset) { 133 case VIRTIO_PCI_HOST_FEATURES: 134 val = vdev->ops->get_host_features(kvm, vpci->dev); 135 ioport__write32(data, val); 136 break; 137 case VIRTIO_PCI_QUEUE_PFN: 138 vq = vdev->ops->get_vq(kvm, vpci->dev, vpci->queue_selector); 139 ioport__write32(data, vq->pfn); 140 break; 141 case VIRTIO_PCI_QUEUE_NUM: 142 val = vdev->ops->get_size_vq(kvm, vpci->dev, vpci->queue_selector); 143 ioport__write16(data, val); 144 break; 145 case VIRTIO_PCI_STATUS: 146 ioport__write8(data, vpci->status); 147 break; 148 case VIRTIO_PCI_ISR: 149 ioport__write8(data, vpci->isr); 150 kvm__irq_line(kvm, vpci->legacy_irq_line, VIRTIO_IRQ_LOW); 151 vpci->isr = VIRTIO_IRQ_LOW; 152 break; 153 default: 154 ret = virtio_pci__specific_data_in(kvm, vdev, data, size, offset); 155 break; 156 }; 157 158 return ret; 159 } 160 161 static bool virtio_pci__io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 162 { 163 struct virtio_device *vdev = ioport->priv; 164 struct virtio_pci *vpci = vdev->virtio; 165 unsigned long offset = port - vpci->port_addr; 166 167 return virtio_pci__data_in(vcpu, vdev, offset, data, size); 168 } 169 170 static void update_msix_map(struct virtio_pci *vpci, 171 struct msix_table *msix_entry, u32 vecnum) 172 { 173 u32 gsi, i; 174 175 /* Find the GSI number used for that vector */ 176 if (vecnum == vpci->config_vector) { 177 gsi = vpci->config_gsi; 178 } else { 179 for (i = 0; i < VIRTIO_PCI_MAX_VQ; i++) 180 if (vpci->vq_vector[i] == vecnum) 181 break; 182 if (i == VIRTIO_PCI_MAX_VQ) 183 return; 184 gsi = vpci->gsis[i]; 185 } 186 187 if (gsi == 0) 188 return; 189 190 msix_entry = &msix_entry[vecnum]; 191 irq__update_msix_route(vpci->kvm, gsi, &msix_entry->msg); 192 } 193 194 static bool virtio_pci__specific_data_out(struct kvm *kvm, struct virtio_device *vdev, 195 void *data, int size, unsigned long offset) 196 { 197 struct virtio_pci *vpci = vdev->virtio; 198 u32 config_offset, vec; 199 int gsi; 200 int type = virtio__get_dev_specific_field(offset - 20, virtio_pci__msix_enabled(vpci), 201 &config_offset); 202 if (type == VIRTIO_PCI_O_MSIX) { 203 switch (offset) { 204 case VIRTIO_MSI_CONFIG_VECTOR: 205 vec = vpci->config_vector = ioport__read16(data); 206 if (vec == VIRTIO_MSI_NO_VECTOR) 207 break; 208 209 gsi = irq__add_msix_route(kvm, 210 &vpci->msix_table[vec].msg, 211 vpci->dev_hdr.dev_num << 3); 212 /* 213 * We don't need IRQ routing if we can use 214 * MSI injection via the KVM_SIGNAL_MSI ioctl. 215 */ 216 if (gsi == -ENXIO && 217 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 218 break; 219 220 if (gsi < 0) { 221 die("failed to configure MSIs"); 222 break; 223 } 224 225 vpci->config_gsi = gsi; 226 break; 227 case VIRTIO_MSI_QUEUE_VECTOR: 228 vec = ioport__read16(data); 229 vpci->vq_vector[vpci->queue_selector] = vec; 230 231 if (vec == VIRTIO_MSI_NO_VECTOR) 232 break; 233 234 gsi = irq__add_msix_route(kvm, 235 &vpci->msix_table[vec].msg, 236 vpci->dev_hdr.dev_num << 3); 237 /* 238 * We don't need IRQ routing if we can use 239 * MSI injection via the KVM_SIGNAL_MSI ioctl. 240 */ 241 if (gsi == -ENXIO && 242 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 243 break; 244 245 if (gsi < 0) { 246 die("failed to configure MSIs"); 247 break; 248 } 249 250 vpci->gsis[vpci->queue_selector] = gsi; 251 if (vdev->ops->notify_vq_gsi) 252 vdev->ops->notify_vq_gsi(kvm, vpci->dev, 253 vpci->queue_selector, 254 gsi); 255 break; 256 }; 257 258 return true; 259 } else if (type == VIRTIO_PCI_O_CONFIG) { 260 vdev->ops->get_config(kvm, vpci->dev)[config_offset] = *(u8 *)data; 261 262 return true; 263 } 264 265 return false; 266 } 267 268 static bool virtio_pci__data_out(struct kvm_cpu *vcpu, struct virtio_device *vdev, 269 unsigned long offset, void *data, int size) 270 { 271 bool ret = true; 272 struct virtio_pci *vpci; 273 struct kvm *kvm; 274 u32 val; 275 276 kvm = vcpu->kvm; 277 vpci = vdev->virtio; 278 279 switch (offset) { 280 case VIRTIO_PCI_GUEST_FEATURES: 281 val = ioport__read32(data); 282 virtio_set_guest_features(kvm, vdev, vpci->dev, val); 283 break; 284 case VIRTIO_PCI_QUEUE_PFN: 285 val = ioport__read32(data); 286 if (val) { 287 virtio_pci__init_ioeventfd(kvm, vdev, 288 vpci->queue_selector); 289 vdev->ops->init_vq(kvm, vpci->dev, vpci->queue_selector, 290 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT, 291 VIRTIO_PCI_VRING_ALIGN, val); 292 } else { 293 virtio_pci_exit_vq(kvm, vdev, vpci->queue_selector); 294 } 295 break; 296 case VIRTIO_PCI_QUEUE_SEL: 297 vpci->queue_selector = ioport__read16(data); 298 break; 299 case VIRTIO_PCI_QUEUE_NOTIFY: 300 val = ioport__read16(data); 301 vdev->ops->notify_vq(kvm, vpci->dev, val); 302 break; 303 case VIRTIO_PCI_STATUS: 304 vpci->status = ioport__read8(data); 305 if (!vpci->status) /* Sample endianness on reset */ 306 vdev->endian = kvm_cpu__get_endianness(vcpu); 307 virtio_notify_status(kvm, vdev, vpci->dev, vpci->status); 308 break; 309 default: 310 ret = virtio_pci__specific_data_out(kvm, vdev, data, size, offset); 311 break; 312 }; 313 314 return ret; 315 } 316 317 static bool virtio_pci__io_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 318 { 319 struct virtio_device *vdev = ioport->priv; 320 struct virtio_pci *vpci = vdev->virtio; 321 unsigned long offset = port - vpci->port_addr; 322 323 return virtio_pci__data_out(vcpu, vdev, offset, data, size); 324 } 325 326 static struct ioport_operations virtio_pci__io_ops = { 327 .io_in = virtio_pci__io_in, 328 .io_out = virtio_pci__io_out, 329 }; 330 331 static void virtio_pci__msix_mmio_callback(struct kvm_cpu *vcpu, 332 u64 addr, u8 *data, u32 len, 333 u8 is_write, void *ptr) 334 { 335 struct virtio_device *vdev = ptr; 336 struct virtio_pci *vpci = vdev->virtio; 337 struct msix_table *table; 338 int vecnum; 339 size_t offset; 340 341 if (addr > vpci->msix_io_block + PCI_IO_SIZE) { 342 if (is_write) 343 return; 344 table = (struct msix_table *)&vpci->msix_pba; 345 offset = addr - (vpci->msix_io_block + PCI_IO_SIZE); 346 } else { 347 table = vpci->msix_table; 348 offset = addr - vpci->msix_io_block; 349 } 350 vecnum = offset / sizeof(struct msix_table); 351 offset = offset % sizeof(struct msix_table); 352 353 if (!is_write) { 354 memcpy(data, (void *)&table[vecnum] + offset, len); 355 return; 356 } 357 358 memcpy((void *)&table[vecnum] + offset, data, len); 359 360 /* Did we just update the address or payload? */ 361 if (offset < offsetof(struct msix_table, ctrl)) 362 update_msix_map(vpci, table, vecnum); 363 } 364 365 static void virtio_pci__signal_msi(struct kvm *kvm, struct virtio_pci *vpci, 366 int vec) 367 { 368 struct kvm_msi msi = { 369 .address_lo = vpci->msix_table[vec].msg.address_lo, 370 .address_hi = vpci->msix_table[vec].msg.address_hi, 371 .data = vpci->msix_table[vec].msg.data, 372 }; 373 374 if (kvm->msix_needs_devid) { 375 msi.flags = KVM_MSI_VALID_DEVID; 376 msi.devid = vpci->dev_hdr.dev_num << 3; 377 } 378 379 irq__signal_msi(kvm, &msi); 380 } 381 382 int virtio_pci__signal_vq(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 383 { 384 struct virtio_pci *vpci = vdev->virtio; 385 int tbl = vpci->vq_vector[vq]; 386 387 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 388 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 389 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 390 391 vpci->msix_pba |= 1 << tbl; 392 return 0; 393 } 394 395 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 396 virtio_pci__signal_msi(kvm, vpci, vpci->vq_vector[vq]); 397 else 398 kvm__irq_trigger(kvm, vpci->gsis[vq]); 399 } else { 400 vpci->isr = VIRTIO_IRQ_HIGH; 401 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 402 } 403 return 0; 404 } 405 406 int virtio_pci__signal_config(struct kvm *kvm, struct virtio_device *vdev) 407 { 408 struct virtio_pci *vpci = vdev->virtio; 409 int tbl = vpci->config_vector; 410 411 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 412 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 413 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 414 415 vpci->msix_pba |= 1 << tbl; 416 return 0; 417 } 418 419 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 420 virtio_pci__signal_msi(kvm, vpci, tbl); 421 else 422 kvm__irq_trigger(kvm, vpci->config_gsi); 423 } else { 424 vpci->isr = VIRTIO_PCI_ISR_CONFIG; 425 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 426 } 427 428 return 0; 429 } 430 431 static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu, 432 u64 addr, u8 *data, u32 len, 433 u8 is_write, void *ptr) 434 { 435 struct virtio_device *vdev = ptr; 436 struct virtio_pci *vpci = vdev->virtio; 437 438 if (!is_write) 439 virtio_pci__data_in(vcpu, vdev, addr - vpci->mmio_addr, 440 data, len); 441 else 442 virtio_pci__data_out(vcpu, vdev, addr - vpci->mmio_addr, 443 data, len); 444 } 445 446 int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev, 447 int device_id, int subsys_id, int class) 448 { 449 struct virtio_pci *vpci = vdev->virtio; 450 int r; 451 452 vpci->kvm = kvm; 453 vpci->dev = dev; 454 455 BUILD_BUG_ON(!is_power_of_two(PCI_IO_SIZE)); 456 457 r = pci_get_io_port_block(PCI_IO_SIZE); 458 r = ioport__register(kvm, r, &virtio_pci__io_ops, PCI_IO_SIZE, vdev); 459 if (r < 0) 460 return r; 461 vpci->port_addr = (u16)r; 462 463 vpci->mmio_addr = pci_get_mmio_block(PCI_IO_SIZE); 464 r = kvm__register_mmio(kvm, vpci->mmio_addr, PCI_IO_SIZE, false, 465 virtio_pci__io_mmio_callback, vdev); 466 if (r < 0) 467 goto free_ioport; 468 469 vpci->msix_io_block = pci_get_mmio_block(PCI_IO_SIZE * 2); 470 r = kvm__register_mmio(kvm, vpci->msix_io_block, PCI_IO_SIZE * 2, false, 471 virtio_pci__msix_mmio_callback, vdev); 472 if (r < 0) 473 goto free_mmio; 474 475 vpci->pci_hdr = (struct pci_device_header) { 476 .vendor_id = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET), 477 .device_id = cpu_to_le16(device_id), 478 .command = PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 479 .header_type = PCI_HEADER_TYPE_NORMAL, 480 .revision_id = 0, 481 .class[0] = class & 0xff, 482 .class[1] = (class >> 8) & 0xff, 483 .class[2] = (class >> 16) & 0xff, 484 .subsys_vendor_id = cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET), 485 .subsys_id = cpu_to_le16(subsys_id), 486 .bar[0] = cpu_to_le32(vpci->port_addr 487 | PCI_BASE_ADDRESS_SPACE_IO), 488 .bar[1] = cpu_to_le32(vpci->mmio_addr 489 | PCI_BASE_ADDRESS_SPACE_MEMORY), 490 .bar[2] = cpu_to_le32(vpci->msix_io_block 491 | PCI_BASE_ADDRESS_SPACE_MEMORY), 492 .status = cpu_to_le16(PCI_STATUS_CAP_LIST), 493 .capabilities = (void *)&vpci->pci_hdr.msix - (void *)&vpci->pci_hdr, 494 .bar_size[0] = cpu_to_le32(PCI_IO_SIZE), 495 .bar_size[1] = cpu_to_le32(PCI_IO_SIZE), 496 .bar_size[2] = cpu_to_le32(PCI_IO_SIZE*2), 497 }; 498 499 vpci->dev_hdr = (struct device_header) { 500 .bus_type = DEVICE_BUS_PCI, 501 .data = &vpci->pci_hdr, 502 }; 503 504 vpci->pci_hdr.msix.cap = PCI_CAP_ID_MSIX; 505 vpci->pci_hdr.msix.next = 0; 506 /* 507 * We at most have VIRTIO_PCI_MAX_VQ entries for virt queue, 508 * VIRTIO_PCI_MAX_CONFIG entries for config. 509 * 510 * To quote the PCI spec: 511 * 512 * System software reads this field to determine the 513 * MSI-X Table Size N, which is encoded as N-1. 514 * For example, a returned value of "00000000011" 515 * indicates a table size of 4. 516 */ 517 vpci->pci_hdr.msix.ctrl = cpu_to_le16(VIRTIO_PCI_MAX_VQ + VIRTIO_PCI_MAX_CONFIG - 1); 518 519 /* Both table and PBA are mapped to the same BAR (2) */ 520 vpci->pci_hdr.msix.table_offset = cpu_to_le32(2); 521 vpci->pci_hdr.msix.pba_offset = cpu_to_le32(2 | PCI_IO_SIZE); 522 vpci->config_vector = 0; 523 524 if (irq__can_signal_msi(kvm)) 525 vpci->features |= VIRTIO_PCI_F_SIGNAL_MSI; 526 527 r = device__register(&vpci->dev_hdr); 528 if (r < 0) 529 goto free_msix_mmio; 530 531 /* save the IRQ that device__register() has allocated */ 532 vpci->legacy_irq_line = vpci->pci_hdr.irq_line; 533 534 return 0; 535 536 free_msix_mmio: 537 kvm__deregister_mmio(kvm, vpci->msix_io_block); 538 free_mmio: 539 kvm__deregister_mmio(kvm, vpci->mmio_addr); 540 free_ioport: 541 ioport__unregister(kvm, vpci->port_addr); 542 return r; 543 } 544 545 int virtio_pci__reset(struct kvm *kvm, struct virtio_device *vdev) 546 { 547 int vq; 548 struct virtio_pci *vpci = vdev->virtio; 549 550 for (vq = 0; vq < vdev->ops->get_vq_count(kvm, vpci->dev); vq++) 551 virtio_pci_exit_vq(kvm, vdev, vq); 552 553 return 0; 554 } 555 556 int virtio_pci__exit(struct kvm *kvm, struct virtio_device *vdev) 557 { 558 struct virtio_pci *vpci = vdev->virtio; 559 560 virtio_pci__reset(kvm, vdev); 561 kvm__deregister_mmio(kvm, vpci->mmio_addr); 562 kvm__deregister_mmio(kvm, vpci->msix_io_block); 563 ioport__unregister(kvm, vpci->port_addr); 564 565 return 0; 566 } 567