1 #include "kvm/virtio-pci.h" 2 3 #include "kvm/ioport.h" 4 #include "kvm/kvm.h" 5 #include "kvm/kvm-cpu.h" 6 #include "kvm/virtio-pci-dev.h" 7 #include "kvm/irq.h" 8 #include "kvm/virtio.h" 9 #include "kvm/ioeventfd.h" 10 11 #include <sys/ioctl.h> 12 #include <linux/virtio_pci.h> 13 #include <linux/byteorder.h> 14 #include <string.h> 15 16 static void virtio_pci__ioevent_callback(struct kvm *kvm, void *param) 17 { 18 struct virtio_pci_ioevent_param *ioeventfd = param; 19 struct virtio_pci *vpci = ioeventfd->vdev->virtio; 20 21 ioeventfd->vdev->ops->notify_vq(kvm, vpci->dev, ioeventfd->vq); 22 } 23 24 static int virtio_pci__init_ioeventfd(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 25 { 26 struct ioevent ioevent; 27 struct virtio_pci *vpci = vdev->virtio; 28 int r, flags = 0; 29 int fd; 30 31 vpci->ioeventfds[vq] = (struct virtio_pci_ioevent_param) { 32 .vdev = vdev, 33 .vq = vq, 34 }; 35 36 ioevent = (struct ioevent) { 37 .fn = virtio_pci__ioevent_callback, 38 .fn_ptr = &vpci->ioeventfds[vq], 39 .datamatch = vq, 40 .fn_kvm = kvm, 41 }; 42 43 /* 44 * Vhost will poll the eventfd in host kernel side, otherwise we 45 * need to poll in userspace. 46 */ 47 if (!vdev->use_vhost) 48 flags |= IOEVENTFD_FLAG_USER_POLL; 49 50 /* ioport */ 51 ioevent.io_addr = vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY; 52 ioevent.io_len = sizeof(u16); 53 ioevent.fd = fd = eventfd(0, 0); 54 r = ioeventfd__add_event(&ioevent, flags | IOEVENTFD_FLAG_PIO); 55 if (r) 56 return r; 57 58 /* mmio */ 59 ioevent.io_addr = vpci->mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY; 60 ioevent.io_len = sizeof(u16); 61 ioevent.fd = eventfd(0, 0); 62 r = ioeventfd__add_event(&ioevent, flags); 63 if (r) 64 goto free_ioport_evt; 65 66 if (vdev->ops->notify_vq_eventfd) 67 vdev->ops->notify_vq_eventfd(kvm, vpci->dev, vq, fd); 68 return 0; 69 70 free_ioport_evt: 71 ioeventfd__del_event(vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 72 return r; 73 } 74 75 static void virtio_pci_exit_vq(struct kvm *kvm, struct virtio_device *vdev, 76 int vq) 77 { 78 struct virtio_pci *vpci = vdev->virtio; 79 80 ioeventfd__del_event(vpci->mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 81 ioeventfd__del_event(vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 82 virtio_exit_vq(kvm, vdev, vpci->dev, vq); 83 } 84 85 static inline bool virtio_pci__msix_enabled(struct virtio_pci *vpci) 86 { 87 return vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_ENABLE); 88 } 89 90 static bool virtio_pci__specific_io_in(struct kvm *kvm, struct virtio_device *vdev, u16 port, 91 void *data, int size, int offset) 92 { 93 u32 config_offset; 94 struct virtio_pci *vpci = vdev->virtio; 95 int type = virtio__get_dev_specific_field(offset - 20, 96 virtio_pci__msix_enabled(vpci), 97 &config_offset); 98 if (type == VIRTIO_PCI_O_MSIX) { 99 switch (offset) { 100 case VIRTIO_MSI_CONFIG_VECTOR: 101 ioport__write16(data, vpci->config_vector); 102 break; 103 case VIRTIO_MSI_QUEUE_VECTOR: 104 ioport__write16(data, vpci->vq_vector[vpci->queue_selector]); 105 break; 106 }; 107 108 return true; 109 } else if (type == VIRTIO_PCI_O_CONFIG) { 110 u8 cfg; 111 112 cfg = vdev->ops->get_config(kvm, vpci->dev)[config_offset]; 113 ioport__write8(data, cfg); 114 return true; 115 } 116 117 return false; 118 } 119 120 static bool virtio_pci__io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 121 { 122 unsigned long offset; 123 bool ret = true; 124 struct virtio_device *vdev; 125 struct virtio_pci *vpci; 126 struct virt_queue *vq; 127 struct kvm *kvm; 128 u32 val; 129 130 kvm = vcpu->kvm; 131 vdev = ioport->priv; 132 vpci = vdev->virtio; 133 offset = port - vpci->port_addr; 134 135 switch (offset) { 136 case VIRTIO_PCI_HOST_FEATURES: 137 val = vdev->ops->get_host_features(kvm, vpci->dev); 138 ioport__write32(data, val); 139 break; 140 case VIRTIO_PCI_QUEUE_PFN: 141 vq = vdev->ops->get_vq(kvm, vpci->dev, vpci->queue_selector); 142 ioport__write32(data, vq->pfn); 143 break; 144 case VIRTIO_PCI_QUEUE_NUM: 145 val = vdev->ops->get_size_vq(kvm, vpci->dev, vpci->queue_selector); 146 ioport__write16(data, val); 147 break; 148 case VIRTIO_PCI_STATUS: 149 ioport__write8(data, vpci->status); 150 break; 151 case VIRTIO_PCI_ISR: 152 ioport__write8(data, vpci->isr); 153 kvm__irq_line(kvm, vpci->legacy_irq_line, VIRTIO_IRQ_LOW); 154 vpci->isr = VIRTIO_IRQ_LOW; 155 break; 156 default: 157 ret = virtio_pci__specific_io_in(kvm, vdev, port, data, size, offset); 158 break; 159 }; 160 161 return ret; 162 } 163 164 static void update_msix_map(struct virtio_pci *vpci, 165 struct msix_table *msix_entry, u32 vecnum) 166 { 167 u32 gsi, i; 168 169 /* Find the GSI number used for that vector */ 170 if (vecnum == vpci->config_vector) { 171 gsi = vpci->config_gsi; 172 } else { 173 for (i = 0; i < VIRTIO_PCI_MAX_VQ; i++) 174 if (vpci->vq_vector[i] == vecnum) 175 break; 176 if (i == VIRTIO_PCI_MAX_VQ) 177 return; 178 gsi = vpci->gsis[i]; 179 } 180 181 if (gsi == 0) 182 return; 183 184 msix_entry = &msix_entry[vecnum]; 185 irq__update_msix_route(vpci->kvm, gsi, &msix_entry->msg); 186 } 187 188 static bool virtio_pci__specific_io_out(struct kvm *kvm, struct virtio_device *vdev, u16 port, 189 void *data, int size, int offset) 190 { 191 struct virtio_pci *vpci = vdev->virtio; 192 u32 config_offset, vec; 193 int gsi; 194 int type = virtio__get_dev_specific_field(offset - 20, virtio_pci__msix_enabled(vpci), 195 &config_offset); 196 if (type == VIRTIO_PCI_O_MSIX) { 197 switch (offset) { 198 case VIRTIO_MSI_CONFIG_VECTOR: 199 vec = vpci->config_vector = ioport__read16(data); 200 if (vec == VIRTIO_MSI_NO_VECTOR) 201 break; 202 203 gsi = irq__add_msix_route(kvm, 204 &vpci->msix_table[vec].msg, 205 vpci->dev_hdr.dev_num << 3); 206 /* 207 * We don't need IRQ routing if we can use 208 * MSI injection via the KVM_SIGNAL_MSI ioctl. 209 */ 210 if (gsi == -ENXIO && 211 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 212 break; 213 214 if (gsi < 0) { 215 die("failed to configure MSIs"); 216 break; 217 } 218 219 vpci->config_gsi = gsi; 220 break; 221 case VIRTIO_MSI_QUEUE_VECTOR: 222 vec = ioport__read16(data); 223 vpci->vq_vector[vpci->queue_selector] = vec; 224 225 if (vec == VIRTIO_MSI_NO_VECTOR) 226 break; 227 228 gsi = irq__add_msix_route(kvm, 229 &vpci->msix_table[vec].msg, 230 vpci->dev_hdr.dev_num << 3); 231 /* 232 * We don't need IRQ routing if we can use 233 * MSI injection via the KVM_SIGNAL_MSI ioctl. 234 */ 235 if (gsi == -ENXIO && 236 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 237 break; 238 239 if (gsi < 0) { 240 die("failed to configure MSIs"); 241 break; 242 } 243 244 vpci->gsis[vpci->queue_selector] = gsi; 245 if (vdev->ops->notify_vq_gsi) 246 vdev->ops->notify_vq_gsi(kvm, vpci->dev, 247 vpci->queue_selector, 248 gsi); 249 break; 250 }; 251 252 return true; 253 } else if (type == VIRTIO_PCI_O_CONFIG) { 254 vdev->ops->get_config(kvm, vpci->dev)[config_offset] = *(u8 *)data; 255 256 return true; 257 } 258 259 return false; 260 } 261 262 static bool virtio_pci__io_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 263 { 264 unsigned long offset; 265 bool ret = true; 266 struct virtio_device *vdev; 267 struct virtio_pci *vpci; 268 struct kvm *kvm; 269 u32 val; 270 271 kvm = vcpu->kvm; 272 vdev = ioport->priv; 273 vpci = vdev->virtio; 274 offset = port - vpci->port_addr; 275 276 switch (offset) { 277 case VIRTIO_PCI_GUEST_FEATURES: 278 val = ioport__read32(data); 279 virtio_set_guest_features(kvm, vdev, vpci->dev, val); 280 break; 281 case VIRTIO_PCI_QUEUE_PFN: 282 val = ioport__read32(data); 283 if (val) { 284 virtio_pci__init_ioeventfd(kvm, vdev, 285 vpci->queue_selector); 286 vdev->ops->init_vq(kvm, vpci->dev, vpci->queue_selector, 287 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT, 288 VIRTIO_PCI_VRING_ALIGN, val); 289 } else { 290 virtio_pci_exit_vq(kvm, vdev, vpci->queue_selector); 291 } 292 break; 293 case VIRTIO_PCI_QUEUE_SEL: 294 vpci->queue_selector = ioport__read16(data); 295 break; 296 case VIRTIO_PCI_QUEUE_NOTIFY: 297 val = ioport__read16(data); 298 vdev->ops->notify_vq(kvm, vpci->dev, val); 299 break; 300 case VIRTIO_PCI_STATUS: 301 vpci->status = ioport__read8(data); 302 if (!vpci->status) /* Sample endianness on reset */ 303 vdev->endian = kvm_cpu__get_endianness(vcpu); 304 virtio_notify_status(kvm, vdev, vpci->dev, vpci->status); 305 break; 306 default: 307 ret = virtio_pci__specific_io_out(kvm, vdev, port, data, size, offset); 308 break; 309 }; 310 311 return ret; 312 } 313 314 static struct ioport_operations virtio_pci__io_ops = { 315 .io_in = virtio_pci__io_in, 316 .io_out = virtio_pci__io_out, 317 }; 318 319 static void virtio_pci__msix_mmio_callback(struct kvm_cpu *vcpu, 320 u64 addr, u8 *data, u32 len, 321 u8 is_write, void *ptr) 322 { 323 struct virtio_pci *vpci = ptr; 324 struct msix_table *table; 325 int vecnum; 326 size_t offset; 327 328 if (addr > vpci->msix_io_block + PCI_IO_SIZE) { 329 if (is_write) 330 return; 331 table = (struct msix_table *)&vpci->msix_pba; 332 offset = addr - (vpci->msix_io_block + PCI_IO_SIZE); 333 } else { 334 table = vpci->msix_table; 335 offset = addr - vpci->msix_io_block; 336 } 337 vecnum = offset / sizeof(struct msix_table); 338 offset = offset % sizeof(struct msix_table); 339 340 if (!is_write) { 341 memcpy(data, (void *)&table[vecnum] + offset, len); 342 return; 343 } 344 345 memcpy((void *)&table[vecnum] + offset, data, len); 346 347 /* Did we just update the address or payload? */ 348 if (offset < offsetof(struct msix_table, ctrl)) 349 update_msix_map(vpci, table, vecnum); 350 } 351 352 static void virtio_pci__signal_msi(struct kvm *kvm, struct virtio_pci *vpci, 353 int vec) 354 { 355 struct kvm_msi msi = { 356 .address_lo = vpci->msix_table[vec].msg.address_lo, 357 .address_hi = vpci->msix_table[vec].msg.address_hi, 358 .data = vpci->msix_table[vec].msg.data, 359 }; 360 361 if (kvm->msix_needs_devid) { 362 msi.flags = KVM_MSI_VALID_DEVID; 363 msi.devid = vpci->dev_hdr.dev_num << 3; 364 } 365 366 irq__signal_msi(kvm, &msi); 367 } 368 369 int virtio_pci__signal_vq(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 370 { 371 struct virtio_pci *vpci = vdev->virtio; 372 int tbl = vpci->vq_vector[vq]; 373 374 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 375 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 376 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 377 378 vpci->msix_pba |= 1 << tbl; 379 return 0; 380 } 381 382 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 383 virtio_pci__signal_msi(kvm, vpci, vpci->vq_vector[vq]); 384 else 385 kvm__irq_trigger(kvm, vpci->gsis[vq]); 386 } else { 387 vpci->isr = VIRTIO_IRQ_HIGH; 388 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 389 } 390 return 0; 391 } 392 393 int virtio_pci__signal_config(struct kvm *kvm, struct virtio_device *vdev) 394 { 395 struct virtio_pci *vpci = vdev->virtio; 396 int tbl = vpci->config_vector; 397 398 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 399 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 400 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 401 402 vpci->msix_pba |= 1 << tbl; 403 return 0; 404 } 405 406 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 407 virtio_pci__signal_msi(kvm, vpci, tbl); 408 else 409 kvm__irq_trigger(kvm, vpci->config_gsi); 410 } else { 411 vpci->isr = VIRTIO_PCI_ISR_CONFIG; 412 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 413 } 414 415 return 0; 416 } 417 418 static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu, 419 u64 addr, u8 *data, u32 len, 420 u8 is_write, void *ptr) 421 { 422 struct virtio_pci *vpci = ptr; 423 int direction = is_write ? KVM_EXIT_IO_OUT : KVM_EXIT_IO_IN; 424 u16 port = vpci->port_addr + (addr & (IOPORT_SIZE - 1)); 425 426 kvm__emulate_io(vcpu, port, data, direction, len, 1); 427 } 428 429 int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev, 430 int device_id, int subsys_id, int class) 431 { 432 struct virtio_pci *vpci = vdev->virtio; 433 int r; 434 435 vpci->kvm = kvm; 436 vpci->dev = dev; 437 438 r = ioport__register(kvm, IOPORT_EMPTY, &virtio_pci__io_ops, IOPORT_SIZE, vdev); 439 if (r < 0) 440 return r; 441 vpci->port_addr = (u16)r; 442 443 vpci->mmio_addr = pci_get_io_space_block(IOPORT_SIZE); 444 r = kvm__register_mmio(kvm, vpci->mmio_addr, IOPORT_SIZE, false, 445 virtio_pci__io_mmio_callback, vpci); 446 if (r < 0) 447 goto free_ioport; 448 449 vpci->msix_io_block = pci_get_io_space_block(PCI_IO_SIZE * 2); 450 r = kvm__register_mmio(kvm, vpci->msix_io_block, PCI_IO_SIZE * 2, false, 451 virtio_pci__msix_mmio_callback, vpci); 452 if (r < 0) 453 goto free_mmio; 454 455 vpci->pci_hdr = (struct pci_device_header) { 456 .vendor_id = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET), 457 .device_id = cpu_to_le16(device_id), 458 .command = PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 459 .header_type = PCI_HEADER_TYPE_NORMAL, 460 .revision_id = 0, 461 .class[0] = class & 0xff, 462 .class[1] = (class >> 8) & 0xff, 463 .class[2] = (class >> 16) & 0xff, 464 .subsys_vendor_id = cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET), 465 .subsys_id = cpu_to_le16(subsys_id), 466 .bar[0] = cpu_to_le32(vpci->port_addr 467 | PCI_BASE_ADDRESS_SPACE_IO), 468 .bar[1] = cpu_to_le32(vpci->mmio_addr 469 | PCI_BASE_ADDRESS_SPACE_MEMORY), 470 .bar[2] = cpu_to_le32(vpci->msix_io_block 471 | PCI_BASE_ADDRESS_SPACE_MEMORY), 472 .status = cpu_to_le16(PCI_STATUS_CAP_LIST), 473 .capabilities = (void *)&vpci->pci_hdr.msix - (void *)&vpci->pci_hdr, 474 .bar_size[0] = cpu_to_le32(IOPORT_SIZE), 475 .bar_size[1] = cpu_to_le32(IOPORT_SIZE), 476 .bar_size[2] = cpu_to_le32(PCI_IO_SIZE*2), 477 }; 478 479 vpci->dev_hdr = (struct device_header) { 480 .bus_type = DEVICE_BUS_PCI, 481 .data = &vpci->pci_hdr, 482 }; 483 484 vpci->pci_hdr.msix.cap = PCI_CAP_ID_MSIX; 485 vpci->pci_hdr.msix.next = 0; 486 /* 487 * We at most have VIRTIO_PCI_MAX_VQ entries for virt queue, 488 * VIRTIO_PCI_MAX_CONFIG entries for config. 489 * 490 * To quote the PCI spec: 491 * 492 * System software reads this field to determine the 493 * MSI-X Table Size N, which is encoded as N-1. 494 * For example, a returned value of "00000000011" 495 * indicates a table size of 4. 496 */ 497 vpci->pci_hdr.msix.ctrl = cpu_to_le16(VIRTIO_PCI_MAX_VQ + VIRTIO_PCI_MAX_CONFIG - 1); 498 499 /* Both table and PBA are mapped to the same BAR (2) */ 500 vpci->pci_hdr.msix.table_offset = cpu_to_le32(2); 501 vpci->pci_hdr.msix.pba_offset = cpu_to_le32(2 | PCI_IO_SIZE); 502 vpci->config_vector = 0; 503 504 if (irq__can_signal_msi(kvm)) 505 vpci->features |= VIRTIO_PCI_F_SIGNAL_MSI; 506 507 r = device__register(&vpci->dev_hdr); 508 if (r < 0) 509 goto free_msix_mmio; 510 511 /* save the IRQ that device__register() has allocated */ 512 vpci->legacy_irq_line = vpci->pci_hdr.irq_line; 513 514 return 0; 515 516 free_msix_mmio: 517 kvm__deregister_mmio(kvm, vpci->msix_io_block); 518 free_mmio: 519 kvm__deregister_mmio(kvm, vpci->mmio_addr); 520 free_ioport: 521 ioport__unregister(kvm, vpci->port_addr); 522 return r; 523 } 524 525 int virtio_pci__reset(struct kvm *kvm, struct virtio_device *vdev) 526 { 527 int vq; 528 struct virtio_pci *vpci = vdev->virtio; 529 530 for (vq = 0; vq < vdev->ops->get_vq_count(kvm, vpci->dev); vq++) 531 virtio_pci_exit_vq(kvm, vdev, vq); 532 533 return 0; 534 } 535 536 int virtio_pci__exit(struct kvm *kvm, struct virtio_device *vdev) 537 { 538 struct virtio_pci *vpci = vdev->virtio; 539 540 virtio_pci__reset(kvm, vdev); 541 kvm__deregister_mmio(kvm, vpci->mmio_addr); 542 kvm__deregister_mmio(kvm, vpci->msix_io_block); 543 ioport__unregister(kvm, vpci->port_addr); 544 545 return 0; 546 } 547