1 #include "kvm/virtio-pci.h" 2 3 #include "kvm/ioport.h" 4 #include "kvm/kvm.h" 5 #include "kvm/kvm-cpu.h" 6 #include "kvm/virtio-pci-dev.h" 7 #include "kvm/irq.h" 8 #include "kvm/virtio.h" 9 #include "kvm/ioeventfd.h" 10 11 #include <sys/ioctl.h> 12 #include <linux/virtio_pci.h> 13 #include <linux/byteorder.h> 14 #include <string.h> 15 16 static void virtio_pci__ioevent_callback(struct kvm *kvm, void *param) 17 { 18 struct virtio_pci_ioevent_param *ioeventfd = param; 19 struct virtio_pci *vpci = ioeventfd->vdev->virtio; 20 21 ioeventfd->vdev->ops->notify_vq(kvm, vpci->dev, ioeventfd->vq); 22 } 23 24 static int virtio_pci__init_ioeventfd(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 25 { 26 struct ioevent ioevent; 27 struct virtio_pci *vpci = vdev->virtio; 28 int r, flags = 0; 29 int fd; 30 31 vpci->ioeventfds[vq] = (struct virtio_pci_ioevent_param) { 32 .vdev = vdev, 33 .vq = vq, 34 }; 35 36 ioevent = (struct ioevent) { 37 .fn = virtio_pci__ioevent_callback, 38 .fn_ptr = &vpci->ioeventfds[vq], 39 .datamatch = vq, 40 .fn_kvm = kvm, 41 }; 42 43 /* 44 * Vhost will poll the eventfd in host kernel side, otherwise we 45 * need to poll in userspace. 46 */ 47 if (!vdev->use_vhost) 48 flags |= IOEVENTFD_FLAG_USER_POLL; 49 50 /* ioport */ 51 ioevent.io_addr = vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY; 52 ioevent.io_len = sizeof(u16); 53 ioevent.fd = fd = eventfd(0, 0); 54 r = ioeventfd__add_event(&ioevent, flags | IOEVENTFD_FLAG_PIO); 55 if (r) 56 return r; 57 58 /* mmio */ 59 ioevent.io_addr = vpci->mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY; 60 ioevent.io_len = sizeof(u16); 61 ioevent.fd = eventfd(0, 0); 62 r = ioeventfd__add_event(&ioevent, flags); 63 if (r) 64 goto free_ioport_evt; 65 66 if (vdev->ops->notify_vq_eventfd) 67 vdev->ops->notify_vq_eventfd(kvm, vpci->dev, vq, fd); 68 return 0; 69 70 free_ioport_evt: 71 ioeventfd__del_event(vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 72 return r; 73 } 74 75 static inline bool virtio_pci__msix_enabled(struct virtio_pci *vpci) 76 { 77 return vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_ENABLE); 78 } 79 80 static bool virtio_pci__specific_io_in(struct kvm *kvm, struct virtio_device *vdev, u16 port, 81 void *data, int size, int offset) 82 { 83 u32 config_offset; 84 struct virtio_pci *vpci = vdev->virtio; 85 int type = virtio__get_dev_specific_field(offset - 20, 86 virtio_pci__msix_enabled(vpci), 87 &config_offset); 88 if (type == VIRTIO_PCI_O_MSIX) { 89 switch (offset) { 90 case VIRTIO_MSI_CONFIG_VECTOR: 91 ioport__write16(data, vpci->config_vector); 92 break; 93 case VIRTIO_MSI_QUEUE_VECTOR: 94 ioport__write16(data, vpci->vq_vector[vpci->queue_selector]); 95 break; 96 }; 97 98 return true; 99 } else if (type == VIRTIO_PCI_O_CONFIG) { 100 u8 cfg; 101 102 cfg = vdev->ops->get_config(kvm, vpci->dev)[config_offset]; 103 ioport__write8(data, cfg); 104 return true; 105 } 106 107 return false; 108 } 109 110 static bool virtio_pci__io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 111 { 112 unsigned long offset; 113 bool ret = true; 114 struct virtio_device *vdev; 115 struct virtio_pci *vpci; 116 struct virt_queue *vq; 117 struct kvm *kvm; 118 u32 val; 119 120 kvm = vcpu->kvm; 121 vdev = ioport->priv; 122 vpci = vdev->virtio; 123 offset = port - vpci->port_addr; 124 125 switch (offset) { 126 case VIRTIO_PCI_HOST_FEATURES: 127 val = vdev->ops->get_host_features(kvm, vpci->dev); 128 ioport__write32(data, val); 129 break; 130 case VIRTIO_PCI_QUEUE_PFN: 131 vq = vdev->ops->get_vq(kvm, vpci->dev, vpci->queue_selector); 132 ioport__write32(data, vq->pfn); 133 break; 134 case VIRTIO_PCI_QUEUE_NUM: 135 val = vdev->ops->get_size_vq(kvm, vpci->dev, vpci->queue_selector); 136 ioport__write16(data, val); 137 break; 138 case VIRTIO_PCI_STATUS: 139 ioport__write8(data, vpci->status); 140 break; 141 case VIRTIO_PCI_ISR: 142 ioport__write8(data, vpci->isr); 143 kvm__irq_line(kvm, vpci->legacy_irq_line, VIRTIO_IRQ_LOW); 144 vpci->isr = VIRTIO_IRQ_LOW; 145 break; 146 default: 147 ret = virtio_pci__specific_io_in(kvm, vdev, port, data, size, offset); 148 break; 149 }; 150 151 return ret; 152 } 153 154 static void update_msix_map(struct virtio_pci *vpci, 155 struct msix_table *msix_entry, u32 vecnum) 156 { 157 u32 gsi, i; 158 159 /* Find the GSI number used for that vector */ 160 if (vecnum == vpci->config_vector) { 161 gsi = vpci->config_gsi; 162 } else { 163 for (i = 0; i < VIRTIO_PCI_MAX_VQ; i++) 164 if (vpci->vq_vector[i] == vecnum) 165 break; 166 if (i == VIRTIO_PCI_MAX_VQ) 167 return; 168 gsi = vpci->gsis[i]; 169 } 170 171 if (gsi == 0) 172 return; 173 174 msix_entry = &msix_entry[vecnum]; 175 irq__update_msix_route(vpci->kvm, gsi, &msix_entry->msg); 176 } 177 178 static bool virtio_pci__specific_io_out(struct kvm *kvm, struct virtio_device *vdev, u16 port, 179 void *data, int size, int offset) 180 { 181 struct virtio_pci *vpci = vdev->virtio; 182 u32 config_offset, vec; 183 int gsi; 184 int type = virtio__get_dev_specific_field(offset - 20, virtio_pci__msix_enabled(vpci), 185 &config_offset); 186 if (type == VIRTIO_PCI_O_MSIX) { 187 switch (offset) { 188 case VIRTIO_MSI_CONFIG_VECTOR: 189 vec = vpci->config_vector = ioport__read16(data); 190 if (vec == VIRTIO_MSI_NO_VECTOR) 191 break; 192 193 gsi = irq__add_msix_route(kvm, 194 &vpci->msix_table[vec].msg, 195 vpci->dev_hdr.dev_num << 3); 196 /* 197 * We don't need IRQ routing if we can use 198 * MSI injection via the KVM_SIGNAL_MSI ioctl. 199 */ 200 if (gsi == -ENXIO && 201 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 202 break; 203 204 if (gsi < 0) { 205 die("failed to configure MSIs"); 206 break; 207 } 208 209 vpci->config_gsi = gsi; 210 break; 211 case VIRTIO_MSI_QUEUE_VECTOR: 212 vec = ioport__read16(data); 213 vpci->vq_vector[vpci->queue_selector] = vec; 214 215 if (vec == VIRTIO_MSI_NO_VECTOR) 216 break; 217 218 gsi = irq__add_msix_route(kvm, 219 &vpci->msix_table[vec].msg, 220 vpci->dev_hdr.dev_num << 3); 221 /* 222 * We don't need IRQ routing if we can use 223 * MSI injection via the KVM_SIGNAL_MSI ioctl. 224 */ 225 if (gsi == -ENXIO && 226 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 227 break; 228 229 if (gsi < 0) { 230 die("failed to configure MSIs"); 231 break; 232 } 233 234 vpci->gsis[vpci->queue_selector] = gsi; 235 if (vdev->ops->notify_vq_gsi) 236 vdev->ops->notify_vq_gsi(kvm, vpci->dev, 237 vpci->queue_selector, 238 gsi); 239 break; 240 }; 241 242 return true; 243 } else if (type == VIRTIO_PCI_O_CONFIG) { 244 vdev->ops->get_config(kvm, vpci->dev)[config_offset] = *(u8 *)data; 245 246 return true; 247 } 248 249 return false; 250 } 251 252 static bool virtio_pci__io_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 253 { 254 unsigned long offset; 255 bool ret = true; 256 struct virtio_device *vdev; 257 struct virtio_pci *vpci; 258 struct kvm *kvm; 259 u32 val; 260 261 kvm = vcpu->kvm; 262 vdev = ioport->priv; 263 vpci = vdev->virtio; 264 offset = port - vpci->port_addr; 265 266 switch (offset) { 267 case VIRTIO_PCI_GUEST_FEATURES: 268 val = ioport__read32(data); 269 virtio_set_guest_features(kvm, vdev, vpci->dev, val); 270 break; 271 case VIRTIO_PCI_QUEUE_PFN: 272 val = ioport__read32(data); 273 virtio_pci__init_ioeventfd(kvm, vdev, vpci->queue_selector); 274 vdev->ops->init_vq(kvm, vpci->dev, vpci->queue_selector, 275 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT, 276 VIRTIO_PCI_VRING_ALIGN, val); 277 break; 278 case VIRTIO_PCI_QUEUE_SEL: 279 vpci->queue_selector = ioport__read16(data); 280 break; 281 case VIRTIO_PCI_QUEUE_NOTIFY: 282 val = ioport__read16(data); 283 vdev->ops->notify_vq(kvm, vpci->dev, val); 284 break; 285 case VIRTIO_PCI_STATUS: 286 vpci->status = ioport__read8(data); 287 if (!vpci->status) /* Sample endianness on reset */ 288 vdev->endian = kvm_cpu__get_endianness(vcpu); 289 virtio_notify_status(kvm, vdev, vpci->dev, vpci->status); 290 break; 291 default: 292 ret = virtio_pci__specific_io_out(kvm, vdev, port, data, size, offset); 293 break; 294 }; 295 296 return ret; 297 } 298 299 static struct ioport_operations virtio_pci__io_ops = { 300 .io_in = virtio_pci__io_in, 301 .io_out = virtio_pci__io_out, 302 }; 303 304 static void virtio_pci__msix_mmio_callback(struct kvm_cpu *vcpu, 305 u64 addr, u8 *data, u32 len, 306 u8 is_write, void *ptr) 307 { 308 struct virtio_pci *vpci = ptr; 309 struct msix_table *table; 310 int vecnum; 311 size_t offset; 312 313 if (addr > vpci->msix_io_block + PCI_IO_SIZE) { 314 if (is_write) 315 return; 316 table = (struct msix_table *)&vpci->msix_pba; 317 offset = addr - (vpci->msix_io_block + PCI_IO_SIZE); 318 } else { 319 table = vpci->msix_table; 320 offset = addr - vpci->msix_io_block; 321 } 322 vecnum = offset / sizeof(struct msix_table); 323 offset = offset % sizeof(struct msix_table); 324 325 if (!is_write) { 326 memcpy(data, (void *)&table[vecnum] + offset, len); 327 return; 328 } 329 330 memcpy((void *)&table[vecnum] + offset, data, len); 331 332 /* Did we just update the address or payload? */ 333 if (offset < offsetof(struct msix_table, ctrl)) 334 update_msix_map(vpci, table, vecnum); 335 } 336 337 static void virtio_pci__signal_msi(struct kvm *kvm, struct virtio_pci *vpci, 338 int vec) 339 { 340 struct kvm_msi msi = { 341 .address_lo = vpci->msix_table[vec].msg.address_lo, 342 .address_hi = vpci->msix_table[vec].msg.address_hi, 343 .data = vpci->msix_table[vec].msg.data, 344 }; 345 346 if (kvm->msix_needs_devid) { 347 msi.flags = KVM_MSI_VALID_DEVID; 348 msi.devid = vpci->dev_hdr.dev_num << 3; 349 } 350 351 irq__signal_msi(kvm, &msi); 352 } 353 354 int virtio_pci__signal_vq(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 355 { 356 struct virtio_pci *vpci = vdev->virtio; 357 int tbl = vpci->vq_vector[vq]; 358 359 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 360 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 361 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 362 363 vpci->msix_pba |= 1 << tbl; 364 return 0; 365 } 366 367 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 368 virtio_pci__signal_msi(kvm, vpci, vpci->vq_vector[vq]); 369 else 370 kvm__irq_trigger(kvm, vpci->gsis[vq]); 371 } else { 372 vpci->isr = VIRTIO_IRQ_HIGH; 373 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 374 } 375 return 0; 376 } 377 378 int virtio_pci__signal_config(struct kvm *kvm, struct virtio_device *vdev) 379 { 380 struct virtio_pci *vpci = vdev->virtio; 381 int tbl = vpci->config_vector; 382 383 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 384 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 385 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 386 387 vpci->msix_pba |= 1 << tbl; 388 return 0; 389 } 390 391 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 392 virtio_pci__signal_msi(kvm, vpci, tbl); 393 else 394 kvm__irq_trigger(kvm, vpci->config_gsi); 395 } else { 396 vpci->isr = VIRTIO_PCI_ISR_CONFIG; 397 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 398 } 399 400 return 0; 401 } 402 403 static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu, 404 u64 addr, u8 *data, u32 len, 405 u8 is_write, void *ptr) 406 { 407 struct virtio_pci *vpci = ptr; 408 int direction = is_write ? KVM_EXIT_IO_OUT : KVM_EXIT_IO_IN; 409 u16 port = vpci->port_addr + (addr & (IOPORT_SIZE - 1)); 410 411 kvm__emulate_io(vcpu, port, data, direction, len, 1); 412 } 413 414 int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev, 415 int device_id, int subsys_id, int class) 416 { 417 struct virtio_pci *vpci = vdev->virtio; 418 int r; 419 420 vpci->kvm = kvm; 421 vpci->dev = dev; 422 423 r = ioport__register(kvm, IOPORT_EMPTY, &virtio_pci__io_ops, IOPORT_SIZE, vdev); 424 if (r < 0) 425 return r; 426 vpci->port_addr = (u16)r; 427 428 vpci->mmio_addr = pci_get_io_space_block(IOPORT_SIZE); 429 r = kvm__register_mmio(kvm, vpci->mmio_addr, IOPORT_SIZE, false, 430 virtio_pci__io_mmio_callback, vpci); 431 if (r < 0) 432 goto free_ioport; 433 434 vpci->msix_io_block = pci_get_io_space_block(PCI_IO_SIZE * 2); 435 r = kvm__register_mmio(kvm, vpci->msix_io_block, PCI_IO_SIZE * 2, false, 436 virtio_pci__msix_mmio_callback, vpci); 437 if (r < 0) 438 goto free_mmio; 439 440 vpci->pci_hdr = (struct pci_device_header) { 441 .vendor_id = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET), 442 .device_id = cpu_to_le16(device_id), 443 .command = PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 444 .header_type = PCI_HEADER_TYPE_NORMAL, 445 .revision_id = 0, 446 .class[0] = class & 0xff, 447 .class[1] = (class >> 8) & 0xff, 448 .class[2] = (class >> 16) & 0xff, 449 .subsys_vendor_id = cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET), 450 .subsys_id = cpu_to_le16(subsys_id), 451 .bar[0] = cpu_to_le32(vpci->port_addr 452 | PCI_BASE_ADDRESS_SPACE_IO), 453 .bar[1] = cpu_to_le32(vpci->mmio_addr 454 | PCI_BASE_ADDRESS_SPACE_MEMORY), 455 .bar[2] = cpu_to_le32(vpci->msix_io_block 456 | PCI_BASE_ADDRESS_SPACE_MEMORY), 457 .status = cpu_to_le16(PCI_STATUS_CAP_LIST), 458 .capabilities = (void *)&vpci->pci_hdr.msix - (void *)&vpci->pci_hdr, 459 .bar_size[0] = cpu_to_le32(IOPORT_SIZE), 460 .bar_size[1] = cpu_to_le32(IOPORT_SIZE), 461 .bar_size[2] = cpu_to_le32(PCI_IO_SIZE*2), 462 }; 463 464 vpci->dev_hdr = (struct device_header) { 465 .bus_type = DEVICE_BUS_PCI, 466 .data = &vpci->pci_hdr, 467 }; 468 469 vpci->pci_hdr.msix.cap = PCI_CAP_ID_MSIX; 470 vpci->pci_hdr.msix.next = 0; 471 /* 472 * We at most have VIRTIO_PCI_MAX_VQ entries for virt queue, 473 * VIRTIO_PCI_MAX_CONFIG entries for config. 474 * 475 * To quote the PCI spec: 476 * 477 * System software reads this field to determine the 478 * MSI-X Table Size N, which is encoded as N-1. 479 * For example, a returned value of "00000000011" 480 * indicates a table size of 4. 481 */ 482 vpci->pci_hdr.msix.ctrl = cpu_to_le16(VIRTIO_PCI_MAX_VQ + VIRTIO_PCI_MAX_CONFIG - 1); 483 484 /* Both table and PBA are mapped to the same BAR (2) */ 485 vpci->pci_hdr.msix.table_offset = cpu_to_le32(2); 486 vpci->pci_hdr.msix.pba_offset = cpu_to_le32(2 | PCI_IO_SIZE); 487 vpci->config_vector = 0; 488 489 if (irq__can_signal_msi(kvm)) 490 vpci->features |= VIRTIO_PCI_F_SIGNAL_MSI; 491 492 r = device__register(&vpci->dev_hdr); 493 if (r < 0) 494 goto free_msix_mmio; 495 496 /* save the IRQ that device__register() has allocated */ 497 vpci->legacy_irq_line = vpci->pci_hdr.irq_line; 498 499 return 0; 500 501 free_msix_mmio: 502 kvm__deregister_mmio(kvm, vpci->msix_io_block); 503 free_mmio: 504 kvm__deregister_mmio(kvm, vpci->mmio_addr); 505 free_ioport: 506 ioport__unregister(kvm, vpci->port_addr); 507 return r; 508 } 509 510 int virtio_pci__exit(struct kvm *kvm, struct virtio_device *vdev) 511 { 512 struct virtio_pci *vpci = vdev->virtio; 513 int i; 514 515 kvm__deregister_mmio(kvm, vpci->mmio_addr); 516 kvm__deregister_mmio(kvm, vpci->msix_io_block); 517 ioport__unregister(kvm, vpci->port_addr); 518 519 for (i = 0; i < VIRTIO_PCI_MAX_VQ; i++) { 520 ioeventfd__del_event(vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY, i); 521 ioeventfd__del_event(vpci->mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY, i); 522 } 523 524 return 0; 525 } 526