1 #include "kvm/virtio-pci.h" 2 3 #include "kvm/ioport.h" 4 #include "kvm/kvm.h" 5 #include "kvm/kvm-cpu.h" 6 #include "kvm/virtio-pci-dev.h" 7 #include "kvm/irq.h" 8 #include "kvm/virtio.h" 9 #include "kvm/ioeventfd.h" 10 11 #include <sys/ioctl.h> 12 #include <linux/virtio_pci.h> 13 #include <linux/byteorder.h> 14 #include <string.h> 15 16 static void virtio_pci__ioevent_callback(struct kvm *kvm, void *param) 17 { 18 struct virtio_pci_ioevent_param *ioeventfd = param; 19 struct virtio_pci *vpci = ioeventfd->vdev->virtio; 20 21 ioeventfd->vdev->ops->notify_vq(kvm, vpci->dev, ioeventfd->vq); 22 } 23 24 static int virtio_pci__init_ioeventfd(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 25 { 26 struct ioevent ioevent; 27 struct virtio_pci *vpci = vdev->virtio; 28 int i, r, flags = 0; 29 int fds[2]; 30 31 vpci->ioeventfds[vq] = (struct virtio_pci_ioevent_param) { 32 .vdev = vdev, 33 .vq = vq, 34 }; 35 36 ioevent = (struct ioevent) { 37 .fn = virtio_pci__ioevent_callback, 38 .fn_ptr = &vpci->ioeventfds[vq], 39 .datamatch = vq, 40 .fn_kvm = kvm, 41 }; 42 43 /* 44 * Vhost will poll the eventfd in host kernel side, otherwise we 45 * need to poll in userspace. 46 */ 47 if (!vdev->use_vhost) 48 flags |= IOEVENTFD_FLAG_USER_POLL; 49 50 /* ioport */ 51 ioevent.io_addr = vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY; 52 ioevent.io_len = sizeof(u16); 53 ioevent.fd = fds[0] = eventfd(0, 0); 54 r = ioeventfd__add_event(&ioevent, flags | IOEVENTFD_FLAG_PIO); 55 if (r) 56 return r; 57 58 /* mmio */ 59 ioevent.io_addr = vpci->mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY; 60 ioevent.io_len = sizeof(u16); 61 ioevent.fd = fds[1] = eventfd(0, 0); 62 r = ioeventfd__add_event(&ioevent, flags); 63 if (r) 64 goto free_ioport_evt; 65 66 if (vdev->ops->notify_vq_eventfd) 67 for (i = 0; i < 2; ++i) 68 vdev->ops->notify_vq_eventfd(kvm, vpci->dev, vq, 69 fds[i]); 70 return 0; 71 72 free_ioport_evt: 73 ioeventfd__del_event(vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq); 74 return r; 75 } 76 77 static inline bool virtio_pci__msix_enabled(struct virtio_pci *vpci) 78 { 79 return vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_ENABLE); 80 } 81 82 static bool virtio_pci__specific_io_in(struct kvm *kvm, struct virtio_device *vdev, u16 port, 83 void *data, int size, int offset) 84 { 85 u32 config_offset; 86 struct virtio_pci *vpci = vdev->virtio; 87 int type = virtio__get_dev_specific_field(offset - 20, 88 virtio_pci__msix_enabled(vpci), 89 &config_offset); 90 if (type == VIRTIO_PCI_O_MSIX) { 91 switch (offset) { 92 case VIRTIO_MSI_CONFIG_VECTOR: 93 ioport__write16(data, vpci->config_vector); 94 break; 95 case VIRTIO_MSI_QUEUE_VECTOR: 96 ioport__write16(data, vpci->vq_vector[vpci->queue_selector]); 97 break; 98 }; 99 100 return true; 101 } else if (type == VIRTIO_PCI_O_CONFIG) { 102 u8 cfg; 103 104 cfg = vdev->ops->get_config(kvm, vpci->dev)[config_offset]; 105 ioport__write8(data, cfg); 106 return true; 107 } 108 109 return false; 110 } 111 112 static bool virtio_pci__io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 113 { 114 unsigned long offset; 115 bool ret = true; 116 struct virtio_device *vdev; 117 struct virtio_pci *vpci; 118 struct kvm *kvm; 119 u32 val; 120 121 kvm = vcpu->kvm; 122 vdev = ioport->priv; 123 vpci = vdev->virtio; 124 offset = port - vpci->port_addr; 125 126 switch (offset) { 127 case VIRTIO_PCI_HOST_FEATURES: 128 val = vdev->ops->get_host_features(kvm, vpci->dev); 129 ioport__write32(data, val); 130 break; 131 case VIRTIO_PCI_QUEUE_PFN: 132 val = vdev->ops->get_pfn_vq(kvm, vpci->dev, vpci->queue_selector); 133 ioport__write32(data, val); 134 break; 135 case VIRTIO_PCI_QUEUE_NUM: 136 val = vdev->ops->get_size_vq(kvm, vpci->dev, vpci->queue_selector); 137 ioport__write16(data, val); 138 break; 139 case VIRTIO_PCI_STATUS: 140 ioport__write8(data, vpci->status); 141 break; 142 case VIRTIO_PCI_ISR: 143 ioport__write8(data, vpci->isr); 144 kvm__irq_line(kvm, vpci->legacy_irq_line, VIRTIO_IRQ_LOW); 145 vpci->isr = VIRTIO_IRQ_LOW; 146 break; 147 default: 148 ret = virtio_pci__specific_io_in(kvm, vdev, port, data, size, offset); 149 break; 150 }; 151 152 return ret; 153 } 154 155 static void update_msix_map(struct virtio_pci *vpci, 156 struct msix_table *msix_entry, u32 vecnum) 157 { 158 u32 gsi, i; 159 160 /* Find the GSI number used for that vector */ 161 if (vecnum == vpci->config_vector) { 162 gsi = vpci->config_gsi; 163 } else { 164 for (i = 0; i < VIRTIO_PCI_MAX_VQ; i++) 165 if (vpci->vq_vector[i] == vecnum) 166 break; 167 if (i == VIRTIO_PCI_MAX_VQ) 168 return; 169 gsi = vpci->gsis[i]; 170 } 171 172 if (gsi == 0) 173 return; 174 175 msix_entry = &msix_entry[vecnum]; 176 irq__update_msix_route(vpci->kvm, gsi, &msix_entry->msg); 177 } 178 179 static bool virtio_pci__specific_io_out(struct kvm *kvm, struct virtio_device *vdev, u16 port, 180 void *data, int size, int offset) 181 { 182 struct virtio_pci *vpci = vdev->virtio; 183 u32 config_offset, vec; 184 int gsi; 185 int type = virtio__get_dev_specific_field(offset - 20, virtio_pci__msix_enabled(vpci), 186 &config_offset); 187 if (type == VIRTIO_PCI_O_MSIX) { 188 switch (offset) { 189 case VIRTIO_MSI_CONFIG_VECTOR: 190 vec = vpci->config_vector = ioport__read16(data); 191 if (vec == VIRTIO_MSI_NO_VECTOR) 192 break; 193 194 gsi = irq__add_msix_route(kvm, 195 &vpci->msix_table[vec].msg, 196 vpci->dev_hdr.dev_num << 3); 197 /* 198 * We don't need IRQ routing if we can use 199 * MSI injection via the KVM_SIGNAL_MSI ioctl. 200 */ 201 if (gsi == -ENXIO && 202 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 203 break; 204 205 if (gsi < 0) { 206 die("failed to configure MSIs"); 207 break; 208 } 209 210 vpci->config_gsi = gsi; 211 break; 212 case VIRTIO_MSI_QUEUE_VECTOR: 213 vec = ioport__read16(data); 214 vpci->vq_vector[vpci->queue_selector] = vec; 215 216 if (vec == VIRTIO_MSI_NO_VECTOR) 217 break; 218 219 gsi = irq__add_msix_route(kvm, 220 &vpci->msix_table[vec].msg, 221 vpci->dev_hdr.dev_num << 3); 222 /* 223 * We don't need IRQ routing if we can use 224 * MSI injection via the KVM_SIGNAL_MSI ioctl. 225 */ 226 if (gsi == -ENXIO && 227 vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 228 break; 229 230 if (gsi < 0) { 231 die("failed to configure MSIs"); 232 break; 233 } 234 235 vpci->gsis[vpci->queue_selector] = gsi; 236 if (vdev->ops->notify_vq_gsi) 237 vdev->ops->notify_vq_gsi(kvm, vpci->dev, 238 vpci->queue_selector, 239 gsi); 240 break; 241 }; 242 243 return true; 244 } else if (type == VIRTIO_PCI_O_CONFIG) { 245 vdev->ops->get_config(kvm, vpci->dev)[config_offset] = *(u8 *)data; 246 247 return true; 248 } 249 250 return false; 251 } 252 253 static bool virtio_pci__io_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size) 254 { 255 unsigned long offset; 256 bool ret = true; 257 struct virtio_device *vdev; 258 struct virtio_pci *vpci; 259 struct kvm *kvm; 260 u32 val; 261 262 kvm = vcpu->kvm; 263 vdev = ioport->priv; 264 vpci = vdev->virtio; 265 offset = port - vpci->port_addr; 266 267 switch (offset) { 268 case VIRTIO_PCI_GUEST_FEATURES: 269 val = ioport__read32(data); 270 vdev->ops->set_guest_features(kvm, vpci->dev, val); 271 break; 272 case VIRTIO_PCI_QUEUE_PFN: 273 val = ioport__read32(data); 274 virtio_pci__init_ioeventfd(kvm, vdev, vpci->queue_selector); 275 vdev->ops->init_vq(kvm, vpci->dev, vpci->queue_selector, 276 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT, 277 VIRTIO_PCI_VRING_ALIGN, val); 278 break; 279 case VIRTIO_PCI_QUEUE_SEL: 280 vpci->queue_selector = ioport__read16(data); 281 break; 282 case VIRTIO_PCI_QUEUE_NOTIFY: 283 val = ioport__read16(data); 284 vdev->ops->notify_vq(kvm, vpci->dev, val); 285 break; 286 case VIRTIO_PCI_STATUS: 287 vpci->status = ioport__read8(data); 288 if (!vpci->status) /* Sample endianness on reset */ 289 vdev->endian = kvm_cpu__get_endianness(vcpu); 290 if (vdev->ops->notify_status) 291 vdev->ops->notify_status(kvm, vpci->dev, vpci->status); 292 break; 293 default: 294 ret = virtio_pci__specific_io_out(kvm, vdev, port, data, size, offset); 295 break; 296 }; 297 298 return ret; 299 } 300 301 static struct ioport_operations virtio_pci__io_ops = { 302 .io_in = virtio_pci__io_in, 303 .io_out = virtio_pci__io_out, 304 }; 305 306 static void virtio_pci__msix_mmio_callback(struct kvm_cpu *vcpu, 307 u64 addr, u8 *data, u32 len, 308 u8 is_write, void *ptr) 309 { 310 struct virtio_pci *vpci = ptr; 311 struct msix_table *table; 312 int vecnum; 313 size_t offset; 314 315 if (addr > vpci->msix_io_block + PCI_IO_SIZE) { 316 if (is_write) 317 return; 318 table = (struct msix_table *)&vpci->msix_pba; 319 offset = addr - (vpci->msix_io_block + PCI_IO_SIZE); 320 } else { 321 table = vpci->msix_table; 322 offset = addr - vpci->msix_io_block; 323 } 324 vecnum = offset / sizeof(struct msix_table); 325 offset = offset % sizeof(struct msix_table); 326 327 if (!is_write) { 328 memcpy(data, (void *)&table[vecnum] + offset, len); 329 return; 330 } 331 332 memcpy((void *)&table[vecnum] + offset, data, len); 333 334 /* Did we just update the address or payload? */ 335 if (offset < offsetof(struct msix_table, ctrl)) 336 update_msix_map(vpci, table, vecnum); 337 } 338 339 static void virtio_pci__signal_msi(struct kvm *kvm, struct virtio_pci *vpci, 340 int vec) 341 { 342 struct kvm_msi msi = { 343 .address_lo = vpci->msix_table[vec].msg.address_lo, 344 .address_hi = vpci->msix_table[vec].msg.address_hi, 345 .data = vpci->msix_table[vec].msg.data, 346 }; 347 348 if (kvm->msix_needs_devid) { 349 msi.flags = KVM_MSI_VALID_DEVID; 350 msi.devid = vpci->dev_hdr.dev_num << 3; 351 } 352 353 irq__signal_msi(kvm, &msi); 354 } 355 356 int virtio_pci__signal_vq(struct kvm *kvm, struct virtio_device *vdev, u32 vq) 357 { 358 struct virtio_pci *vpci = vdev->virtio; 359 int tbl = vpci->vq_vector[vq]; 360 361 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 362 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 363 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 364 365 vpci->msix_pba |= 1 << tbl; 366 return 0; 367 } 368 369 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 370 virtio_pci__signal_msi(kvm, vpci, vpci->vq_vector[vq]); 371 else 372 kvm__irq_trigger(kvm, vpci->gsis[vq]); 373 } else { 374 vpci->isr = VIRTIO_IRQ_HIGH; 375 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 376 } 377 return 0; 378 } 379 380 int virtio_pci__signal_config(struct kvm *kvm, struct virtio_device *vdev) 381 { 382 struct virtio_pci *vpci = vdev->virtio; 383 int tbl = vpci->config_vector; 384 385 if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) { 386 if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) || 387 vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) { 388 389 vpci->msix_pba |= 1 << tbl; 390 return 0; 391 } 392 393 if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI) 394 virtio_pci__signal_msi(kvm, vpci, tbl); 395 else 396 kvm__irq_trigger(kvm, vpci->config_gsi); 397 } else { 398 vpci->isr = VIRTIO_PCI_ISR_CONFIG; 399 kvm__irq_trigger(kvm, vpci->legacy_irq_line); 400 } 401 402 return 0; 403 } 404 405 static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu, 406 u64 addr, u8 *data, u32 len, 407 u8 is_write, void *ptr) 408 { 409 struct virtio_pci *vpci = ptr; 410 int direction = is_write ? KVM_EXIT_IO_OUT : KVM_EXIT_IO_IN; 411 u16 port = vpci->port_addr + (addr & (IOPORT_SIZE - 1)); 412 413 kvm__emulate_io(vcpu, port, data, direction, len, 1); 414 } 415 416 int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev, 417 int device_id, int subsys_id, int class) 418 { 419 struct virtio_pci *vpci = vdev->virtio; 420 int r; 421 422 vpci->kvm = kvm; 423 vpci->dev = dev; 424 425 r = ioport__register(kvm, IOPORT_EMPTY, &virtio_pci__io_ops, IOPORT_SIZE, vdev); 426 if (r < 0) 427 return r; 428 vpci->port_addr = (u16)r; 429 430 vpci->mmio_addr = pci_get_io_space_block(IOPORT_SIZE); 431 r = kvm__register_mmio(kvm, vpci->mmio_addr, IOPORT_SIZE, false, 432 virtio_pci__io_mmio_callback, vpci); 433 if (r < 0) 434 goto free_ioport; 435 436 vpci->msix_io_block = pci_get_io_space_block(PCI_IO_SIZE * 2); 437 r = kvm__register_mmio(kvm, vpci->msix_io_block, PCI_IO_SIZE * 2, false, 438 virtio_pci__msix_mmio_callback, vpci); 439 if (r < 0) 440 goto free_mmio; 441 442 vpci->pci_hdr = (struct pci_device_header) { 443 .vendor_id = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET), 444 .device_id = cpu_to_le16(device_id), 445 .command = PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 446 .header_type = PCI_HEADER_TYPE_NORMAL, 447 .revision_id = 0, 448 .class[0] = class & 0xff, 449 .class[1] = (class >> 8) & 0xff, 450 .class[2] = (class >> 16) & 0xff, 451 .subsys_vendor_id = cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET), 452 .subsys_id = cpu_to_le16(subsys_id), 453 .bar[0] = cpu_to_le32(vpci->mmio_addr 454 | PCI_BASE_ADDRESS_SPACE_MEMORY), 455 .bar[1] = cpu_to_le32(vpci->port_addr 456 | PCI_BASE_ADDRESS_SPACE_IO), 457 .bar[2] = cpu_to_le32(vpci->msix_io_block 458 | PCI_BASE_ADDRESS_SPACE_MEMORY), 459 .status = cpu_to_le16(PCI_STATUS_CAP_LIST), 460 .capabilities = (void *)&vpci->pci_hdr.msix - (void *)&vpci->pci_hdr, 461 .bar_size[0] = cpu_to_le32(IOPORT_SIZE), 462 .bar_size[1] = cpu_to_le32(IOPORT_SIZE), 463 .bar_size[2] = cpu_to_le32(PCI_IO_SIZE*2), 464 }; 465 466 vpci->dev_hdr = (struct device_header) { 467 .bus_type = DEVICE_BUS_PCI, 468 .data = &vpci->pci_hdr, 469 }; 470 471 vpci->pci_hdr.msix.cap = PCI_CAP_ID_MSIX; 472 vpci->pci_hdr.msix.next = 0; 473 /* 474 * We at most have VIRTIO_PCI_MAX_VQ entries for virt queue, 475 * VIRTIO_PCI_MAX_CONFIG entries for config. 476 * 477 * To quote the PCI spec: 478 * 479 * System software reads this field to determine the 480 * MSI-X Table Size N, which is encoded as N-1. 481 * For example, a returned value of "00000000011" 482 * indicates a table size of 4. 483 */ 484 vpci->pci_hdr.msix.ctrl = cpu_to_le16(VIRTIO_PCI_MAX_VQ + VIRTIO_PCI_MAX_CONFIG - 1); 485 486 /* Both table and PBA are mapped to the same BAR (2) */ 487 vpci->pci_hdr.msix.table_offset = cpu_to_le32(2); 488 vpci->pci_hdr.msix.pba_offset = cpu_to_le32(2 | PCI_IO_SIZE); 489 vpci->config_vector = 0; 490 491 if (irq__can_signal_msi(kvm)) 492 vpci->features |= VIRTIO_PCI_F_SIGNAL_MSI; 493 494 r = device__register(&vpci->dev_hdr); 495 if (r < 0) 496 goto free_msix_mmio; 497 498 /* save the IRQ that device__register() has allocated */ 499 vpci->legacy_irq_line = vpci->pci_hdr.irq_line; 500 501 return 0; 502 503 free_msix_mmio: 504 kvm__deregister_mmio(kvm, vpci->msix_io_block); 505 free_mmio: 506 kvm__deregister_mmio(kvm, vpci->mmio_addr); 507 free_ioport: 508 ioport__unregister(kvm, vpci->port_addr); 509 return r; 510 } 511 512 int virtio_pci__exit(struct kvm *kvm, struct virtio_device *vdev) 513 { 514 struct virtio_pci *vpci = vdev->virtio; 515 int i; 516 517 kvm__deregister_mmio(kvm, vpci->mmio_addr); 518 kvm__deregister_mmio(kvm, vpci->msix_io_block); 519 ioport__unregister(kvm, vpci->port_addr); 520 521 for (i = 0; i < VIRTIO_PCI_MAX_VQ; i++) { 522 ioeventfd__del_event(vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY, i); 523 ioeventfd__del_event(vpci->mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY, i); 524 } 525 526 return 0; 527 } 528