1416b2c2dSAsias He #include "kvm/virtio-blk.h" 2b30d05adSPekka Enberg 331638bcaSCyrill Gorcunov #include "kvm/virtio-pci-dev.h" 42449f6e3SSasha Levin #include "kvm/irq.h" 55a24a9f2SPekka Enberg #include "kvm/disk-image.h" 639d6af07SAsias He #include "kvm/virtio.h" 7b30d05adSPekka Enberg #include "kvm/ioport.h" 84ef0f4d6SPekka Enberg #include "kvm/mutex.h" 9fe99fd4eSPekka Enberg #include "kvm/util.h" 108b1ff07eSPekka Enberg #include "kvm/kvm.h" 11b30d05adSPekka Enberg #include "kvm/pci.h" 12fb0957f2SSasha Levin #include "kvm/threadpool.h" 13ec75b82fSSasha Levin #include "kvm/ioeventfd.h" 14*404d164bSSasha Levin #include "kvm/guest_compat.h" 15b30d05adSPekka Enberg 1620c64ecaSPekka Enberg #include <linux/virtio_ring.h> 1720c64ecaSPekka Enberg #include <linux/virtio_blk.h> 180528c2a7SPekka Enberg 19ebe9ac19SSasha Levin #include <linux/list.h> 203fdf659dSSasha Levin #include <linux/types.h> 210528c2a7SPekka Enberg #include <pthread.h> 224155ba8cSPekka Enberg 234749e795SSasha Levin #define VIRTIO_BLK_MAX_DEV 4 2410eca11dSPekka Enberg #define NUM_VIRT_QUEUES 1 2510eca11dSPekka Enberg 2603110ff3SAsias He #define VIRTIO_BLK_QUEUE_SIZE 128 273d7831a1SAsias He /* 283d7831a1SAsias He * the header and status consume too entries 293d7831a1SAsias He */ 303d7831a1SAsias He #define DISK_SEG_MAX (VIRTIO_BLK_QUEUE_SIZE - 2) 3110eca11dSPekka Enberg 32fe2a70d1SSasha Levin struct blk_dev_job { 334749e795SSasha Levin struct virt_queue *vq; 34fe2a70d1SSasha Levin struct blk_dev *bdev; 3569971b13SSasha Levin struct iovec iov[VIRTIO_BLK_QUEUE_SIZE]; 3669971b13SSasha Levin u16 out, in, head; 37df0c7f57SSasha Levin struct thread_pool__job job_id; 384749e795SSasha Levin }; 394749e795SSasha Levin 40fe2a70d1SSasha Levin struct blk_dev { 410528c2a7SPekka Enberg pthread_mutex_t mutex; 42ebe9ac19SSasha Levin struct list_head list; 430528c2a7SPekka Enberg 4440ce993fSPekka Enberg struct virtio_blk_config blk_config; 4538605e1cSSasha Levin struct disk_image *disk; 46ebe9ac19SSasha Levin u64 base_addr; 473fdf659dSSasha Levin u32 host_features; 483fdf659dSSasha Levin u32 guest_features; 493fdf659dSSasha Levin u16 config_vector; 503fdf659dSSasha Levin u8 status; 51ebfc7327SAsias He u8 isr; 52*404d164bSSasha Levin int compat_id; 5347bf1d0fSPekka Enberg 5447bf1d0fSPekka Enberg /* virtio queue */ 553fdf659dSSasha Levin u16 queue_selector; 5610eca11dSPekka Enberg 5745e47970SAsias He struct virt_queue vqs[NUM_VIRT_QUEUES]; 5869971b13SSasha Levin struct blk_dev_job jobs[VIRTIO_BLK_QUEUE_SIZE]; 5969971b13SSasha Levin u16 job_idx; 60ef1f02f2SSasha Levin struct pci_device_header pci_hdr; 61fbc2fbf9SPekka Enberg }; 62fbc2fbf9SPekka Enberg 63ebe9ac19SSasha Levin static LIST_HEAD(bdevs); 6440ce993fSPekka Enberg 65c9f6a037SXiao Guangrong static bool virtio_blk_dev_in(struct blk_dev *bdev, void *data, unsigned long offset, int size) 6640ce993fSPekka Enberg { 67fe2a70d1SSasha Levin u8 *config_space = (u8 *) &bdev->blk_config; 6840ce993fSPekka Enberg 69c9f6a037SXiao Guangrong if (size != 1) 7040ce993fSPekka Enberg return false; 7140ce993fSPekka Enberg 72b8f43678SSasha Levin ioport__write8(data, config_space[offset - VIRTIO_MSI_CONFIG_VECTOR]); 7340ce993fSPekka Enberg 7440ce993fSPekka Enberg return true; 7540ce993fSPekka Enberg } 7640ce993fSPekka Enberg 77c9f6a037SXiao Guangrong static bool virtio_blk_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) 78fbc2fbf9SPekka Enberg { 79407475bfSPekka Enberg struct blk_dev *bdev; 80ebe9ac19SSasha Levin u16 offset; 810528c2a7SPekka Enberg bool ret = true; 820528c2a7SPekka Enberg 83ebe9ac19SSasha Levin bdev = ioport->priv; 84ebe9ac19SSasha Levin offset = port - bdev->base_addr; 854749e795SSasha Levin 86fe2a70d1SSasha Levin mutex_lock(&bdev->mutex); 87fbc2fbf9SPekka Enberg 88fbc2fbf9SPekka Enberg switch (offset) { 89fbc2fbf9SPekka Enberg case VIRTIO_PCI_HOST_FEATURES: 90fe2a70d1SSasha Levin ioport__write32(data, bdev->host_features); 91fbc2fbf9SPekka Enberg break; 92fbc2fbf9SPekka Enberg case VIRTIO_PCI_GUEST_FEATURES: 930528c2a7SPekka Enberg ret = false; 949ee67e60SAsias He break; 95fbc2fbf9SPekka Enberg case VIRTIO_PCI_QUEUE_PFN: 96fe2a70d1SSasha Levin ioport__write32(data, bdev->vqs[bdev->queue_selector].pfn); 978b1ff07eSPekka Enberg break; 98fbc2fbf9SPekka Enberg case VIRTIO_PCI_QUEUE_NUM: 9910eca11dSPekka Enberg ioport__write16(data, VIRTIO_BLK_QUEUE_SIZE); 1008b1ff07eSPekka Enberg break; 101fbc2fbf9SPekka Enberg case VIRTIO_PCI_QUEUE_SEL: 102fbc2fbf9SPekka Enberg case VIRTIO_PCI_QUEUE_NOTIFY: 1030528c2a7SPekka Enberg ret = false; 1049ee67e60SAsias He break; 105fbc2fbf9SPekka Enberg case VIRTIO_PCI_STATUS: 106fe2a70d1SSasha Levin ioport__write8(data, bdev->status); 107fbc2fbf9SPekka Enberg break; 108fbc2fbf9SPekka Enberg case VIRTIO_PCI_ISR: 109ebfc7327SAsias He ioport__write8(data, bdev->isr); 11043835ac9SSasha Levin kvm__irq_line(kvm, bdev->pci_hdr.irq_line, VIRTIO_IRQ_LOW); 111ebfc7327SAsias He bdev->isr = VIRTIO_IRQ_LOW; 1127e61688eSPekka Enberg break; 113fbc2fbf9SPekka Enberg case VIRTIO_MSI_CONFIG_VECTOR: 114fe2a70d1SSasha Levin ioport__write16(data, bdev->config_vector); 11540ce993fSPekka Enberg break; 116fbc2fbf9SPekka Enberg default: 117c9f6a037SXiao Guangrong ret = virtio_blk_dev_in(bdev, data, offset, size); 118407475bfSPekka Enberg break; 119fbc2fbf9SPekka Enberg }; 120fbc2fbf9SPekka Enberg 121fe2a70d1SSasha Levin mutex_unlock(&bdev->mutex); 1220528c2a7SPekka Enberg 1230528c2a7SPekka Enberg return ret; 124fbc2fbf9SPekka Enberg } 125fbc2fbf9SPekka Enberg 12669971b13SSasha Levin static void virtio_blk_do_io_request(struct kvm *kvm, void *param) 1274155ba8cSPekka Enberg { 1284155ba8cSPekka Enberg struct virtio_blk_outhdr *req; 1293fdf659dSSasha Levin u8 *status; 13069971b13SSasha Levin ssize_t block_cnt; 13169971b13SSasha Levin struct blk_dev_job *job; 13269971b13SSasha Levin struct blk_dev *bdev; 13369971b13SSasha Levin struct virt_queue *queue; 13469971b13SSasha Levin struct iovec *iov; 13569971b13SSasha Levin u16 out, in, head; 1364155ba8cSPekka Enberg 13769971b13SSasha Levin block_cnt = -1; 13869971b13SSasha Levin job = param; 13969971b13SSasha Levin bdev = job->bdev; 14069971b13SSasha Levin queue = job->vq; 14169971b13SSasha Levin iov = job->iov; 14269971b13SSasha Levin out = job->out; 14369971b13SSasha Levin in = job->in; 14469971b13SSasha Levin head = job->head; 14545e47970SAsias He req = iov[0].iov_base; 14603110ff3SAsias He 147258dd093SPekka Enberg switch (req->type) { 14803110ff3SAsias He case VIRTIO_BLK_T_IN: 149b8861977SAsias He block_cnt = disk_image__read(bdev->disk, req->sector, iov + 1, in + out - 2); 150258dd093SPekka Enberg break; 15103110ff3SAsias He case VIRTIO_BLK_T_OUT: 152b8861977SAsias He block_cnt = disk_image__write(bdev->disk, req->sector, iov + 1, in + out - 2); 153258dd093SPekka Enberg break; 15429084a74SPrasad Joshi case VIRTIO_BLK_T_FLUSH: 15529084a74SPrasad Joshi block_cnt = disk_image__flush(bdev->disk); 15629084a74SPrasad Joshi break; 157ff6462e8SSasha Levin case VIRTIO_BLK_T_GET_ID: 158ff6462e8SSasha Levin block_cnt = VIRTIO_BLK_ID_BYTES; 159ff6462e8SSasha Levin disk_image__get_serial(bdev->disk, (iov + 1)->iov_base, &block_cnt); 160ff6462e8SSasha Levin break; 161258dd093SPekka Enberg default: 1624542f276SCyrill Gorcunov pr_warning("request type %d", req->type); 16370b53f25SSasha Levin block_cnt = -1; 164407475bfSPekka Enberg break; 16503110ff3SAsias He } 16603110ff3SAsias He 16745e47970SAsias He /* status */ 16845e47970SAsias He status = iov[out + in - 1].iov_base; 16970b53f25SSasha Levin *status = (block_cnt < 0) ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK; 17003110ff3SAsias He 17169971b13SSasha Levin mutex_lock(&bdev->mutex); 17245e47970SAsias He virt_queue__set_used_elem(queue, head, block_cnt); 17369971b13SSasha Levin mutex_unlock(&bdev->mutex); 1744155ba8cSPekka Enberg 17569971b13SSasha Levin virt_queue__trigger_irq(queue, bdev->pci_hdr.irq_line, &bdev->isr, kvm); 1764155ba8cSPekka Enberg } 1774155ba8cSPekka Enberg 17869971b13SSasha Levin static void virtio_blk_do_io(struct kvm *kvm, struct virt_queue *vq, struct blk_dev *bdev) 17945e47970SAsias He { 18069971b13SSasha Levin while (virt_queue__available(vq)) { 18169971b13SSasha Levin struct blk_dev_job *job = &bdev->jobs[bdev->job_idx++ % VIRTIO_BLK_QUEUE_SIZE]; 182407475bfSPekka Enberg 18369971b13SSasha Levin *job = (struct blk_dev_job) { 18469971b13SSasha Levin .vq = vq, 18569971b13SSasha Levin .bdev = bdev, 18669971b13SSasha Levin }; 18769971b13SSasha Levin job->head = virt_queue__get_iov(vq, job->iov, &job->out, &job->in, kvm); 18845e47970SAsias He 18969971b13SSasha Levin thread_pool__init_job(&job->job_id, kvm, virtio_blk_do_io_request, job); 19069971b13SSasha Levin thread_pool__do_job(&job->job_id); 19169971b13SSasha Levin } 1924baf6f73SSasha Levin } 1930528c2a7SPekka Enberg 194c9f6a037SXiao Guangrong static bool virtio_blk_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) 195fbc2fbf9SPekka Enberg { 196407475bfSPekka Enberg struct blk_dev *bdev; 197ebe9ac19SSasha Levin u16 offset; 1980528c2a7SPekka Enberg bool ret = true; 1990528c2a7SPekka Enberg 200ebe9ac19SSasha Levin bdev = ioport->priv; 201ebe9ac19SSasha Levin offset = port - bdev->base_addr; 2024749e795SSasha Levin 203fe2a70d1SSasha Levin mutex_lock(&bdev->mutex); 204fbc2fbf9SPekka Enberg 205fbc2fbf9SPekka Enberg switch (offset) { 206fbc2fbf9SPekka Enberg case VIRTIO_PCI_GUEST_FEATURES: 207fe2a70d1SSasha Levin bdev->guest_features = ioport__read32(data); 208fbc2fbf9SPekka Enberg break; 20910eca11dSPekka Enberg case VIRTIO_PCI_QUEUE_PFN: { 21010eca11dSPekka Enberg struct virt_queue *queue; 21110eca11dSPekka Enberg void *p; 21210eca11dSPekka Enberg 213*404d164bSSasha Levin compat__remove_message(bdev->compat_id); 214*404d164bSSasha Levin 215fe2a70d1SSasha Levin queue = &bdev->vqs[bdev->queue_selector]; 21610eca11dSPekka Enberg queue->pfn = ioport__read32(data); 21743835ac9SSasha Levin p = guest_pfn_to_host(kvm, queue->pfn); 21810eca11dSPekka Enberg 219b8f43678SSasha Levin vring_init(&queue->vring, VIRTIO_BLK_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); 22010eca11dSPekka Enberg 2217e61688eSPekka Enberg break; 22210eca11dSPekka Enberg } 223fbc2fbf9SPekka Enberg case VIRTIO_PCI_QUEUE_SEL: 224fe2a70d1SSasha Levin bdev->queue_selector = ioport__read16(data); 2257e61688eSPekka Enberg break; 22610eca11dSPekka Enberg case VIRTIO_PCI_QUEUE_NOTIFY: { 2273fdf659dSSasha Levin u16 queue_index; 228407475bfSPekka Enberg 22910eca11dSPekka Enberg queue_index = ioport__read16(data); 23069971b13SSasha Levin virtio_blk_do_io(kvm, &bdev->vqs[queue_index], bdev); 231407475bfSPekka Enberg 2327e61688eSPekka Enberg break; 23310eca11dSPekka Enberg } 234fbc2fbf9SPekka Enberg case VIRTIO_PCI_STATUS: 235fe2a70d1SSasha Levin bdev->status = ioport__read8(data); 236fbc2fbf9SPekka Enberg break; 237fbc2fbf9SPekka Enberg case VIRTIO_MSI_CONFIG_VECTOR: 238fe2a70d1SSasha Levin bdev->config_vector = VIRTIO_MSI_NO_VECTOR; 23940ce993fSPekka Enberg break; 240fbc2fbf9SPekka Enberg case VIRTIO_MSI_QUEUE_VECTOR: 24140ce993fSPekka Enberg break; 242fbc2fbf9SPekka Enberg default: 2430528c2a7SPekka Enberg ret = false; 244407475bfSPekka Enberg break; 245fbc2fbf9SPekka Enberg }; 246fbc2fbf9SPekka Enberg 247fe2a70d1SSasha Levin mutex_unlock(&bdev->mutex); 2480528c2a7SPekka Enberg 2490528c2a7SPekka Enberg return ret; 250fbc2fbf9SPekka Enberg } 251fbc2fbf9SPekka Enberg 252416b2c2dSAsias He static struct ioport_operations virtio_blk_io_ops = { 253416b2c2dSAsias He .io_in = virtio_blk_pci_io_in, 254416b2c2dSAsias He .io_out = virtio_blk_pci_io_out, 255fbc2fbf9SPekka Enberg }; 256fbc2fbf9SPekka Enberg 257ec75b82fSSasha Levin static void ioevent_callback(struct kvm *kvm, void *param) 258ec75b82fSSasha Levin { 25969971b13SSasha Levin struct blk_dev *bdev = param; 260ec75b82fSSasha Levin 26169971b13SSasha Levin virtio_blk_do_io(kvm, &bdev->vqs[0], bdev); 262ec75b82fSSasha Levin } 263ec75b82fSSasha Levin 26443835ac9SSasha Levin void virtio_blk__init(struct kvm *kvm, struct disk_image *disk) 2654749e795SSasha Levin { 2664749e795SSasha Levin u16 blk_dev_base_addr; 267ec75b82fSSasha Levin u8 dev, pin, line, i; 268fe2a70d1SSasha Levin struct blk_dev *bdev; 269ec75b82fSSasha Levin struct ioevent ioevent; 2704749e795SSasha Levin 2714749e795SSasha Levin if (!disk) 2724749e795SSasha Levin return; 2734749e795SSasha Levin 274ebe9ac19SSasha Levin bdev = calloc(1, sizeof(struct blk_dev)); 275ebe9ac19SSasha Levin if (bdev == NULL) 276fe2a70d1SSasha Levin die("Failed allocating bdev"); 2774749e795SSasha Levin 278ebe9ac19SSasha Levin blk_dev_base_addr = ioport__register(IOPORT_EMPTY, &virtio_blk_io_ops, IOPORT_SIZE, bdev); 2794749e795SSasha Levin 280fe2a70d1SSasha Levin *bdev = (struct blk_dev) { 2814749e795SSasha Levin .mutex = PTHREAD_MUTEX_INITIALIZER, 2824749e795SSasha Levin .disk = disk, 283ebe9ac19SSasha Levin .base_addr = blk_dev_base_addr, 2844749e795SSasha Levin .blk_config = (struct virtio_blk_config) { 2854749e795SSasha Levin .capacity = disk->size / SECTOR_SIZE, 2863d7831a1SAsias He .seg_max = DISK_SEG_MAX, 2874749e795SSasha Levin }, 288ef1f02f2SSasha Levin .pci_hdr = (struct pci_device_header) { 289b30d05adSPekka Enberg .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 290b30d05adSPekka Enberg .device_id = PCI_DEVICE_ID_VIRTIO_BLK, 291b30d05adSPekka Enberg .header_type = PCI_HEADER_TYPE_NORMAL, 292b30d05adSPekka Enberg .revision_id = 0, 293b30d05adSPekka Enberg .class = 0x010000, 294b30d05adSPekka Enberg .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 2950a7ab0c6SSasha Levin .subsys_id = VIRTIO_ID_BLOCK, 2964749e795SSasha Levin .bar[0] = blk_dev_base_addr | PCI_BASE_ADDRESS_SPACE_IO, 2974749e795SSasha Levin }, 2983d7831a1SAsias He /* 2993d7831a1SAsias He * Note we don't set VIRTIO_BLK_F_GEOMETRY here so the 3003d7831a1SAsias He * guest kernel will compute disk geometry by own, the 3013d7831a1SAsias He * same applies to VIRTIO_BLK_F_BLK_SIZE 3023d7831a1SAsias He */ 30329084a74SPrasad Joshi .host_features = (1UL << VIRTIO_BLK_F_SEG_MAX | 1UL << VIRTIO_BLK_F_FLUSH), 304b30d05adSPekka Enberg }; 305b30d05adSPekka Enberg 306ebe9ac19SSasha Levin list_add_tail(&bdev->list, &bdevs); 307ebe9ac19SSasha Levin 3080a7ab0c6SSasha Levin if (irq__register_device(VIRTIO_ID_BLOCK, &dev, &pin, &line) < 0) 3092449f6e3SSasha Levin return; 3102449f6e3SSasha Levin 311ef1f02f2SSasha Levin bdev->pci_hdr.irq_pin = pin; 312ef1f02f2SSasha Levin bdev->pci_hdr.irq_line = line; 3132449f6e3SSasha Levin 314ef1f02f2SSasha Levin pci__register(&bdev->pci_hdr, dev); 315ec75b82fSSasha Levin 316ec75b82fSSasha Levin for (i = 0; i < NUM_VIRT_QUEUES; i++) { 317ec75b82fSSasha Levin ioevent = (struct ioevent) { 318ec75b82fSSasha Levin .io_addr = blk_dev_base_addr + VIRTIO_PCI_QUEUE_NOTIFY, 319ec75b82fSSasha Levin .io_len = sizeof(u16), 320ec75b82fSSasha Levin .fn = ioevent_callback, 321ec75b82fSSasha Levin .datamatch = i, 32269971b13SSasha Levin .fn_ptr = bdev, 323ec75b82fSSasha Levin .fn_kvm = kvm, 324ec75b82fSSasha Levin .fd = eventfd(0, 0), 325ec75b82fSSasha Levin }; 326ec75b82fSSasha Levin 327ec75b82fSSasha Levin ioeventfd__add_event(&ioevent); 328ec75b82fSSasha Levin } 329*404d164bSSasha Levin 330*404d164bSSasha Levin bdev->compat_id = compat__add_message("virtio-blk device was not detected", 331*404d164bSSasha Levin "While you have requested a virtio-blk device, " 332*404d164bSSasha Levin "the guest kernel didn't seem to detect it.\n" 333*404d164bSSasha Levin "Please make sure that the kernel was compiled" 334*404d164bSSasha Levin "with CONFIG_VIRTIO_BLK."); 335b30d05adSPekka Enberg } 336bcb6aacaSPrasad Joshi 337bcb6aacaSPrasad Joshi void virtio_blk__init_all(struct kvm *kvm) 338bcb6aacaSPrasad Joshi { 339bcb6aacaSPrasad Joshi int i; 340bcb6aacaSPrasad Joshi 341bcb6aacaSPrasad Joshi for (i = 0; i < kvm->nr_disks; i++) 342bcb6aacaSPrasad Joshi virtio_blk__init(kvm, kvm->disks[i]); 343bcb6aacaSPrasad Joshi } 344a0a1e3c2SPrasad Joshi 345a0a1e3c2SPrasad Joshi void virtio_blk__delete_all(struct kvm *kvm) 346a0a1e3c2SPrasad Joshi { 347ebe9ac19SSasha Levin while (!list_empty(&bdevs)) { 348ebe9ac19SSasha Levin struct blk_dev *bdev; 349a0a1e3c2SPrasad Joshi 350ebe9ac19SSasha Levin bdev = list_first_entry(&bdevs, struct blk_dev, list); 351ec75b82fSSasha Levin ioeventfd__del_event(bdev->base_addr + VIRTIO_PCI_QUEUE_NOTIFY, 0); 352ebe9ac19SSasha Levin list_del(&bdev->list); 353ebe9ac19SSasha Levin free(bdev); 354ebe9ac19SSasha Levin } 355a0a1e3c2SPrasad Joshi } 356