1b30d05adSPekka Enberg #include "kvm/blk-virtio.h" 2b30d05adSPekka Enberg 310eca11dSPekka Enberg #include "kvm/virtio_ring.h" 4c435b91dSPekka Enberg #include "kvm/virtio_blk.h" 5984b7ae0SPekka Enberg #include "kvm/virtio_pci.h" 65a24a9f2SPekka Enberg #include "kvm/disk-image.h" 7b30d05adSPekka Enberg #include "kvm/ioport.h" 8fe99fd4eSPekka Enberg #include "kvm/util.h" 98b1ff07eSPekka Enberg #include "kvm/kvm.h" 10b30d05adSPekka Enberg #include "kvm/pci.h" 11b30d05adSPekka Enberg 124155ba8cSPekka Enberg #include <inttypes.h> 134155ba8cSPekka Enberg #include <assert.h> 144155ba8cSPekka Enberg 158b1ff07eSPekka Enberg #define VIRTIO_BLK_IRQ 14 16984b7ae0SPekka Enberg 1710eca11dSPekka Enberg #define NUM_VIRT_QUEUES 1 1810eca11dSPekka Enberg 1910eca11dSPekka Enberg #define VIRTIO_BLK_QUEUE_SIZE 16 2010eca11dSPekka Enberg 2110eca11dSPekka Enberg struct virt_queue { 2210eca11dSPekka Enberg struct vring vring; 23fe99fd4eSPekka Enberg uint32_t pfn; 2493d18b72SPekka Enberg /* The last_avail_idx field is an index to ->ring of struct vring_avail. 25fe99fd4eSPekka Enberg It's where we assume the next request index is at. */ 2693d18b72SPekka Enberg uint16_t last_avail_idx; 2710eca11dSPekka Enberg }; 2810eca11dSPekka Enberg 29fbc2fbf9SPekka Enberg struct device { 3040ce993fSPekka Enberg struct virtio_blk_config blk_config; 31c435b91dSPekka Enberg uint32_t host_features; 32fbc2fbf9SPekka Enberg uint32_t guest_features; 3340ce993fSPekka Enberg uint16_t config_vector; 34fbc2fbf9SPekka Enberg uint8_t status; 3547bf1d0fSPekka Enberg 3647bf1d0fSPekka Enberg /* virtio queue */ 3747bf1d0fSPekka Enberg uint16_t queue_selector; 3810eca11dSPekka Enberg 3910eca11dSPekka Enberg struct virt_queue virt_queues[NUM_VIRT_QUEUES]; 40fbc2fbf9SPekka Enberg }; 41fbc2fbf9SPekka Enberg 4240ce993fSPekka Enberg #define DISK_CYLINDERS 1024 4340ce993fSPekka Enberg #define DISK_HEADS 64 4440ce993fSPekka Enberg #define DISK_SECTORS 32 45*a2c8c696SAsias He #define DISK_SEG_MAX 128 4640ce993fSPekka Enberg 47c435b91dSPekka Enberg static struct device device = { 4840ce993fSPekka Enberg .blk_config = (struct virtio_blk_config) { 4940ce993fSPekka Enberg .capacity = DISK_CYLINDERS * DISK_HEADS * DISK_SECTORS, 50*a2c8c696SAsias He .seg_max = DISK_SEG_MAX, 5140ce993fSPekka Enberg /* VIRTIO_BLK_F_GEOMETRY */ 5240ce993fSPekka Enberg .geometry = { 5340ce993fSPekka Enberg .cylinders = DISK_CYLINDERS, 5440ce993fSPekka Enberg .heads = DISK_HEADS, 5540ce993fSPekka Enberg .sectors = DISK_SECTORS, 5640ce993fSPekka Enberg }, 5740ce993fSPekka Enberg /* VIRTIO_BLK_SIZE */ 5840ce993fSPekka Enberg .blk_size = 4096, 5940ce993fSPekka Enberg }, 601ef2738dSCyrill Gorcunov /* 611ef2738dSCyrill Gorcunov * Note we don't set VIRTIO_BLK_F_GEOMETRY here so the 621ef2738dSCyrill Gorcunov * node kernel will compute disk geometry by own, the 631ef2738dSCyrill Gorcunov * same applies to VIRTIO_BLK_F_BLK_SIZE 641ef2738dSCyrill Gorcunov */ 65*a2c8c696SAsias He .host_features = (1UL << VIRTIO_BLK_F_SEG_MAX), 66c435b91dSPekka Enberg }; 67fbc2fbf9SPekka Enberg 6840ce993fSPekka Enberg static bool virtio_blk_config_in(void *data, unsigned long offset, int size, uint32_t count) 6940ce993fSPekka Enberg { 7040ce993fSPekka Enberg uint8_t *config_space = (uint8_t *) &device.blk_config; 7140ce993fSPekka Enberg 7240ce993fSPekka Enberg if (size != 1 || count != 1) 7340ce993fSPekka Enberg return false; 7440ce993fSPekka Enberg 7540ce993fSPekka Enberg ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]); 7640ce993fSPekka Enberg 7740ce993fSPekka Enberg return true; 7840ce993fSPekka Enberg } 7940ce993fSPekka Enberg 80fbc2fbf9SPekka Enberg static bool blk_virtio_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 81fbc2fbf9SPekka Enberg { 82fbc2fbf9SPekka Enberg unsigned long offset; 83fbc2fbf9SPekka Enberg 84fbc2fbf9SPekka Enberg offset = port - IOPORT_VIRTIO; 85fbc2fbf9SPekka Enberg 86fbc2fbf9SPekka Enberg switch (offset) { 87fbc2fbf9SPekka Enberg case VIRTIO_PCI_HOST_FEATURES: 88c435b91dSPekka Enberg ioport__write32(data, device.host_features); 89fbc2fbf9SPekka Enberg break; 90fbc2fbf9SPekka Enberg case VIRTIO_PCI_GUEST_FEATURES: 918b1ff07eSPekka Enberg return false; 92fbc2fbf9SPekka Enberg case VIRTIO_PCI_QUEUE_PFN: 9310eca11dSPekka Enberg ioport__write32(data, device.virt_queues[device.queue_selector].pfn); 948b1ff07eSPekka Enberg break; 95fbc2fbf9SPekka Enberg case VIRTIO_PCI_QUEUE_NUM: 9610eca11dSPekka Enberg ioport__write16(data, VIRTIO_BLK_QUEUE_SIZE); 978b1ff07eSPekka Enberg break; 98fbc2fbf9SPekka Enberg case VIRTIO_PCI_QUEUE_SEL: 99fbc2fbf9SPekka Enberg case VIRTIO_PCI_QUEUE_NOTIFY: 100fbc2fbf9SPekka Enberg return false; 101fbc2fbf9SPekka Enberg case VIRTIO_PCI_STATUS: 102fbc2fbf9SPekka Enberg ioport__write8(data, device.status); 103fbc2fbf9SPekka Enberg break; 104fbc2fbf9SPekka Enberg case VIRTIO_PCI_ISR: 1058b1ff07eSPekka Enberg ioport__write8(data, 0x1); 1068b1ff07eSPekka Enberg kvm__irq_line(self, VIRTIO_BLK_IRQ, 0); 1077e61688eSPekka Enberg break; 108fbc2fbf9SPekka Enberg case VIRTIO_MSI_CONFIG_VECTOR: 10940ce993fSPekka Enberg ioport__write16(data, device.config_vector); 11040ce993fSPekka Enberg break; 111fbc2fbf9SPekka Enberg default: 11240ce993fSPekka Enberg return virtio_blk_config_in(data, offset, size, count); 113fbc2fbf9SPekka Enberg }; 114fbc2fbf9SPekka Enberg 115fbc2fbf9SPekka Enberg return true; 116fbc2fbf9SPekka Enberg } 117fbc2fbf9SPekka Enberg 118*a2c8c696SAsias He static bool blk_virtio_request(struct kvm *self, struct virt_queue *queue) 1194155ba8cSPekka Enberg { 1204155ba8cSPekka Enberg struct vring_used_elem *used_elem; 1214155ba8cSPekka Enberg struct virtio_blk_outhdr *req; 122*a2c8c696SAsias He uint16_t desc_block_last; 1234155ba8cSPekka Enberg struct vring_desc *desc; 124*a2c8c696SAsias He uint16_t desc_status; 125*a2c8c696SAsias He uint16_t desc_block; 126258dd093SPekka Enberg uint32_t block_len; 127*a2c8c696SAsias He uint32_t block_cnt; 128*a2c8c696SAsias He uint16_t desc_hdr; 1294155ba8cSPekka Enberg uint8_t *status; 130258dd093SPekka Enberg void *block; 131*a2c8c696SAsias He int err; 132*a2c8c696SAsias He int err_cnt; 1334155ba8cSPekka Enberg 134*a2c8c696SAsias He /* header */ 135*a2c8c696SAsias He desc_hdr = queue->vring.avail->ring[queue->last_avail_idx++ % queue->vring.num]; 1364155ba8cSPekka Enberg 137*a2c8c696SAsias He if (desc_hdr >= queue->vring.num) { 1384155ba8cSPekka Enberg warning("fatal I/O error"); 1394155ba8cSPekka Enberg return false; 1404155ba8cSPekka Enberg } 1414155ba8cSPekka Enberg 142*a2c8c696SAsias He desc = &queue->vring.desc[desc_hdr]; 1434155ba8cSPekka Enberg assert(!(desc->flags & VRING_DESC_F_INDIRECT)); 1444155ba8cSPekka Enberg 1454155ba8cSPekka Enberg req = guest_flat_to_host(self, desc->addr); 1464155ba8cSPekka Enberg 147*a2c8c696SAsias He /* status */ 148*a2c8c696SAsias He desc_status = desc_hdr; 149*a2c8c696SAsias He 150*a2c8c696SAsias He do { 151*a2c8c696SAsias He desc_block_last = desc_status; 152*a2c8c696SAsias He desc_status = queue->vring.desc[desc_status].next; 153*a2c8c696SAsias He 154*a2c8c696SAsias He if (desc_status >= queue->vring.num) { 155*a2c8c696SAsias He warning("fatal I/O error"); 156*a2c8c696SAsias He return false; 157*a2c8c696SAsias He } 158*a2c8c696SAsias He 159*a2c8c696SAsias He } while (queue->vring.desc[desc_status].flags & VRING_DESC_F_NEXT); 160*a2c8c696SAsias He 161*a2c8c696SAsias He desc = &queue->vring.desc[desc_status]; 162*a2c8c696SAsias He assert(!(desc->flags & VRING_DESC_F_INDIRECT)); 163*a2c8c696SAsias He 164*a2c8c696SAsias He status = guest_flat_to_host(self, desc->addr); 165*a2c8c696SAsias He 1664155ba8cSPekka Enberg /* block */ 167*a2c8c696SAsias He desc_block = desc_hdr; 168*a2c8c696SAsias He block_cnt = 0; 169*a2c8c696SAsias He err_cnt = 0; 170*a2c8c696SAsias He 171*a2c8c696SAsias He do { 172*a2c8c696SAsias He desc_block = queue->vring.desc[desc_block].next; 173*a2c8c696SAsias He 174*a2c8c696SAsias He desc = &queue->vring.desc[desc_block]; 1754155ba8cSPekka Enberg assert(!(desc->flags & VRING_DESC_F_INDIRECT)); 1764155ba8cSPekka Enberg 177258dd093SPekka Enberg block = guest_flat_to_host(self, desc->addr); 178258dd093SPekka Enberg block_len = desc->len; 1794155ba8cSPekka Enberg 180258dd093SPekka Enberg switch (req->type) { 181*a2c8c696SAsias He case VIRTIO_BLK_T_IN: 182258dd093SPekka Enberg err = disk_image__read_sector(self->disk_image, req->sector, block, block_len); 183258dd093SPekka Enberg break; 184*a2c8c696SAsias He case VIRTIO_BLK_T_OUT: 185258dd093SPekka Enberg err = disk_image__write_sector(self->disk_image, req->sector, block, block_len); 186258dd093SPekka Enberg break; 187258dd093SPekka Enberg default: 1884155ba8cSPekka Enberg warning("request type %d", req->type); 189*a2c8c696SAsias He err = -1; 1904155ba8cSPekka Enberg } 1914155ba8cSPekka Enberg 192*a2c8c696SAsias He if (err) 193*a2c8c696SAsias He err_cnt++; 1944155ba8cSPekka Enberg 195*a2c8c696SAsias He req->sector += block_len >> SECTOR_SHIFT; 196*a2c8c696SAsias He block_cnt += block_len; 197*a2c8c696SAsias He 198*a2c8c696SAsias He if (desc_block == desc_block_last) 199*a2c8c696SAsias He break; 200*a2c8c696SAsias He 201*a2c8c696SAsias He if (desc_block >= queue->vring.num) { 202*a2c8c696SAsias He warning("fatal I/O error"); 203*a2c8c696SAsias He return false; 204*a2c8c696SAsias He } 205*a2c8c696SAsias He 206*a2c8c696SAsias He } while (true); 207*a2c8c696SAsias He 208*a2c8c696SAsias He *status = err_cnt ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK; 209*a2c8c696SAsias He 210*a2c8c696SAsias He used_elem = &queue->vring.used->ring[queue->vring.used->idx++ % queue->vring.num]; 211*a2c8c696SAsias He used_elem->id = desc_hdr; 212*a2c8c696SAsias He used_elem->len = block_cnt; 2134155ba8cSPekka Enberg 2144155ba8cSPekka Enberg return true; 2154155ba8cSPekka Enberg } 2164155ba8cSPekka Enberg 217fbc2fbf9SPekka Enberg static bool blk_virtio_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) 218fbc2fbf9SPekka Enberg { 219fbc2fbf9SPekka Enberg unsigned long offset; 220fbc2fbf9SPekka Enberg 221fbc2fbf9SPekka Enberg offset = port - IOPORT_VIRTIO; 222fbc2fbf9SPekka Enberg 223fbc2fbf9SPekka Enberg switch (offset) { 224fbc2fbf9SPekka Enberg case VIRTIO_PCI_GUEST_FEATURES: 225fbc2fbf9SPekka Enberg device.guest_features = ioport__read32(data); 226fbc2fbf9SPekka Enberg break; 22710eca11dSPekka Enberg case VIRTIO_PCI_QUEUE_PFN: { 22810eca11dSPekka Enberg struct virt_queue *queue; 22910eca11dSPekka Enberg void *p; 23010eca11dSPekka Enberg 23110eca11dSPekka Enberg queue = &device.virt_queues[device.queue_selector]; 23210eca11dSPekka Enberg 23310eca11dSPekka Enberg queue->pfn = ioport__read32(data); 23410eca11dSPekka Enberg 23510eca11dSPekka Enberg p = guest_flat_to_host(self, queue->pfn << 12); 23610eca11dSPekka Enberg 23710eca11dSPekka Enberg vring_init(&queue->vring, VIRTIO_BLK_QUEUE_SIZE, p, 4096); 23810eca11dSPekka Enberg 2397e61688eSPekka Enberg break; 24010eca11dSPekka Enberg } 241fbc2fbf9SPekka Enberg case VIRTIO_PCI_QUEUE_SEL: 24247bf1d0fSPekka Enberg device.queue_selector = ioport__read16(data); 2437e61688eSPekka Enberg break; 24410eca11dSPekka Enberg case VIRTIO_PCI_QUEUE_NOTIFY: { 24510eca11dSPekka Enberg struct virt_queue *queue; 24610eca11dSPekka Enberg uint16_t queue_index; 24710eca11dSPekka Enberg 24810eca11dSPekka Enberg queue_index = ioport__read16(data); 24910eca11dSPekka Enberg 25010eca11dSPekka Enberg queue = &device.virt_queues[queue_index]; 25110eca11dSPekka Enberg 2524155ba8cSPekka Enberg while (queue->vring.avail->idx != queue->last_avail_idx) { 253*a2c8c696SAsias He if (!blk_virtio_request(self, queue)) 2544155ba8cSPekka Enberg return false; 25593d18b72SPekka Enberg } 2568b1ff07eSPekka Enberg kvm__irq_line(self, VIRTIO_BLK_IRQ, 1); 2575a24a9f2SPekka Enberg 2587e61688eSPekka Enberg break; 25910eca11dSPekka Enberg } 260fbc2fbf9SPekka Enberg case VIRTIO_PCI_STATUS: 261fbc2fbf9SPekka Enberg device.status = ioport__read8(data); 262fbc2fbf9SPekka Enberg break; 263fbc2fbf9SPekka Enberg case VIRTIO_MSI_CONFIG_VECTOR: 26440ce993fSPekka Enberg device.config_vector = VIRTIO_MSI_NO_VECTOR; 26540ce993fSPekka Enberg break; 266fbc2fbf9SPekka Enberg case VIRTIO_MSI_QUEUE_VECTOR: 26740ce993fSPekka Enberg break; 268fbc2fbf9SPekka Enberg default: 269fbc2fbf9SPekka Enberg return false; 270fbc2fbf9SPekka Enberg }; 271fbc2fbf9SPekka Enberg 272fbc2fbf9SPekka Enberg return true; 273fbc2fbf9SPekka Enberg } 274fbc2fbf9SPekka Enberg 275fbc2fbf9SPekka Enberg static struct ioport_operations blk_virtio_io_ops = { 276fbc2fbf9SPekka Enberg .io_in = blk_virtio_in, 277fbc2fbf9SPekka Enberg .io_out = blk_virtio_out, 278fbc2fbf9SPekka Enberg }; 279fbc2fbf9SPekka Enberg 280b30d05adSPekka Enberg #define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 281b30d05adSPekka Enberg #define PCI_DEVICE_ID_VIRTIO_BLK 0x1001 282b30d05adSPekka Enberg #define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET 0x1af4 283b30d05adSPekka Enberg #define PCI_SUBSYSTEM_ID_VIRTIO_BLK 0x0002 284b30d05adSPekka Enberg 285fbc2fbf9SPekka Enberg static struct pci_device_header blk_virtio_pci_device = { 286b30d05adSPekka Enberg .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, 287b30d05adSPekka Enberg .device_id = PCI_DEVICE_ID_VIRTIO_BLK, 288b30d05adSPekka Enberg .header_type = PCI_HEADER_TYPE_NORMAL, 289b30d05adSPekka Enberg .revision_id = 0, 290b30d05adSPekka Enberg .class = 0x010000, 291b30d05adSPekka Enberg .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, 292b30d05adSPekka Enberg .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_BLK, 293b30d05adSPekka Enberg .bar[0] = IOPORT_VIRTIO | PCI_BASE_ADDRESS_SPACE_IO, 294dc53a427SPekka Enberg .irq_pin = 1, 2958b1ff07eSPekka Enberg .irq_line = VIRTIO_BLK_IRQ, 296b30d05adSPekka Enberg }; 297b30d05adSPekka Enberg 298ca7c891bSCyrill Gorcunov void blk_virtio__init(struct kvm *self) 299b30d05adSPekka Enberg { 3001f848897SPekka Enberg if (!self->disk_image) 3011f848897SPekka Enberg return; 3021f848897SPekka Enberg 303*a2c8c696SAsias He device.blk_config.capacity = self->disk_image->size >> SECTOR_SHIFT; 304ca7c891bSCyrill Gorcunov 305fbc2fbf9SPekka Enberg pci__register(&blk_virtio_pci_device, 1); 306b30d05adSPekka Enberg 3078b1ff07eSPekka Enberg ioport__register(IOPORT_VIRTIO, &blk_virtio_io_ops, 256); 308b30d05adSPekka Enberg } 309