1 #include <linux/virtio_ring.h> 2 #include <linux/types.h> 3 #include <sys/uio.h> 4 #include <stdlib.h> 5 6 #include "kvm/guest_compat.h" 7 #include "kvm/barrier.h" 8 #include "kvm/virtio.h" 9 #include "kvm/virtio-pci.h" 10 #include "kvm/virtio-mmio.h" 11 #include "kvm/util.h" 12 #include "kvm/kvm.h" 13 14 15 const char* virtio_trans_name(enum virtio_trans trans) 16 { 17 if (trans == VIRTIO_PCI) 18 return "pci"; 19 else if (trans == VIRTIO_MMIO) 20 return "mmio"; 21 return "unknown"; 22 } 23 24 void virt_queue__used_idx_advance(struct virt_queue *queue, u16 jump) 25 { 26 u16 idx = virtio_guest_to_host_u16(queue, queue->vring.used->idx); 27 28 /* 29 * Use wmb to assure that used elem was updated with head and len. 30 * We need a wmb here since we can't advance idx unless we're ready 31 * to pass the used element to the guest. 32 */ 33 wmb(); 34 idx += jump; 35 queue->vring.used->idx = virtio_host_to_guest_u16(queue, idx); 36 } 37 38 struct vring_used_elem * 39 virt_queue__set_used_elem_no_update(struct virt_queue *queue, u32 head, 40 u32 len, u16 offset) 41 { 42 struct vring_used_elem *used_elem; 43 u16 idx = virtio_guest_to_host_u16(queue, queue->vring.used->idx); 44 45 idx += offset; 46 used_elem = &queue->vring.used->ring[idx % queue->vring.num]; 47 used_elem->id = virtio_host_to_guest_u32(queue, head); 48 used_elem->len = virtio_host_to_guest_u32(queue, len); 49 50 return used_elem; 51 } 52 53 struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 head, u32 len) 54 { 55 struct vring_used_elem *used_elem; 56 57 used_elem = virt_queue__set_used_elem_no_update(queue, head, len, 0); 58 virt_queue__used_idx_advance(queue, 1); 59 60 return used_elem; 61 } 62 63 static inline bool virt_desc__test_flag(struct virt_queue *vq, 64 struct vring_desc *desc, u16 flag) 65 { 66 return !!(virtio_guest_to_host_u16(vq, desc->flags) & flag); 67 } 68 69 /* 70 * Each buffer in the virtqueues is actually a chain of descriptors. This 71 * function returns the next descriptor in the chain, or max if we're at the 72 * end. 73 */ 74 static unsigned next_desc(struct virt_queue *vq, struct vring_desc *desc, 75 unsigned int i, unsigned int max) 76 { 77 unsigned int next; 78 79 /* If this descriptor says it doesn't chain, we're done. */ 80 if (!virt_desc__test_flag(vq, &desc[i], VRING_DESC_F_NEXT)) 81 return max; 82 83 next = virtio_guest_to_host_u16(vq, desc[i].next); 84 85 /* Ensure they're not leading us off end of descriptors. */ 86 return min(next, max); 87 } 88 89 u16 virt_queue__get_head_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, u16 head, struct kvm *kvm) 90 { 91 struct vring_desc *desc; 92 u16 idx; 93 u16 max; 94 95 idx = head; 96 *out = *in = 0; 97 max = vq->vring.num; 98 desc = vq->vring.desc; 99 100 if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_INDIRECT)) { 101 max = virtio_guest_to_host_u32(vq, desc[idx].len) / sizeof(struct vring_desc); 102 desc = guest_flat_to_host(kvm, virtio_guest_to_host_u64(vq, desc[idx].addr)); 103 idx = 0; 104 } 105 106 do { 107 /* Grab the first descriptor, and check it's OK. */ 108 iov[*out + *in].iov_len = virtio_guest_to_host_u32(vq, desc[idx].len); 109 iov[*out + *in].iov_base = guest_flat_to_host(kvm, 110 virtio_guest_to_host_u64(vq, desc[idx].addr)); 111 /* If this is an input descriptor, increment that count. */ 112 if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_WRITE)) 113 (*in)++; 114 else 115 (*out)++; 116 } while ((idx = next_desc(vq, desc, idx, max)) != max); 117 118 return head; 119 } 120 121 u16 virt_queue__get_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, struct kvm *kvm) 122 { 123 u16 head; 124 125 head = virt_queue__pop(vq); 126 127 return virt_queue__get_head_iov(vq, iov, out, in, head, kvm); 128 } 129 130 /* in and out are relative to guest */ 131 u16 virt_queue__get_inout_iov(struct kvm *kvm, struct virt_queue *queue, 132 struct iovec in_iov[], struct iovec out_iov[], 133 u16 *in, u16 *out) 134 { 135 struct vring_desc *desc; 136 u16 head, idx; 137 138 idx = head = virt_queue__pop(queue); 139 *out = *in = 0; 140 do { 141 u64 addr; 142 desc = virt_queue__get_desc(queue, idx); 143 addr = virtio_guest_to_host_u64(queue, desc->addr); 144 if (virt_desc__test_flag(queue, desc, VRING_DESC_F_WRITE)) { 145 in_iov[*in].iov_base = guest_flat_to_host(kvm, addr); 146 in_iov[*in].iov_len = virtio_guest_to_host_u32(queue, desc->len); 147 (*in)++; 148 } else { 149 out_iov[*out].iov_base = guest_flat_to_host(kvm, addr); 150 out_iov[*out].iov_len = virtio_guest_to_host_u32(queue, desc->len); 151 (*out)++; 152 } 153 if (virt_desc__test_flag(queue, desc, VRING_DESC_F_NEXT)) 154 idx = virtio_guest_to_host_u16(queue, desc->next); 155 else 156 break; 157 } while (1); 158 159 return head; 160 } 161 162 void virtio_init_device_vq(struct kvm *kvm, struct virtio_device *vdev, 163 struct virt_queue *vq, size_t nr_descs) 164 { 165 struct vring_addr *addr = &vq->vring_addr; 166 167 vq->endian = vdev->endian; 168 vq->use_event_idx = (vdev->features & VIRTIO_RING_F_EVENT_IDX); 169 vq->enabled = true; 170 171 if (addr->legacy) { 172 unsigned long base = (u64)addr->pfn * addr->pgsize; 173 void *p = guest_flat_to_host(kvm, base); 174 175 vring_init(&vq->vring, nr_descs, p, addr->align); 176 } else { 177 u64 desc = (u64)addr->desc_hi << 32 | addr->desc_lo; 178 u64 avail = (u64)addr->avail_hi << 32 | addr->avail_lo; 179 u64 used = (u64)addr->used_hi << 32 | addr->used_lo; 180 181 vq->vring = (struct vring) { 182 .desc = guest_flat_to_host(kvm, desc), 183 .used = guest_flat_to_host(kvm, used), 184 .avail = guest_flat_to_host(kvm, avail), 185 .num = nr_descs, 186 }; 187 } 188 } 189 190 void virtio_exit_vq(struct kvm *kvm, struct virtio_device *vdev, 191 void *dev, int num) 192 { 193 struct virt_queue *vq = vdev->ops->get_vq(kvm, dev, num); 194 195 if (vq->enabled && vdev->ops->exit_vq) 196 vdev->ops->exit_vq(kvm, dev, num); 197 memset(vq, 0, sizeof(*vq)); 198 } 199 200 int virtio__get_dev_specific_field(int offset, bool msix, u32 *config_off) 201 { 202 if (msix) { 203 if (offset < 4) 204 return VIRTIO_PCI_O_MSIX; 205 else 206 offset -= 4; 207 } 208 209 *config_off = offset; 210 211 return VIRTIO_PCI_O_CONFIG; 212 } 213 214 bool virtio_queue__should_signal(struct virt_queue *vq) 215 { 216 u16 old_idx, new_idx, event_idx; 217 218 /* 219 * Use mb to assure used idx has been increased before we signal the 220 * guest, and we don't read a stale value for used_event. Without a mb 221 * here we might not send a notification that we need to send, or the 222 * guest may ignore the queue since it won't see an updated idx. 223 */ 224 mb(); 225 226 if (!vq->use_event_idx) { 227 /* 228 * When VIRTIO_RING_F_EVENT_IDX isn't negotiated, interrupt the 229 * guest if it didn't explicitly request to be left alone. 230 */ 231 return !(virtio_guest_to_host_u16(vq, vq->vring.avail->flags) & 232 VRING_AVAIL_F_NO_INTERRUPT); 233 } 234 235 old_idx = vq->last_used_signalled; 236 new_idx = virtio_guest_to_host_u16(vq, vq->vring.used->idx); 237 event_idx = virtio_guest_to_host_u16(vq, vring_used_event(&vq->vring)); 238 239 if (vring_need_event(event_idx, new_idx, old_idx)) { 240 vq->last_used_signalled = new_idx; 241 return true; 242 } 243 244 return false; 245 } 246 247 void virtio_set_guest_features(struct kvm *kvm, struct virtio_device *vdev, 248 void *dev, u32 features) 249 { 250 /* TODO: fail negotiation if features & ~host_features */ 251 252 vdev->features = features; 253 vdev->ops->set_guest_features(kvm, dev, features); 254 } 255 256 void virtio_notify_status(struct kvm *kvm, struct virtio_device *vdev, 257 void *dev, u8 status) 258 { 259 u32 ext_status = status; 260 261 vdev->status &= ~VIRTIO_CONFIG_S_MASK; 262 vdev->status |= status; 263 264 /* Add a few hints to help devices */ 265 if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && 266 !(vdev->status & VIRTIO__STATUS_START)) { 267 vdev->status |= VIRTIO__STATUS_START; 268 ext_status |= VIRTIO__STATUS_START; 269 270 } else if (!status && (vdev->status & VIRTIO__STATUS_START)) { 271 vdev->status &= ~VIRTIO__STATUS_START; 272 ext_status |= VIRTIO__STATUS_STOP; 273 274 /* 275 * Reset virtqueues and stop all traffic now, so that the device 276 * can safely reset the backend in notify_status(). 277 */ 278 vdev->ops->reset(kvm, vdev); 279 } 280 if (!status) 281 ext_status |= VIRTIO__STATUS_CONFIG; 282 283 if (vdev->ops->notify_status) 284 vdev->ops->notify_status(kvm, dev, ext_status); 285 } 286 287 bool virtio_access_config(struct kvm *kvm, struct virtio_device *vdev, 288 void *dev, unsigned long offset, void *data, 289 size_t size, bool is_write) 290 { 291 void *in, *out, *config; 292 size_t config_size = vdev->ops->get_config_size(kvm, dev); 293 294 if (WARN_ONCE(offset + size > config_size, 295 "Config access offset (%lu) is beyond config size (%zu)\n", 296 offset, config_size)) 297 return false; 298 299 config = vdev->ops->get_config(kvm, dev) + offset; 300 301 in = is_write ? data : config; 302 out = is_write ? config : data; 303 304 switch (size) { 305 case 1: 306 *(u8 *)out = *(u8 *)in; 307 break; 308 case 2: 309 *(u16 *)out = *(u16 *)in; 310 break; 311 case 4: 312 *(u32 *)out = *(u32 *)in; 313 break; 314 case 8: 315 *(u64 *)out = *(u64 *)in; 316 break; 317 default: 318 WARN_ONCE(1, "%s: invalid access size\n", __func__); 319 return false; 320 } 321 322 return true; 323 } 324 325 int virtio_init(struct kvm *kvm, void *dev, struct virtio_device *vdev, 326 struct virtio_ops *ops, enum virtio_trans trans, 327 int device_id, int subsys_id, int class) 328 { 329 void *virtio; 330 int r; 331 332 switch (trans) { 333 case VIRTIO_PCI: 334 virtio = calloc(sizeof(struct virtio_pci), 1); 335 if (!virtio) 336 return -ENOMEM; 337 vdev->virtio = virtio; 338 vdev->ops = ops; 339 vdev->ops->signal_vq = virtio_pci__signal_vq; 340 vdev->ops->signal_config = virtio_pci__signal_config; 341 vdev->ops->init = virtio_pci__init; 342 vdev->ops->exit = virtio_pci__exit; 343 vdev->ops->reset = virtio_pci__reset; 344 r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class); 345 break; 346 case VIRTIO_MMIO: 347 virtio = calloc(sizeof(struct virtio_mmio), 1); 348 if (!virtio) 349 return -ENOMEM; 350 vdev->virtio = virtio; 351 vdev->ops = ops; 352 vdev->ops->signal_vq = virtio_mmio_signal_vq; 353 vdev->ops->signal_config = virtio_mmio_signal_config; 354 vdev->ops->init = virtio_mmio_init; 355 vdev->ops->exit = virtio_mmio_exit; 356 vdev->ops->reset = virtio_mmio_reset; 357 r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class); 358 break; 359 default: 360 r = -1; 361 }; 362 363 return r; 364 } 365 366 int virtio_compat_add_message(const char *device, const char *config) 367 { 368 int len = 1024; 369 int compat_id; 370 char *title; 371 char *desc; 372 373 title = malloc(len); 374 if (!title) 375 return -ENOMEM; 376 377 desc = malloc(len); 378 if (!desc) { 379 free(title); 380 return -ENOMEM; 381 } 382 383 snprintf(title, len, "%s device was not detected.", device); 384 snprintf(desc, len, "While you have requested a %s device, " 385 "the guest kernel did not initialize it.\n" 386 "\tPlease make sure that the guest kernel was " 387 "compiled with %s=y enabled in .config.", 388 device, config); 389 390 compat_id = compat__add_message(title, desc); 391 392 free(desc); 393 free(title); 394 395 return compat_id; 396 } 397