1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Helpers for the host side of a virtio ring. 4 * 5 * Since these may be in userspace, we use (inline) accessors. 6 */ 7 #include <linux/compiler.h> 8 #include <linux/module.h> 9 #include <linux/vringh.h> 10 #include <linux/virtio_ring.h> 11 #include <linux/kernel.h> 12 #include <linux/ratelimit.h> 13 #include <linux/uaccess.h> 14 #include <linux/slab.h> 15 #include <linux/export.h> 16 #if IS_REACHABLE(CONFIG_VHOST_IOTLB) 17 #include <linux/bvec.h> 18 #include <linux/highmem.h> 19 #include <linux/vhost_iotlb.h> 20 #endif 21 #include <uapi/linux/virtio_config.h> 22 23 static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) 24 { 25 static DEFINE_RATELIMIT_STATE(vringh_rs, 26 DEFAULT_RATELIMIT_INTERVAL, 27 DEFAULT_RATELIMIT_BURST); 28 if (__ratelimit(&vringh_rs)) { 29 va_list ap; 30 va_start(ap, fmt); 31 printk(KERN_NOTICE "vringh:"); 32 vprintk(fmt, ap); 33 va_end(ap); 34 } 35 } 36 37 /* Returns vring->num if empty, -ve on error. */ 38 static inline int __vringh_get_head(const struct vringh *vrh, 39 int (*getu16)(const struct vringh *vrh, 40 u16 *val, const __virtio16 *p), 41 u16 *last_avail_idx) 42 { 43 u16 avail_idx, i, head; 44 int err; 45 46 err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx); 47 if (err) { 48 vringh_bad("Failed to access avail idx at %p", 49 &vrh->vring.avail->idx); 50 return err; 51 } 52 53 if (*last_avail_idx == avail_idx) 54 return vrh->vring.num; 55 56 /* Only get avail ring entries after they have been exposed by guest. */ 57 virtio_rmb(vrh->weak_barriers); 58 59 i = *last_avail_idx & (vrh->vring.num - 1); 60 61 err = getu16(vrh, &head, &vrh->vring.avail->ring[i]); 62 if (err) { 63 vringh_bad("Failed to read head: idx %d address %p", 64 *last_avail_idx, &vrh->vring.avail->ring[i]); 65 return err; 66 } 67 68 if (head >= vrh->vring.num) { 69 vringh_bad("Guest says index %u > %u is available", 70 head, vrh->vring.num); 71 return -EINVAL; 72 } 73 74 (*last_avail_idx)++; 75 return head; 76 } 77 78 /** 79 * vringh_kiov_advance - skip bytes from vring_kiov 80 * @iov: an iov passed to vringh_getdesc_*() (updated as we consume) 81 * @len: the maximum length to advance 82 */ 83 void vringh_kiov_advance(struct vringh_kiov *iov, size_t len) 84 { 85 while (len && iov->i < iov->used) { 86 size_t partlen = min(iov->iov[iov->i].iov_len, len); 87 88 iov->consumed += partlen; 89 iov->iov[iov->i].iov_len -= partlen; 90 iov->iov[iov->i].iov_base += partlen; 91 92 if (!iov->iov[iov->i].iov_len) { 93 /* Fix up old iov element then increment. */ 94 iov->iov[iov->i].iov_len = iov->consumed; 95 iov->iov[iov->i].iov_base -= iov->consumed; 96 97 iov->consumed = 0; 98 iov->i++; 99 } 100 101 len -= partlen; 102 } 103 } 104 EXPORT_SYMBOL(vringh_kiov_advance); 105 106 /* Copy some bytes to/from the iovec. Returns num copied. */ 107 static inline ssize_t vringh_iov_xfer(struct vringh *vrh, 108 struct vringh_kiov *iov, 109 void *ptr, size_t len, 110 int (*xfer)(const struct vringh *vrh, 111 void *addr, void *ptr, 112 size_t len)) 113 { 114 int err, done = 0; 115 116 while (len && iov->i < iov->used) { 117 size_t partlen; 118 119 partlen = min(iov->iov[iov->i].iov_len, len); 120 err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen); 121 if (err) 122 return err; 123 done += partlen; 124 len -= partlen; 125 ptr += partlen; 126 iov->consumed += partlen; 127 iov->iov[iov->i].iov_len -= partlen; 128 iov->iov[iov->i].iov_base += partlen; 129 130 if (!iov->iov[iov->i].iov_len) { 131 /* Fix up old iov element then increment. */ 132 iov->iov[iov->i].iov_len = iov->consumed; 133 iov->iov[iov->i].iov_base -= iov->consumed; 134 135 iov->consumed = 0; 136 iov->i++; 137 } 138 } 139 return done; 140 } 141 142 /* May reduce *len if range is shorter. */ 143 static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len, 144 struct vringh_range *range, 145 bool (*getrange)(struct vringh *, 146 u64, struct vringh_range *)) 147 { 148 if (addr < range->start || addr > range->end_incl) { 149 if (!getrange(vrh, addr, range)) 150 return false; 151 } 152 BUG_ON(addr < range->start || addr > range->end_incl); 153 154 /* To end of memory? */ 155 if (unlikely(addr + *len == 0)) { 156 if (range->end_incl == -1ULL) 157 return true; 158 goto truncate; 159 } 160 161 /* Otherwise, don't wrap. */ 162 if (addr + *len < addr) { 163 vringh_bad("Wrapping descriptor %zu@0x%llx", 164 *len, (unsigned long long)addr); 165 return false; 166 } 167 168 if (unlikely(addr + *len - 1 > range->end_incl)) 169 goto truncate; 170 return true; 171 172 truncate: 173 *len = range->end_incl + 1 - addr; 174 return true; 175 } 176 177 static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, 178 struct vringh_range *range, 179 bool (*getrange)(struct vringh *, 180 u64, struct vringh_range *)) 181 { 182 return true; 183 } 184 185 /* No reason for this code to be inline. */ 186 static int move_to_indirect(const struct vringh *vrh, 187 int *up_next, u16 *i, void *addr, 188 const struct vring_desc *desc, 189 struct vring_desc **descs, int *desc_max) 190 { 191 u32 len; 192 193 /* Indirect tables can't have indirect. */ 194 if (*up_next != -1) { 195 vringh_bad("Multilevel indirect %u->%u", *up_next, *i); 196 return -EINVAL; 197 } 198 199 len = vringh32_to_cpu(vrh, desc->len); 200 if (unlikely(len % sizeof(struct vring_desc))) { 201 vringh_bad("Strange indirect len %u", desc->len); 202 return -EINVAL; 203 } 204 205 /* We will check this when we follow it! */ 206 if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) 207 *up_next = vringh16_to_cpu(vrh, desc->next); 208 else 209 *up_next = -2; 210 *descs = addr; 211 *desc_max = len / sizeof(struct vring_desc); 212 213 /* Now, start at the first indirect. */ 214 *i = 0; 215 return 0; 216 } 217 218 static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) 219 { 220 struct kvec *new; 221 unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2; 222 223 if (new_num < 8) 224 new_num = 8; 225 226 flag = (iov->max_num & VRINGH_IOV_ALLOCATED); 227 if (flag) 228 new = krealloc_array(iov->iov, new_num, sizeof(*new), gfp); 229 else { 230 new = kmalloc_array(new_num, sizeof(*new), gfp); 231 if (new) { 232 memcpy(new, iov->iov, 233 iov->max_num * sizeof(struct iovec)); 234 flag = VRINGH_IOV_ALLOCATED; 235 } 236 } 237 if (!new) 238 return -ENOMEM; 239 iov->iov = new; 240 iov->max_num = (new_num | flag); 241 return 0; 242 } 243 244 static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next, 245 struct vring_desc **descs, int *desc_max) 246 { 247 u16 i = *up_next; 248 249 *up_next = -1; 250 *descs = vrh->vring.desc; 251 *desc_max = vrh->vring.num; 252 return i; 253 } 254 255 static int slow_copy(struct vringh *vrh, void *dst, const void *src, 256 bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, 257 struct vringh_range *range, 258 bool (*getrange)(struct vringh *vrh, 259 u64, 260 struct vringh_range *)), 261 bool (*getrange)(struct vringh *vrh, 262 u64 addr, 263 struct vringh_range *r), 264 struct vringh_range *range, 265 int (*copy)(const struct vringh *vrh, 266 void *dst, const void *src, size_t len)) 267 { 268 size_t part, len = sizeof(struct vring_desc); 269 270 do { 271 u64 addr; 272 int err; 273 274 part = len; 275 addr = (u64)(unsigned long)src - range->offset; 276 277 if (!rcheck(vrh, addr, &part, range, getrange)) 278 return -EINVAL; 279 280 err = copy(vrh, dst, src, part); 281 if (err) 282 return err; 283 284 dst += part; 285 src += part; 286 len -= part; 287 } while (len); 288 return 0; 289 } 290 291 static inline int 292 __vringh_iov(struct vringh *vrh, u16 i, 293 struct vringh_kiov *riov, 294 struct vringh_kiov *wiov, 295 bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, 296 struct vringh_range *range, 297 bool (*getrange)(struct vringh *, u64, 298 struct vringh_range *)), 299 bool (*getrange)(struct vringh *, u64, struct vringh_range *), 300 gfp_t gfp, 301 int (*copy)(const struct vringh *vrh, 302 void *dst, const void *src, size_t len)) 303 { 304 int err, count = 0, indirect_count = 0, up_next, desc_max; 305 struct vring_desc desc, *descs; 306 struct vringh_range range = { -1ULL, 0 }, slowrange; 307 bool slow = false; 308 309 /* We start traversing vring's descriptor table. */ 310 descs = vrh->vring.desc; 311 desc_max = vrh->vring.num; 312 up_next = -1; 313 314 /* You must want something! */ 315 if (WARN_ON(!riov && !wiov)) 316 return -EINVAL; 317 318 if (riov) 319 riov->i = riov->used = riov->consumed = 0; 320 if (wiov) 321 wiov->i = wiov->used = wiov->consumed = 0; 322 323 for (;;) { 324 void *addr; 325 struct vringh_kiov *iov; 326 size_t len; 327 328 if (unlikely(slow)) 329 err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, 330 &slowrange, copy); 331 else 332 err = copy(vrh, &desc, &descs[i], sizeof(desc)); 333 if (unlikely(err)) 334 goto fail; 335 336 if (unlikely(desc.flags & 337 cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) { 338 u64 a = vringh64_to_cpu(vrh, desc.addr); 339 340 /* Make sure it's OK, and get offset. */ 341 len = vringh32_to_cpu(vrh, desc.len); 342 if (!rcheck(vrh, a, &len, &range, getrange)) { 343 err = -EINVAL; 344 goto fail; 345 } 346 347 if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { 348 slow = true; 349 /* We need to save this range to use offset */ 350 slowrange = range; 351 } 352 353 addr = (void *)(long)(a + range.offset); 354 err = move_to_indirect(vrh, &up_next, &i, addr, &desc, 355 &descs, &desc_max); 356 if (err) 357 goto fail; 358 continue; 359 } 360 361 if (up_next == -1) 362 count++; 363 else 364 indirect_count++; 365 366 if (count > vrh->vring.num || indirect_count > desc_max) { 367 vringh_bad("Descriptor loop in %p", descs); 368 err = -ELOOP; 369 goto fail; 370 } 371 372 if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE)) 373 iov = wiov; 374 else { 375 iov = riov; 376 if (unlikely(wiov && wiov->used)) { 377 vringh_bad("Readable desc %p after writable", 378 &descs[i]); 379 err = -EINVAL; 380 goto fail; 381 } 382 } 383 384 if (!iov) { 385 vringh_bad("Unexpected %s desc", 386 !wiov ? "writable" : "readable"); 387 err = -EPROTO; 388 goto fail; 389 } 390 391 again: 392 /* Make sure it's OK, and get offset. */ 393 len = vringh32_to_cpu(vrh, desc.len); 394 if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range, 395 getrange)) { 396 err = -EINVAL; 397 goto fail; 398 } 399 addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) + 400 range.offset); 401 402 if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { 403 err = resize_iovec(iov, gfp); 404 if (err) 405 goto fail; 406 } 407 408 iov->iov[iov->used].iov_base = addr; 409 iov->iov[iov->used].iov_len = len; 410 iov->used++; 411 412 if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { 413 desc.len = cpu_to_vringh32(vrh, 414 vringh32_to_cpu(vrh, desc.len) - len); 415 desc.addr = cpu_to_vringh64(vrh, 416 vringh64_to_cpu(vrh, desc.addr) + len); 417 goto again; 418 } 419 420 if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) { 421 i = vringh16_to_cpu(vrh, desc.next); 422 } else { 423 /* Just in case we need to finish traversing above. */ 424 if (unlikely(up_next > 0)) { 425 i = return_from_indirect(vrh, &up_next, 426 &descs, &desc_max); 427 slow = false; 428 indirect_count = 0; 429 } else 430 break; 431 } 432 433 if (i >= desc_max) { 434 vringh_bad("Chained index %u > %u", i, desc_max); 435 err = -EINVAL; 436 goto fail; 437 } 438 } 439 440 return 0; 441 442 fail: 443 return err; 444 } 445 446 static inline int __vringh_complete(struct vringh *vrh, 447 const struct vring_used_elem *used, 448 unsigned int num_used, 449 int (*putu16)(const struct vringh *vrh, 450 __virtio16 *p, u16 val), 451 int (*putused)(const struct vringh *vrh, 452 struct vring_used_elem *dst, 453 const struct vring_used_elem 454 *src, unsigned num)) 455 { 456 struct vring_used *used_ring; 457 int err; 458 u16 used_idx, off; 459 460 used_ring = vrh->vring.used; 461 used_idx = vrh->last_used_idx + vrh->completed; 462 463 off = used_idx % vrh->vring.num; 464 465 /* Compiler knows num_used == 1 sometimes, hence extra check */ 466 if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { 467 u16 part = vrh->vring.num - off; 468 err = putused(vrh, &used_ring->ring[off], used, part); 469 if (!err) 470 err = putused(vrh, &used_ring->ring[0], used + part, 471 num_used - part); 472 } else 473 err = putused(vrh, &used_ring->ring[off], used, num_used); 474 475 if (err) { 476 vringh_bad("Failed to write %u used entries %u at %p", 477 num_used, off, &used_ring->ring[off]); 478 return err; 479 } 480 481 /* Make sure buffer is written before we update index. */ 482 virtio_wmb(vrh->weak_barriers); 483 484 err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used); 485 if (err) { 486 vringh_bad("Failed to update used index at %p", 487 &vrh->vring.used->idx); 488 return err; 489 } 490 491 vrh->completed += num_used; 492 return 0; 493 } 494 495 496 static inline int __vringh_need_notify(struct vringh *vrh, 497 int (*getu16)(const struct vringh *vrh, 498 u16 *val, 499 const __virtio16 *p)) 500 { 501 bool notify; 502 u16 used_event; 503 int err; 504 505 /* Flush out used index update. This is paired with the 506 * barrier that the Guest executes when enabling 507 * interrupts. */ 508 virtio_mb(vrh->weak_barriers); 509 510 /* Old-style, without event indices. */ 511 if (!vrh->event_indices) { 512 u16 flags; 513 err = getu16(vrh, &flags, &vrh->vring.avail->flags); 514 if (err) { 515 vringh_bad("Failed to get flags at %p", 516 &vrh->vring.avail->flags); 517 return err; 518 } 519 return (!(flags & VRING_AVAIL_F_NO_INTERRUPT)); 520 } 521 522 /* Modern: we know when other side wants to know. */ 523 err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring)); 524 if (err) { 525 vringh_bad("Failed to get used event idx at %p", 526 &vring_used_event(&vrh->vring)); 527 return err; 528 } 529 530 /* Just in case we added so many that we wrap. */ 531 if (unlikely(vrh->completed > 0xffff)) 532 notify = true; 533 else 534 notify = vring_need_event(used_event, 535 vrh->last_used_idx + vrh->completed, 536 vrh->last_used_idx); 537 538 vrh->last_used_idx += vrh->completed; 539 vrh->completed = 0; 540 return notify; 541 } 542 543 static inline bool __vringh_notify_enable(struct vringh *vrh, 544 int (*getu16)(const struct vringh *vrh, 545 u16 *val, const __virtio16 *p), 546 int (*putu16)(const struct vringh *vrh, 547 __virtio16 *p, u16 val)) 548 { 549 u16 avail; 550 551 if (!vrh->event_indices) { 552 /* Old-school; update flags. */ 553 if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) { 554 vringh_bad("Clearing used flags %p", 555 &vrh->vring.used->flags); 556 return true; 557 } 558 } else { 559 if (putu16(vrh, &vring_avail_event(&vrh->vring), 560 vrh->last_avail_idx) != 0) { 561 vringh_bad("Updating avail event index %p", 562 &vring_avail_event(&vrh->vring)); 563 return true; 564 } 565 } 566 567 /* They could have slipped one in as we were doing that: make 568 * sure it's written, then check again. */ 569 virtio_mb(vrh->weak_barriers); 570 571 if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) { 572 vringh_bad("Failed to check avail idx at %p", 573 &vrh->vring.avail->idx); 574 return true; 575 } 576 577 /* This is unlikely, so we just leave notifications enabled 578 * (if we're using event_indices, we'll only get one 579 * notification anyway). */ 580 return avail == vrh->last_avail_idx; 581 } 582 583 static inline void __vringh_notify_disable(struct vringh *vrh, 584 int (*putu16)(const struct vringh *vrh, 585 __virtio16 *p, u16 val)) 586 { 587 if (!vrh->event_indices) { 588 /* Old-school; update flags. */ 589 if (putu16(vrh, &vrh->vring.used->flags, 590 VRING_USED_F_NO_NOTIFY)) { 591 vringh_bad("Setting used flags %p", 592 &vrh->vring.used->flags); 593 } 594 } 595 } 596 597 /* Userspace access helpers: in this case, addresses are really userspace. */ 598 static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p) 599 { 600 __virtio16 v = 0; 601 int rc = get_user(v, (__force __virtio16 __user *)p); 602 *val = vringh16_to_cpu(vrh, v); 603 return rc; 604 } 605 606 static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val) 607 { 608 __virtio16 v = cpu_to_vringh16(vrh, val); 609 return put_user(v, (__force __virtio16 __user *)p); 610 } 611 612 static inline int copydesc_user(const struct vringh *vrh, 613 void *dst, const void *src, size_t len) 614 { 615 return copy_from_user(dst, (__force void __user *)src, len) ? 616 -EFAULT : 0; 617 } 618 619 static inline int putused_user(const struct vringh *vrh, 620 struct vring_used_elem *dst, 621 const struct vring_used_elem *src, 622 unsigned int num) 623 { 624 return copy_to_user((__force void __user *)dst, src, 625 sizeof(*dst) * num) ? -EFAULT : 0; 626 } 627 628 static inline int xfer_from_user(const struct vringh *vrh, void *src, 629 void *dst, size_t len) 630 { 631 return copy_from_user(dst, (__force void __user *)src, len) ? 632 -EFAULT : 0; 633 } 634 635 static inline int xfer_to_user(const struct vringh *vrh, 636 void *dst, void *src, size_t len) 637 { 638 return copy_to_user((__force void __user *)dst, src, len) ? 639 -EFAULT : 0; 640 } 641 642 /** 643 * vringh_init_user - initialize a vringh for a userspace vring. 644 * @vrh: the vringh to initialize. 645 * @features: the feature bits for this ring. 646 * @num: the number of elements. 647 * @weak_barriers: true if we only need memory barriers, not I/O. 648 * @desc: the userspace descriptor pointer. 649 * @avail: the userspace avail pointer. 650 * @used: the userspace used pointer. 651 * 652 * Returns an error if num is invalid: you should check pointers 653 * yourself! 654 */ 655 int vringh_init_user(struct vringh *vrh, u64 features, 656 unsigned int num, bool weak_barriers, 657 vring_desc_t __user *desc, 658 vring_avail_t __user *avail, 659 vring_used_t __user *used) 660 { 661 /* Sane power of 2 please! */ 662 if (!num || num > 0xffff || (num & (num - 1))) { 663 vringh_bad("Bad ring size %u", num); 664 return -EINVAL; 665 } 666 667 vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); 668 vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); 669 vrh->weak_barriers = weak_barriers; 670 vrh->completed = 0; 671 vrh->last_avail_idx = 0; 672 vrh->last_used_idx = 0; 673 vrh->vring.num = num; 674 /* vring expects kernel addresses, but only used via accessors. */ 675 vrh->vring.desc = (__force struct vring_desc *)desc; 676 vrh->vring.avail = (__force struct vring_avail *)avail; 677 vrh->vring.used = (__force struct vring_used *)used; 678 return 0; 679 } 680 EXPORT_SYMBOL(vringh_init_user); 681 682 /** 683 * vringh_getdesc_user - get next available descriptor from userspace ring. 684 * @vrh: the userspace vring. 685 * @riov: where to put the readable descriptors (or NULL) 686 * @wiov: where to put the writable descriptors (or NULL) 687 * @getrange: function to call to check ranges. 688 * @head: head index we received, for passing to vringh_complete_user(). 689 * 690 * Returns 0 if there was no descriptor, 1 if there was, or -errno. 691 * 692 * Note that on error return, you can tell the difference between an 693 * invalid ring and a single invalid descriptor: in the former case, 694 * *head will be vrh->vring.num. You may be able to ignore an invalid 695 * descriptor, but there's not much you can do with an invalid ring. 696 * 697 * Note that you can reuse riov and wiov with subsequent calls. Content is 698 * overwritten and memory reallocated if more space is needed. 699 * When you don't have to use riov and wiov anymore, you should clean up them 700 * calling vringh_iov_cleanup() to release the memory, even on error! 701 */ 702 int vringh_getdesc_user(struct vringh *vrh, 703 struct vringh_iov *riov, 704 struct vringh_iov *wiov, 705 bool (*getrange)(struct vringh *vrh, 706 u64 addr, struct vringh_range *r), 707 u16 *head) 708 { 709 int err; 710 711 *head = vrh->vring.num; 712 err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx); 713 if (err < 0) 714 return err; 715 716 /* Empty... */ 717 if (err == vrh->vring.num) 718 return 0; 719 720 /* We need the layouts to be the identical for this to work */ 721 BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov)); 722 BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) != 723 offsetof(struct vringh_iov, iov)); 724 BUILD_BUG_ON(offsetof(struct vringh_kiov, i) != 725 offsetof(struct vringh_iov, i)); 726 BUILD_BUG_ON(offsetof(struct vringh_kiov, used) != 727 offsetof(struct vringh_iov, used)); 728 BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) != 729 offsetof(struct vringh_iov, max_num)); 730 BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); 731 BUILD_BUG_ON(offsetof(struct iovec, iov_base) != 732 offsetof(struct kvec, iov_base)); 733 BUILD_BUG_ON(offsetof(struct iovec, iov_len) != 734 offsetof(struct kvec, iov_len)); 735 BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base) 736 != sizeof(((struct kvec *)NULL)->iov_base)); 737 BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len) 738 != sizeof(((struct kvec *)NULL)->iov_len)); 739 740 *head = err; 741 err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov, 742 (struct vringh_kiov *)wiov, 743 range_check, getrange, GFP_KERNEL, copydesc_user); 744 if (err) 745 return err; 746 747 return 1; 748 } 749 EXPORT_SYMBOL(vringh_getdesc_user); 750 751 /** 752 * vringh_iov_pull_user - copy bytes from vring_iov. 753 * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume) 754 * @dst: the place to copy. 755 * @len: the maximum length to copy. 756 * 757 * Returns the bytes copied <= len or a negative errno. 758 */ 759 ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) 760 { 761 return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov, 762 dst, len, xfer_from_user); 763 } 764 EXPORT_SYMBOL(vringh_iov_pull_user); 765 766 /** 767 * vringh_iov_push_user - copy bytes into vring_iov. 768 * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) 769 * @src: the place to copy from. 770 * @len: the maximum length to copy. 771 * 772 * Returns the bytes copied <= len or a negative errno. 773 */ 774 ssize_t vringh_iov_push_user(struct vringh_iov *wiov, 775 const void *src, size_t len) 776 { 777 return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov, 778 (void *)src, len, xfer_to_user); 779 } 780 EXPORT_SYMBOL(vringh_iov_push_user); 781 782 /** 783 * vringh_abandon_user - we've decided not to handle the descriptor(s). 784 * @vrh: the vring. 785 * @num: the number of descriptors to put back (ie. num 786 * vringh_get_user() to undo). 787 * 788 * The next vringh_get_user() will return the old descriptor(s) again. 789 */ 790 void vringh_abandon_user(struct vringh *vrh, unsigned int num) 791 { 792 /* We only update vring_avail_event(vr) when we want to be notified, 793 * so we haven't changed that yet. */ 794 vrh->last_avail_idx -= num; 795 } 796 EXPORT_SYMBOL(vringh_abandon_user); 797 798 /** 799 * vringh_complete_user - we've finished with descriptor, publish it. 800 * @vrh: the vring. 801 * @head: the head as filled in by vringh_getdesc_user. 802 * @len: the length of data we have written. 803 * 804 * You should check vringh_need_notify_user() after one or more calls 805 * to this function. 806 */ 807 int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) 808 { 809 struct vring_used_elem used; 810 811 used.id = cpu_to_vringh32(vrh, head); 812 used.len = cpu_to_vringh32(vrh, len); 813 return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); 814 } 815 EXPORT_SYMBOL(vringh_complete_user); 816 817 /** 818 * vringh_complete_multi_user - we've finished with many descriptors. 819 * @vrh: the vring. 820 * @used: the head, length pairs. 821 * @num_used: the number of used elements. 822 * 823 * You should check vringh_need_notify_user() after one or more calls 824 * to this function. 825 */ 826 int vringh_complete_multi_user(struct vringh *vrh, 827 const struct vring_used_elem used[], 828 unsigned num_used) 829 { 830 return __vringh_complete(vrh, used, num_used, 831 putu16_user, putused_user); 832 } 833 EXPORT_SYMBOL(vringh_complete_multi_user); 834 835 /** 836 * vringh_notify_enable_user - we want to know if something changes. 837 * @vrh: the vring. 838 * 839 * This always enables notifications, but returns false if there are 840 * now more buffers available in the vring. 841 */ 842 bool vringh_notify_enable_user(struct vringh *vrh) 843 { 844 return __vringh_notify_enable(vrh, getu16_user, putu16_user); 845 } 846 EXPORT_SYMBOL(vringh_notify_enable_user); 847 848 /** 849 * vringh_notify_disable_user - don't tell us if something changes. 850 * @vrh: the vring. 851 * 852 * This is our normal running state: we disable and then only enable when 853 * we're going to sleep. 854 */ 855 void vringh_notify_disable_user(struct vringh *vrh) 856 { 857 __vringh_notify_disable(vrh, putu16_user); 858 } 859 EXPORT_SYMBOL(vringh_notify_disable_user); 860 861 /** 862 * vringh_need_notify_user - must we tell the other side about used buffers? 863 * @vrh: the vring we've called vringh_complete_user() on. 864 * 865 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 866 */ 867 int vringh_need_notify_user(struct vringh *vrh) 868 { 869 return __vringh_need_notify(vrh, getu16_user); 870 } 871 EXPORT_SYMBOL(vringh_need_notify_user); 872 873 /* Kernelspace access helpers. */ 874 static inline int getu16_kern(const struct vringh *vrh, 875 u16 *val, const __virtio16 *p) 876 { 877 *val = vringh16_to_cpu(vrh, READ_ONCE(*p)); 878 return 0; 879 } 880 881 static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val) 882 { 883 WRITE_ONCE(*p, cpu_to_vringh16(vrh, val)); 884 return 0; 885 } 886 887 static inline int copydesc_kern(const struct vringh *vrh, 888 void *dst, const void *src, size_t len) 889 { 890 memcpy(dst, src, len); 891 return 0; 892 } 893 894 static inline int putused_kern(const struct vringh *vrh, 895 struct vring_used_elem *dst, 896 const struct vring_used_elem *src, 897 unsigned int num) 898 { 899 memcpy(dst, src, num * sizeof(*dst)); 900 return 0; 901 } 902 903 static inline int xfer_kern(const struct vringh *vrh, void *src, 904 void *dst, size_t len) 905 { 906 memcpy(dst, src, len); 907 return 0; 908 } 909 910 static inline int kern_xfer(const struct vringh *vrh, void *dst, 911 void *src, size_t len) 912 { 913 memcpy(dst, src, len); 914 return 0; 915 } 916 917 /** 918 * vringh_init_kern - initialize a vringh for a kernelspace vring. 919 * @vrh: the vringh to initialize. 920 * @features: the feature bits for this ring. 921 * @num: the number of elements. 922 * @weak_barriers: true if we only need memory barriers, not I/O. 923 * @desc: the userspace descriptor pointer. 924 * @avail: the userspace avail pointer. 925 * @used: the userspace used pointer. 926 * 927 * Returns an error if num is invalid. 928 */ 929 int vringh_init_kern(struct vringh *vrh, u64 features, 930 unsigned int num, bool weak_barriers, 931 struct vring_desc *desc, 932 struct vring_avail *avail, 933 struct vring_used *used) 934 { 935 /* Sane power of 2 please! */ 936 if (!num || num > 0xffff || (num & (num - 1))) { 937 vringh_bad("Bad ring size %u", num); 938 return -EINVAL; 939 } 940 941 vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); 942 vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); 943 vrh->weak_barriers = weak_barriers; 944 vrh->completed = 0; 945 vrh->last_avail_idx = 0; 946 vrh->last_used_idx = 0; 947 vrh->vring.num = num; 948 vrh->vring.desc = desc; 949 vrh->vring.avail = avail; 950 vrh->vring.used = used; 951 return 0; 952 } 953 EXPORT_SYMBOL(vringh_init_kern); 954 955 /** 956 * vringh_getdesc_kern - get next available descriptor from kernelspace ring. 957 * @vrh: the kernelspace vring. 958 * @riov: where to put the readable descriptors (or NULL) 959 * @wiov: where to put the writable descriptors (or NULL) 960 * @head: head index we received, for passing to vringh_complete_kern(). 961 * @gfp: flags for allocating larger riov/wiov. 962 * 963 * Returns 0 if there was no descriptor, 1 if there was, or -errno. 964 * 965 * Note that on error return, you can tell the difference between an 966 * invalid ring and a single invalid descriptor: in the former case, 967 * *head will be vrh->vring.num. You may be able to ignore an invalid 968 * descriptor, but there's not much you can do with an invalid ring. 969 * 970 * Note that you can reuse riov and wiov with subsequent calls. Content is 971 * overwritten and memory reallocated if more space is needed. 972 * When you don't have to use riov and wiov anymore, you should clean up them 973 * calling vringh_kiov_cleanup() to release the memory, even on error! 974 */ 975 int vringh_getdesc_kern(struct vringh *vrh, 976 struct vringh_kiov *riov, 977 struct vringh_kiov *wiov, 978 u16 *head, 979 gfp_t gfp) 980 { 981 int err; 982 983 err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx); 984 if (err < 0) 985 return err; 986 987 /* Empty... */ 988 if (err == vrh->vring.num) 989 return 0; 990 991 *head = err; 992 err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, 993 gfp, copydesc_kern); 994 if (err) 995 return err; 996 997 return 1; 998 } 999 EXPORT_SYMBOL(vringh_getdesc_kern); 1000 1001 /** 1002 * vringh_iov_pull_kern - copy bytes from vring_iov. 1003 * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) 1004 * @dst: the place to copy. 1005 * @len: the maximum length to copy. 1006 * 1007 * Returns the bytes copied <= len or a negative errno. 1008 */ 1009 ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) 1010 { 1011 return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern); 1012 } 1013 EXPORT_SYMBOL(vringh_iov_pull_kern); 1014 1015 /** 1016 * vringh_iov_push_kern - copy bytes into vring_iov. 1017 * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) 1018 * @src: the place to copy from. 1019 * @len: the maximum length to copy. 1020 * 1021 * Returns the bytes copied <= len or a negative errno. 1022 */ 1023 ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, 1024 const void *src, size_t len) 1025 { 1026 return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer); 1027 } 1028 EXPORT_SYMBOL(vringh_iov_push_kern); 1029 1030 /** 1031 * vringh_abandon_kern - we've decided not to handle the descriptor(s). 1032 * @vrh: the vring. 1033 * @num: the number of descriptors to put back (ie. num 1034 * vringh_get_kern() to undo). 1035 * 1036 * The next vringh_get_kern() will return the old descriptor(s) again. 1037 */ 1038 void vringh_abandon_kern(struct vringh *vrh, unsigned int num) 1039 { 1040 /* We only update vring_avail_event(vr) when we want to be notified, 1041 * so we haven't changed that yet. */ 1042 vrh->last_avail_idx -= num; 1043 } 1044 EXPORT_SYMBOL(vringh_abandon_kern); 1045 1046 /** 1047 * vringh_complete_kern - we've finished with descriptor, publish it. 1048 * @vrh: the vring. 1049 * @head: the head as filled in by vringh_getdesc_kern. 1050 * @len: the length of data we have written. 1051 * 1052 * You should check vringh_need_notify_kern() after one or more calls 1053 * to this function. 1054 */ 1055 int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) 1056 { 1057 struct vring_used_elem used; 1058 1059 used.id = cpu_to_vringh32(vrh, head); 1060 used.len = cpu_to_vringh32(vrh, len); 1061 1062 return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); 1063 } 1064 EXPORT_SYMBOL(vringh_complete_kern); 1065 1066 /** 1067 * vringh_notify_enable_kern - we want to know if something changes. 1068 * @vrh: the vring. 1069 * 1070 * This always enables notifications, but returns false if there are 1071 * now more buffers available in the vring. 1072 */ 1073 bool vringh_notify_enable_kern(struct vringh *vrh) 1074 { 1075 return __vringh_notify_enable(vrh, getu16_kern, putu16_kern); 1076 } 1077 EXPORT_SYMBOL(vringh_notify_enable_kern); 1078 1079 /** 1080 * vringh_notify_disable_kern - don't tell us if something changes. 1081 * @vrh: the vring. 1082 * 1083 * This is our normal running state: we disable and then only enable when 1084 * we're going to sleep. 1085 */ 1086 void vringh_notify_disable_kern(struct vringh *vrh) 1087 { 1088 __vringh_notify_disable(vrh, putu16_kern); 1089 } 1090 EXPORT_SYMBOL(vringh_notify_disable_kern); 1091 1092 /** 1093 * vringh_need_notify_kern - must we tell the other side about used buffers? 1094 * @vrh: the vring we've called vringh_complete_kern() on. 1095 * 1096 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 1097 */ 1098 int vringh_need_notify_kern(struct vringh *vrh) 1099 { 1100 return __vringh_need_notify(vrh, getu16_kern); 1101 } 1102 EXPORT_SYMBOL(vringh_need_notify_kern); 1103 1104 #if IS_REACHABLE(CONFIG_VHOST_IOTLB) 1105 1106 struct iotlb_vec { 1107 union { 1108 struct iovec *iovec; 1109 struct bio_vec *bvec; 1110 } iov; 1111 size_t count; 1112 }; 1113 1114 static int iotlb_translate(const struct vringh *vrh, 1115 u64 addr, u64 len, u64 *translated, 1116 struct iotlb_vec *ivec, u32 perm) 1117 { 1118 struct vhost_iotlb_map *map; 1119 struct vhost_iotlb *iotlb = vrh->iotlb; 1120 int ret = 0; 1121 u64 s = 0, last = addr + len - 1; 1122 1123 spin_lock(vrh->iotlb_lock); 1124 1125 while (len > s) { 1126 uintptr_t io_addr; 1127 size_t io_len; 1128 u64 size; 1129 1130 if (unlikely(ret >= ivec->count)) { 1131 ret = -ENOBUFS; 1132 break; 1133 } 1134 1135 map = vhost_iotlb_itree_first(iotlb, addr, last); 1136 if (!map || map->start > addr) { 1137 ret = -EINVAL; 1138 break; 1139 } else if (!(map->perm & perm)) { 1140 ret = -EPERM; 1141 break; 1142 } 1143 1144 size = map->size - addr + map->start; 1145 io_len = min(len - s, size); 1146 io_addr = map->addr - map->start + addr; 1147 1148 if (vrh->use_va) { 1149 struct iovec *iovec = ivec->iov.iovec; 1150 1151 iovec[ret].iov_len = io_len; 1152 iovec[ret].iov_base = (void __user *)io_addr; 1153 } else { 1154 u64 pfn = io_addr >> PAGE_SHIFT; 1155 struct bio_vec *bvec = ivec->iov.bvec; 1156 1157 bvec_set_page(&bvec[ret], pfn_to_page(pfn), io_len, 1158 io_addr & (PAGE_SIZE - 1)); 1159 } 1160 1161 s += size; 1162 addr += size; 1163 ++ret; 1164 } 1165 1166 spin_unlock(vrh->iotlb_lock); 1167 1168 if (translated) 1169 *translated = min(len, s); 1170 1171 return ret; 1172 } 1173 1174 #define IOTLB_IOV_STRIDE 16 1175 1176 static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, 1177 void *src, size_t len) 1178 { 1179 struct iotlb_vec ivec; 1180 union { 1181 struct iovec iovec[IOTLB_IOV_STRIDE]; 1182 struct bio_vec bvec[IOTLB_IOV_STRIDE]; 1183 } iov; 1184 u64 total_translated = 0; 1185 1186 ivec.iov.iovec = iov.iovec; 1187 ivec.count = IOTLB_IOV_STRIDE; 1188 1189 while (total_translated < len) { 1190 struct iov_iter iter; 1191 u64 translated; 1192 int ret; 1193 1194 ret = iotlb_translate(vrh, (u64)(uintptr_t)src, 1195 len - total_translated, &translated, 1196 &ivec, VHOST_MAP_RO); 1197 if (ret == -ENOBUFS) 1198 ret = IOTLB_IOV_STRIDE; 1199 else if (ret < 0) 1200 return ret; 1201 1202 if (vrh->use_va) { 1203 iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret, 1204 translated); 1205 } else { 1206 iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret, 1207 translated); 1208 } 1209 1210 ret = copy_from_iter(dst, translated, &iter); 1211 if (ret < 0) 1212 return ret; 1213 1214 src += translated; 1215 dst += translated; 1216 total_translated += translated; 1217 } 1218 1219 return total_translated; 1220 } 1221 1222 static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, 1223 void *src, size_t len) 1224 { 1225 struct iotlb_vec ivec; 1226 union { 1227 struct iovec iovec[IOTLB_IOV_STRIDE]; 1228 struct bio_vec bvec[IOTLB_IOV_STRIDE]; 1229 } iov; 1230 u64 total_translated = 0; 1231 1232 ivec.iov.iovec = iov.iovec; 1233 ivec.count = IOTLB_IOV_STRIDE; 1234 1235 while (total_translated < len) { 1236 struct iov_iter iter; 1237 u64 translated; 1238 int ret; 1239 1240 ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, 1241 len - total_translated, &translated, 1242 &ivec, VHOST_MAP_WO); 1243 if (ret == -ENOBUFS) 1244 ret = IOTLB_IOV_STRIDE; 1245 else if (ret < 0) 1246 return ret; 1247 1248 if (vrh->use_va) { 1249 iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret, 1250 translated); 1251 } else { 1252 iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret, 1253 translated); 1254 } 1255 1256 ret = copy_to_iter(src, translated, &iter); 1257 if (ret < 0) 1258 return ret; 1259 1260 src += translated; 1261 dst += translated; 1262 total_translated += translated; 1263 } 1264 1265 return total_translated; 1266 } 1267 1268 static inline int getu16_iotlb(const struct vringh *vrh, 1269 u16 *val, const __virtio16 *p) 1270 { 1271 struct iotlb_vec ivec; 1272 union { 1273 struct iovec iovec[1]; 1274 struct bio_vec bvec[1]; 1275 } iov; 1276 __virtio16 tmp; 1277 int ret; 1278 1279 ivec.iov.iovec = iov.iovec; 1280 ivec.count = 1; 1281 1282 /* Atomic read is needed for getu16 */ 1283 ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 1284 NULL, &ivec, VHOST_MAP_RO); 1285 if (ret < 0) 1286 return ret; 1287 1288 if (vrh->use_va) { 1289 ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base); 1290 if (ret) 1291 return ret; 1292 } else { 1293 __virtio16 *from = bvec_kmap_local(&ivec.iov.bvec[0]); 1294 1295 tmp = READ_ONCE(*from); 1296 kunmap_local(from); 1297 } 1298 1299 *val = vringh16_to_cpu(vrh, tmp); 1300 1301 return 0; 1302 } 1303 1304 static inline int putu16_iotlb(const struct vringh *vrh, 1305 __virtio16 *p, u16 val) 1306 { 1307 struct iotlb_vec ivec; 1308 union { 1309 struct iovec iovec; 1310 struct bio_vec bvec; 1311 } iov; 1312 __virtio16 tmp; 1313 int ret; 1314 1315 ivec.iov.iovec = &iov.iovec; 1316 ivec.count = 1; 1317 1318 /* Atomic write is needed for putu16 */ 1319 ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 1320 NULL, &ivec, VHOST_MAP_RO); 1321 if (ret < 0) 1322 return ret; 1323 1324 tmp = cpu_to_vringh16(vrh, val); 1325 1326 if (vrh->use_va) { 1327 ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base); 1328 if (ret) 1329 return ret; 1330 } else { 1331 __virtio16 *to = bvec_kmap_local(&ivec.iov.bvec[0]); 1332 1333 WRITE_ONCE(*to, tmp); 1334 kunmap_local(to); 1335 } 1336 1337 return 0; 1338 } 1339 1340 static inline int copydesc_iotlb(const struct vringh *vrh, 1341 void *dst, const void *src, size_t len) 1342 { 1343 int ret; 1344 1345 ret = copy_from_iotlb(vrh, dst, (void *)src, len); 1346 if (ret != len) 1347 return -EFAULT; 1348 1349 return 0; 1350 } 1351 1352 static inline int xfer_from_iotlb(const struct vringh *vrh, void *src, 1353 void *dst, size_t len) 1354 { 1355 int ret; 1356 1357 ret = copy_from_iotlb(vrh, dst, src, len); 1358 if (ret != len) 1359 return -EFAULT; 1360 1361 return 0; 1362 } 1363 1364 static inline int xfer_to_iotlb(const struct vringh *vrh, 1365 void *dst, void *src, size_t len) 1366 { 1367 int ret; 1368 1369 ret = copy_to_iotlb(vrh, dst, src, len); 1370 if (ret != len) 1371 return -EFAULT; 1372 1373 return 0; 1374 } 1375 1376 static inline int putused_iotlb(const struct vringh *vrh, 1377 struct vring_used_elem *dst, 1378 const struct vring_used_elem *src, 1379 unsigned int num) 1380 { 1381 int size = num * sizeof(*dst); 1382 int ret; 1383 1384 ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst)); 1385 if (ret != size) 1386 return -EFAULT; 1387 1388 return 0; 1389 } 1390 1391 /** 1392 * vringh_init_iotlb - initialize a vringh for a ring with IOTLB. 1393 * @vrh: the vringh to initialize. 1394 * @features: the feature bits for this ring. 1395 * @num: the number of elements. 1396 * @weak_barriers: true if we only need memory barriers, not I/O. 1397 * @desc: the userspace descriptor pointer. 1398 * @avail: the userspace avail pointer. 1399 * @used: the userspace used pointer. 1400 * 1401 * Returns an error if num is invalid. 1402 */ 1403 int vringh_init_iotlb(struct vringh *vrh, u64 features, 1404 unsigned int num, bool weak_barriers, 1405 struct vring_desc *desc, 1406 struct vring_avail *avail, 1407 struct vring_used *used) 1408 { 1409 vrh->use_va = false; 1410 1411 return vringh_init_kern(vrh, features, num, weak_barriers, 1412 desc, avail, used); 1413 } 1414 EXPORT_SYMBOL(vringh_init_iotlb); 1415 1416 /** 1417 * vringh_init_iotlb_va - initialize a vringh for a ring with IOTLB containing 1418 * user VA. 1419 * @vrh: the vringh to initialize. 1420 * @features: the feature bits for this ring. 1421 * @num: the number of elements. 1422 * @weak_barriers: true if we only need memory barriers, not I/O. 1423 * @desc: the userspace descriptor pointer. 1424 * @avail: the userspace avail pointer. 1425 * @used: the userspace used pointer. 1426 * 1427 * Returns an error if num is invalid. 1428 */ 1429 int vringh_init_iotlb_va(struct vringh *vrh, u64 features, 1430 unsigned int num, bool weak_barriers, 1431 struct vring_desc *desc, 1432 struct vring_avail *avail, 1433 struct vring_used *used) 1434 { 1435 vrh->use_va = true; 1436 1437 return vringh_init_kern(vrh, features, num, weak_barriers, 1438 desc, avail, used); 1439 } 1440 EXPORT_SYMBOL(vringh_init_iotlb_va); 1441 1442 /** 1443 * vringh_set_iotlb - initialize a vringh for a ring with IOTLB. 1444 * @vrh: the vring 1445 * @iotlb: iotlb associated with this vring 1446 * @iotlb_lock: spinlock to synchronize the iotlb accesses 1447 */ 1448 void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb, 1449 spinlock_t *iotlb_lock) 1450 { 1451 vrh->iotlb = iotlb; 1452 vrh->iotlb_lock = iotlb_lock; 1453 } 1454 EXPORT_SYMBOL(vringh_set_iotlb); 1455 1456 /** 1457 * vringh_getdesc_iotlb - get next available descriptor from ring with 1458 * IOTLB. 1459 * @vrh: the kernelspace vring. 1460 * @riov: where to put the readable descriptors (or NULL) 1461 * @wiov: where to put the writable descriptors (or NULL) 1462 * @head: head index we received, for passing to vringh_complete_iotlb(). 1463 * @gfp: flags for allocating larger riov/wiov. 1464 * 1465 * Returns 0 if there was no descriptor, 1 if there was, or -errno. 1466 * 1467 * Note that on error return, you can tell the difference between an 1468 * invalid ring and a single invalid descriptor: in the former case, 1469 * *head will be vrh->vring.num. You may be able to ignore an invalid 1470 * descriptor, but there's not much you can do with an invalid ring. 1471 * 1472 * Note that you can reuse riov and wiov with subsequent calls. Content is 1473 * overwritten and memory reallocated if more space is needed. 1474 * When you don't have to use riov and wiov anymore, you should clean up them 1475 * calling vringh_kiov_cleanup() to release the memory, even on error! 1476 */ 1477 int vringh_getdesc_iotlb(struct vringh *vrh, 1478 struct vringh_kiov *riov, 1479 struct vringh_kiov *wiov, 1480 u16 *head, 1481 gfp_t gfp) 1482 { 1483 int err; 1484 1485 err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx); 1486 if (err < 0) 1487 return err; 1488 1489 /* Empty... */ 1490 if (err == vrh->vring.num) 1491 return 0; 1492 1493 *head = err; 1494 err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, 1495 gfp, copydesc_iotlb); 1496 if (err) 1497 return err; 1498 1499 return 1; 1500 } 1501 EXPORT_SYMBOL(vringh_getdesc_iotlb); 1502 1503 /** 1504 * vringh_iov_pull_iotlb - copy bytes from vring_iov. 1505 * @vrh: the vring. 1506 * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume) 1507 * @dst: the place to copy. 1508 * @len: the maximum length to copy. 1509 * 1510 * Returns the bytes copied <= len or a negative errno. 1511 */ 1512 ssize_t vringh_iov_pull_iotlb(struct vringh *vrh, 1513 struct vringh_kiov *riov, 1514 void *dst, size_t len) 1515 { 1516 return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb); 1517 } 1518 EXPORT_SYMBOL(vringh_iov_pull_iotlb); 1519 1520 /** 1521 * vringh_iov_push_iotlb - copy bytes into vring_iov. 1522 * @vrh: the vring. 1523 * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume) 1524 * @src: the place to copy from. 1525 * @len: the maximum length to copy. 1526 * 1527 * Returns the bytes copied <= len or a negative errno. 1528 */ 1529 ssize_t vringh_iov_push_iotlb(struct vringh *vrh, 1530 struct vringh_kiov *wiov, 1531 const void *src, size_t len) 1532 { 1533 return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb); 1534 } 1535 EXPORT_SYMBOL(vringh_iov_push_iotlb); 1536 1537 /** 1538 * vringh_abandon_iotlb - we've decided not to handle the descriptor(s). 1539 * @vrh: the vring. 1540 * @num: the number of descriptors to put back (ie. num 1541 * vringh_get_iotlb() to undo). 1542 * 1543 * The next vringh_get_iotlb() will return the old descriptor(s) again. 1544 */ 1545 void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num) 1546 { 1547 /* We only update vring_avail_event(vr) when we want to be notified, 1548 * so we haven't changed that yet. 1549 */ 1550 vrh->last_avail_idx -= num; 1551 } 1552 EXPORT_SYMBOL(vringh_abandon_iotlb); 1553 1554 /** 1555 * vringh_complete_iotlb - we've finished with descriptor, publish it. 1556 * @vrh: the vring. 1557 * @head: the head as filled in by vringh_getdesc_iotlb. 1558 * @len: the length of data we have written. 1559 * 1560 * You should check vringh_need_notify_iotlb() after one or more calls 1561 * to this function. 1562 */ 1563 int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len) 1564 { 1565 struct vring_used_elem used; 1566 1567 used.id = cpu_to_vringh32(vrh, head); 1568 used.len = cpu_to_vringh32(vrh, len); 1569 1570 return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb); 1571 } 1572 EXPORT_SYMBOL(vringh_complete_iotlb); 1573 1574 /** 1575 * vringh_notify_enable_iotlb - we want to know if something changes. 1576 * @vrh: the vring. 1577 * 1578 * This always enables notifications, but returns false if there are 1579 * now more buffers available in the vring. 1580 */ 1581 bool vringh_notify_enable_iotlb(struct vringh *vrh) 1582 { 1583 return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb); 1584 } 1585 EXPORT_SYMBOL(vringh_notify_enable_iotlb); 1586 1587 /** 1588 * vringh_notify_disable_iotlb - don't tell us if something changes. 1589 * @vrh: the vring. 1590 * 1591 * This is our normal running state: we disable and then only enable when 1592 * we're going to sleep. 1593 */ 1594 void vringh_notify_disable_iotlb(struct vringh *vrh) 1595 { 1596 __vringh_notify_disable(vrh, putu16_iotlb); 1597 } 1598 EXPORT_SYMBOL(vringh_notify_disable_iotlb); 1599 1600 /** 1601 * vringh_need_notify_iotlb - must we tell the other side about used buffers? 1602 * @vrh: the vring we've called vringh_complete_iotlb() on. 1603 * 1604 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 1605 */ 1606 int vringh_need_notify_iotlb(struct vringh *vrh) 1607 { 1608 return __vringh_need_notify(vrh, getu16_iotlb); 1609 } 1610 EXPORT_SYMBOL(vringh_need_notify_iotlb); 1611 1612 #endif 1613 1614 MODULE_DESCRIPTION("host side of a virtio ring"); 1615 MODULE_LICENSE("GPL"); 1616