1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/vhost_types.h> 5 #include <linux/vdpa.h> 6 #include <linux/gcd.h> 7 #include <linux/string.h> 8 #include <linux/mlx5/qp.h> 9 #include "mlx5_vdpa.h" 10 11 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */ 12 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \ 13 ({ \ 14 u64 __s = _s; \ 15 u64 _res; \ 16 _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \ 17 _res; \ 18 }) 19 20 static int get_octo_len(u64 len, int page_shift) 21 { 22 u64 page_size = 1ULL << page_shift; 23 int npages; 24 25 npages = ALIGN(len, page_size) >> page_shift; 26 return (npages + 1) / 2; 27 } 28 29 static void mlx5_set_access_mode(void *mkc, int mode) 30 { 31 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 32 MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2); 33 } 34 35 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) 36 { 37 struct scatterlist *sg; 38 int nsg = mr->nsg; 39 u64 dma_addr; 40 u64 dma_len; 41 int j = 0; 42 int i; 43 44 for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) { 45 for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg); 46 nsg && dma_len; 47 nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size)) 48 mtt[j++] = cpu_to_be64(dma_addr); 49 } 50 } 51 52 struct mlx5_create_mkey_mem { 53 u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)]; 54 u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; 55 __be64 mtt[]; 56 }; 57 58 struct mlx5_destroy_mkey_mem { 59 u8 out[MLX5_ST_SZ_BYTES(destroy_mkey_out)]; 60 u8 in[MLX5_ST_SZ_BYTES(destroy_mkey_in)]; 61 }; 62 63 static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev, 64 struct mlx5_vdpa_direct_mr *mr, 65 struct mlx5_create_mkey_mem *mem) 66 { 67 void *in = &mem->in; 68 void *mkc; 69 70 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 71 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 72 MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO)); 73 MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO)); 74 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT); 75 MLX5_SET(mkc, mkc, qpn, 0xffffff); 76 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 77 MLX5_SET64(mkc, mkc, start_addr, mr->offset); 78 MLX5_SET64(mkc, mkc, len, mr->end - mr->start); 79 MLX5_SET(mkc, mkc, log_page_size, mr->log_size); 80 MLX5_SET(mkc, mkc, translations_octword_size, 81 get_octo_len(mr->end - mr->start, mr->log_size)); 82 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 83 get_octo_len(mr->end - mr->start, mr->log_size)); 84 populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); 85 86 MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); 87 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 88 } 89 90 static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev, 91 struct mlx5_vdpa_direct_mr *mr, 92 struct mlx5_create_mkey_mem *mem) 93 { 94 u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index); 95 96 mr->mr = mlx5_idx_to_mkey(mkey_index); 97 } 98 99 static void fill_destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, 100 struct mlx5_vdpa_direct_mr *mr, 101 struct mlx5_destroy_mkey_mem *mem) 102 { 103 void *in = &mem->in; 104 105 MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid); 106 MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); 107 MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mr->mr)); 108 } 109 110 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 111 { 112 if (!mr->mr) 113 return; 114 115 mlx5_vdpa_destroy_mkey(mvdev, mr->mr); 116 } 117 118 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 119 { 120 return max_t(u64, map->start, mr->start); 121 } 122 123 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 124 { 125 return min_t(u64, map->last + 1, mr->end); 126 } 127 128 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) 129 { 130 return map_end(map, mr) - map_start(map, mr); 131 } 132 133 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1) 134 #define MLX5_VDPA_INVALID_LEN ((u64)-1) 135 136 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey) 137 { 138 struct mlx5_vdpa_direct_mr *s; 139 140 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); 141 if (!s) 142 return MLX5_VDPA_INVALID_START_ADDR; 143 144 return s->start; 145 } 146 147 static u64 indir_len(struct mlx5_vdpa_mr *mkey) 148 { 149 struct mlx5_vdpa_direct_mr *s; 150 struct mlx5_vdpa_direct_mr *e; 151 152 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); 153 if (!s) 154 return MLX5_VDPA_INVALID_LEN; 155 156 e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list); 157 158 return e->end - s->start; 159 } 160 161 #define LOG_MAX_KLM_SIZE 30 162 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE) 163 164 static u32 klm_bcount(u64 size) 165 { 166 return (u32)size; 167 } 168 169 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in) 170 { 171 struct mlx5_vdpa_direct_mr *dmr; 172 struct mlx5_klm *klmarr; 173 struct mlx5_klm *klm; 174 bool first = true; 175 u64 preve; 176 int i; 177 178 klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 179 i = 0; 180 list_for_each_entry(dmr, &mkey->head, list) { 181 again: 182 klm = &klmarr[i++]; 183 if (first) { 184 preve = dmr->start; 185 first = false; 186 } 187 188 if (preve == dmr->start) { 189 klm->key = cpu_to_be32(dmr->mr); 190 klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); 191 preve = dmr->end; 192 } else { 193 u64 bcount = min_t(u64, dmr->start - preve, MAX_KLM_SIZE); 194 195 klm->key = cpu_to_be32(mvdev->res.null_mkey); 196 klm->bcount = cpu_to_be32(klm_bcount(bcount)); 197 preve += bcount; 198 199 goto again; 200 } 201 } 202 } 203 204 static int klm_byte_size(int nklms) 205 { 206 return 16 * ALIGN(nklms, 4); 207 } 208 209 #define MLX5_VDPA_MTT_ALIGN 16 210 211 static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 212 { 213 struct mlx5_vdpa_async_cmd *cmds; 214 struct mlx5_vdpa_direct_mr *dmr; 215 int err = 0; 216 int i = 0; 217 218 cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); 219 if (!cmds) 220 return -ENOMEM; 221 222 list_for_each_entry(dmr, &mr->head, list) { 223 struct mlx5_create_mkey_mem *cmd_mem; 224 int mttlen, mttcount; 225 226 mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN); 227 mttcount = mttlen / sizeof(cmd_mem->mtt[0]); 228 cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL); 229 if (!cmd_mem) { 230 err = -ENOMEM; 231 goto done; 232 } 233 234 cmds[i].out = cmd_mem->out; 235 cmds[i].outlen = sizeof(cmd_mem->out); 236 cmds[i].in = cmd_mem->in; 237 cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount); 238 239 fill_create_direct_mr(mvdev, dmr, cmd_mem); 240 241 i++; 242 } 243 244 err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); 245 if (err) { 246 247 mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err); 248 goto done; 249 } 250 251 i = 0; 252 list_for_each_entry(dmr, &mr->head, list) { 253 struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; 254 struct mlx5_create_mkey_mem *cmd_mem; 255 256 cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out); 257 258 if (!cmd->err) { 259 create_direct_mr_end(mvdev, dmr, cmd_mem); 260 } else { 261 err = err ? err : cmd->err; 262 mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n", 263 dmr->start, dmr->end, cmd->err); 264 } 265 } 266 267 done: 268 for (i = i-1; i >= 0; i--) { 269 struct mlx5_create_mkey_mem *cmd_mem; 270 271 cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out); 272 kvfree(cmd_mem); 273 } 274 275 kvfree(cmds); 276 return err; 277 } 278 279 DEFINE_FREE(free_cmds, struct mlx5_vdpa_async_cmd *, kvfree(_T)) 280 DEFINE_FREE(free_cmd_mem, struct mlx5_destroy_mkey_mem *, kvfree(_T)) 281 282 static int destroy_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 283 { 284 struct mlx5_destroy_mkey_mem *cmd_mem __free(free_cmd_mem) = NULL; 285 struct mlx5_vdpa_async_cmd *cmds __free(free_cmds) = NULL; 286 struct mlx5_vdpa_direct_mr *dmr; 287 int err = 0; 288 int i = 0; 289 290 cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL); 291 cmd_mem = kvcalloc(mr->num_directs, sizeof(*cmd_mem), GFP_KERNEL); 292 if (!cmds || !cmd_mem) 293 return -ENOMEM; 294 295 list_for_each_entry(dmr, &mr->head, list) { 296 cmds[i].out = cmd_mem[i].out; 297 cmds[i].outlen = sizeof(cmd_mem[i].out); 298 cmds[i].in = cmd_mem[i].in; 299 cmds[i].inlen = sizeof(cmd_mem[i].in); 300 fill_destroy_direct_mr(mvdev, dmr, &cmd_mem[i]); 301 i++; 302 } 303 304 err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs); 305 if (err) { 306 307 mlx5_vdpa_err(mvdev, "error issuing MTT mkey deletion for direct mrs: %d\n", err); 308 return err; 309 } 310 311 i = 0; 312 list_for_each_entry(dmr, &mr->head, list) { 313 struct mlx5_vdpa_async_cmd *cmd = &cmds[i++]; 314 315 dmr->mr = 0; 316 if (cmd->err) { 317 err = err ? err : cmd->err; 318 mlx5_vdpa_err(mvdev, "error deleting MTT mkey [0x%llx, 0x%llx]: %d\n", 319 dmr->start, dmr->end, cmd->err); 320 } 321 } 322 323 return err; 324 } 325 326 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 327 { 328 int inlen; 329 void *mkc; 330 void *in; 331 int err; 332 u64 start; 333 u64 len; 334 335 start = indir_start_addr(mr); 336 len = indir_len(mr); 337 if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN) 338 return -EINVAL; 339 340 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms); 341 in = kzalloc(inlen, GFP_KERNEL); 342 if (!in) 343 return -ENOMEM; 344 345 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); 346 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 347 MLX5_SET(mkc, mkc, lw, 1); 348 MLX5_SET(mkc, mkc, lr, 1); 349 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS); 350 MLX5_SET(mkc, mkc, qpn, 0xffffff); 351 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 352 MLX5_SET64(mkc, mkc, start_addr, start); 353 MLX5_SET64(mkc, mkc, len, len); 354 MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16); 355 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms); 356 fill_indir(mvdev, mr, in); 357 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); 358 kfree(in); 359 return err; 360 } 361 362 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey) 363 { 364 mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey); 365 } 366 367 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr, 368 struct vhost_iotlb *iotlb) 369 { 370 struct vhost_iotlb_map *map; 371 unsigned long lgcd = 0; 372 int log_entity_size; 373 unsigned long size; 374 int err; 375 struct page *pg; 376 unsigned int nsg; 377 int sglen; 378 u64 pa, offset; 379 u64 paend; 380 struct scatterlist *sg; 381 struct device *dma = mvdev->vdev.dma_dev; 382 383 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 384 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { 385 size = maplen(map, mr); 386 lgcd = gcd(lgcd, size); 387 } 388 log_entity_size = ilog2(lgcd); 389 390 sglen = 1 << log_entity_size; 391 nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size); 392 393 err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL); 394 if (err) 395 return err; 396 397 sg = mr->sg_head.sgl; 398 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); 399 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { 400 offset = mr->start > map->start ? mr->start - map->start : 0; 401 pa = map->addr + offset; 402 paend = map->addr + offset + maplen(map, mr); 403 for (; pa < paend; pa += sglen) { 404 pg = pfn_to_page(__phys_to_pfn(pa)); 405 if (!sg) { 406 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", 407 map->start, map->last + 1); 408 err = -ENOMEM; 409 goto err_map; 410 } 411 sg_set_page(sg, pg, sglen, 0); 412 sg = sg_next(sg); 413 if (!sg) 414 goto done; 415 } 416 } 417 done: 418 mr->log_size = log_entity_size; 419 mr->nsg = nsg; 420 mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 421 if (!mr->nent) { 422 err = -ENOMEM; 423 goto err_map; 424 } 425 426 return 0; 427 428 err_map: 429 sg_free_table(&mr->sg_head); 430 return err; 431 } 432 433 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) 434 { 435 struct device *dma = mvdev->vdev.dma_dev; 436 437 destroy_direct_mr(mvdev, mr); 438 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); 439 sg_free_table(&mr->sg_head); 440 } 441 442 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, 443 struct mlx5_vdpa_mr *mr, 444 u64 start, 445 u64 size, 446 u8 perm, 447 struct vhost_iotlb *iotlb) 448 { 449 struct mlx5_vdpa_direct_mr *dmr; 450 struct mlx5_vdpa_direct_mr *n; 451 LIST_HEAD(tmp); 452 u64 st; 453 u64 sz; 454 int err; 455 456 st = start; 457 while (size) { 458 sz = (u32)min_t(u64, MAX_KLM_SIZE, size); 459 dmr = kzalloc(sizeof(*dmr), GFP_KERNEL); 460 if (!dmr) { 461 err = -ENOMEM; 462 goto err_alloc; 463 } 464 465 dmr->start = st; 466 dmr->end = st + sz; 467 dmr->perm = perm; 468 err = map_direct_mr(mvdev, dmr, iotlb); 469 if (err) { 470 kfree(dmr); 471 goto err_alloc; 472 } 473 474 list_add_tail(&dmr->list, &tmp); 475 size -= sz; 476 mr->num_directs++; 477 mr->num_klms++; 478 st += sz; 479 } 480 list_splice_tail(&tmp, &mr->head); 481 return 0; 482 483 err_alloc: 484 list_for_each_entry_safe(dmr, n, &mr->head, list) { 485 list_del_init(&dmr->list); 486 unmap_direct_mr(mvdev, dmr); 487 kfree(dmr); 488 } 489 return err; 490 } 491 492 /* The iotlb pointer contains a list of maps. Go over the maps, possibly 493 * merging mergeable maps, and create direct memory keys that provide the 494 * device access to memory. The direct mkeys are then referred to by the 495 * indirect memory key that provides access to the enitre address space given 496 * by iotlb. 497 */ 498 static int create_user_mr(struct mlx5_vdpa_dev *mvdev, 499 struct mlx5_vdpa_mr *mr, 500 struct vhost_iotlb *iotlb) 501 { 502 struct mlx5_vdpa_direct_mr *dmr; 503 struct mlx5_vdpa_direct_mr *n; 504 struct vhost_iotlb_map *map; 505 u32 pperm = U16_MAX; 506 u64 last = U64_MAX; 507 u64 ps = U64_MAX; 508 u64 pe = U64_MAX; 509 u64 start = 0; 510 int err = 0; 511 int nnuls; 512 513 INIT_LIST_HEAD(&mr->head); 514 for (map = vhost_iotlb_itree_first(iotlb, start, last); map; 515 map = vhost_iotlb_itree_next(map, start, last)) { 516 start = map->start; 517 if (pe == map->start && pperm == map->perm) { 518 pe = map->last + 1; 519 } else { 520 if (ps != U64_MAX) { 521 if (pe < map->start) { 522 /* We have a hole in the map. Check how 523 * many null keys are required to fill it. 524 */ 525 nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe, 526 LOG_MAX_KLM_SIZE); 527 mr->num_klms += nnuls; 528 } 529 err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb); 530 if (err) 531 goto err_chain; 532 } 533 ps = map->start; 534 pe = map->last + 1; 535 pperm = map->perm; 536 } 537 } 538 err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb); 539 if (err) 540 goto err_chain; 541 542 err = create_direct_keys(mvdev, mr); 543 if (err) 544 goto err_chain; 545 546 /* Create the memory key that defines the guests's address space. This 547 * memory key refers to the direct keys that contain the MTT 548 * translations 549 */ 550 err = create_indirect_key(mvdev, mr); 551 if (err) 552 goto err_chain; 553 554 mr->user_mr = true; 555 return 0; 556 557 err_chain: 558 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { 559 list_del_init(&dmr->list); 560 unmap_direct_mr(mvdev, dmr); 561 kfree(dmr); 562 } 563 return err; 564 } 565 566 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 567 { 568 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 569 void *mkc; 570 u32 *in; 571 int err; 572 573 in = kzalloc(inlen, GFP_KERNEL); 574 if (!in) 575 return -ENOMEM; 576 577 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 578 579 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 580 MLX5_SET(mkc, mkc, length64, 1); 581 MLX5_SET(mkc, mkc, lw, 1); 582 MLX5_SET(mkc, mkc, lr, 1); 583 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); 584 MLX5_SET(mkc, mkc, qpn, 0xffffff); 585 586 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); 587 if (!err) 588 mr->user_mr = false; 589 590 kfree(in); 591 return err; 592 } 593 594 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 595 { 596 mlx5_vdpa_destroy_mkey(mvdev, mr->mkey); 597 } 598 599 static int dup_iotlb(struct vhost_iotlb *dst, struct vhost_iotlb *src) 600 { 601 struct vhost_iotlb_map *map; 602 u64 start = 0, last = ULLONG_MAX; 603 int err; 604 605 if (dst == src) 606 return -EINVAL; 607 608 if (!src) { 609 err = vhost_iotlb_add_range(dst, start, last, start, VHOST_ACCESS_RW); 610 return err; 611 } 612 613 for (map = vhost_iotlb_itree_first(src, start, last); map; 614 map = vhost_iotlb_itree_next(map, start, last)) { 615 err = vhost_iotlb_add_range(dst, map->start, map->last, 616 map->addr, map->perm); 617 if (err) 618 return err; 619 } 620 return 0; 621 } 622 623 static void prune_iotlb(struct vhost_iotlb *iotlb) 624 { 625 vhost_iotlb_del_range(iotlb, 0, ULLONG_MAX); 626 } 627 628 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 629 { 630 struct mlx5_vdpa_direct_mr *dmr; 631 struct mlx5_vdpa_direct_mr *n; 632 633 destroy_indirect_key(mvdev, mr); 634 destroy_direct_keys(mvdev, mr); 635 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { 636 list_del_init(&dmr->list); 637 unmap_direct_mr(mvdev, dmr); 638 kfree(dmr); 639 } 640 } 641 642 static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) 643 { 644 if (WARN_ON(!mr)) 645 return; 646 647 if (mr->user_mr) 648 destroy_user_mr(mvdev, mr); 649 else 650 destroy_dma_mr(mvdev, mr); 651 652 vhost_iotlb_free(mr->iotlb); 653 654 list_del(&mr->mr_list); 655 656 kfree(mr); 657 } 658 659 /* There can be multiple .set_map() operations in quick succession. 660 * This large delay is a simple way to prevent the MR cleanup from blocking 661 * .set_map() MR creation in this scenario. 662 */ 663 #define MLX5_VDPA_MR_GC_TRIGGER_MS 2000 664 665 static void mlx5_vdpa_mr_gc_handler(struct work_struct *work) 666 { 667 struct mlx5_vdpa_mr_resources *mres; 668 struct mlx5_vdpa_mr *mr, *tmp; 669 struct mlx5_vdpa_dev *mvdev; 670 671 mres = container_of(work, struct mlx5_vdpa_mr_resources, gc_dwork_ent.work); 672 673 if (atomic_read(&mres->shutdown)) { 674 mutex_lock(&mres->lock); 675 } else if (!mutex_trylock(&mres->lock)) { 676 queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent, 677 msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS)); 678 return; 679 } 680 681 mvdev = container_of(mres, struct mlx5_vdpa_dev, mres); 682 683 list_for_each_entry_safe(mr, tmp, &mres->mr_gc_list_head, mr_list) { 684 _mlx5_vdpa_destroy_mr(mvdev, mr); 685 } 686 687 mutex_unlock(&mres->lock); 688 } 689 690 static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, 691 struct mlx5_vdpa_mr *mr) 692 { 693 struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; 694 695 if (!mr) 696 return; 697 698 if (refcount_dec_and_test(&mr->refcount)) { 699 list_move_tail(&mr->mr_list, &mres->mr_gc_list_head); 700 queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent, 701 msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS)); 702 } 703 } 704 705 void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, 706 struct mlx5_vdpa_mr *mr) 707 { 708 mutex_lock(&mvdev->mres.lock); 709 _mlx5_vdpa_put_mr(mvdev, mr); 710 mutex_unlock(&mvdev->mres.lock); 711 } 712 713 static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, 714 struct mlx5_vdpa_mr *mr) 715 { 716 if (!mr) 717 return; 718 719 refcount_inc(&mr->refcount); 720 } 721 722 void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, 723 struct mlx5_vdpa_mr *mr) 724 { 725 mutex_lock(&mvdev->mres.lock); 726 _mlx5_vdpa_get_mr(mvdev, mr); 727 mutex_unlock(&mvdev->mres.lock); 728 } 729 730 void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev, 731 struct mlx5_vdpa_mr *new_mr, 732 unsigned int asid) 733 { 734 struct mlx5_vdpa_mr *old_mr = mvdev->mres.mr[asid]; 735 736 mutex_lock(&mvdev->mres.lock); 737 738 _mlx5_vdpa_put_mr(mvdev, old_mr); 739 mvdev->mres.mr[asid] = new_mr; 740 741 mutex_unlock(&mvdev->mres.lock); 742 } 743 744 static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev) 745 { 746 struct mlx5_vdpa_mr *mr; 747 748 mutex_lock(&mvdev->mres.lock); 749 750 list_for_each_entry(mr, &mvdev->mres.mr_list_head, mr_list) { 751 752 mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: " 753 "mr: %p, mkey: 0x%x, refcount: %u\n", 754 mr, mr->mkey, refcount_read(&mr->refcount)); 755 } 756 757 mutex_unlock(&mvdev->mres.lock); 758 759 } 760 761 void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev) 762 { 763 if (!mvdev->res.valid) 764 return; 765 766 for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) 767 mlx5_vdpa_update_mr(mvdev, NULL, i); 768 769 prune_iotlb(mvdev->cvq.iotlb); 770 771 mlx5_vdpa_show_mr_leaks(mvdev); 772 } 773 774 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, 775 struct mlx5_vdpa_mr *mr, 776 struct vhost_iotlb *iotlb) 777 { 778 int err; 779 780 if (iotlb) 781 err = create_user_mr(mvdev, mr, iotlb); 782 else 783 err = create_dma_mr(mvdev, mr); 784 785 if (err) 786 return err; 787 788 mr->iotlb = vhost_iotlb_alloc(0, 0); 789 if (!mr->iotlb) { 790 err = -ENOMEM; 791 goto err_mr; 792 } 793 794 err = dup_iotlb(mr->iotlb, iotlb); 795 if (err) 796 goto err_iotlb; 797 798 list_add_tail(&mr->mr_list, &mvdev->mres.mr_list_head); 799 800 return 0; 801 802 err_iotlb: 803 vhost_iotlb_free(mr->iotlb); 804 805 err_mr: 806 if (iotlb) 807 destroy_user_mr(mvdev, mr); 808 else 809 destroy_dma_mr(mvdev, mr); 810 811 return err; 812 } 813 814 struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, 815 struct vhost_iotlb *iotlb) 816 { 817 struct mlx5_vdpa_mr *mr; 818 int err; 819 820 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 821 if (!mr) 822 return ERR_PTR(-ENOMEM); 823 824 mutex_lock(&mvdev->mres.lock); 825 err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb); 826 mutex_unlock(&mvdev->mres.lock); 827 828 if (err) 829 goto out_err; 830 831 refcount_set(&mr->refcount, 1); 832 833 return mr; 834 835 out_err: 836 kfree(mr); 837 return ERR_PTR(err); 838 } 839 840 int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, 841 struct vhost_iotlb *iotlb, 842 unsigned int asid) 843 { 844 int err; 845 846 if (mvdev->mres.group2asid[MLX5_VDPA_CVQ_GROUP] != asid) 847 return 0; 848 849 spin_lock(&mvdev->cvq.iommu_lock); 850 851 prune_iotlb(mvdev->cvq.iotlb); 852 err = dup_iotlb(mvdev->cvq.iotlb, iotlb); 853 854 spin_unlock(&mvdev->cvq.iommu_lock); 855 856 return err; 857 } 858 859 int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev) 860 { 861 struct mlx5_vdpa_mr *mr; 862 863 mr = mlx5_vdpa_create_mr(mvdev, NULL); 864 if (IS_ERR(mr)) 865 return PTR_ERR(mr); 866 867 mlx5_vdpa_update_mr(mvdev, mr, 0); 868 869 return mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, 0); 870 } 871 872 int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) 873 { 874 if (asid >= MLX5_VDPA_NUM_AS) 875 return -EINVAL; 876 877 mlx5_vdpa_update_mr(mvdev, NULL, asid); 878 879 if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 880 if (mlx5_vdpa_create_dma_mr(mvdev)) 881 mlx5_vdpa_warn(mvdev, "create DMA MR failed\n"); 882 } else { 883 mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, asid); 884 } 885 886 return 0; 887 } 888 889 int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev) 890 { 891 struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; 892 893 mres->wq_gc = create_singlethread_workqueue("mlx5_vdpa_mr_gc"); 894 if (!mres->wq_gc) 895 return -ENOMEM; 896 897 INIT_DELAYED_WORK(&mres->gc_dwork_ent, mlx5_vdpa_mr_gc_handler); 898 899 mutex_init(&mres->lock); 900 901 INIT_LIST_HEAD(&mres->mr_list_head); 902 INIT_LIST_HEAD(&mres->mr_gc_list_head); 903 904 return 0; 905 } 906 907 void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev) 908 { 909 struct mlx5_vdpa_mr_resources *mres = &mvdev->mres; 910 911 atomic_set(&mres->shutdown, 1); 912 913 flush_delayed_work(&mres->gc_dwork_ent); 914 destroy_workqueue(mres->wq_gc); 915 mres->wq_gc = NULL; 916 mutex_destroy(&mres->lock); 917 } 918