1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * Copyright (c) 2020, Intel Corporation. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 35 #include <linux/kref.h> 36 #include <linux/random.h> 37 #include <linux/debugfs.h> 38 #include <linux/export.h> 39 #include <linux/delay.h> 40 #include <linux/dma-buf.h> 41 #include <linux/dma-resv.h> 42 #include <rdma/ib_umem_odp.h> 43 #include "dm.h" 44 #include "mlx5_ib.h" 45 #include "umr.h" 46 #include "data_direct.h" 47 #include "dmah.h" 48 49 enum { 50 MAX_PENDING_REG_MR = 8, 51 }; 52 53 #define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4 54 55 static void 56 create_mkey_callback(int status, struct mlx5_async_work *context); 57 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, 58 u64 iova, int access_flags, 59 unsigned long page_size, bool populate, 60 int access_mode, u16 st_index, u8 ph); 61 static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr); 62 63 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, 64 struct ib_pd *pd) 65 { 66 struct mlx5_ib_dev *dev = to_mdev(pd->device); 67 68 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 69 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 70 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 71 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 72 MLX5_SET(mkc, mkc, lr, 1); 73 74 if (acc & IB_ACCESS_RELAXED_ORDERING) { 75 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) 76 MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); 77 78 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || 79 (MLX5_CAP_GEN(dev->mdev, 80 relaxed_ordering_read_pci_enabled) && 81 pcie_relaxed_ordering_enabled(dev->mdev->pdev))) 82 MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); 83 } 84 85 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 86 MLX5_SET(mkc, mkc, qpn, 0xffffff); 87 MLX5_SET64(mkc, mkc, start_addr, start_addr); 88 } 89 90 static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in) 91 { 92 u8 key = atomic_inc_return(&dev->mkey_var); 93 void *mkc; 94 95 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 96 MLX5_SET(mkc, mkc, mkey_7_0, key); 97 *mkey = key; 98 } 99 100 static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, 101 struct mlx5_ib_mkey *mkey, u32 *in, int inlen) 102 { 103 int ret; 104 105 assign_mkey_variant(dev, &mkey->key, in); 106 ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen); 107 if (!ret) 108 init_waitqueue_head(&mkey->wait); 109 110 return ret; 111 } 112 113 static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create) 114 { 115 struct mlx5_ib_dev *dev = async_create->ent->dev; 116 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 117 size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out); 118 119 MLX5_SET(create_mkey_in, async_create->in, opcode, 120 MLX5_CMD_OP_CREATE_MKEY); 121 assign_mkey_variant(dev, &async_create->mkey, async_create->in); 122 return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen, 123 async_create->out, outlen, create_mkey_callback, 124 &async_create->cb_work); 125 } 126 127 static int mkey_cache_max_order(struct mlx5_ib_dev *dev); 128 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); 129 130 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 131 { 132 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); 133 134 return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); 135 } 136 137 static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) 138 { 139 if (status == -ENXIO) /* core driver is not available */ 140 return; 141 142 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 143 if (status != -EREMOTEIO) /* driver specific failure */ 144 return; 145 146 /* Failed in FW, print cmd out failure details */ 147 mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); 148 } 149 150 static int push_mkey_locked(struct mlx5_cache_ent *ent, u32 mkey) 151 { 152 unsigned long tmp = ent->mkeys_queue.ci % NUM_MKEYS_PER_PAGE; 153 struct mlx5_mkeys_page *page; 154 155 lockdep_assert_held(&ent->mkeys_queue.lock); 156 if (ent->mkeys_queue.ci >= 157 ent->mkeys_queue.num_pages * NUM_MKEYS_PER_PAGE) { 158 page = kzalloc_obj(*page, GFP_ATOMIC); 159 if (!page) 160 return -ENOMEM; 161 ent->mkeys_queue.num_pages++; 162 list_add_tail(&page->list, &ent->mkeys_queue.pages_list); 163 } else { 164 page = list_last_entry(&ent->mkeys_queue.pages_list, 165 struct mlx5_mkeys_page, list); 166 } 167 168 page->mkeys[tmp] = mkey; 169 ent->mkeys_queue.ci++; 170 return 0; 171 } 172 173 static int pop_mkey_locked(struct mlx5_cache_ent *ent) 174 { 175 unsigned long tmp = (ent->mkeys_queue.ci - 1) % NUM_MKEYS_PER_PAGE; 176 struct mlx5_mkeys_page *last_page; 177 u32 mkey; 178 179 lockdep_assert_held(&ent->mkeys_queue.lock); 180 last_page = list_last_entry(&ent->mkeys_queue.pages_list, 181 struct mlx5_mkeys_page, list); 182 mkey = last_page->mkeys[tmp]; 183 last_page->mkeys[tmp] = 0; 184 ent->mkeys_queue.ci--; 185 if (ent->mkeys_queue.num_pages > 1 && !tmp) { 186 list_del(&last_page->list); 187 ent->mkeys_queue.num_pages--; 188 kfree(last_page); 189 } 190 return mkey; 191 } 192 193 static void create_mkey_callback(int status, struct mlx5_async_work *context) 194 { 195 struct mlx5r_async_create_mkey *mkey_out = 196 container_of(context, struct mlx5r_async_create_mkey, cb_work); 197 struct mlx5_cache_ent *ent = mkey_out->ent; 198 struct mlx5_ib_dev *dev = ent->dev; 199 unsigned long flags; 200 201 if (status) { 202 create_mkey_warn(dev, status, mkey_out->out); 203 kfree(mkey_out); 204 spin_lock_irqsave(&ent->mkeys_queue.lock, flags); 205 ent->pending--; 206 WRITE_ONCE(dev->fill_delay, 1); 207 spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); 208 mod_timer(&dev->delay_timer, jiffies + HZ); 209 return; 210 } 211 212 mkey_out->mkey |= mlx5_idx_to_mkey( 213 MLX5_GET(create_mkey_out, mkey_out->out, mkey_index)); 214 WRITE_ONCE(dev->cache.last_add, jiffies); 215 216 spin_lock_irqsave(&ent->mkeys_queue.lock, flags); 217 push_mkey_locked(ent, mkey_out->mkey); 218 ent->pending--; 219 /* If we are doing fill_to_high_water then keep going. */ 220 queue_adjust_cache_locked(ent); 221 spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); 222 kfree(mkey_out); 223 } 224 225 static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) 226 { 227 int ret = 0; 228 229 switch (access_mode) { 230 case MLX5_MKC_ACCESS_MODE_MTT: 231 ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / 232 sizeof(struct mlx5_mtt)); 233 break; 234 case MLX5_MKC_ACCESS_MODE_KSM: 235 ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / 236 sizeof(struct mlx5_klm)); 237 break; 238 default: 239 WARN_ON(1); 240 } 241 return ret; 242 } 243 244 static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) 245 { 246 set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0, 247 ent->dev->umrc.pd); 248 MLX5_SET(mkc, mkc, free, 1); 249 MLX5_SET(mkc, mkc, umr_en, 1); 250 MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); 251 MLX5_SET(mkc, mkc, access_mode_4_2, 252 (ent->rb_key.access_mode >> 2) & 0x7); 253 MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats); 254 255 MLX5_SET(mkc, mkc, translations_octword_size, 256 get_mkc_octo_size(ent->rb_key.access_mode, 257 ent->rb_key.ndescs)); 258 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 259 260 if (ent->rb_key.ph != MLX5_IB_NO_PH) { 261 MLX5_SET(mkc, mkc, pcie_tph_en, 1); 262 MLX5_SET(mkc, mkc, pcie_tph_ph, ent->rb_key.ph); 263 if (ent->rb_key.st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) 264 MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, 265 ent->rb_key.st_index); 266 } 267 } 268 269 /* Asynchronously schedule new MRs to be populated in the cache. */ 270 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) 271 { 272 struct mlx5r_async_create_mkey *async_create; 273 void *mkc; 274 int err = 0; 275 int i; 276 277 for (i = 0; i < num; i++) { 278 async_create = kzalloc_obj(struct mlx5r_async_create_mkey); 279 if (!async_create) 280 return -ENOMEM; 281 mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in, 282 memory_key_mkey_entry); 283 set_cache_mkc(ent, mkc); 284 async_create->ent = ent; 285 286 spin_lock_irq(&ent->mkeys_queue.lock); 287 if (ent->pending >= MAX_PENDING_REG_MR) { 288 err = -EAGAIN; 289 goto free_async_create; 290 } 291 ent->pending++; 292 spin_unlock_irq(&ent->mkeys_queue.lock); 293 294 err = mlx5_ib_create_mkey_cb(async_create); 295 if (err) { 296 mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); 297 goto err_create_mkey; 298 } 299 } 300 301 return 0; 302 303 err_create_mkey: 304 spin_lock_irq(&ent->mkeys_queue.lock); 305 ent->pending--; 306 free_async_create: 307 spin_unlock_irq(&ent->mkeys_queue.lock); 308 kfree(async_create); 309 return err; 310 } 311 312 /* Synchronously create a MR in the cache */ 313 static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey) 314 { 315 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 316 void *mkc; 317 u32 *in; 318 int err; 319 320 in = kzalloc(inlen, GFP_KERNEL); 321 if (!in) 322 return -ENOMEM; 323 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 324 set_cache_mkc(ent, mkc); 325 326 err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen); 327 if (err) 328 goto free_in; 329 330 WRITE_ONCE(ent->dev->cache.last_add, jiffies); 331 free_in: 332 kfree(in); 333 return err; 334 } 335 336 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) 337 { 338 u32 mkey; 339 340 lockdep_assert_held(&ent->mkeys_queue.lock); 341 if (!ent->mkeys_queue.ci) 342 return; 343 mkey = pop_mkey_locked(ent); 344 spin_unlock_irq(&ent->mkeys_queue.lock); 345 mlx5_core_destroy_mkey(ent->dev->mdev, mkey); 346 spin_lock_irq(&ent->mkeys_queue.lock); 347 } 348 349 static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, 350 bool limit_fill) 351 __acquires(&ent->mkeys_queue.lock) __releases(&ent->mkeys_queue.lock) 352 { 353 int err; 354 355 lockdep_assert_held(&ent->mkeys_queue.lock); 356 357 while (true) { 358 if (limit_fill) 359 target = ent->limit * 2; 360 if (target == ent->pending + ent->mkeys_queue.ci) 361 return 0; 362 if (target > ent->pending + ent->mkeys_queue.ci) { 363 u32 todo = target - (ent->pending + ent->mkeys_queue.ci); 364 365 spin_unlock_irq(&ent->mkeys_queue.lock); 366 err = add_keys(ent, todo); 367 if (err == -EAGAIN) 368 usleep_range(3000, 5000); 369 spin_lock_irq(&ent->mkeys_queue.lock); 370 if (err) { 371 if (err != -EAGAIN) 372 return err; 373 } else 374 return 0; 375 } else { 376 remove_cache_mr_locked(ent); 377 } 378 } 379 } 380 381 static ssize_t size_write(struct file *filp, const char __user *buf, 382 size_t count, loff_t *pos) 383 { 384 struct mlx5_cache_ent *ent = filp->private_data; 385 u32 target; 386 int err; 387 388 err = kstrtou32_from_user(buf, count, 0, &target); 389 if (err) 390 return err; 391 392 /* 393 * Target is the new value of total_mrs the user requests, however we 394 * cannot free MRs that are in use. Compute the target value for stored 395 * mkeys. 396 */ 397 spin_lock_irq(&ent->mkeys_queue.lock); 398 if (target < ent->in_use) { 399 err = -EINVAL; 400 goto err_unlock; 401 } 402 target = target - ent->in_use; 403 if (target < ent->limit || target > ent->limit*2) { 404 err = -EINVAL; 405 goto err_unlock; 406 } 407 err = resize_available_mrs(ent, target, false); 408 if (err) 409 goto err_unlock; 410 spin_unlock_irq(&ent->mkeys_queue.lock); 411 412 return count; 413 414 err_unlock: 415 spin_unlock_irq(&ent->mkeys_queue.lock); 416 return err; 417 } 418 419 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 420 loff_t *pos) 421 { 422 struct mlx5_cache_ent *ent = filp->private_data; 423 char lbuf[20]; 424 int err; 425 426 err = snprintf(lbuf, sizeof(lbuf), "%ld\n", 427 ent->mkeys_queue.ci + ent->in_use); 428 if (err < 0) 429 return err; 430 431 return simple_read_from_buffer(buf, count, pos, lbuf, err); 432 } 433 434 static const struct file_operations size_fops = { 435 .owner = THIS_MODULE, 436 .open = simple_open, 437 .write = size_write, 438 .read = size_read, 439 }; 440 441 static ssize_t limit_write(struct file *filp, const char __user *buf, 442 size_t count, loff_t *pos) 443 { 444 struct mlx5_cache_ent *ent = filp->private_data; 445 u32 var; 446 int err; 447 448 err = kstrtou32_from_user(buf, count, 0, &var); 449 if (err) 450 return err; 451 452 /* 453 * Upon set we immediately fill the cache to high water mark implied by 454 * the limit. 455 */ 456 spin_lock_irq(&ent->mkeys_queue.lock); 457 ent->limit = var; 458 err = resize_available_mrs(ent, 0, true); 459 spin_unlock_irq(&ent->mkeys_queue.lock); 460 if (err) 461 return err; 462 return count; 463 } 464 465 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 466 loff_t *pos) 467 { 468 struct mlx5_cache_ent *ent = filp->private_data; 469 char lbuf[20]; 470 int err; 471 472 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 473 if (err < 0) 474 return err; 475 476 return simple_read_from_buffer(buf, count, pos, lbuf, err); 477 } 478 479 static const struct file_operations limit_fops = { 480 .owner = THIS_MODULE, 481 .open = simple_open, 482 .write = limit_write, 483 .read = limit_read, 484 }; 485 486 static bool someone_adding(struct mlx5_mkey_cache *cache) 487 { 488 struct mlx5_cache_ent *ent; 489 struct rb_node *node; 490 bool ret; 491 492 mutex_lock(&cache->rb_lock); 493 for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) { 494 ent = rb_entry(node, struct mlx5_cache_ent, node); 495 spin_lock_irq(&ent->mkeys_queue.lock); 496 ret = ent->mkeys_queue.ci < ent->limit; 497 spin_unlock_irq(&ent->mkeys_queue.lock); 498 if (ret) { 499 mutex_unlock(&cache->rb_lock); 500 return true; 501 } 502 } 503 mutex_unlock(&cache->rb_lock); 504 return false; 505 } 506 507 /* 508 * Check if the bucket is outside the high/low water mark and schedule an async 509 * update. The cache refill has hysteresis, once the low water mark is hit it is 510 * refilled up to the high mark. 511 */ 512 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) 513 { 514 lockdep_assert_held(&ent->mkeys_queue.lock); 515 516 if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp) 517 return; 518 if (ent->mkeys_queue.ci < ent->limit) { 519 ent->fill_to_high_water = true; 520 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 521 } else if (ent->fill_to_high_water && 522 ent->mkeys_queue.ci + ent->pending < 2 * ent->limit) { 523 /* 524 * Once we start populating due to hitting a low water mark 525 * continue until we pass the high water mark. 526 */ 527 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 528 } else if (ent->mkeys_queue.ci == 2 * ent->limit) { 529 ent->fill_to_high_water = false; 530 } else if (ent->mkeys_queue.ci > 2 * ent->limit) { 531 /* Queue deletion of excess entries */ 532 ent->fill_to_high_water = false; 533 if (ent->pending) 534 queue_delayed_work(ent->dev->cache.wq, &ent->dwork, 535 secs_to_jiffies(1)); 536 else 537 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); 538 } 539 } 540 541 static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) 542 { 543 u32 mkey; 544 545 spin_lock_irq(&ent->mkeys_queue.lock); 546 while (ent->mkeys_queue.ci) { 547 mkey = pop_mkey_locked(ent); 548 spin_unlock_irq(&ent->mkeys_queue.lock); 549 mlx5_core_destroy_mkey(dev->mdev, mkey); 550 spin_lock_irq(&ent->mkeys_queue.lock); 551 } 552 ent->tmp_cleanup_scheduled = false; 553 spin_unlock_irq(&ent->mkeys_queue.lock); 554 } 555 556 static void __cache_work_func(struct mlx5_cache_ent *ent) 557 { 558 struct mlx5_ib_dev *dev = ent->dev; 559 struct mlx5_mkey_cache *cache = &dev->cache; 560 int err; 561 562 spin_lock_irq(&ent->mkeys_queue.lock); 563 if (ent->disabled) 564 goto out; 565 566 if (ent->fill_to_high_water && 567 ent->mkeys_queue.ci + ent->pending < 2 * ent->limit && 568 !READ_ONCE(dev->fill_delay)) { 569 spin_unlock_irq(&ent->mkeys_queue.lock); 570 err = add_keys(ent, 1); 571 spin_lock_irq(&ent->mkeys_queue.lock); 572 if (ent->disabled) 573 goto out; 574 if (err) { 575 /* 576 * EAGAIN only happens if there are pending MRs, so we 577 * will be rescheduled when storing them. The only 578 * failure path here is ENOMEM. 579 */ 580 if (err != -EAGAIN) { 581 mlx5_ib_warn( 582 dev, 583 "add keys command failed, err %d\n", 584 err); 585 queue_delayed_work(cache->wq, &ent->dwork, 586 secs_to_jiffies(1)); 587 } 588 } 589 } else if (ent->mkeys_queue.ci > 2 * ent->limit) { 590 bool need_delay; 591 592 /* 593 * The remove_cache_mr() logic is performed as garbage 594 * collection task. Such task is intended to be run when no 595 * other active processes are running. 596 * 597 * The need_resched() will return TRUE if there are user tasks 598 * to be activated in near future. 599 * 600 * In such case, we don't execute remove_cache_mr() and postpone 601 * the garbage collection work to try to run in next cycle, in 602 * order to free CPU resources to other tasks. 603 */ 604 spin_unlock_irq(&ent->mkeys_queue.lock); 605 need_delay = need_resched() || someone_adding(cache) || 606 !time_after(jiffies, 607 READ_ONCE(cache->last_add) + 300 * HZ); 608 spin_lock_irq(&ent->mkeys_queue.lock); 609 if (ent->disabled) 610 goto out; 611 if (need_delay) { 612 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 613 goto out; 614 } 615 remove_cache_mr_locked(ent); 616 queue_adjust_cache_locked(ent); 617 } 618 out: 619 spin_unlock_irq(&ent->mkeys_queue.lock); 620 } 621 622 static void delayed_cache_work_func(struct work_struct *work) 623 { 624 struct mlx5_cache_ent *ent; 625 626 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 627 /* temp entries are never filled, only cleaned */ 628 if (ent->is_tmp) 629 clean_keys(ent->dev, ent); 630 else 631 __cache_work_func(ent); 632 } 633 634 static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1, 635 struct mlx5r_cache_rb_key key2) 636 { 637 int res; 638 639 res = key1.ats - key2.ats; 640 if (res) 641 return res; 642 643 res = key1.access_mode - key2.access_mode; 644 if (res) 645 return res; 646 647 res = key1.access_flags - key2.access_flags; 648 if (res) 649 return res; 650 651 res = key1.st_index - key2.st_index; 652 if (res) 653 return res; 654 655 res = key1.ph - key2.ph; 656 if (res) 657 return res; 658 659 /* 660 * keep ndescs the last in the compare table since the find function 661 * searches for an exact match on all properties and only closest 662 * match in size. 663 */ 664 return key1.ndescs - key2.ndescs; 665 } 666 667 static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, 668 struct mlx5_cache_ent *ent) 669 { 670 struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL; 671 struct mlx5_cache_ent *cur; 672 int cmp; 673 674 /* Figure out where to put new node */ 675 while (*new) { 676 cur = rb_entry(*new, struct mlx5_cache_ent, node); 677 parent = *new; 678 cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key); 679 if (cmp > 0) 680 new = &((*new)->rb_left); 681 if (cmp < 0) 682 new = &((*new)->rb_right); 683 if (cmp == 0) 684 return -EEXIST; 685 } 686 687 /* Add new node and rebalance tree. */ 688 rb_link_node(&ent->node, parent, new); 689 rb_insert_color(&ent->node, &cache->rb_root); 690 691 return 0; 692 } 693 694 static struct mlx5_cache_ent * 695 mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, 696 struct mlx5r_cache_rb_key rb_key) 697 { 698 struct rb_node *node = dev->cache.rb_root.rb_node; 699 struct mlx5_cache_ent *cur, *smallest = NULL; 700 u64 ndescs_limit; 701 int cmp; 702 703 /* 704 * Find the smallest ent with order >= requested_order. 705 */ 706 while (node) { 707 cur = rb_entry(node, struct mlx5_cache_ent, node); 708 cmp = cache_ent_key_cmp(cur->rb_key, rb_key); 709 if (cmp > 0) { 710 smallest = cur; 711 node = node->rb_left; 712 } 713 if (cmp < 0) 714 node = node->rb_right; 715 if (cmp == 0) 716 return cur; 717 } 718 719 /* 720 * Limit the usage of mkeys larger than twice the required size while 721 * also allowing the usage of smallest cache entry for small MRs. 722 */ 723 ndescs_limit = max_t(u64, rb_key.ndescs * 2, 724 MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS); 725 726 return (smallest && 727 smallest->rb_key.access_mode == rb_key.access_mode && 728 smallest->rb_key.access_flags == rb_key.access_flags && 729 smallest->rb_key.ats == rb_key.ats && 730 smallest->rb_key.st_index == rb_key.st_index && 731 smallest->rb_key.ph == rb_key.ph && 732 smallest->rb_key.ndescs <= ndescs_limit) ? 733 smallest : 734 NULL; 735 } 736 737 static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, 738 struct mlx5_cache_ent *ent) 739 { 740 struct mlx5_ib_mr *mr; 741 int err; 742 743 mr = kzalloc_obj(*mr); 744 if (!mr) 745 return ERR_PTR(-ENOMEM); 746 747 spin_lock_irq(&ent->mkeys_queue.lock); 748 ent->in_use++; 749 750 if (!ent->mkeys_queue.ci) { 751 queue_adjust_cache_locked(ent); 752 ent->miss++; 753 spin_unlock_irq(&ent->mkeys_queue.lock); 754 err = create_cache_mkey(ent, &mr->mmkey.key); 755 if (err) { 756 spin_lock_irq(&ent->mkeys_queue.lock); 757 ent->in_use--; 758 spin_unlock_irq(&ent->mkeys_queue.lock); 759 kfree(mr); 760 return ERR_PTR(err); 761 } 762 } else { 763 mr->mmkey.key = pop_mkey_locked(ent); 764 queue_adjust_cache_locked(ent); 765 spin_unlock_irq(&ent->mkeys_queue.lock); 766 } 767 mr->mmkey.cache_ent = ent; 768 mr->mmkey.type = MLX5_MKEY_MR; 769 mr->mmkey.rb_key = ent->rb_key; 770 mr->mmkey.cacheable = true; 771 init_waitqueue_head(&mr->mmkey.wait); 772 return mr; 773 } 774 775 static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, 776 int access_flags) 777 { 778 int ret = 0; 779 780 if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) && 781 MLX5_CAP_GEN(dev->mdev, atomic) && 782 MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 783 ret |= IB_ACCESS_REMOTE_ATOMIC; 784 785 if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && 786 MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && 787 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 788 ret |= IB_ACCESS_RELAXED_ORDERING; 789 790 if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && 791 (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || 792 MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) && 793 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 794 ret |= IB_ACCESS_RELAXED_ORDERING; 795 796 return ret; 797 } 798 799 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, 800 int access_flags, int access_mode, 801 int ndescs) 802 { 803 struct mlx5r_cache_rb_key rb_key = { 804 .ndescs = ndescs, 805 .access_mode = access_mode, 806 .access_flags = get_unchangeable_access_flags(dev, access_flags), 807 .ph = MLX5_IB_NO_PH, 808 }; 809 struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key); 810 811 if (!ent) 812 return ERR_PTR(-EOPNOTSUPP); 813 814 return _mlx5_mr_cache_alloc(dev, ent); 815 } 816 817 static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 818 { 819 if (!mlx5_debugfs_root || dev->is_rep) 820 return; 821 822 debugfs_remove_recursive(dev->cache.fs_root); 823 dev->cache.fs_root = NULL; 824 } 825 826 static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev, 827 struct mlx5_cache_ent *ent) 828 { 829 int order = order_base_2(ent->rb_key.ndescs); 830 struct dentry *dir; 831 832 if (!mlx5_debugfs_root || dev->is_rep) 833 return; 834 835 if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) 836 order = MLX5_IMR_KSM_CACHE_ENTRY + 2; 837 838 sprintf(ent->name, "%d", order); 839 dir = debugfs_create_dir(ent->name, dev->cache.fs_root); 840 debugfs_create_file("size", 0600, dir, ent, &size_fops); 841 debugfs_create_file("limit", 0600, dir, ent, &limit_fops); 842 debugfs_create_ulong("cur", 0400, dir, &ent->mkeys_queue.ci); 843 debugfs_create_u32("miss", 0600, dir, &ent->miss); 844 } 845 846 static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) 847 { 848 struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev); 849 struct mlx5_mkey_cache *cache = &dev->cache; 850 851 if (!mlx5_debugfs_root || dev->is_rep) 852 return; 853 854 cache->fs_root = debugfs_create_dir("mr_cache", dbg_root); 855 } 856 857 static void delay_time_func(struct timer_list *t) 858 { 859 struct mlx5_ib_dev *dev = timer_container_of(dev, t, delay_timer); 860 861 WRITE_ONCE(dev->fill_delay, 0); 862 } 863 864 static int mlx5r_mkeys_init(struct mlx5_cache_ent *ent) 865 { 866 struct mlx5_mkeys_page *page; 867 868 page = kzalloc_obj(*page); 869 if (!page) 870 return -ENOMEM; 871 INIT_LIST_HEAD(&ent->mkeys_queue.pages_list); 872 spin_lock_init(&ent->mkeys_queue.lock); 873 list_add_tail(&page->list, &ent->mkeys_queue.pages_list); 874 ent->mkeys_queue.num_pages++; 875 return 0; 876 } 877 878 static void mlx5r_mkeys_uninit(struct mlx5_cache_ent *ent) 879 { 880 struct mlx5_mkeys_page *page; 881 882 WARN_ON(ent->mkeys_queue.ci || ent->mkeys_queue.num_pages > 1); 883 page = list_last_entry(&ent->mkeys_queue.pages_list, 884 struct mlx5_mkeys_page, list); 885 list_del(&page->list); 886 kfree(page); 887 } 888 889 struct mlx5_cache_ent * 890 mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, 891 struct mlx5r_cache_rb_key rb_key, 892 bool persistent_entry) 893 { 894 struct mlx5_cache_ent *ent; 895 int order; 896 int ret; 897 898 ent = kzalloc_obj(*ent); 899 if (!ent) 900 return ERR_PTR(-ENOMEM); 901 902 ret = mlx5r_mkeys_init(ent); 903 if (ret) 904 goto mkeys_err; 905 ent->rb_key = rb_key; 906 ent->dev = dev; 907 ent->is_tmp = !persistent_entry; 908 909 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 910 911 ret = mlx5_cache_ent_insert(&dev->cache, ent); 912 if (ret) 913 goto ent_insert_err; 914 915 if (persistent_entry) { 916 if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) 917 order = MLX5_IMR_KSM_CACHE_ENTRY; 918 else 919 order = order_base_2(rb_key.ndescs) - 2; 920 921 if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && 922 !dev->is_rep && mlx5_core_is_pf(dev->mdev) && 923 mlx5r_umr_can_load_pas(dev, 0)) 924 ent->limit = dev->mdev->profile.mr_cache[order].limit; 925 else 926 ent->limit = 0; 927 928 mlx5_mkey_cache_debugfs_add_ent(dev, ent); 929 } 930 931 return ent; 932 ent_insert_err: 933 mlx5r_mkeys_uninit(ent); 934 mkeys_err: 935 kfree(ent); 936 return ERR_PTR(ret); 937 } 938 939 static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev) 940 { 941 struct rb_root *root = &dev->cache.rb_root; 942 struct mlx5_cache_ent *ent; 943 struct rb_node *node; 944 945 mutex_lock(&dev->cache.rb_lock); 946 node = rb_first(root); 947 while (node) { 948 ent = rb_entry(node, struct mlx5_cache_ent, node); 949 node = rb_next(node); 950 clean_keys(dev, ent); 951 rb_erase(&ent->node, root); 952 mlx5r_mkeys_uninit(ent); 953 kfree(ent); 954 } 955 mutex_unlock(&dev->cache.rb_lock); 956 } 957 958 int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) 959 { 960 struct mlx5_mkey_cache *cache = &dev->cache; 961 struct rb_root *root = &dev->cache.rb_root; 962 struct mlx5r_cache_rb_key rb_key = { 963 .access_mode = MLX5_MKC_ACCESS_MODE_MTT, 964 .ph = MLX5_IB_NO_PH, 965 }; 966 struct mlx5_cache_ent *ent; 967 struct rb_node *node; 968 int ret; 969 int i; 970 971 mutex_init(&dev->slow_path_mutex); 972 mutex_init(&dev->cache.rb_lock); 973 dev->cache.rb_root = RB_ROOT; 974 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); 975 if (!cache->wq) { 976 mlx5_ib_warn(dev, "failed to create work queue\n"); 977 return -ENOMEM; 978 } 979 980 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); 981 timer_setup(&dev->delay_timer, delay_time_func, 0); 982 mlx5_mkey_cache_debugfs_init(dev); 983 mutex_lock(&cache->rb_lock); 984 for (i = 0; i <= mkey_cache_max_order(dev); i++) { 985 rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i; 986 ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); 987 if (IS_ERR(ent)) { 988 ret = PTR_ERR(ent); 989 goto err; 990 } 991 } 992 993 ret = mlx5_odp_init_mkey_cache(dev); 994 if (ret) 995 goto err; 996 997 mutex_unlock(&cache->rb_lock); 998 for (node = rb_first(root); node; node = rb_next(node)) { 999 ent = rb_entry(node, struct mlx5_cache_ent, node); 1000 spin_lock_irq(&ent->mkeys_queue.lock); 1001 queue_adjust_cache_locked(ent); 1002 spin_unlock_irq(&ent->mkeys_queue.lock); 1003 } 1004 1005 return 0; 1006 1007 err: 1008 mutex_unlock(&cache->rb_lock); 1009 mlx5_mkey_cache_debugfs_cleanup(dev); 1010 mlx5r_destroy_cache_entries(dev); 1011 destroy_workqueue(cache->wq); 1012 mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); 1013 return ret; 1014 } 1015 1016 void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) 1017 { 1018 struct rb_root *root = &dev->cache.rb_root; 1019 struct mlx5_cache_ent *ent; 1020 struct rb_node *node; 1021 1022 if (!dev->cache.wq) 1023 return; 1024 1025 mutex_lock(&dev->cache.rb_lock); 1026 for (node = rb_first(root); node; node = rb_next(node)) { 1027 ent = rb_entry(node, struct mlx5_cache_ent, node); 1028 spin_lock_irq(&ent->mkeys_queue.lock); 1029 ent->disabled = true; 1030 spin_unlock_irq(&ent->mkeys_queue.lock); 1031 cancel_delayed_work(&ent->dwork); 1032 } 1033 mutex_unlock(&dev->cache.rb_lock); 1034 1035 /* 1036 * After all entries are disabled and will not reschedule on WQ, 1037 * flush it and all async commands. 1038 */ 1039 flush_workqueue(dev->cache.wq); 1040 1041 mlx5_mkey_cache_debugfs_cleanup(dev); 1042 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); 1043 1044 /* At this point all entries are disabled and have no concurrent work. */ 1045 mlx5r_destroy_cache_entries(dev); 1046 1047 destroy_workqueue(dev->cache.wq); 1048 timer_delete_sync(&dev->delay_timer); 1049 } 1050 1051 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 1052 { 1053 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1054 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1055 struct mlx5_ib_mr *mr; 1056 void *mkc; 1057 u32 *in; 1058 int err; 1059 1060 mr = kzalloc_obj(*mr); 1061 if (!mr) 1062 return ERR_PTR(-ENOMEM); 1063 1064 in = kzalloc(inlen, GFP_KERNEL); 1065 if (!in) { 1066 err = -ENOMEM; 1067 goto err_free; 1068 } 1069 1070 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1071 1072 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 1073 MLX5_SET(mkc, mkc, length64, 1); 1074 set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0, 1075 pd); 1076 MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats)); 1077 1078 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1079 if (err) 1080 goto err_in; 1081 1082 kfree(in); 1083 mr->mmkey.type = MLX5_MKEY_MR; 1084 mr->ibmr.lkey = mr->mmkey.key; 1085 mr->ibmr.rkey = mr->mmkey.key; 1086 mr->umem = NULL; 1087 1088 return &mr->ibmr; 1089 1090 err_in: 1091 kfree(in); 1092 1093 err_free: 1094 kfree(mr); 1095 1096 return ERR_PTR(err); 1097 } 1098 1099 static int get_octo_len(u64 addr, u64 len, int page_shift) 1100 { 1101 u64 page_size = 1ULL << page_shift; 1102 u64 offset; 1103 int npages; 1104 1105 offset = addr & (page_size - 1); 1106 npages = ALIGN(len + offset, page_size) >> page_shift; 1107 return (npages + 1) / 2; 1108 } 1109 1110 static int mkey_cache_max_order(struct mlx5_ib_dev *dev) 1111 { 1112 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) 1113 return MKEY_CACHE_LAST_STD_ENTRY; 1114 return MLX5_MAX_UMR_SHIFT; 1115 } 1116 1117 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 1118 u64 length, int access_flags, u64 iova) 1119 { 1120 mr->ibmr.lkey = mr->mmkey.key; 1121 mr->ibmr.rkey = mr->mmkey.key; 1122 mr->ibmr.length = length; 1123 mr->ibmr.device = &dev->ib_dev; 1124 mr->ibmr.iova = iova; 1125 mr->access_flags = access_flags; 1126 } 1127 1128 static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, 1129 u64 iova) 1130 { 1131 /* 1132 * The alignment of iova has already been checked upon entering 1133 * UVERBS_METHOD_REG_DMABUF_MR 1134 */ 1135 umem->iova = iova; 1136 return PAGE_SIZE; 1137 } 1138 1139 static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, 1140 struct ib_umem *umem, u64 iova, 1141 int access_flags, int access_mode, 1142 u16 st_index, u8 ph) 1143 { 1144 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1145 struct mlx5r_cache_rb_key rb_key = {}; 1146 struct mlx5_cache_ent *ent; 1147 struct mlx5_ib_mr *mr; 1148 unsigned long page_size; 1149 1150 if (umem->is_dmabuf) 1151 page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); 1152 else 1153 page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova, 1154 access_mode); 1155 if (WARN_ON(!page_size)) 1156 return ERR_PTR(-EINVAL); 1157 1158 rb_key.access_mode = access_mode; 1159 rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size); 1160 rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags); 1161 rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags); 1162 rb_key.st_index = st_index; 1163 rb_key.ph = ph; 1164 ent = mkey_cache_ent_from_rb_key(dev, rb_key); 1165 /* 1166 * If the MR can't come from the cache then synchronously create an uncached 1167 * one. 1168 */ 1169 if (!ent) { 1170 mutex_lock(&dev->slow_path_mutex); 1171 mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode, 1172 st_index, ph); 1173 mutex_unlock(&dev->slow_path_mutex); 1174 if (IS_ERR(mr)) 1175 return mr; 1176 mr->mmkey.rb_key = rb_key; 1177 mr->mmkey.cacheable = true; 1178 return mr; 1179 } 1180 1181 mr = _mlx5_mr_cache_alloc(dev, ent); 1182 if (IS_ERR(mr)) 1183 return mr; 1184 1185 mr->ibmr.pd = pd; 1186 mr->umem = umem; 1187 mr->page_shift = order_base_2(page_size); 1188 set_mr_fields(dev, mr, umem->length, access_flags, iova); 1189 1190 return mr; 1191 } 1192 1193 static struct ib_mr * 1194 reg_create_crossing_vhca_mr(struct ib_pd *pd, u64 iova, u64 length, int access_flags, 1195 u32 crossed_lkey) 1196 { 1197 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1198 int access_mode = MLX5_MKC_ACCESS_MODE_CROSSING; 1199 struct mlx5_ib_mr *mr; 1200 void *mkc; 1201 int inlen; 1202 u32 *in; 1203 int err; 1204 1205 if (!MLX5_CAP_GEN(dev->mdev, crossing_vhca_mkey)) 1206 return ERR_PTR(-EOPNOTSUPP); 1207 1208 mr = kzalloc_obj(*mr); 1209 if (!mr) 1210 return ERR_PTR(-ENOMEM); 1211 1212 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1213 in = kvzalloc(inlen, GFP_KERNEL); 1214 if (!in) { 1215 err = -ENOMEM; 1216 goto err_1; 1217 } 1218 1219 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1220 MLX5_SET(mkc, mkc, crossing_target_vhca_id, 1221 MLX5_CAP_GEN(dev->mdev, vhca_id)); 1222 MLX5_SET(mkc, mkc, translations_octword_size, crossed_lkey); 1223 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 1224 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 1225 1226 /* for this crossing mkey IOVA should be 0 and len should be IOVA + len */ 1227 set_mkc_access_pd_addr_fields(mkc, access_flags, 0, pd); 1228 MLX5_SET64(mkc, mkc, len, iova + length); 1229 1230 MLX5_SET(mkc, mkc, free, 0); 1231 MLX5_SET(mkc, mkc, umr_en, 0); 1232 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1233 if (err) 1234 goto err_2; 1235 1236 mr->mmkey.type = MLX5_MKEY_MR; 1237 set_mr_fields(dev, mr, length, access_flags, iova); 1238 mr->ibmr.pd = pd; 1239 kvfree(in); 1240 mlx5_ib_dbg(dev, "crossing mkey = 0x%x\n", mr->mmkey.key); 1241 1242 return &mr->ibmr; 1243 err_2: 1244 kvfree(in); 1245 err_1: 1246 kfree(mr); 1247 return ERR_PTR(err); 1248 } 1249 1250 /* 1251 * If ibmr is NULL it will be allocated by reg_create. 1252 * Else, the given ibmr will be used. 1253 */ 1254 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, 1255 u64 iova, int access_flags, 1256 unsigned long page_size, bool populate, 1257 int access_mode, u16 st_index, u8 ph) 1258 { 1259 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1260 struct mlx5_ib_mr *mr; 1261 __be64 *pas; 1262 void *mkc; 1263 int inlen; 1264 u32 *in; 1265 int err; 1266 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)) && 1267 (access_mode == MLX5_MKC_ACCESS_MODE_MTT) && 1268 (ph == MLX5_IB_NO_PH); 1269 bool ksm_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM); 1270 1271 if (!page_size) 1272 return ERR_PTR(-EINVAL); 1273 mr = kzalloc_obj(*mr); 1274 if (!mr) 1275 return ERR_PTR(-ENOMEM); 1276 1277 mr->ibmr.pd = pd; 1278 mr->access_flags = access_flags; 1279 mr->page_shift = order_base_2(page_size); 1280 1281 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1282 if (populate) 1283 inlen += sizeof(*pas) * 1284 roundup(ib_umem_num_dma_blocks(umem, page_size), 2); 1285 in = kvzalloc(inlen, GFP_KERNEL); 1286 if (!in) { 1287 err = -ENOMEM; 1288 goto err_1; 1289 } 1290 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 1291 if (populate) { 1292 if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND || ksm_mode)) { 1293 err = -EINVAL; 1294 goto err_2; 1295 } 1296 mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, 1297 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 1298 } 1299 1300 /* The pg_access bit allows setting the access flags 1301 * in the page list submitted with the command. 1302 */ 1303 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1304 1305 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1306 set_mkc_access_pd_addr_fields(mkc, access_flags, iova, 1307 populate ? pd : dev->umrc.pd); 1308 /* In case a data direct flow, overwrite the pdn field by its internal kernel PD */ 1309 if (umem->is_dmabuf && ksm_mode) 1310 MLX5_SET(mkc, mkc, pd, dev->ddr.pdn); 1311 1312 MLX5_SET(mkc, mkc, free, !populate); 1313 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode); 1314 MLX5_SET(mkc, mkc, umr_en, 1); 1315 1316 MLX5_SET64(mkc, mkc, len, umem->length); 1317 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1318 if (ksm_mode) 1319 MLX5_SET(mkc, mkc, translations_octword_size, 1320 get_octo_len(iova, umem->length, mr->page_shift) * 2); 1321 else 1322 MLX5_SET(mkc, mkc, translations_octword_size, 1323 get_octo_len(iova, umem->length, mr->page_shift)); 1324 MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); 1325 if (mlx5_umem_needs_ats(dev, umem, access_flags)) 1326 MLX5_SET(mkc, mkc, ma_translation_mode, 1); 1327 if (populate) { 1328 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 1329 get_octo_len(iova, umem->length, mr->page_shift)); 1330 } 1331 1332 if (ph != MLX5_IB_NO_PH) { 1333 MLX5_SET(mkc, mkc, pcie_tph_en, 1); 1334 MLX5_SET(mkc, mkc, pcie_tph_ph, ph); 1335 if (st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) 1336 MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, st_index); 1337 } 1338 1339 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1340 if (err) { 1341 mlx5_ib_warn(dev, "create mkey failed\n"); 1342 goto err_2; 1343 } 1344 mr->mmkey.type = MLX5_MKEY_MR; 1345 mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift); 1346 mr->umem = umem; 1347 set_mr_fields(dev, mr, umem->length, access_flags, iova); 1348 kvfree(in); 1349 1350 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 1351 1352 return mr; 1353 1354 err_2: 1355 kvfree(in); 1356 err_1: 1357 kfree(mr); 1358 return ERR_PTR(err); 1359 } 1360 1361 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, 1362 u64 length, int acc, int mode) 1363 { 1364 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1365 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1366 struct mlx5_ib_mr *mr; 1367 void *mkc; 1368 u32 *in; 1369 int err; 1370 1371 mr = kzalloc_obj(*mr); 1372 if (!mr) 1373 return ERR_PTR(-ENOMEM); 1374 1375 in = kzalloc(inlen, GFP_KERNEL); 1376 if (!in) { 1377 err = -ENOMEM; 1378 goto err_free; 1379 } 1380 1381 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1382 1383 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 1384 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); 1385 MLX5_SET64(mkc, mkc, len, length); 1386 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); 1387 1388 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 1389 if (err) 1390 goto err_in; 1391 1392 kfree(in); 1393 1394 set_mr_fields(dev, mr, length, acc, start_addr); 1395 1396 return &mr->ibmr; 1397 1398 err_in: 1399 kfree(in); 1400 1401 err_free: 1402 kfree(mr); 1403 1404 return ERR_PTR(err); 1405 } 1406 1407 int mlx5_ib_advise_mr(struct ib_pd *pd, 1408 enum ib_uverbs_advise_mr_advice advice, 1409 u32 flags, 1410 struct ib_sge *sg_list, 1411 u32 num_sge, 1412 struct uverbs_attr_bundle *attrs) 1413 { 1414 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && 1415 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && 1416 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) 1417 return -EOPNOTSUPP; 1418 1419 return mlx5_ib_advise_mr_prefetch(pd, advice, flags, 1420 sg_list, num_sge); 1421 } 1422 1423 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, 1424 struct ib_dm_mr_attr *attr, 1425 struct uverbs_attr_bundle *attrs) 1426 { 1427 struct mlx5_ib_dm *mdm = to_mdm(dm); 1428 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; 1429 u64 start_addr = mdm->dev_addr + attr->offset; 1430 int mode; 1431 1432 switch (mdm->type) { 1433 case MLX5_IB_UAPI_DM_TYPE_MEMIC: 1434 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) 1435 return ERR_PTR(-EINVAL); 1436 1437 mode = MLX5_MKC_ACCESS_MODE_MEMIC; 1438 start_addr -= pci_resource_start(dev->pdev, 0); 1439 break; 1440 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: 1441 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: 1442 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: 1443 case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: 1444 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) 1445 return ERR_PTR(-EINVAL); 1446 1447 mode = MLX5_MKC_ACCESS_MODE_SW_ICM; 1448 break; 1449 default: 1450 return ERR_PTR(-EINVAL); 1451 } 1452 1453 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, 1454 attr->access_flags, mode); 1455 } 1456 1457 static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, 1458 u64 iova, int access_flags, 1459 struct ib_dmah *dmah) 1460 { 1461 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1462 struct mlx5_ib_mr *mr = NULL; 1463 bool xlt_with_umr; 1464 u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX; 1465 u8 ph = MLX5_IB_NO_PH; 1466 int err; 1467 1468 if (dmah) { 1469 struct mlx5_ib_dmah *mdmah = to_mdmah(dmah); 1470 1471 ph = dmah->ph; 1472 if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) 1473 st_index = mdmah->st_index; 1474 } 1475 1476 xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length); 1477 if (xlt_with_umr) { 1478 mr = alloc_cacheable_mr(pd, umem, iova, access_flags, 1479 MLX5_MKC_ACCESS_MODE_MTT, 1480 st_index, ph); 1481 } else { 1482 unsigned long page_size = mlx5_umem_mkc_find_best_pgsz( 1483 dev, umem, iova, MLX5_MKC_ACCESS_MODE_MTT); 1484 1485 mutex_lock(&dev->slow_path_mutex); 1486 mr = reg_create(pd, umem, iova, access_flags, page_size, 1487 true, MLX5_MKC_ACCESS_MODE_MTT, 1488 st_index, ph); 1489 mutex_unlock(&dev->slow_path_mutex); 1490 } 1491 if (IS_ERR(mr)) { 1492 ib_umem_release(umem); 1493 return ERR_CAST(mr); 1494 } 1495 1496 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1497 1498 atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); 1499 1500 if (xlt_with_umr) { 1501 /* 1502 * If the MR was created with reg_create then it will be 1503 * configured properly but left disabled. It is safe to go ahead 1504 * and configure it again via UMR while enabling it. 1505 */ 1506 err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); 1507 if (err) { 1508 mlx5_ib_dereg_mr(&mr->ibmr, NULL); 1509 return ERR_PTR(err); 1510 } 1511 } 1512 return &mr->ibmr; 1513 } 1514 1515 static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, 1516 u64 iova, int access_flags, 1517 struct ib_udata *udata) 1518 { 1519 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1520 struct ib_umem_odp *odp; 1521 struct mlx5_ib_mr *mr; 1522 int err; 1523 1524 if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 1525 return ERR_PTR(-EOPNOTSUPP); 1526 1527 err = mlx5r_odp_create_eq(dev, &dev->odp_pf_eq); 1528 if (err) 1529 return ERR_PTR(err); 1530 if (!start && length == U64_MAX) { 1531 if (iova != 0) 1532 return ERR_PTR(-EINVAL); 1533 if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) 1534 return ERR_PTR(-EINVAL); 1535 1536 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); 1537 if (IS_ERR(mr)) 1538 return ERR_CAST(mr); 1539 return &mr->ibmr; 1540 } 1541 1542 /* ODP requires xlt update via umr to work. */ 1543 if (!mlx5r_umr_can_load_pas(dev, length)) 1544 return ERR_PTR(-EINVAL); 1545 1546 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, 1547 &mlx5_mn_ops); 1548 if (IS_ERR(odp)) 1549 return ERR_CAST(odp); 1550 1551 mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags, 1552 MLX5_MKC_ACCESS_MODE_MTT, 1553 MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX, 1554 MLX5_IB_NO_PH); 1555 if (IS_ERR(mr)) { 1556 ib_umem_release(&odp->umem); 1557 return ERR_CAST(mr); 1558 } 1559 xa_init(&mr->implicit_children); 1560 1561 odp->private = mr; 1562 err = mlx5r_store_odp_mkey(dev, &mr->mmkey); 1563 if (err) 1564 goto err_dereg_mr; 1565 1566 err = mlx5_ib_init_odp_mr(mr); 1567 if (err) 1568 goto err_dereg_mr; 1569 return &mr->ibmr; 1570 1571 err_dereg_mr: 1572 mlx5_ib_dereg_mr(&mr->ibmr, NULL); 1573 return ERR_PTR(err); 1574 } 1575 1576 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1577 u64 iova, int access_flags, 1578 struct ib_dmah *dmah, 1579 struct ib_udata *udata) 1580 { 1581 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1582 struct ib_umem *umem; 1583 int err; 1584 1585 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || 1586 ((access_flags & IB_ACCESS_ON_DEMAND) && dmah)) 1587 return ERR_PTR(-EOPNOTSUPP); 1588 1589 mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", 1590 start, iova, length, access_flags); 1591 1592 err = mlx5r_umr_resource_init(dev); 1593 if (err) 1594 return ERR_PTR(err); 1595 1596 if (access_flags & IB_ACCESS_ON_DEMAND) 1597 return create_user_odp_mr(pd, start, length, iova, access_flags, 1598 udata); 1599 umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); 1600 if (IS_ERR(umem)) 1601 return ERR_CAST(umem); 1602 return create_real_mr(pd, umem, iova, access_flags, dmah); 1603 } 1604 1605 static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) 1606 { 1607 struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; 1608 struct mlx5_ib_mr *mr = umem_dmabuf->private; 1609 1610 dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); 1611 1612 if (!umem_dmabuf->sgt || !mr) 1613 return; 1614 1615 mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); 1616 ib_umem_dmabuf_unmap_pages(umem_dmabuf); 1617 } 1618 1619 static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { 1620 .allow_peer2peer = 1, 1621 .invalidate_mappings = mlx5_ib_dmabuf_invalidate_cb, 1622 }; 1623 1624 static struct ib_mr * 1625 reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device, 1626 u64 offset, u64 length, u64 virt_addr, 1627 int fd, int access_flags, int access_mode, 1628 struct ib_dmah *dmah) 1629 { 1630 bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM); 1631 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1632 struct mlx5_ib_mr *mr = NULL; 1633 struct ib_umem_dmabuf *umem_dmabuf; 1634 u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX; 1635 u8 ph = MLX5_IB_NO_PH; 1636 int err; 1637 1638 err = mlx5r_umr_resource_init(dev); 1639 if (err) 1640 return ERR_PTR(err); 1641 1642 if (!pinned_mode) 1643 umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, 1644 offset, length, fd, 1645 access_flags, 1646 &mlx5_ib_dmabuf_attach_ops); 1647 else if (dma_device) 1648 umem_dmabuf = ib_umem_dmabuf_get_pinned_with_dma_device(&dev->ib_dev, 1649 dma_device, offset, length, 1650 fd, access_flags); 1651 else 1652 umem_dmabuf = ib_umem_dmabuf_get_pinned( 1653 &dev->ib_dev, offset, length, fd, access_flags); 1654 1655 if (IS_ERR(umem_dmabuf)) { 1656 mlx5_ib_dbg(dev, "umem_dmabuf get failed (%pe)\n", umem_dmabuf); 1657 return ERR_CAST(umem_dmabuf); 1658 } 1659 1660 if (dmah) { 1661 struct mlx5_ib_dmah *mdmah = to_mdmah(dmah); 1662 1663 ph = dmah->ph; 1664 if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) 1665 st_index = mdmah->st_index; 1666 } 1667 1668 mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, 1669 access_flags, access_mode, 1670 st_index, ph); 1671 if (IS_ERR(mr)) { 1672 ib_umem_release(&umem_dmabuf->umem); 1673 return ERR_CAST(mr); 1674 } 1675 1676 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1677 1678 atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); 1679 umem_dmabuf->private = mr; 1680 if (!pinned_mode) { 1681 err = mlx5r_store_odp_mkey(dev, &mr->mmkey); 1682 if (err) 1683 goto err_dereg_mr; 1684 } else { 1685 mr->data_direct = true; 1686 } 1687 1688 err = mlx5_ib_init_dmabuf_mr(mr); 1689 if (err) 1690 goto err_dereg_mr; 1691 return &mr->ibmr; 1692 1693 err_dereg_mr: 1694 __mlx5_ib_dereg_mr(&mr->ibmr); 1695 return ERR_PTR(err); 1696 } 1697 1698 static struct ib_mr * 1699 reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset, 1700 u64 length, u64 virt_addr, 1701 int fd, int access_flags) 1702 { 1703 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1704 struct mlx5_data_direct_dev *data_direct_dev; 1705 struct ib_mr *crossing_mr; 1706 struct ib_mr *crossed_mr; 1707 int ret = 0; 1708 1709 /* As of HW behaviour the IOVA must be page aligned in KSM mode */ 1710 if (!PAGE_ALIGNED(virt_addr) || (access_flags & IB_ACCESS_ON_DEMAND)) 1711 return ERR_PTR(-EOPNOTSUPP); 1712 1713 mutex_lock(&dev->data_direct_lock); 1714 data_direct_dev = dev->data_direct_dev; 1715 if (!data_direct_dev) { 1716 ret = -EINVAL; 1717 goto end; 1718 } 1719 1720 /* If no device's 'data direct mkey' with RO flags exists 1721 * mask it out accordingly. 1722 */ 1723 if (!dev->ddr.mkey_ro_valid) 1724 access_flags &= ~IB_ACCESS_RELAXED_ORDERING; 1725 crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev, 1726 offset, length, virt_addr, fd, 1727 access_flags, MLX5_MKC_ACCESS_MODE_KSM, 1728 NULL); 1729 if (IS_ERR(crossed_mr)) { 1730 ret = PTR_ERR(crossed_mr); 1731 goto end; 1732 } 1733 1734 mutex_lock(&dev->slow_path_mutex); 1735 crossing_mr = reg_create_crossing_vhca_mr(pd, virt_addr, length, access_flags, 1736 crossed_mr->lkey); 1737 mutex_unlock(&dev->slow_path_mutex); 1738 if (IS_ERR(crossing_mr)) { 1739 __mlx5_ib_dereg_mr(crossed_mr); 1740 ret = PTR_ERR(crossing_mr); 1741 goto end; 1742 } 1743 1744 list_add_tail(&to_mmr(crossed_mr)->dd_node, &dev->data_direct_mr_list); 1745 to_mmr(crossing_mr)->dd_crossed_mr = to_mmr(crossed_mr); 1746 to_mmr(crossing_mr)->data_direct = true; 1747 end: 1748 mutex_unlock(&dev->data_direct_lock); 1749 return ret ? ERR_PTR(ret) : crossing_mr; 1750 } 1751 1752 struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, 1753 u64 length, u64 virt_addr, 1754 int fd, int access_flags, 1755 struct ib_dmah *dmah, 1756 struct uverbs_attr_bundle *attrs) 1757 { 1758 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1759 int mlx5_access_flags = 0; 1760 int err; 1761 1762 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || 1763 !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 1764 return ERR_PTR(-EOPNOTSUPP); 1765 1766 if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) { 1767 err = uverbs_get_flags32(&mlx5_access_flags, attrs, 1768 MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, 1769 MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT); 1770 if (err) 1771 return ERR_PTR(err); 1772 } 1773 1774 mlx5_ib_dbg(dev, 1775 "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x, mlx5_access_flags 0x%x\n", 1776 offset, virt_addr, length, fd, access_flags, mlx5_access_flags); 1777 1778 /* dmabuf requires xlt update via umr to work. */ 1779 if (!mlx5r_umr_can_load_pas(dev, length)) 1780 return ERR_PTR(-EINVAL); 1781 1782 if (mlx5_access_flags & MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT) 1783 return reg_user_mr_dmabuf_by_data_direct(pd, offset, length, virt_addr, 1784 fd, access_flags); 1785 1786 return reg_user_mr_dmabuf(pd, NULL, offset, length, virt_addr, fd, 1787 access_flags, MLX5_MKC_ACCESS_MODE_MTT, dmah); 1788 } 1789 1790 /* 1791 * True if the change in access flags can be done via UMR, only some access 1792 * flags can be updated. 1793 */ 1794 static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, 1795 unsigned int current_access_flags, 1796 unsigned int target_access_flags) 1797 { 1798 unsigned int diffs = current_access_flags ^ target_access_flags; 1799 1800 if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | 1801 IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING | 1802 IB_ACCESS_REMOTE_ATOMIC)) 1803 return false; 1804 return mlx5r_umr_can_reconfig(dev, current_access_flags, 1805 target_access_flags); 1806 } 1807 1808 static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, 1809 struct ib_umem *new_umem, 1810 int new_access_flags, u64 iova, 1811 unsigned long *page_size) 1812 { 1813 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1814 1815 /* We only track the allocated sizes of MRs from the cache */ 1816 if (!mr->mmkey.cache_ent) 1817 return false; 1818 if (!mlx5r_umr_can_load_pas(dev, new_umem->length)) 1819 return false; 1820 1821 *page_size = mlx5_umem_mkc_find_best_pgsz( 1822 dev, new_umem, iova, mr->mmkey.cache_ent->rb_key.access_mode); 1823 if (WARN_ON(!*page_size)) 1824 return false; 1825 return (mr->mmkey.cache_ent->rb_key.ndescs) >= 1826 ib_umem_num_dma_blocks(new_umem, *page_size); 1827 } 1828 1829 static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, 1830 int access_flags, int flags, struct ib_umem *new_umem, 1831 u64 iova, unsigned long page_size) 1832 { 1833 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1834 int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; 1835 struct ib_umem *old_umem = mr->umem; 1836 int err; 1837 1838 /* 1839 * To keep everything simple the MR is revoked before we start to mess 1840 * with it. This ensure the change is atomic relative to any use of the 1841 * MR. 1842 */ 1843 err = mlx5r_umr_revoke_mr(mr); 1844 if (err) 1845 return err; 1846 1847 if (flags & IB_MR_REREG_PD) { 1848 mr->ibmr.pd = pd; 1849 upd_flags |= MLX5_IB_UPD_XLT_PD; 1850 } 1851 if (flags & IB_MR_REREG_ACCESS) { 1852 mr->access_flags = access_flags; 1853 upd_flags |= MLX5_IB_UPD_XLT_ACCESS; 1854 } 1855 1856 mr->ibmr.iova = iova; 1857 mr->ibmr.length = new_umem->length; 1858 mr->page_shift = order_base_2(page_size); 1859 mr->umem = new_umem; 1860 err = mlx5r_umr_update_mr_pas(mr, upd_flags); 1861 if (err) { 1862 /* 1863 * The MR is revoked at this point so there is no issue to free 1864 * new_umem. 1865 */ 1866 mr->umem = old_umem; 1867 return err; 1868 } 1869 1870 atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); 1871 ib_umem_release(old_umem); 1872 atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); 1873 return 0; 1874 } 1875 1876 struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1877 u64 length, u64 iova, int new_access_flags, 1878 struct ib_pd *new_pd, 1879 struct ib_udata *udata) 1880 { 1881 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1882 struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1883 int err; 1884 1885 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct || 1886 mr->mmkey.rb_key.ph != MLX5_IB_NO_PH) 1887 return ERR_PTR(-EOPNOTSUPP); 1888 1889 mlx5_ib_dbg( 1890 dev, 1891 "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", 1892 start, iova, length, new_access_flags); 1893 1894 if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) 1895 return ERR_PTR(-EOPNOTSUPP); 1896 1897 if (!(flags & IB_MR_REREG_ACCESS)) 1898 new_access_flags = mr->access_flags; 1899 if (!(flags & IB_MR_REREG_PD)) 1900 new_pd = ib_mr->pd; 1901 1902 if (!(flags & IB_MR_REREG_TRANS)) { 1903 struct ib_umem *umem; 1904 1905 /* Fast path for PD/access change */ 1906 if (can_use_umr_rereg_access(dev, mr->access_flags, 1907 new_access_flags)) { 1908 err = mlx5r_umr_rereg_pd_access(mr, new_pd, 1909 new_access_flags); 1910 if (err) 1911 return ERR_PTR(err); 1912 return NULL; 1913 } 1914 /* DM or ODP MR's don't have a normal umem so we can't re-use it */ 1915 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) 1916 goto recreate; 1917 1918 /* 1919 * Only one active MR can refer to a umem at one time, revoke 1920 * the old MR before assigning the umem to the new one. 1921 */ 1922 err = mlx5r_umr_revoke_mr(mr); 1923 if (err) 1924 return ERR_PTR(err); 1925 umem = mr->umem; 1926 mr->umem = NULL; 1927 atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); 1928 1929 return create_real_mr(new_pd, umem, mr->ibmr.iova, 1930 new_access_flags, NULL); 1931 } 1932 1933 /* 1934 * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does 1935 * but the logic around releasing the umem is different 1936 */ 1937 if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) 1938 goto recreate; 1939 1940 if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && 1941 can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { 1942 struct ib_umem *new_umem; 1943 unsigned long page_size; 1944 1945 new_umem = ib_umem_get(&dev->ib_dev, start, length, 1946 new_access_flags); 1947 if (IS_ERR(new_umem)) 1948 return ERR_CAST(new_umem); 1949 1950 /* Fast path for PAS change */ 1951 if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, 1952 &page_size)) { 1953 err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, 1954 new_umem, iova, page_size); 1955 if (err) { 1956 ib_umem_release(new_umem); 1957 return ERR_PTR(err); 1958 } 1959 return NULL; 1960 } 1961 return create_real_mr(new_pd, new_umem, iova, new_access_flags, NULL); 1962 } 1963 1964 /* 1965 * Everything else has no state we can preserve, just create a new MR 1966 * from scratch 1967 */ 1968 recreate: 1969 return mlx5_ib_reg_user_mr(new_pd, start, length, iova, 1970 new_access_flags, NULL, udata); 1971 } 1972 1973 static int 1974 mlx5_alloc_priv_descs(struct ib_device *device, 1975 struct mlx5_ib_mr *mr, 1976 int ndescs, 1977 int desc_size) 1978 { 1979 struct mlx5_ib_dev *dev = to_mdev(device); 1980 struct device *ddev = &dev->mdev->pdev->dev; 1981 int size = ndescs * desc_size; 1982 int add_size; 1983 int ret; 1984 1985 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); 1986 if (is_power_of_2(MLX5_UMR_ALIGN) && add_size) { 1987 int end = max_t(int, MLX5_UMR_ALIGN, roundup_pow_of_two(size)); 1988 1989 add_size = min_t(int, end - size, add_size); 1990 } 1991 1992 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1993 if (!mr->descs_alloc) 1994 return -ENOMEM; 1995 1996 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1997 1998 mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); 1999 if (dma_mapping_error(ddev, mr->desc_map)) { 2000 ret = -ENOMEM; 2001 goto err; 2002 } 2003 2004 return 0; 2005 err: 2006 kfree(mr->descs_alloc); 2007 2008 return ret; 2009 } 2010 2011 static void 2012 mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 2013 { 2014 if (!mr->umem && !mr->data_direct && 2015 mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) { 2016 struct ib_device *device = mr->ibmr.device; 2017 int size = mr->max_descs * mr->desc_size; 2018 struct mlx5_ib_dev *dev = to_mdev(device); 2019 2020 dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, 2021 DMA_TO_DEVICE); 2022 kfree(mr->descs_alloc); 2023 mr->descs = NULL; 2024 } 2025 } 2026 2027 static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, 2028 struct mlx5_ib_mr *mr) 2029 { 2030 struct mlx5_mkey_cache *cache = &dev->cache; 2031 struct mlx5_cache_ent *ent; 2032 int ret; 2033 2034 if (mr->mmkey.cache_ent) { 2035 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2036 goto end; 2037 } 2038 2039 mutex_lock(&cache->rb_lock); 2040 ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); 2041 if (ent) { 2042 if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { 2043 if (ent->disabled) { 2044 mutex_unlock(&cache->rb_lock); 2045 return -EOPNOTSUPP; 2046 } 2047 mr->mmkey.cache_ent = ent; 2048 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2049 mutex_unlock(&cache->rb_lock); 2050 goto end; 2051 } 2052 } 2053 2054 ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false); 2055 mutex_unlock(&cache->rb_lock); 2056 if (IS_ERR(ent)) 2057 return PTR_ERR(ent); 2058 2059 mr->mmkey.cache_ent = ent; 2060 spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2061 2062 end: 2063 ret = push_mkey_locked(mr->mmkey.cache_ent, mr->mmkey.key); 2064 spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); 2065 return ret; 2066 } 2067 2068 static int mlx5_ib_revoke_data_direct_mr(struct mlx5_ib_mr *mr) 2069 { 2070 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 2071 struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); 2072 int err; 2073 2074 lockdep_assert_held(&dev->data_direct_lock); 2075 mr->revoked = true; 2076 err = mlx5r_umr_revoke_mr(mr); 2077 if (WARN_ON(err)) 2078 return err; 2079 2080 ib_umem_dmabuf_revoke(umem_dmabuf); 2081 return 0; 2082 } 2083 2084 void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev) 2085 { 2086 struct mlx5_ib_mr *mr, *next; 2087 2088 lockdep_assert_held(&dev->data_direct_lock); 2089 2090 list_for_each_entry_safe(mr, next, &dev->data_direct_mr_list, dd_node) { 2091 list_del(&mr->dd_node); 2092 mlx5_ib_revoke_data_direct_mr(mr); 2093 } 2094 } 2095 2096 static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr) 2097 { 2098 bool is_odp_dma_buf = is_dmabuf_mr(mr) && 2099 !to_ib_umem_dmabuf(mr->umem)->pinned; 2100 bool is_odp = is_odp_mr(mr); 2101 int ret; 2102 2103 if (is_odp) 2104 mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2105 2106 if (is_odp_dma_buf) 2107 dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, 2108 NULL); 2109 2110 ret = mlx5r_umr_revoke_mr(mr); 2111 2112 if (is_odp) { 2113 if (!ret) 2114 to_ib_umem_odp(mr->umem)->private = NULL; 2115 mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2116 } 2117 2118 if (is_odp_dma_buf) { 2119 if (!ret) 2120 to_ib_umem_dmabuf(mr->umem)->private = NULL; 2121 dma_resv_unlock( 2122 to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); 2123 } 2124 2125 return ret; 2126 } 2127 2128 static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) 2129 { 2130 bool is_odp_dma_buf = is_dmabuf_mr(mr) && 2131 !to_ib_umem_dmabuf(mr->umem)->pinned; 2132 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 2133 struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; 2134 bool is_odp = is_odp_mr(mr); 2135 bool from_cache = !!ent; 2136 int ret; 2137 2138 if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && 2139 !cache_ent_find_and_store(dev, mr)) { 2140 ent = mr->mmkey.cache_ent; 2141 /* upon storing to a clean temp entry - schedule its cleanup */ 2142 spin_lock_irq(&ent->mkeys_queue.lock); 2143 if (from_cache) 2144 ent->in_use--; 2145 if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { 2146 mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 2147 secs_to_jiffies(30)); 2148 ent->tmp_cleanup_scheduled = true; 2149 } 2150 spin_unlock_irq(&ent->mkeys_queue.lock); 2151 return 0; 2152 } 2153 2154 if (ent) { 2155 spin_lock_irq(&ent->mkeys_queue.lock); 2156 ent->in_use--; 2157 mr->mmkey.cache_ent = NULL; 2158 spin_unlock_irq(&ent->mkeys_queue.lock); 2159 } 2160 2161 if (is_odp) 2162 mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2163 2164 if (is_odp_dma_buf) 2165 dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, 2166 NULL); 2167 ret = destroy_mkey(dev, mr); 2168 if (is_odp) { 2169 if (!ret) 2170 to_ib_umem_odp(mr->umem)->private = NULL; 2171 mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); 2172 } 2173 2174 if (is_odp_dma_buf) { 2175 if (!ret) 2176 to_ib_umem_dmabuf(mr->umem)->private = NULL; 2177 dma_resv_unlock( 2178 to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); 2179 } 2180 return ret; 2181 } 2182 2183 static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) 2184 { 2185 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2186 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 2187 int rc; 2188 2189 /* 2190 * Any async use of the mr must hold the refcount, once the refcount 2191 * goes to zero no other thread, such as ODP page faults, prefetch, any 2192 * UMR activity, etc can touch the mkey. Thus it is safe to destroy it. 2193 */ 2194 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && 2195 refcount_read(&mr->mmkey.usecount) != 0 && 2196 xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))) 2197 mlx5r_deref_wait_odp_mkey(&mr->mmkey); 2198 2199 if (ibmr->type == IB_MR_TYPE_INTEGRITY) { 2200 xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), 2201 mr->sig, NULL, GFP_KERNEL); 2202 2203 if (mr->mtt_mr) { 2204 rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL); 2205 if (rc) 2206 return rc; 2207 mr->mtt_mr = NULL; 2208 } 2209 if (mr->klm_mr) { 2210 rc = mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL); 2211 if (rc) 2212 return rc; 2213 mr->klm_mr = NULL; 2214 } 2215 2216 if (mlx5_core_destroy_psv(dev->mdev, 2217 mr->sig->psv_memory.psv_idx)) 2218 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 2219 mr->sig->psv_memory.psv_idx); 2220 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 2221 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 2222 mr->sig->psv_wire.psv_idx); 2223 kfree(mr->sig); 2224 mr->sig = NULL; 2225 } 2226 2227 /* Stop DMA */ 2228 rc = mlx5r_handle_mkey_cleanup(mr); 2229 if (rc) 2230 return rc; 2231 2232 if (mr->umem) { 2233 bool is_odp = is_odp_mr(mr); 2234 2235 if (!is_odp) 2236 atomic_sub(ib_umem_num_pages(mr->umem), 2237 &dev->mdev->priv.reg_pages); 2238 ib_umem_release(mr->umem); 2239 if (is_odp) 2240 mlx5_ib_free_odp_mr(mr); 2241 } 2242 2243 if (!mr->mmkey.cache_ent) 2244 mlx5_free_priv_descs(mr); 2245 2246 kfree(mr); 2247 return 0; 2248 } 2249 2250 static int dereg_crossing_data_direct_mr(struct mlx5_ib_dev *dev, 2251 struct mlx5_ib_mr *mr) 2252 { 2253 struct mlx5_ib_mr *dd_crossed_mr = mr->dd_crossed_mr; 2254 int ret; 2255 2256 ret = __mlx5_ib_dereg_mr(&mr->ibmr); 2257 if (ret) 2258 return ret; 2259 2260 mutex_lock(&dev->data_direct_lock); 2261 if (!dd_crossed_mr->revoked) 2262 list_del(&dd_crossed_mr->dd_node); 2263 2264 ret = __mlx5_ib_dereg_mr(&dd_crossed_mr->ibmr); 2265 mutex_unlock(&dev->data_direct_lock); 2266 return ret; 2267 } 2268 2269 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 2270 { 2271 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2272 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 2273 2274 if (mr->data_direct) 2275 return dereg_crossing_data_direct_mr(dev, mr); 2276 2277 return __mlx5_ib_dereg_mr(ibmr); 2278 } 2279 2280 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, 2281 int access_mode, int page_shift) 2282 { 2283 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2284 void *mkc; 2285 2286 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2287 2288 /* This is only used from the kernel, so setting the PD is OK. */ 2289 set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd); 2290 MLX5_SET(mkc, mkc, free, 1); 2291 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 2292 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 2293 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 2294 MLX5_SET(mkc, mkc, umr_en, 1); 2295 MLX5_SET(mkc, mkc, log_page_size, page_shift); 2296 if (access_mode == MLX5_MKC_ACCESS_MODE_PA || 2297 access_mode == MLX5_MKC_ACCESS_MODE_MTT) 2298 MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats)); 2299 } 2300 2301 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2302 int ndescs, int desc_size, int page_shift, 2303 int access_mode, u32 *in, int inlen) 2304 { 2305 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2306 int err; 2307 2308 mr->access_mode = access_mode; 2309 mr->desc_size = desc_size; 2310 mr->max_descs = ndescs; 2311 2312 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); 2313 if (err) 2314 return err; 2315 2316 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); 2317 2318 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); 2319 if (err) 2320 goto err_free_descs; 2321 2322 mr->mmkey.type = MLX5_MKEY_MR; 2323 mr->ibmr.lkey = mr->mmkey.key; 2324 mr->ibmr.rkey = mr->mmkey.key; 2325 2326 return 0; 2327 2328 err_free_descs: 2329 mlx5_free_priv_descs(mr); 2330 return err; 2331 } 2332 2333 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, 2334 u32 max_num_sg, u32 max_num_meta_sg, 2335 int desc_size, int access_mode) 2336 { 2337 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2338 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); 2339 int page_shift = 0; 2340 struct mlx5_ib_mr *mr; 2341 u32 *in; 2342 int err; 2343 2344 mr = kzalloc_obj(*mr); 2345 if (!mr) 2346 return ERR_PTR(-ENOMEM); 2347 2348 mr->ibmr.pd = pd; 2349 mr->ibmr.device = pd->device; 2350 2351 in = kzalloc(inlen, GFP_KERNEL); 2352 if (!in) { 2353 err = -ENOMEM; 2354 goto err_free; 2355 } 2356 2357 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) 2358 page_shift = PAGE_SHIFT; 2359 2360 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, 2361 access_mode, in, inlen); 2362 if (err) 2363 goto err_free_in; 2364 2365 mr->umem = NULL; 2366 kfree(in); 2367 2368 return mr; 2369 2370 err_free_in: 2371 kfree(in); 2372 err_free: 2373 kfree(mr); 2374 return ERR_PTR(err); 2375 } 2376 2377 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2378 int ndescs, u32 *in, int inlen) 2379 { 2380 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), 2381 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, 2382 inlen); 2383 } 2384 2385 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2386 int ndescs, u32 *in, int inlen) 2387 { 2388 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), 2389 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 2390 } 2391 2392 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 2393 int max_num_sg, int max_num_meta_sg, 2394 u32 *in, int inlen) 2395 { 2396 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2397 u32 psv_index[2]; 2398 void *mkc; 2399 int err; 2400 2401 mr->sig = kzalloc_obj(*mr->sig); 2402 if (!mr->sig) 2403 return -ENOMEM; 2404 2405 /* create mem & wire PSVs */ 2406 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); 2407 if (err) 2408 goto err_free_sig; 2409 2410 mr->sig->psv_memory.psv_idx = psv_index[0]; 2411 mr->sig->psv_wire.psv_idx = psv_index[1]; 2412 2413 mr->sig->sig_status_checked = true; 2414 mr->sig->sig_err_exists = false; 2415 /* Next UMR, Arm SIGERR */ 2416 ++mr->sig->sigerr_count; 2417 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 2418 sizeof(struct mlx5_klm), 2419 MLX5_MKC_ACCESS_MODE_KLMS); 2420 if (IS_ERR(mr->klm_mr)) { 2421 err = PTR_ERR(mr->klm_mr); 2422 goto err_destroy_psv; 2423 } 2424 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 2425 sizeof(struct mlx5_mtt), 2426 MLX5_MKC_ACCESS_MODE_MTT); 2427 if (IS_ERR(mr->mtt_mr)) { 2428 err = PTR_ERR(mr->mtt_mr); 2429 goto err_free_klm_mr; 2430 } 2431 2432 /* Set bsf descriptors for mkey */ 2433 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2434 MLX5_SET(mkc, mkc, bsf_en, 1); 2435 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); 2436 2437 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, 2438 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 2439 if (err) 2440 goto err_free_mtt_mr; 2441 2442 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), 2443 mr->sig, GFP_KERNEL)); 2444 if (err) 2445 goto err_free_descs; 2446 return 0; 2447 2448 err_free_descs: 2449 destroy_mkey(dev, mr); 2450 mlx5_free_priv_descs(mr); 2451 err_free_mtt_mr: 2452 mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL); 2453 mr->mtt_mr = NULL; 2454 err_free_klm_mr: 2455 mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL); 2456 mr->klm_mr = NULL; 2457 err_destroy_psv: 2458 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) 2459 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 2460 mr->sig->psv_memory.psv_idx); 2461 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 2462 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 2463 mr->sig->psv_wire.psv_idx); 2464 err_free_sig: 2465 kfree(mr->sig); 2466 2467 return err; 2468 } 2469 2470 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, 2471 enum ib_mr_type mr_type, u32 max_num_sg, 2472 u32 max_num_meta_sg) 2473 { 2474 struct mlx5_ib_dev *dev = to_mdev(pd->device); 2475 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2476 int ndescs = ALIGN(max_num_sg, 4); 2477 struct mlx5_ib_mr *mr; 2478 u32 *in; 2479 int err; 2480 2481 mr = kzalloc_obj(*mr); 2482 if (!mr) 2483 return ERR_PTR(-ENOMEM); 2484 2485 in = kzalloc(inlen, GFP_KERNEL); 2486 if (!in) { 2487 err = -ENOMEM; 2488 goto err_free; 2489 } 2490 2491 mr->ibmr.device = pd->device; 2492 mr->umem = NULL; 2493 2494 switch (mr_type) { 2495 case IB_MR_TYPE_MEM_REG: 2496 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); 2497 break; 2498 case IB_MR_TYPE_SG_GAPS: 2499 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); 2500 break; 2501 case IB_MR_TYPE_INTEGRITY: 2502 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, 2503 max_num_meta_sg, in, inlen); 2504 break; 2505 default: 2506 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 2507 err = -EINVAL; 2508 } 2509 2510 if (err) 2511 goto err_free_in; 2512 2513 kfree(in); 2514 2515 return &mr->ibmr; 2516 2517 err_free_in: 2518 kfree(in); 2519 err_free: 2520 kfree(mr); 2521 return ERR_PTR(err); 2522 } 2523 2524 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 2525 u32 max_num_sg) 2526 { 2527 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); 2528 } 2529 2530 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, 2531 u32 max_num_sg, u32 max_num_meta_sg) 2532 { 2533 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, 2534 max_num_meta_sg); 2535 } 2536 2537 int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) 2538 { 2539 struct mlx5_ib_dev *dev = to_mdev(ibmw->device); 2540 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 2541 struct mlx5_ib_mw *mw = to_mmw(ibmw); 2542 unsigned int ndescs; 2543 u32 *in = NULL; 2544 void *mkc; 2545 int err; 2546 struct mlx5_ib_alloc_mw req = {}; 2547 struct { 2548 __u32 comp_mask; 2549 __u32 response_length; 2550 } resp = {}; 2551 2552 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 2553 if (err) 2554 return err; 2555 2556 if (req.comp_mask || req.reserved1 || req.reserved2) 2557 return -EOPNOTSUPP; 2558 2559 if (udata->inlen > sizeof(req) && 2560 !ib_is_udata_cleared(udata, sizeof(req), 2561 udata->inlen - sizeof(req))) 2562 return -EOPNOTSUPP; 2563 2564 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 2565 2566 in = kzalloc(inlen, GFP_KERNEL); 2567 if (!in) 2568 return -ENOMEM; 2569 2570 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 2571 2572 MLX5_SET(mkc, mkc, free, 1); 2573 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 2574 MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn); 2575 MLX5_SET(mkc, mkc, umr_en, 1); 2576 MLX5_SET(mkc, mkc, lr, 1); 2577 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); 2578 MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2))); 2579 MLX5_SET(mkc, mkc, qpn, 0xffffff); 2580 2581 err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); 2582 if (err) 2583 goto free; 2584 2585 mw->mmkey.type = MLX5_MKEY_MW; 2586 ibmw->rkey = mw->mmkey.key; 2587 mw->mmkey.ndescs = ndescs; 2588 2589 resp.response_length = 2590 min(offsetofend(typeof(resp), response_length), udata->outlen); 2591 if (resp.response_length) { 2592 err = ib_copy_to_udata(udata, &resp, resp.response_length); 2593 if (err) 2594 goto free_mkey; 2595 } 2596 2597 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 2598 err = mlx5r_store_odp_mkey(dev, &mw->mmkey); 2599 if (err) 2600 goto free_mkey; 2601 } 2602 2603 kfree(in); 2604 return 0; 2605 2606 free_mkey: 2607 mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key); 2608 free: 2609 kfree(in); 2610 return err; 2611 } 2612 2613 int mlx5_ib_dealloc_mw(struct ib_mw *mw) 2614 { 2615 struct mlx5_ib_dev *dev = to_mdev(mw->device); 2616 struct mlx5_ib_mw *mmw = to_mmw(mw); 2617 2618 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && 2619 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key))) 2620 /* 2621 * pagefault_single_data_segment() may be accessing mmw 2622 * if the user bound an ODP MR to this MW. 2623 */ 2624 mlx5r_deref_wait_odp_mkey(&mmw->mmkey); 2625 2626 return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key); 2627 } 2628 2629 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 2630 struct ib_mr_status *mr_status) 2631 { 2632 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 2633 int ret = 0; 2634 2635 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 2636 pr_err("Invalid status check mask\n"); 2637 ret = -EINVAL; 2638 goto done; 2639 } 2640 2641 mr_status->fail_status = 0; 2642 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 2643 if (!mmr->sig) { 2644 ret = -EINVAL; 2645 pr_err("signature status check requested on a non-signature enabled MR\n"); 2646 goto done; 2647 } 2648 2649 mmr->sig->sig_status_checked = true; 2650 if (!mmr->sig->sig_err_exists) 2651 goto done; 2652 2653 if (ibmr->lkey == mmr->sig->err_item.key) 2654 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 2655 sizeof(mr_status->sig_err)); 2656 else { 2657 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 2658 mr_status->sig_err.sig_err_offset = 0; 2659 mr_status->sig_err.key = mmr->sig->err_item.key; 2660 } 2661 2662 mmr->sig->sig_err_exists = false; 2663 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 2664 } 2665 2666 done: 2667 return ret; 2668 } 2669 2670 static int 2671 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2672 int data_sg_nents, unsigned int *data_sg_offset, 2673 struct scatterlist *meta_sg, int meta_sg_nents, 2674 unsigned int *meta_sg_offset) 2675 { 2676 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2677 unsigned int sg_offset = 0; 2678 int n = 0; 2679 2680 mr->meta_length = 0; 2681 if (data_sg_nents == 1) { 2682 n++; 2683 mr->mmkey.ndescs = 1; 2684 if (data_sg_offset) 2685 sg_offset = *data_sg_offset; 2686 mr->data_length = sg_dma_len(data_sg) - sg_offset; 2687 mr->data_iova = sg_dma_address(data_sg) + sg_offset; 2688 if (meta_sg_nents == 1) { 2689 n++; 2690 mr->meta_ndescs = 1; 2691 if (meta_sg_offset) 2692 sg_offset = *meta_sg_offset; 2693 else 2694 sg_offset = 0; 2695 mr->meta_length = sg_dma_len(meta_sg) - sg_offset; 2696 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; 2697 } 2698 ibmr->length = mr->data_length + mr->meta_length; 2699 } 2700 2701 return n; 2702 } 2703 2704 static int 2705 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 2706 struct scatterlist *sgl, 2707 unsigned short sg_nents, 2708 unsigned int *sg_offset_p, 2709 struct scatterlist *meta_sgl, 2710 unsigned short meta_sg_nents, 2711 unsigned int *meta_sg_offset_p) 2712 { 2713 struct scatterlist *sg = sgl; 2714 struct mlx5_klm *klms = mr->descs; 2715 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 2716 u32 lkey = mr->ibmr.pd->local_dma_lkey; 2717 int i, j = 0; 2718 2719 mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 2720 mr->ibmr.length = 0; 2721 2722 for_each_sg(sgl, sg, sg_nents, i) { 2723 if (unlikely(i >= mr->max_descs)) 2724 break; 2725 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 2726 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 2727 klms[i].key = cpu_to_be32(lkey); 2728 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2729 2730 sg_offset = 0; 2731 } 2732 2733 if (sg_offset_p) 2734 *sg_offset_p = sg_offset; 2735 2736 mr->mmkey.ndescs = i; 2737 mr->data_length = mr->ibmr.length; 2738 2739 if (meta_sg_nents) { 2740 sg = meta_sgl; 2741 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; 2742 for_each_sg(meta_sgl, sg, meta_sg_nents, j) { 2743 if (unlikely(i + j >= mr->max_descs)) 2744 break; 2745 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + 2746 sg_offset); 2747 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - 2748 sg_offset); 2749 klms[i + j].key = cpu_to_be32(lkey); 2750 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2751 2752 sg_offset = 0; 2753 } 2754 if (meta_sg_offset_p) 2755 *meta_sg_offset_p = sg_offset; 2756 2757 mr->meta_ndescs = j; 2758 mr->meta_length = mr->ibmr.length - mr->data_length; 2759 } 2760 2761 return i + j; 2762 } 2763 2764 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 2765 { 2766 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2767 __be64 *descs; 2768 2769 if (unlikely(mr->mmkey.ndescs == mr->max_descs)) 2770 return -ENOMEM; 2771 2772 descs = mr->descs; 2773 descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2774 2775 return 0; 2776 } 2777 2778 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) 2779 { 2780 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2781 __be64 *descs; 2782 2783 if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs)) 2784 return -ENOMEM; 2785 2786 descs = mr->descs; 2787 descs[mr->mmkey.ndescs + mr->meta_ndescs++] = 2788 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2789 2790 return 0; 2791 } 2792 2793 static int 2794 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2795 int data_sg_nents, unsigned int *data_sg_offset, 2796 struct scatterlist *meta_sg, int meta_sg_nents, 2797 unsigned int *meta_sg_offset) 2798 { 2799 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2800 struct mlx5_ib_mr *pi_mr = mr->mtt_mr; 2801 int n; 2802 2803 pi_mr->mmkey.ndescs = 0; 2804 pi_mr->meta_ndescs = 0; 2805 pi_mr->meta_length = 0; 2806 2807 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2808 pi_mr->desc_size * pi_mr->max_descs, 2809 DMA_TO_DEVICE); 2810 2811 pi_mr->ibmr.page_size = ibmr->page_size; 2812 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, 2813 mlx5_set_page); 2814 if (n != data_sg_nents) 2815 return n; 2816 2817 pi_mr->data_iova = pi_mr->ibmr.iova; 2818 pi_mr->data_length = pi_mr->ibmr.length; 2819 pi_mr->ibmr.length = pi_mr->data_length; 2820 ibmr->length = pi_mr->data_length; 2821 2822 if (meta_sg_nents) { 2823 u64 page_mask = ~((u64)ibmr->page_size - 1); 2824 u64 iova = pi_mr->data_iova; 2825 2826 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, 2827 meta_sg_offset, mlx5_set_page_pi); 2828 2829 pi_mr->meta_length = pi_mr->ibmr.length; 2830 /* 2831 * PI address for the HW is the offset of the metadata address 2832 * relative to the first data page address. 2833 * It equals to first data page address + size of data pages + 2834 * metadata offset at the first metadata page 2835 */ 2836 pi_mr->pi_iova = (iova & page_mask) + 2837 pi_mr->mmkey.ndescs * ibmr->page_size + 2838 (pi_mr->ibmr.iova & ~page_mask); 2839 /* 2840 * In order to use one MTT MR for data and metadata, we register 2841 * also the gaps between the end of the data and the start of 2842 * the metadata (the sig MR will verify that the HW will access 2843 * to right addresses). This mapping is safe because we use 2844 * internal mkey for the registration. 2845 */ 2846 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; 2847 pi_mr->ibmr.iova = iova; 2848 ibmr->length += pi_mr->meta_length; 2849 } 2850 2851 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2852 pi_mr->desc_size * pi_mr->max_descs, 2853 DMA_TO_DEVICE); 2854 2855 return n; 2856 } 2857 2858 static int 2859 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2860 int data_sg_nents, unsigned int *data_sg_offset, 2861 struct scatterlist *meta_sg, int meta_sg_nents, 2862 unsigned int *meta_sg_offset) 2863 { 2864 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2865 struct mlx5_ib_mr *pi_mr = mr->klm_mr; 2866 int n; 2867 2868 pi_mr->mmkey.ndescs = 0; 2869 pi_mr->meta_ndescs = 0; 2870 pi_mr->meta_length = 0; 2871 2872 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2873 pi_mr->desc_size * pi_mr->max_descs, 2874 DMA_TO_DEVICE); 2875 2876 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, 2877 meta_sg, meta_sg_nents, meta_sg_offset); 2878 2879 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2880 pi_mr->desc_size * pi_mr->max_descs, 2881 DMA_TO_DEVICE); 2882 2883 /* This is zero-based memory region */ 2884 pi_mr->data_iova = 0; 2885 pi_mr->ibmr.iova = 0; 2886 pi_mr->pi_iova = pi_mr->data_length; 2887 ibmr->length = pi_mr->ibmr.length; 2888 2889 return n; 2890 } 2891 2892 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2893 int data_sg_nents, unsigned int *data_sg_offset, 2894 struct scatterlist *meta_sg, int meta_sg_nents, 2895 unsigned int *meta_sg_offset) 2896 { 2897 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2898 struct mlx5_ib_mr *pi_mr = NULL; 2899 int n; 2900 2901 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); 2902 2903 mr->mmkey.ndescs = 0; 2904 mr->data_length = 0; 2905 mr->data_iova = 0; 2906 mr->meta_ndescs = 0; 2907 mr->pi_iova = 0; 2908 /* 2909 * As a performance optimization, if possible, there is no need to 2910 * perform UMR operation to register the data/metadata buffers. 2911 * First try to map the sg lists to PA descriptors with local_dma_lkey. 2912 * Fallback to UMR only in case of a failure. 2913 */ 2914 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2915 data_sg_offset, meta_sg, meta_sg_nents, 2916 meta_sg_offset); 2917 if (n == data_sg_nents + meta_sg_nents) 2918 goto out; 2919 /* 2920 * As a performance optimization, if possible, there is no need to map 2921 * the sg lists to KLM descriptors. First try to map the sg lists to MTT 2922 * descriptors and fallback to KLM only in case of a failure. 2923 * It's more efficient for the HW to work with MTT descriptors 2924 * (especially in high load). 2925 * Use KLM (indirect access) only if it's mandatory. 2926 */ 2927 pi_mr = mr->mtt_mr; 2928 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2929 data_sg_offset, meta_sg, meta_sg_nents, 2930 meta_sg_offset); 2931 if (n == data_sg_nents + meta_sg_nents) 2932 goto out; 2933 2934 pi_mr = mr->klm_mr; 2935 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2936 data_sg_offset, meta_sg, meta_sg_nents, 2937 meta_sg_offset); 2938 if (unlikely(n != data_sg_nents + meta_sg_nents)) 2939 return -ENOMEM; 2940 2941 out: 2942 /* This is zero-based memory region */ 2943 ibmr->iova = 0; 2944 mr->pi_mr = pi_mr; 2945 if (pi_mr) 2946 ibmr->sig_attrs->meta_length = pi_mr->meta_length; 2947 else 2948 ibmr->sig_attrs->meta_length = mr->meta_length; 2949 2950 return 0; 2951 } 2952 2953 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 2954 unsigned int *sg_offset) 2955 { 2956 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2957 int n; 2958 2959 mr->mmkey.ndescs = 0; 2960 2961 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 2962 mr->desc_size * mr->max_descs, 2963 DMA_TO_DEVICE); 2964 2965 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) 2966 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, 2967 NULL); 2968 else 2969 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 2970 mlx5_set_page); 2971 2972 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 2973 mr->desc_size * mr->max_descs, 2974 DMA_TO_DEVICE); 2975 2976 return n; 2977 } 2978