1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
3
4 #include <rdma/ib_umem_odp.h>
5 #include "mlx5_ib.h"
6 #include "umr.h"
7 #include "wr.h"
8
9 /*
10 * We can't use an array for xlt_emergency_page because dma_map_single doesn't
11 * work on kernel modules memory
12 */
13 void *xlt_emergency_page;
14 static DEFINE_MUTEX(xlt_emergency_page_mutex);
15
get_umr_enable_mr_mask(void)16 static __be64 get_umr_enable_mr_mask(void)
17 {
18 u64 result;
19
20 result = MLX5_MKEY_MASK_KEY |
21 MLX5_MKEY_MASK_FREE;
22
23 return cpu_to_be64(result);
24 }
25
get_umr_disable_mr_mask(void)26 static __be64 get_umr_disable_mr_mask(void)
27 {
28 u64 result;
29
30 result = MLX5_MKEY_MASK_FREE;
31
32 return cpu_to_be64(result);
33 }
34
get_umr_update_translation_mask(struct mlx5_ib_dev * dev)35 static __be64 get_umr_update_translation_mask(struct mlx5_ib_dev *dev)
36 {
37 u64 result;
38
39 result = MLX5_MKEY_MASK_LEN |
40 MLX5_MKEY_MASK_PAGE_SIZE |
41 MLX5_MKEY_MASK_START_ADDR;
42 if (MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5))
43 result |= MLX5_MKEY_MASK_PAGE_SIZE_5;
44
45 return cpu_to_be64(result);
46 }
47
get_umr_update_access_mask(struct mlx5_ib_dev * dev)48 static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
49 {
50 u64 result;
51
52 result = MLX5_MKEY_MASK_LR |
53 MLX5_MKEY_MASK_LW |
54 MLX5_MKEY_MASK_RR |
55 MLX5_MKEY_MASK_RW;
56
57 if (MLX5_CAP_GEN(dev->mdev, atomic))
58 result |= MLX5_MKEY_MASK_A;
59
60 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
61 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
62
63 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
64 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
65
66 return cpu_to_be64(result);
67 }
68
get_umr_update_pd_mask(void)69 static __be64 get_umr_update_pd_mask(void)
70 {
71 u64 result;
72
73 result = MLX5_MKEY_MASK_PD;
74
75 return cpu_to_be64(result);
76 }
77
umr_check_mkey_mask(struct mlx5_ib_dev * dev,u64 mask)78 static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
79 {
80 if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
81 MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
82 return -EPERM;
83
84 if (mask & MLX5_MKEY_MASK_A &&
85 MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
86 return -EPERM;
87
88 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
89 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
90 return -EPERM;
91
92 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
93 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
94 return -EPERM;
95
96 return 0;
97 }
98
99 enum {
100 MAX_UMR_WR = 128,
101 };
102
mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev * dev,struct ib_qp * qp)103 static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
104 {
105 struct ib_qp_attr attr = {};
106 int ret;
107
108 attr.qp_state = IB_QPS_INIT;
109 attr.port_num = 1;
110 ret = ib_modify_qp(qp, &attr,
111 IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
112 if (ret) {
113 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
114 return ret;
115 }
116
117 memset(&attr, 0, sizeof(attr));
118 attr.qp_state = IB_QPS_RTR;
119
120 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
121 if (ret) {
122 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
123 return ret;
124 }
125
126 memset(&attr, 0, sizeof(attr));
127 attr.qp_state = IB_QPS_RTS;
128 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
129 if (ret) {
130 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
131 return ret;
132 }
133
134 return 0;
135 }
136
mlx5r_umr_resource_init(struct mlx5_ib_dev * dev)137 int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
138 {
139 struct ib_qp_init_attr init_attr = {};
140 struct ib_cq *cq;
141 struct ib_qp *qp;
142 int ret = 0;
143
144
145 /*
146 * UMR qp is set once, never changed until device unload.
147 * Avoid taking the mutex if initialization is already done.
148 */
149 if (dev->umrc.qp)
150 return 0;
151
152 mutex_lock(&dev->umrc.init_lock);
153 /* First user allocates the UMR resources. Skip if already allocated. */
154 if (dev->umrc.qp)
155 goto unlock;
156
157 cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
158 if (IS_ERR(cq)) {
159 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
160 ret = PTR_ERR(cq);
161 goto unlock;
162 }
163
164 init_attr.send_cq = cq;
165 init_attr.recv_cq = cq;
166 init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
167 init_attr.cap.max_send_wr = MAX_UMR_WR;
168 init_attr.cap.max_send_sge = 1;
169 init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
170 init_attr.port_num = 1;
171 qp = ib_create_qp(dev->umrc.pd, &init_attr);
172 if (IS_ERR(qp)) {
173 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
174 ret = PTR_ERR(qp);
175 goto destroy_cq;
176 }
177
178 ret = mlx5r_umr_qp_rst2rts(dev, qp);
179 if (ret)
180 goto destroy_qp;
181
182 dev->umrc.cq = cq;
183
184 sema_init(&dev->umrc.sem, MAX_UMR_WR);
185 mutex_init(&dev->umrc.lock);
186 dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
187 dev->umrc.qp = qp;
188
189 mutex_unlock(&dev->umrc.init_lock);
190 return 0;
191
192 destroy_qp:
193 ib_destroy_qp(qp);
194 destroy_cq:
195 ib_free_cq(cq);
196 unlock:
197 mutex_unlock(&dev->umrc.init_lock);
198 return ret;
199 }
200
mlx5r_umr_resource_cleanup(struct mlx5_ib_dev * dev)201 void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
202 {
203 if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
204 return;
205 mutex_destroy(&dev->umrc.lock);
206 /* After device init, UMR cp/qp are not unset during the lifetime. */
207 ib_destroy_qp(dev->umrc.qp);
208 ib_free_cq(dev->umrc.cq);
209 }
210
mlx5r_umr_init(struct mlx5_ib_dev * dev)211 int mlx5r_umr_init(struct mlx5_ib_dev *dev)
212 {
213 struct ib_pd *pd;
214
215 pd = ib_alloc_pd(&dev->ib_dev, 0);
216 if (IS_ERR(pd)) {
217 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
218 return PTR_ERR(pd);
219 }
220 dev->umrc.pd = pd;
221
222 mutex_init(&dev->umrc.init_lock);
223
224 return 0;
225 }
226
mlx5r_umr_cleanup(struct mlx5_ib_dev * dev)227 void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev)
228 {
229 if (!dev->umrc.pd)
230 return;
231
232 mutex_destroy(&dev->umrc.init_lock);
233 ib_dealloc_pd(dev->umrc.pd);
234 }
235
236
mlx5r_umr_post_send(struct ib_qp * ibqp,u32 mkey,struct ib_cqe * cqe,struct mlx5r_umr_wqe * wqe,bool with_data)237 static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
238 struct mlx5r_umr_wqe *wqe, bool with_data)
239 {
240 unsigned int wqe_size =
241 with_data ? sizeof(struct mlx5r_umr_wqe) :
242 sizeof(struct mlx5r_umr_wqe) -
243 sizeof(struct mlx5_wqe_data_seg);
244 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
245 struct mlx5_core_dev *mdev = dev->mdev;
246 struct mlx5_ib_qp *qp = to_mqp(ibqp);
247 struct mlx5_wqe_ctrl_seg *ctrl;
248 union {
249 struct ib_cqe *ib_cqe;
250 u64 wr_id;
251 } id;
252 void *cur_edge, *seg;
253 unsigned long flags;
254 unsigned int idx;
255 int size, err;
256
257 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
258 return -EIO;
259
260 spin_lock_irqsave(&qp->sq.lock, flags);
261
262 err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
263 cpu_to_be32(mkey), false, false);
264 if (WARN_ON(err))
265 goto out;
266
267 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
268
269 mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
270
271 id.ib_cqe = cqe;
272 mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
273 MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
274
275 mlx5r_ring_db(qp, 1, ctrl);
276
277 out:
278 spin_unlock_irqrestore(&qp->sq.lock, flags);
279
280 return err;
281 }
282
mlx5r_umr_recover(struct mlx5_ib_dev * dev,u32 mkey,struct mlx5r_umr_context * umr_context,struct mlx5r_umr_wqe * wqe,bool with_data)283 static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey,
284 struct mlx5r_umr_context *umr_context,
285 struct mlx5r_umr_wqe *wqe, bool with_data)
286 {
287 struct umr_common *umrc = &dev->umrc;
288 struct ib_qp_attr attr;
289 int err;
290
291 mutex_lock(&umrc->lock);
292 /* Preventing any further WRs to be sent now */
293 if (umrc->state != MLX5_UMR_STATE_RECOVER) {
294 mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n",
295 umrc->state);
296 umrc->state = MLX5_UMR_STATE_RECOVER;
297 }
298 mutex_unlock(&umrc->lock);
299
300 /* Sending a final/barrier WR (the failed one) and wait for its completion.
301 * This will ensure that all the previous WRs got a completion before
302 * we set the QP state to RESET.
303 */
304 err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe,
305 with_data);
306 if (err) {
307 mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err);
308 goto err;
309 }
310
311 /* Since the QP is in an error state, it will only receive
312 * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier
313 * we don't care about its status.
314 */
315 wait_for_completion(&umr_context->done);
316
317 attr.qp_state = IB_QPS_RESET;
318 err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
319 if (err) {
320 mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err);
321 goto err;
322 }
323
324 err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
325 if (err) {
326 mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err);
327 goto err;
328 }
329
330 umrc->state = MLX5_UMR_STATE_ACTIVE;
331 return 0;
332
333 err:
334 umrc->state = MLX5_UMR_STATE_ERR;
335 return err;
336 }
337
mlx5r_umr_done(struct ib_cq * cq,struct ib_wc * wc)338 static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
339 {
340 struct mlx5_ib_umr_context *context =
341 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
342
343 context->status = wc->status;
344 complete(&context->done);
345 }
346
mlx5r_umr_init_context(struct mlx5r_umr_context * context)347 static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
348 {
349 context->cqe.done = mlx5r_umr_done;
350 init_completion(&context->done);
351 }
352
mlx5r_umr_post_send_wait(struct mlx5_ib_dev * dev,u32 mkey,struct mlx5r_umr_wqe * wqe,bool with_data)353 static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
354 struct mlx5r_umr_wqe *wqe, bool with_data)
355 {
356 struct umr_common *umrc = &dev->umrc;
357 struct mlx5r_umr_context umr_context;
358 int err;
359
360 err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
361 if (WARN_ON(err))
362 return err;
363
364 mlx5r_umr_init_context(&umr_context);
365
366 down(&umrc->sem);
367 while (true) {
368 mutex_lock(&umrc->lock);
369 if (umrc->state == MLX5_UMR_STATE_ERR) {
370 mutex_unlock(&umrc->lock);
371 err = -EFAULT;
372 break;
373 }
374
375 if (umrc->state == MLX5_UMR_STATE_RECOVER) {
376 mutex_unlock(&umrc->lock);
377 usleep_range(3000, 5000);
378 continue;
379 }
380
381 err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
382 with_data);
383 mutex_unlock(&umrc->lock);
384 if (err) {
385 mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
386 err);
387 break;
388 }
389
390 wait_for_completion(&umr_context.done);
391
392 if (umr_context.status == IB_WC_SUCCESS)
393 break;
394
395 if (umr_context.status == IB_WC_WR_FLUSH_ERR)
396 continue;
397
398 WARN_ON_ONCE(1);
399 mlx5_ib_warn(dev,
400 "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
401 umr_context.status, mkey);
402 err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data);
403 if (err)
404 mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
405 err);
406 err = -EFAULT;
407 break;
408 }
409 up(&umrc->sem);
410 return err;
411 }
412
413 /**
414 * mlx5r_umr_revoke_mr - Fence all DMA on the MR
415 * @mr: The MR to fence
416 *
417 * Upon return the NIC will not be doing any DMA to the pages under the MR,
418 * and any DMA in progress will be completed. Failure of this function
419 * indicates the HW has failed catastrophically.
420 */
mlx5r_umr_revoke_mr(struct mlx5_ib_mr * mr)421 int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
422 {
423 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
424 struct mlx5r_umr_wqe wqe = {};
425
426 if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
427 return 0;
428
429 wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
430 wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
431 wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
432
433 MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
434 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
435 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
436 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
437 mlx5_mkey_variant(mr->mmkey.key));
438
439 return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
440 }
441
mlx5r_umr_set_access_flags(struct mlx5_ib_dev * dev,struct mlx5_mkey_seg * seg,unsigned int access_flags)442 static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
443 struct mlx5_mkey_seg *seg,
444 unsigned int access_flags)
445 {
446 bool ro_read = (access_flags & IB_ACCESS_RELAXED_ORDERING) &&
447 (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) ||
448 pcie_relaxed_ordering_enabled(dev->mdev->pdev));
449
450 MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
451 MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
452 MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
453 MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
454 MLX5_SET(mkc, seg, lr, 1);
455 MLX5_SET(mkc, seg, relaxed_ordering_write,
456 !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
457 MLX5_SET(mkc, seg, relaxed_ordering_read, ro_read);
458 }
459
mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr * mr,struct ib_pd * pd,int access_flags)460 int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
461 int access_flags)
462 {
463 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
464 struct mlx5r_umr_wqe wqe = {};
465 int err;
466
467 wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
468 wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
469 wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
470 wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
471
472 mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
473 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
474 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
475 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
476 mlx5_mkey_variant(mr->mmkey.key));
477
478 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
479 if (err)
480 return err;
481
482 mr->access_flags = access_flags;
483 return 0;
484 }
485
486 #define MLX5_MAX_UMR_CHUNK \
487 ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_FLEX_ALIGNMENT)
488 #define MLX5_SPARE_UMR_CHUNK 0x10000
489
490 /*
491 * Allocate a temporary buffer to hold the per-page information to transfer to
492 * HW. For efficiency this should be as large as it can be, but buffer
493 * allocation failure is not allowed, so try smaller sizes.
494 */
mlx5r_umr_alloc_xlt(size_t * nents,size_t ent_size,gfp_t gfp_mask)495 static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
496 {
497 const size_t xlt_chunk_align = MLX5_UMR_FLEX_ALIGNMENT / ent_size;
498 size_t size;
499 void *res = NULL;
500
501 static_assert(PAGE_SIZE % MLX5_UMR_FLEX_ALIGNMENT == 0);
502
503 /*
504 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
505 * allocation can't trigger any kind of reclaim.
506 */
507 might_sleep();
508
509 gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
510
511 /*
512 * If the system already has a suitable high order page then just use
513 * that, but don't try hard to create one. This max is about 1M, so a
514 * free x86 huge page will satisfy it.
515 */
516 size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
517 MLX5_MAX_UMR_CHUNK);
518 *nents = size / ent_size;
519 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
520 get_order(size));
521 if (res)
522 return res;
523
524 if (size > MLX5_SPARE_UMR_CHUNK) {
525 size = MLX5_SPARE_UMR_CHUNK;
526 *nents = size / ent_size;
527 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
528 get_order(size));
529 if (res)
530 return res;
531 }
532
533 *nents = PAGE_SIZE / ent_size;
534 res = (void *)__get_free_page(gfp_mask);
535 if (res)
536 return res;
537
538 mutex_lock(&xlt_emergency_page_mutex);
539 memset(xlt_emergency_page, 0, PAGE_SIZE);
540 return xlt_emergency_page;
541 }
542
mlx5r_umr_free_xlt(void * xlt,size_t length)543 static void mlx5r_umr_free_xlt(void *xlt, size_t length)
544 {
545 if (xlt == xlt_emergency_page) {
546 mutex_unlock(&xlt_emergency_page_mutex);
547 return;
548 }
549
550 free_pages((unsigned long)xlt, get_order(length));
551 }
552
mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev * dev,void * xlt,struct ib_sge * sg)553 static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
554 struct ib_sge *sg)
555 {
556 struct device *ddev = &dev->mdev->pdev->dev;
557
558 dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
559 mlx5r_umr_free_xlt(xlt, sg->length);
560 }
561
562 /*
563 * Create an XLT buffer ready for submission.
564 */
mlx5r_umr_create_xlt(struct mlx5_ib_dev * dev,struct ib_sge * sg,size_t nents,size_t ent_size,unsigned int flags)565 static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
566 size_t nents, size_t ent_size,
567 unsigned int flags)
568 {
569 struct device *ddev = &dev->mdev->pdev->dev;
570 dma_addr_t dma;
571 void *xlt;
572
573 xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
574 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
575 GFP_KERNEL);
576 sg->length = nents * ent_size;
577 dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
578 if (dma_mapping_error(ddev, dma)) {
579 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
580 mlx5r_umr_free_xlt(xlt, sg->length);
581 return NULL;
582 }
583 sg->addr = dma;
584 sg->lkey = dev->umrc.pd->local_dma_lkey;
585
586 return xlt;
587 }
588
589 static void
mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg * ctrl_seg,unsigned int flags,struct ib_sge * sg)590 mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
591 unsigned int flags, struct ib_sge *sg)
592 {
593 if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
594 /* fail if free */
595 ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
596 else
597 /* fail if not free */
598 ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
599 ctrl_seg->xlt_octowords =
600 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
601 }
602
mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev * dev,struct mlx5_mkey_seg * mkey_seg,struct mlx5_ib_mr * mr,unsigned int page_shift)603 static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
604 struct mlx5_mkey_seg *mkey_seg,
605 struct mlx5_ib_mr *mr,
606 unsigned int page_shift)
607 {
608 mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
609 MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
610 MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
611 MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
612 MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
613 MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
614 MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
615 }
616
617 static void
mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg * data_seg,struct ib_sge * sg)618 mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
619 struct ib_sge *sg)
620 {
621 data_seg->byte_count = cpu_to_be32(sg->length);
622 data_seg->lkey = cpu_to_be32(sg->lkey);
623 data_seg->addr = cpu_to_be64(sg->addr);
624 }
625
mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg * ctrl_seg,u64 offset)626 static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
627 u64 offset)
628 {
629 u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
630
631 ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
632 ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
633 ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
634 }
635
mlx5r_umr_final_update_xlt(struct mlx5_ib_dev * dev,struct mlx5r_umr_wqe * wqe,struct mlx5_ib_mr * mr,struct ib_sge * sg,unsigned int flags)636 static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
637 struct mlx5r_umr_wqe *wqe,
638 struct mlx5_ib_mr *mr, struct ib_sge *sg,
639 unsigned int flags)
640 {
641 bool update_pd_access, update_translation;
642
643 if (flags & MLX5_IB_UPD_XLT_ENABLE)
644 wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
645
646 update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
647 flags & MLX5_IB_UPD_XLT_PD ||
648 flags & MLX5_IB_UPD_XLT_ACCESS;
649
650 if (update_pd_access) {
651 wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
652 wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
653 }
654
655 update_translation =
656 flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
657
658 if (update_translation) {
659 wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(dev);
660 if (!mr->ibmr.length)
661 MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
662 if (flags & MLX5_IB_UPD_XLT_KEEP_PGSZ)
663 wqe->ctrl_seg.mkey_mask &=
664 cpu_to_be64(~MLX5_MKEY_MASK_PAGE_SIZE);
665 }
666
667 wqe->ctrl_seg.xlt_octowords =
668 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
669 wqe->data_seg.byte_count = cpu_to_be32(sg->length);
670 }
671
672 static void
_mlx5r_umr_init_wqe(struct mlx5_ib_mr * mr,struct mlx5r_umr_wqe * wqe,struct ib_sge * sg,unsigned int flags,unsigned int page_shift,bool dd)673 _mlx5r_umr_init_wqe(struct mlx5_ib_mr *mr, struct mlx5r_umr_wqe *wqe,
674 struct ib_sge *sg, unsigned int flags,
675 unsigned int page_shift, bool dd)
676 {
677 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
678
679 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe->ctrl_seg, flags, sg);
680 mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe->mkey_seg, mr, page_shift);
681 if (dd) /* Use the data direct internal kernel PD */
682 MLX5_SET(mkc, &wqe->mkey_seg, pd, dev->ddr.pdn);
683 mlx5r_umr_set_update_xlt_data_seg(&wqe->data_seg, sg);
684 }
685
686 static int
_mlx5r_umr_update_mr_pas(struct mlx5_ib_mr * mr,unsigned int flags,bool dd,size_t start_block,size_t nblocks)687 _mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd,
688 size_t start_block, size_t nblocks)
689 {
690 size_t ent_size = dd ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt);
691 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
692 struct device *ddev = &dev->mdev->pdev->dev;
693 struct mlx5r_umr_wqe wqe = {};
694 size_t processed_blocks = 0;
695 struct ib_block_iter biter;
696 size_t cur_block_idx = 0;
697 struct mlx5_ksm *cur_ksm;
698 struct mlx5_mtt *cur_mtt;
699 size_t orig_sg_length;
700 size_t total_blocks;
701 size_t final_size;
702 void *curr_entry;
703 struct ib_sge sg;
704 void *entry;
705 u64 offset;
706 int err = 0;
707
708 total_blocks = ib_umem_num_dma_blocks(mr->umem, 1UL << mr->page_shift);
709 if (start_block > total_blocks)
710 return -EINVAL;
711
712 /* nblocks 0 means update all blocks starting from start_block */
713 if (nblocks)
714 total_blocks = nblocks;
715
716 entry = mlx5r_umr_create_xlt(dev, &sg, total_blocks, ent_size, flags);
717 if (!entry)
718 return -ENOMEM;
719
720 orig_sg_length = sg.length;
721
722 _mlx5r_umr_init_wqe(mr, &wqe, &sg, flags, mr->page_shift, dd);
723
724 /* Set initial translation offset to start_block */
725 offset = (u64)start_block * ent_size;
726 mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
727
728 if (dd)
729 cur_ksm = entry;
730 else
731 cur_mtt = entry;
732
733 curr_entry = entry;
734
735 rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) {
736 if (cur_block_idx < start_block) {
737 cur_block_idx++;
738 continue;
739 }
740
741 if (nblocks && processed_blocks >= nblocks)
742 break;
743
744 if (curr_entry == entry + sg.length) {
745 dma_sync_single_for_device(ddev, sg.addr, sg.length,
746 DMA_TO_DEVICE);
747
748 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
749 true);
750 if (err)
751 goto err;
752 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
753 DMA_TO_DEVICE);
754 offset += sg.length;
755 mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
756 if (dd)
757 cur_ksm = entry;
758 else
759 cur_mtt = entry;
760 }
761
762 if (dd) {
763 cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter));
764 cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
765 if (mr->umem->is_dmabuf &&
766 (flags & MLX5_IB_UPD_XLT_ZAP)) {
767 cur_ksm->va = 0;
768 cur_ksm->key = 0;
769 }
770 cur_ksm++;
771 curr_entry = cur_ksm;
772 } else {
773 cur_mtt->ptag =
774 cpu_to_be64(rdma_block_iter_dma_address(&biter) |
775 MLX5_IB_MTT_PRESENT);
776 if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
777 cur_mtt->ptag = 0;
778 cur_mtt++;
779 curr_entry = cur_mtt;
780 }
781
782 processed_blocks++;
783 }
784
785 final_size = curr_entry - entry;
786 sg.length = ALIGN(final_size, MLX5_UMR_FLEX_ALIGNMENT);
787 memset(curr_entry, 0, sg.length - final_size);
788 mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
789
790 dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
791 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
792
793 err:
794 sg.length = orig_sg_length;
795 mlx5r_umr_unmap_free_xlt(dev, entry, &sg);
796 return err;
797 }
798
mlx5r_umr_update_data_direct_ksm_pas_range(struct mlx5_ib_mr * mr,unsigned int flags,size_t start_block,size_t nblocks)799 int mlx5r_umr_update_data_direct_ksm_pas_range(struct mlx5_ib_mr *mr,
800 unsigned int flags,
801 size_t start_block,
802 size_t nblocks)
803 {
804 /* No invalidation flow is expected */
805 if (WARN_ON(!mr->umem->is_dmabuf) || ((flags & MLX5_IB_UPD_XLT_ZAP) &&
806 !(flags & MLX5_IB_UPD_XLT_KEEP_PGSZ)))
807 return -EINVAL;
808
809 return _mlx5r_umr_update_mr_pas(mr, flags, true, start_block, nblocks);
810 }
811
mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr * mr,unsigned int flags)812 int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr,
813 unsigned int flags)
814 {
815 return mlx5r_umr_update_data_direct_ksm_pas_range(mr, flags, 0, 0);
816 }
817
mlx5r_umr_update_mr_pas_range(struct mlx5_ib_mr * mr,unsigned int flags,size_t start_block,size_t nblocks)818 int mlx5r_umr_update_mr_pas_range(struct mlx5_ib_mr *mr, unsigned int flags,
819 size_t start_block, size_t nblocks)
820 {
821 if (WARN_ON(mr->umem->is_odp))
822 return -EINVAL;
823
824 return _mlx5r_umr_update_mr_pas(mr, flags, false, start_block, nblocks);
825 }
826
827 /*
828 * Send the DMA list to the HW for a normal MR using UMR.
829 * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
830 * flag may be used.
831 */
mlx5r_umr_update_mr_pas(struct mlx5_ib_mr * mr,unsigned int flags)832 int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
833 {
834 return mlx5r_umr_update_mr_pas_range(mr, flags, 0, 0);
835 }
836
umr_can_use_indirect_mkey(struct mlx5_ib_dev * dev)837 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
838 {
839 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
840 }
841
mlx5r_umr_update_xlt(struct mlx5_ib_mr * mr,u64 idx,int npages,int page_shift,int flags)842 int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
843 int page_shift, int flags)
844 {
845 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
846 ? sizeof(struct mlx5_klm)
847 : sizeof(struct mlx5_mtt);
848 const int page_align = MLX5_UMR_FLEX_ALIGNMENT / desc_size;
849 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
850 struct device *ddev = &dev->mdev->pdev->dev;
851 const int page_mask = page_align - 1;
852 struct mlx5r_umr_wqe wqe = {};
853 size_t pages_mapped = 0;
854 size_t pages_to_map = 0;
855 size_t size_to_map = 0;
856 size_t orig_sg_length;
857 size_t pages_iter;
858 struct ib_sge sg;
859 int err = 0;
860 void *xlt;
861
862 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
863 !umr_can_use_indirect_mkey(dev))
864 return -EPERM;
865
866 if (WARN_ON(!mr->umem->is_odp))
867 return -EINVAL;
868
869 /* UMR copies MTTs in units of MLX5_UMR_FLEX_ALIGNMENT bytes,
870 * so we need to align the offset and length accordingly
871 */
872 if (idx & page_mask) {
873 npages += idx & page_mask;
874 idx &= ~page_mask;
875 }
876 pages_to_map = ALIGN(npages, page_align);
877
878 xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
879 if (!xlt)
880 return -ENOMEM;
881
882 pages_iter = sg.length / desc_size;
883 orig_sg_length = sg.length;
884
885 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
886 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
887 size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
888
889 pages_to_map = min_t(size_t, pages_to_map, max_pages);
890 }
891
892 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
893 mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
894 mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
895
896 for (pages_mapped = 0;
897 pages_mapped < pages_to_map && !err;
898 pages_mapped += pages_iter, idx += pages_iter) {
899 npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
900 size_to_map = npages * desc_size;
901 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
902 DMA_TO_DEVICE);
903 /*
904 * npages is the maximum number of pages to map, but we
905 * can't guarantee that all pages are actually mapped.
906 *
907 * For example, if page is p2p of type which is not supported
908 * for mapping, the number of pages mapped will be less than
909 * requested.
910 */
911 err = mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
912 if (err)
913 return err;
914 dma_sync_single_for_device(ddev, sg.addr, sg.length,
915 DMA_TO_DEVICE);
916 sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT);
917
918 if (pages_mapped + pages_iter >= pages_to_map)
919 mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
920 mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
921 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
922 }
923 sg.length = orig_sg_length;
924 mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
925 return err;
926 }
927
928 /*
929 * Update only the page-size (log_page_size) field of an existing memory key
930 * using UMR. This is useful when the MR's physical layout stays the same
931 * but the optimal page shift has changed (e.g. dmabuf after pages are
932 * pinned and the HW can switch from 4K to huge-page alignment).
933 */
mlx5r_umr_update_mr_page_shift(struct mlx5_ib_mr * mr,unsigned int page_shift,bool dd)934 int mlx5r_umr_update_mr_page_shift(struct mlx5_ib_mr *mr,
935 unsigned int page_shift,
936 bool dd)
937 {
938 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
939 struct mlx5r_umr_wqe wqe = {};
940 int err;
941
942 /* Build UMR wqe: we touch only PAGE_SIZE, so use the dedicated mask */
943 wqe.ctrl_seg.mkey_mask = get_umr_update_translation_mask(dev);
944
945 /* MR must be free while page size is modified */
946 wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE | MLX5_UMR_INLINE;
947
948 /* Fill mkey segment with the new page size, keep the rest unchanged */
949 MLX5_SET(mkc, &wqe.mkey_seg, log_page_size, page_shift);
950
951 if (dd)
952 MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn);
953 else
954 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
955
956 MLX5_SET64(mkc, &wqe.mkey_seg, start_addr, mr->ibmr.iova);
957 MLX5_SET64(mkc, &wqe.mkey_seg, len, mr->ibmr.length);
958 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
959 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
960 mlx5_mkey_variant(mr->mmkey.key));
961
962 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
963 if (!err)
964 mr->page_shift = page_shift;
965
966 return err;
967 }
968
969 static inline int
_mlx5r_dmabuf_umr_update_pas(struct mlx5_ib_mr * mr,unsigned int flags,size_t start_block,size_t nblocks,bool dd)970 _mlx5r_dmabuf_umr_update_pas(struct mlx5_ib_mr *mr, unsigned int flags,
971 size_t start_block, size_t nblocks, bool dd)
972 {
973 if (dd)
974 return mlx5r_umr_update_data_direct_ksm_pas_range(mr, flags,
975 start_block,
976 nblocks);
977 else
978 return mlx5r_umr_update_mr_pas_range(mr, flags, start_block,
979 nblocks);
980 }
981
982 /**
983 * This function makes an mkey non-present by zapping the translation entries of
984 * the mkey by zapping (zeroing out) the first N entries, where N is determined
985 * by the largest page size supported by the device and the MR length.
986 * It then updates the mkey's page size to the largest possible value, ensuring
987 * the MR is completely non-present and safe for further updates.
988 * It is useful to update the page size of a dmabuf MR on a page fault.
989 *
990 * Return: On success, returns the number of entries that were zapped.
991 * On error, returns a negative error code.
992 */
_mlx5r_umr_zap_mkey(struct mlx5_ib_mr * mr,unsigned int flags,unsigned int page_shift,size_t * nblocks,bool dd)993 static int _mlx5r_umr_zap_mkey(struct mlx5_ib_mr *mr,
994 unsigned int flags,
995 unsigned int page_shift,
996 size_t *nblocks,
997 bool dd)
998 {
999 unsigned int old_page_shift = mr->page_shift;
1000 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
1001 unsigned int max_page_shift;
1002 size_t page_shift_nblocks;
1003 unsigned int max_log_size;
1004 int access_mode;
1005 int err;
1006
1007 access_mode = dd ? MLX5_MKC_ACCESS_MODE_KSM : MLX5_MKC_ACCESS_MODE_MTT;
1008 flags |= MLX5_IB_UPD_XLT_KEEP_PGSZ | MLX5_IB_UPD_XLT_ZAP |
1009 MLX5_IB_UPD_XLT_ATOMIC;
1010 max_log_size = get_max_log_entity_size_cap(dev, access_mode);
1011 max_page_shift = order_base_2(mr->ibmr.length);
1012 max_page_shift = min(max(max_page_shift, page_shift), max_log_size);
1013 /* Count blocks in units of max_page_shift, we will zap exactly this
1014 * many to make the whole MR non-present.
1015 * Block size must be aligned to MLX5_UMR_FLEX_ALIGNMENT since it may
1016 * be used as offset into the XLT later on.
1017 */
1018 *nblocks = ib_umem_num_dma_blocks(mr->umem, 1UL << max_page_shift);
1019 if (dd)
1020 *nblocks = ALIGN(*nblocks, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
1021 else
1022 *nblocks = ALIGN(*nblocks, MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT);
1023 page_shift_nblocks = ib_umem_num_dma_blocks(mr->umem,
1024 1UL << page_shift);
1025 /* If the number of blocks at max possible page shift is greater than
1026 * the number of blocks at the new page size, we should just go over the
1027 * whole mkey entries.
1028 */
1029 if (*nblocks >= page_shift_nblocks)
1030 *nblocks = 0;
1031
1032 /* Make the first nblocks entries non-present without changing
1033 * page size yet.
1034 */
1035 if (*nblocks)
1036 mr->page_shift = max_page_shift;
1037 err = _mlx5r_dmabuf_umr_update_pas(mr, flags, 0, *nblocks, dd);
1038 if (err) {
1039 mr->page_shift = old_page_shift;
1040 return err;
1041 }
1042
1043 /* Change page size to the max page size now that the MR is completely
1044 * non-present.
1045 */
1046 if (*nblocks) {
1047 err = mlx5r_umr_update_mr_page_shift(mr, max_page_shift, dd);
1048 if (err) {
1049 mr->page_shift = old_page_shift;
1050 return err;
1051 }
1052 }
1053
1054 return 0;
1055 }
1056
1057 /**
1058 * mlx5r_umr_dmabuf_update_pgsz - Safely update DMABUF MR page size and its
1059 * entries accordingly
1060 * @mr: The memory region to update
1061 * @xlt_flags: Translation table update flags
1062 * @page_shift: The new (optimized) page shift to use
1063 *
1064 * This function updates the page size and mkey translation entries for a DMABUF
1065 * MR in a safe, multi-step process to avoid exposing partially updated mappings
1066 * The update is performed in 5 steps:
1067 * 1. Make the first X entries non-present, while X is calculated to be
1068 * minimal according to a large page shift that can be used to cover the
1069 * MR length.
1070 * 2. Update the page size to the large supported page size
1071 * 3. Load the remaining N-X entries according to the (optimized) page_shift
1072 * 4. Update the page size according to the (optimized) page_shift
1073 * 5. Load the first X entries with the correct translations
1074 *
1075 * This ensures that at no point is the MR accessible with a partially updated
1076 * translation table, maintaining correctness and preventing access to stale or
1077 * inconsistent mappings.
1078 *
1079 * Returns 0 on success or a negative error code on failure.
1080 */
mlx5r_umr_dmabuf_update_pgsz(struct mlx5_ib_mr * mr,u32 xlt_flags,unsigned int page_shift)1081 int mlx5r_umr_dmabuf_update_pgsz(struct mlx5_ib_mr *mr, u32 xlt_flags,
1082 unsigned int page_shift)
1083 {
1084 unsigned int old_page_shift = mr->page_shift;
1085 size_t zapped_blocks;
1086 size_t total_blocks;
1087 int err;
1088
1089 err = _mlx5r_umr_zap_mkey(mr, xlt_flags, page_shift, &zapped_blocks,
1090 mr->data_direct);
1091 if (err)
1092 return err;
1093
1094 /* _mlx5r_umr_zap_mkey already enables the mkey */
1095 xlt_flags &= ~MLX5_IB_UPD_XLT_ENABLE;
1096 mr->page_shift = page_shift;
1097 total_blocks = ib_umem_num_dma_blocks(mr->umem, 1UL << mr->page_shift);
1098 if (zapped_blocks && zapped_blocks < total_blocks) {
1099 /* Update PAS according to the new page size but don't update
1100 * the page size in the mkey yet.
1101 */
1102 err = _mlx5r_dmabuf_umr_update_pas(
1103 mr,
1104 xlt_flags | MLX5_IB_UPD_XLT_KEEP_PGSZ,
1105 zapped_blocks,
1106 total_blocks - zapped_blocks,
1107 mr->data_direct);
1108 if (err)
1109 goto err;
1110 }
1111
1112 err = mlx5r_umr_update_mr_page_shift(mr, mr->page_shift,
1113 mr->data_direct);
1114 if (err)
1115 goto err;
1116 err = _mlx5r_dmabuf_umr_update_pas(mr, xlt_flags, 0, zapped_blocks,
1117 mr->data_direct);
1118 if (err)
1119 goto err;
1120
1121 return 0;
1122 err:
1123 mr->page_shift = old_page_shift;
1124 return err;
1125 }
1126