1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved 4 */ 5 6 #include "cmd.h" 7 8 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; 9 10 static int mlx5vf_is_migratable(struct mlx5_core_dev *mdev, u16 func_id) 11 { 12 int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); 13 void *query_cap = NULL, *cap; 14 int ret; 15 16 query_cap = kzalloc(query_sz, GFP_KERNEL); 17 if (!query_cap) 18 return -ENOMEM; 19 20 ret = mlx5_vport_get_other_func_cap(mdev, func_id, query_cap, 21 MLX5_CAP_GENERAL_2); 22 if (ret) 23 goto out; 24 25 cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); 26 if (!MLX5_GET(cmd_hca_cap_2, cap, migratable)) 27 ret = -EOPNOTSUPP; 28 out: 29 kfree(query_cap); 30 return ret; 31 } 32 33 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, 34 u16 *vhca_id); 35 static void 36 _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev); 37 38 int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) 39 { 40 struct mlx5_vf_migration_file *migf = mvdev->saving_migf; 41 u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {}; 42 u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {}; 43 int err; 44 45 lockdep_assert_held(&mvdev->state_mutex); 46 if (mvdev->mdev_detach) 47 return -ENOTCONN; 48 49 /* 50 * In case PRE_COPY is used, saving_migf is exposed while the device is 51 * running. Make sure to run only once there is no active save command. 52 * Running both in parallel, might end-up with a failure in the save 53 * command once it will try to turn on 'tracking' on a suspended device. 54 */ 55 if (migf) { 56 err = wait_for_completion_interruptible(&migf->save_comp); 57 if (err) 58 return err; 59 } 60 61 MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA); 62 MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id); 63 MLX5_SET(suspend_vhca_in, in, op_mod, op_mod); 64 65 err = mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); 66 if (migf) 67 complete(&migf->save_comp); 68 69 return err; 70 } 71 72 int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) 73 { 74 u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {}; 75 u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {}; 76 77 lockdep_assert_held(&mvdev->state_mutex); 78 if (mvdev->mdev_detach) 79 return -ENOTCONN; 80 81 MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA); 82 MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id); 83 MLX5_SET(resume_vhca_in, in, op_mod, op_mod); 84 85 return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out); 86 } 87 88 int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, 89 size_t *state_size, u64 *total_size, 90 u8 query_flags) 91 { 92 u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; 93 u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; 94 bool inc = query_flags & MLX5VF_QUERY_INC; 95 int ret; 96 97 lockdep_assert_held(&mvdev->state_mutex); 98 if (mvdev->mdev_detach) 99 return -ENOTCONN; 100 101 /* 102 * In case PRE_COPY is used, saving_migf is exposed while device is 103 * running. Make sure to run only once there is no active save command. 104 * Running both in parallel, might end-up with a failure in the 105 * incremental query command on un-tracked vhca. 106 */ 107 if (inc) { 108 ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp); 109 if (ret) 110 return ret; 111 /* Upon cleanup, ignore previous pre_copy error state */ 112 if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR && 113 !(query_flags & MLX5VF_QUERY_CLEANUP)) { 114 /* 115 * In case we had a PRE_COPY error, only query full 116 * image for final image 117 */ 118 if (!(query_flags & MLX5VF_QUERY_FINAL)) { 119 *state_size = 0; 120 complete(&mvdev->saving_migf->save_comp); 121 return 0; 122 } 123 query_flags &= ~MLX5VF_QUERY_INC; 124 } 125 /* Block incremental query which is state-dependent */ 126 if (mvdev->saving_migf->state == MLX5_MIGF_STATE_ERROR) { 127 complete(&mvdev->saving_migf->save_comp); 128 return -ENODEV; 129 } 130 } 131 132 MLX5_SET(query_vhca_migration_state_in, in, opcode, 133 MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); 134 MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id); 135 MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); 136 MLX5_SET(query_vhca_migration_state_in, in, incremental, 137 query_flags & MLX5VF_QUERY_INC); 138 MLX5_SET(query_vhca_migration_state_in, in, chunk, mvdev->chunk_mode); 139 140 ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, 141 out); 142 if (inc) 143 complete(&mvdev->saving_migf->save_comp); 144 145 if (ret) 146 return ret; 147 148 *state_size = MLX5_GET(query_vhca_migration_state_out, out, 149 required_umem_size); 150 if (total_size) 151 *total_size = mvdev->chunk_mode ? 152 MLX5_GET64(query_vhca_migration_state_out, out, 153 remaining_total_size) : *state_size; 154 155 return 0; 156 } 157 158 static void set_tracker_change_event(struct mlx5vf_pci_core_device *mvdev) 159 { 160 mvdev->tracker.object_changed = true; 161 complete(&mvdev->tracker_comp); 162 } 163 164 static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev) 165 { 166 /* Mark the tracker under an error and wake it up if it's running */ 167 mvdev->tracker.is_err = true; 168 complete(&mvdev->tracker_comp); 169 } 170 171 static int mlx5fv_vf_event(struct notifier_block *nb, 172 unsigned long event, void *data) 173 { 174 struct mlx5vf_pci_core_device *mvdev = 175 container_of(nb, struct mlx5vf_pci_core_device, nb); 176 177 switch (event) { 178 case MLX5_PF_NOTIFY_ENABLE_VF: 179 mutex_lock(&mvdev->state_mutex); 180 mvdev->mdev_detach = false; 181 mlx5vf_state_mutex_unlock(mvdev); 182 break; 183 case MLX5_PF_NOTIFY_DISABLE_VF: 184 mlx5vf_cmd_close_migratable(mvdev); 185 mutex_lock(&mvdev->state_mutex); 186 mvdev->mdev_detach = true; 187 mlx5vf_state_mutex_unlock(mvdev); 188 break; 189 default: 190 break; 191 } 192 193 return 0; 194 } 195 196 void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) 197 { 198 if (!mvdev->migrate_cap) 199 return; 200 201 /* Must be done outside the lock to let it progress */ 202 set_tracker_error(mvdev); 203 mutex_lock(&mvdev->state_mutex); 204 mlx5vf_disable_fds(mvdev, NULL); 205 _mlx5vf_free_page_tracker_resources(mvdev); 206 mlx5vf_state_mutex_unlock(mvdev); 207 } 208 209 void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) 210 { 211 if (!mvdev->migrate_cap) 212 return; 213 214 mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id, 215 &mvdev->nb); 216 destroy_workqueue(mvdev->cb_wq); 217 } 218 219 void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, 220 const struct vfio_migration_ops *mig_ops, 221 const struct vfio_log_ops *log_ops) 222 { 223 struct pci_dev *pdev = mvdev->core_device.pdev; 224 int ret; 225 226 if (!pdev->is_virtfn) 227 return; 228 229 mvdev->mdev = mlx5_vf_get_core_dev(pdev); 230 if (!mvdev->mdev) 231 return; 232 233 if (!MLX5_CAP_GEN(mvdev->mdev, migration)) 234 goto end; 235 236 if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && 237 MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))) 238 goto end; 239 240 mvdev->vf_id = pci_iov_vf_id(pdev); 241 if (mvdev->vf_id < 0) 242 goto end; 243 244 ret = mlx5vf_is_migratable(mvdev->mdev, mvdev->vf_id + 1); 245 if (ret) 246 goto end; 247 248 if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1, 249 &mvdev->vhca_id)) 250 goto end; 251 252 mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0); 253 if (!mvdev->cb_wq) 254 goto end; 255 256 mutex_init(&mvdev->state_mutex); 257 spin_lock_init(&mvdev->reset_lock); 258 mvdev->nb.notifier_call = mlx5fv_vf_event; 259 ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id, 260 &mvdev->nb); 261 if (ret) { 262 destroy_workqueue(mvdev->cb_wq); 263 goto end; 264 } 265 266 mvdev->migrate_cap = 1; 267 mvdev->core_device.vdev.migration_flags = 268 VFIO_MIGRATION_STOP_COPY | 269 VFIO_MIGRATION_P2P | 270 VFIO_MIGRATION_PRE_COPY; 271 272 mvdev->core_device.vdev.mig_ops = mig_ops; 273 init_completion(&mvdev->tracker_comp); 274 if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) 275 mvdev->core_device.vdev.log_ops = log_ops; 276 277 if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) 278 mvdev->chunk_mode = 1; 279 280 end: 281 mlx5_vf_put_core_dev(mvdev->mdev); 282 } 283 284 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, 285 u16 *vhca_id) 286 { 287 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; 288 int out_size; 289 void *out; 290 int ret; 291 292 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); 293 out = kzalloc(out_size, GFP_KERNEL); 294 if (!out) 295 return -ENOMEM; 296 297 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 298 MLX5_SET(query_hca_cap_in, in, other_function, 1); 299 MLX5_SET(query_hca_cap_in, in, function_id, function_id); 300 MLX5_SET(query_hca_cap_in, in, op_mod, 301 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | 302 HCA_CAP_OPMOD_GET_CUR); 303 304 ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); 305 if (ret) 306 goto err_exec; 307 308 *vhca_id = MLX5_GET(query_hca_cap_out, out, 309 capability.cmd_hca_cap.vhca_id); 310 311 err_exec: 312 kfree(out); 313 return ret; 314 } 315 316 static u32 *alloc_mkey_in(u32 npages, u32 pdn) 317 { 318 int inlen; 319 void *mkc; 320 u32 *in; 321 322 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 323 sizeof(__be64) * round_up(npages, 2); 324 325 in = kvzalloc(inlen, GFP_KERNEL_ACCOUNT); 326 if (!in) 327 return NULL; 328 329 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 330 DIV_ROUND_UP(npages, 2)); 331 332 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 333 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); 334 MLX5_SET(mkc, mkc, lr, 1); 335 MLX5_SET(mkc, mkc, lw, 1); 336 MLX5_SET(mkc, mkc, rr, 1); 337 MLX5_SET(mkc, mkc, rw, 1); 338 MLX5_SET(mkc, mkc, pd, pdn); 339 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 340 MLX5_SET(mkc, mkc, qpn, 0xffffff); 341 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 342 MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); 343 MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE); 344 345 return in; 346 } 347 348 static int create_mkey(struct mlx5_core_dev *mdev, u32 npages, u32 *mkey_in, 349 u32 *mkey) 350 { 351 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 352 sizeof(__be64) * round_up(npages, 2); 353 354 return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen); 355 } 356 357 static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages, 358 u32 *mkey_in, struct dma_iova_state *state, 359 enum dma_data_direction dir) 360 { 361 dma_addr_t addr; 362 __be64 *mtt; 363 int i; 364 365 if (dma_use_iova(state)) { 366 dma_iova_destroy(mdev->device, state, npages * PAGE_SIZE, dir, 367 0); 368 } else { 369 mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, 370 klm_pas_mtt); 371 for (i = npages - 1; i >= 0; i--) { 372 addr = be64_to_cpu(mtt[i]); 373 dma_unmap_page(mdev->device, addr, PAGE_SIZE, dir); 374 } 375 } 376 } 377 378 static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages, 379 struct page **page_list, u32 *mkey_in, 380 struct dma_iova_state *state, 381 enum dma_data_direction dir) 382 { 383 dma_addr_t addr; 384 size_t mapped = 0; 385 __be64 *mtt; 386 int i, err; 387 388 mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt); 389 390 if (dma_iova_try_alloc(mdev->device, state, 0, npages * PAGE_SIZE)) { 391 addr = state->addr; 392 for (i = 0; i < npages; i++) { 393 err = dma_iova_link(mdev->device, state, 394 page_to_phys(page_list[i]), mapped, 395 PAGE_SIZE, dir, 0); 396 if (err) 397 goto error; 398 *mtt++ = cpu_to_be64(addr); 399 addr += PAGE_SIZE; 400 mapped += PAGE_SIZE; 401 } 402 err = dma_iova_sync(mdev->device, state, 0, mapped); 403 if (err) 404 goto error; 405 } else { 406 for (i = 0; i < npages; i++) { 407 addr = dma_map_page(mdev->device, page_list[i], 0, 408 PAGE_SIZE, dir); 409 err = dma_mapping_error(mdev->device, addr); 410 if (err) 411 goto error; 412 *mtt++ = cpu_to_be64(addr); 413 } 414 } 415 return 0; 416 417 error: 418 unregister_dma_pages(mdev, i, mkey_in, state, dir); 419 return err; 420 } 421 422 static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf) 423 { 424 struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev; 425 struct mlx5_core_dev *mdev = mvdev->mdev; 426 int ret; 427 428 lockdep_assert_held(&mvdev->state_mutex); 429 if (mvdev->mdev_detach) 430 return -ENOTCONN; 431 432 if (buf->mkey_in || !buf->npages) 433 return -EINVAL; 434 435 buf->mkey_in = alloc_mkey_in(buf->npages, buf->migf->pdn); 436 if (!buf->mkey_in) 437 return -ENOMEM; 438 439 ret = register_dma_pages(mdev, buf->npages, buf->page_list, 440 buf->mkey_in, &buf->state, buf->dma_dir); 441 if (ret) 442 goto err_register_dma; 443 444 ret = create_mkey(mdev, buf->npages, buf->mkey_in, &buf->mkey); 445 if (ret) 446 goto err_create_mkey; 447 448 return 0; 449 450 err_create_mkey: 451 unregister_dma_pages(mdev, buf->npages, buf->mkey_in, &buf->state, 452 buf->dma_dir); 453 err_register_dma: 454 kvfree(buf->mkey_in); 455 buf->mkey_in = NULL; 456 return ret; 457 } 458 459 static void free_page_list(u32 npages, struct page **page_list) 460 { 461 int i; 462 463 /* Undo alloc_pages_bulk() */ 464 for (i = npages - 1; i >= 0; i--) 465 __free_page(page_list[i]); 466 467 kvfree(page_list); 468 } 469 470 void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) 471 { 472 struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev; 473 struct mlx5_core_dev *mdev = mvdev->mdev; 474 475 lockdep_assert_held(&mvdev->state_mutex); 476 WARN_ON(mvdev->mdev_detach); 477 478 if (buf->mkey_in) { 479 mlx5_core_destroy_mkey(mdev, buf->mkey); 480 unregister_dma_pages(mdev, buf->npages, buf->mkey_in, 481 &buf->state, buf->dma_dir); 482 kvfree(buf->mkey_in); 483 } 484 485 free_page_list(buf->npages, buf->page_list); 486 kfree(buf); 487 } 488 489 static int mlx5vf_add_pages(struct page ***page_list, unsigned int npages) 490 { 491 unsigned int filled, done = 0; 492 int i; 493 494 *page_list = 495 kvcalloc(npages, sizeof(struct page *), GFP_KERNEL_ACCOUNT); 496 if (!*page_list) 497 return -ENOMEM; 498 499 for (;;) { 500 filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, npages - done, 501 *page_list + done); 502 if (!filled) 503 goto err; 504 505 done += filled; 506 if (done == npages) 507 break; 508 } 509 510 return 0; 511 512 err: 513 for (i = 0; i < done; i++) 514 __free_page(*page_list[i]); 515 516 kvfree(*page_list); 517 *page_list = NULL; 518 return -ENOMEM; 519 } 520 521 struct mlx5_vhca_data_buffer * 522 mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages, 523 enum dma_data_direction dma_dir) 524 { 525 struct mlx5_vhca_data_buffer *buf; 526 int ret; 527 528 buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); 529 if (!buf) 530 return ERR_PTR(-ENOMEM); 531 532 buf->dma_dir = dma_dir; 533 buf->migf = migf; 534 if (npages) { 535 ret = mlx5vf_add_pages(&buf->page_list, npages); 536 if (ret) 537 goto end; 538 539 buf->npages = npages; 540 541 if (dma_dir != DMA_NONE) { 542 ret = mlx5vf_dma_data_buffer(buf); 543 if (ret) 544 goto end; 545 } 546 } 547 548 return buf; 549 end: 550 mlx5vf_free_data_buffer(buf); 551 return ERR_PTR(ret); 552 } 553 554 void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf) 555 { 556 spin_lock_irq(&buf->migf->list_lock); 557 buf->stop_copy_chunk_num = 0; 558 list_add_tail(&buf->buf_elm, &buf->migf->avail_list); 559 spin_unlock_irq(&buf->migf->list_lock); 560 } 561 562 struct mlx5_vhca_data_buffer * 563 mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages, 564 enum dma_data_direction dma_dir) 565 { 566 struct mlx5_vhca_data_buffer *buf, *temp_buf; 567 struct list_head free_list; 568 569 lockdep_assert_held(&migf->mvdev->state_mutex); 570 if (migf->mvdev->mdev_detach) 571 return ERR_PTR(-ENOTCONN); 572 573 INIT_LIST_HEAD(&free_list); 574 575 spin_lock_irq(&migf->list_lock); 576 list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) { 577 if (buf->dma_dir == dma_dir) { 578 list_del_init(&buf->buf_elm); 579 if (buf->npages >= npages) { 580 spin_unlock_irq(&migf->list_lock); 581 goto found; 582 } 583 /* 584 * Prevent holding redundant buffers. Put in a free 585 * list and call at the end not under the spin lock 586 * (&migf->list_lock) to mlx5vf_free_data_buffer which 587 * might sleep. 588 */ 589 list_add(&buf->buf_elm, &free_list); 590 } 591 } 592 spin_unlock_irq(&migf->list_lock); 593 buf = mlx5vf_alloc_data_buffer(migf, npages, dma_dir); 594 595 found: 596 while ((temp_buf = list_first_entry_or_null(&free_list, 597 struct mlx5_vhca_data_buffer, buf_elm))) { 598 list_del(&temp_buf->buf_elm); 599 mlx5vf_free_data_buffer(temp_buf); 600 } 601 602 return buf; 603 } 604 605 static void 606 mlx5vf_save_callback_complete(struct mlx5_vf_migration_file *migf, 607 struct mlx5vf_async_data *async_data) 608 { 609 kvfree(async_data->out); 610 complete(&migf->save_comp); 611 fput(migf->filp); 612 } 613 614 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) 615 { 616 struct mlx5vf_async_data *async_data = container_of(_work, 617 struct mlx5vf_async_data, work); 618 struct mlx5_vf_migration_file *migf = container_of(async_data, 619 struct mlx5_vf_migration_file, async_data); 620 621 mutex_lock(&migf->lock); 622 if (async_data->status) { 623 mlx5vf_put_data_buffer(async_data->buf); 624 if (async_data->header_buf) 625 mlx5vf_put_data_buffer(async_data->header_buf); 626 if (!async_data->stop_copy_chunk && 627 async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR) 628 migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR; 629 else 630 migf->state = MLX5_MIGF_STATE_ERROR; 631 wake_up_interruptible(&migf->poll_wait); 632 } 633 mutex_unlock(&migf->lock); 634 mlx5vf_save_callback_complete(migf, async_data); 635 } 636 637 static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, 638 size_t image_size, bool initial_pre_copy) 639 { 640 struct mlx5_vf_migration_file *migf = header_buf->migf; 641 struct mlx5_vf_migration_header header = {}; 642 unsigned long flags; 643 struct page *page; 644 u8 *to_buff; 645 646 header.record_size = cpu_to_le64(image_size); 647 header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY); 648 header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_FW_DATA); 649 page = mlx5vf_get_migration_page(header_buf, 0); 650 if (!page) 651 return -EINVAL; 652 to_buff = kmap_local_page(page); 653 memcpy(to_buff, &header, sizeof(header)); 654 kunmap_local(to_buff); 655 header_buf->length = sizeof(header); 656 header_buf->start_pos = header_buf->migf->max_pos; 657 migf->max_pos += header_buf->length; 658 spin_lock_irqsave(&migf->list_lock, flags); 659 list_add_tail(&header_buf->buf_elm, &migf->buf_list); 660 spin_unlock_irqrestore(&migf->list_lock, flags); 661 if (initial_pre_copy) 662 migf->pre_copy_initial_bytes += sizeof(header); 663 return 0; 664 } 665 666 static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) 667 { 668 struct mlx5vf_async_data *async_data = container_of(context, 669 struct mlx5vf_async_data, cb_work); 670 struct mlx5_vf_migration_file *migf = container_of(async_data, 671 struct mlx5_vf_migration_file, async_data); 672 673 if (!status) { 674 size_t next_required_umem_size = 0; 675 bool stop_copy_last_chunk; 676 size_t image_size; 677 unsigned long flags; 678 bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY && 679 !async_data->stop_copy_chunk; 680 681 image_size = MLX5_GET(save_vhca_state_out, async_data->out, 682 actual_image_size); 683 if (async_data->buf->stop_copy_chunk_num) 684 next_required_umem_size = MLX5_GET(save_vhca_state_out, 685 async_data->out, next_required_umem_size); 686 stop_copy_last_chunk = async_data->stop_copy_chunk && 687 !next_required_umem_size; 688 if (async_data->header_buf) { 689 status = add_buf_header(async_data->header_buf, image_size, 690 initial_pre_copy); 691 if (status) 692 goto err; 693 } 694 async_data->buf->length = image_size; 695 async_data->buf->start_pos = migf->max_pos; 696 migf->max_pos += async_data->buf->length; 697 spin_lock_irqsave(&migf->list_lock, flags); 698 list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); 699 if (async_data->buf->stop_copy_chunk_num) { 700 migf->num_ready_chunks++; 701 if (next_required_umem_size && 702 migf->num_ready_chunks >= MAX_NUM_CHUNKS) { 703 /* Delay the next SAVE till one chunk be consumed */ 704 migf->next_required_umem_size = next_required_umem_size; 705 next_required_umem_size = 0; 706 } 707 } 708 spin_unlock_irqrestore(&migf->list_lock, flags); 709 if (initial_pre_copy) { 710 migf->pre_copy_initial_bytes += image_size; 711 migf->state = MLX5_MIGF_STATE_PRE_COPY; 712 } 713 if (stop_copy_last_chunk) 714 migf->state = MLX5_MIGF_STATE_COMPLETE; 715 wake_up_interruptible(&migf->poll_wait); 716 if (next_required_umem_size) 717 mlx5vf_mig_file_set_save_work(migf, 718 /* Picking up the next chunk num */ 719 (async_data->buf->stop_copy_chunk_num % MAX_NUM_CHUNKS) + 1, 720 next_required_umem_size); 721 mlx5vf_save_callback_complete(migf, async_data); 722 return; 723 } 724 725 err: 726 /* The error flow can't run from an interrupt context */ 727 if (status == -EREMOTEIO) { 728 status = MLX5_GET(save_vhca_state_out, async_data->out, status); 729 /* Failed in FW, print cmd out failure details */ 730 mlx5_cmd_out_err(migf->mvdev->mdev, MLX5_CMD_OP_SAVE_VHCA_STATE, 0, 731 async_data->out); 732 } 733 734 async_data->status = status; 735 queue_work(migf->mvdev->cb_wq, &async_data->work); 736 } 737 738 int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, 739 struct mlx5_vf_migration_file *migf, 740 struct mlx5_vhca_data_buffer *buf, bool inc, 741 bool track) 742 { 743 u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); 744 u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; 745 struct mlx5_vhca_data_buffer *header_buf = NULL; 746 struct mlx5vf_async_data *async_data; 747 bool pre_copy_cleanup = false; 748 int err; 749 750 lockdep_assert_held(&mvdev->state_mutex); 751 if (mvdev->mdev_detach) 752 return -ENOTCONN; 753 754 err = wait_for_completion_interruptible(&migf->save_comp); 755 if (err) 756 return err; 757 758 if ((migf->state == MLX5_MIGF_STATE_PRE_COPY || 759 migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc) 760 pre_copy_cleanup = true; 761 762 if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) 763 /* 764 * In case we had a PRE_COPY error, SAVE is triggered only for 765 * the final image, read device full image. 766 */ 767 inc = false; 768 769 MLX5_SET(save_vhca_state_in, in, opcode, 770 MLX5_CMD_OP_SAVE_VHCA_STATE); 771 MLX5_SET(save_vhca_state_in, in, op_mod, 0); 772 MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); 773 MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey); 774 MLX5_SET(save_vhca_state_in, in, size, buf->npages * PAGE_SIZE); 775 MLX5_SET(save_vhca_state_in, in, incremental, inc); 776 MLX5_SET(save_vhca_state_in, in, set_track, track); 777 778 async_data = &migf->async_data; 779 async_data->buf = buf; 780 async_data->stop_copy_chunk = (!track && !pre_copy_cleanup); 781 async_data->out = kvzalloc(out_size, GFP_KERNEL); 782 if (!async_data->out) { 783 err = -ENOMEM; 784 goto err_out; 785 } 786 787 if (async_data->stop_copy_chunk) { 788 u8 header_idx = buf->stop_copy_chunk_num ? 789 buf->stop_copy_chunk_num - 1 : 0; 790 791 header_buf = migf->buf_header[header_idx]; 792 migf->buf_header[header_idx] = NULL; 793 } 794 795 if (!header_buf) { 796 header_buf = mlx5vf_get_data_buffer( 797 migf, 798 DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header), 799 PAGE_SIZE), 800 DMA_NONE); 801 if (IS_ERR(header_buf)) { 802 err = PTR_ERR(header_buf); 803 goto err_free; 804 } 805 } 806 807 if (async_data->stop_copy_chunk) 808 migf->state = MLX5_MIGF_STATE_SAVE_STOP_COPY_CHUNK; 809 810 async_data->header_buf = header_buf; 811 get_file(migf->filp); 812 err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), 813 async_data->out, 814 out_size, mlx5vf_save_callback, 815 &async_data->cb_work); 816 if (err) 817 goto err_exec; 818 819 return 0; 820 821 err_exec: 822 if (header_buf) 823 mlx5vf_put_data_buffer(header_buf); 824 fput(migf->filp); 825 err_free: 826 kvfree(async_data->out); 827 err_out: 828 complete(&migf->save_comp); 829 return err; 830 } 831 832 int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, 833 struct mlx5_vf_migration_file *migf, 834 struct mlx5_vhca_data_buffer *buf) 835 { 836 u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {}; 837 u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {}; 838 int err; 839 840 lockdep_assert_held(&mvdev->state_mutex); 841 if (mvdev->mdev_detach) 842 return -ENOTCONN; 843 844 if (!buf->mkey_in) { 845 err = mlx5vf_dma_data_buffer(buf); 846 if (err) 847 return err; 848 } 849 850 MLX5_SET(load_vhca_state_in, in, opcode, 851 MLX5_CMD_OP_LOAD_VHCA_STATE); 852 MLX5_SET(load_vhca_state_in, in, op_mod, 0); 853 MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id); 854 MLX5_SET(load_vhca_state_in, in, mkey, buf->mkey); 855 MLX5_SET(load_vhca_state_in, in, size, buf->length); 856 return mlx5_cmd_exec_inout(mvdev->mdev, load_vhca_state, in, out); 857 } 858 859 int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf) 860 { 861 int err; 862 863 lockdep_assert_held(&migf->mvdev->state_mutex); 864 if (migf->mvdev->mdev_detach) 865 return -ENOTCONN; 866 867 err = mlx5_core_alloc_pd(migf->mvdev->mdev, &migf->pdn); 868 return err; 869 } 870 871 void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf) 872 { 873 lockdep_assert_held(&migf->mvdev->state_mutex); 874 if (migf->mvdev->mdev_detach) 875 return; 876 877 mlx5_core_dealloc_pd(migf->mvdev->mdev, migf->pdn); 878 } 879 880 void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) 881 { 882 struct mlx5_vhca_data_buffer *entry; 883 int i; 884 885 lockdep_assert_held(&migf->mvdev->state_mutex); 886 WARN_ON(migf->mvdev->mdev_detach); 887 888 for (i = 0; i < MAX_NUM_CHUNKS; i++) { 889 if (migf->buf[i]) { 890 mlx5vf_free_data_buffer(migf->buf[i]); 891 migf->buf[i] = NULL; 892 } 893 894 if (migf->buf_header[i]) { 895 mlx5vf_free_data_buffer(migf->buf_header[i]); 896 migf->buf_header[i] = NULL; 897 } 898 } 899 900 list_splice(&migf->avail_list, &migf->buf_list); 901 902 while ((entry = list_first_entry_or_null(&migf->buf_list, 903 struct mlx5_vhca_data_buffer, buf_elm))) { 904 list_del(&entry->buf_elm); 905 mlx5vf_free_data_buffer(entry); 906 } 907 908 mlx5vf_cmd_dealloc_pd(migf); 909 } 910 911 static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev, 912 struct mlx5vf_pci_core_device *mvdev, 913 struct rb_root_cached *ranges, u32 nnodes) 914 { 915 int max_num_range = 916 MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_max_num_range); 917 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 918 int record_size = MLX5_ST_SZ_BYTES(page_track_range); 919 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; 920 struct interval_tree_node *node = NULL; 921 u64 total_ranges_len = 0; 922 u32 num_ranges = nnodes; 923 u8 log_addr_space_size; 924 void *range_list_ptr; 925 void *obj_context; 926 void *cmd_hdr; 927 int inlen; 928 void *in; 929 int err; 930 int i; 931 932 if (num_ranges > max_num_range) { 933 vfio_combine_iova_ranges(ranges, nnodes, max_num_range); 934 num_ranges = max_num_range; 935 } 936 937 inlen = MLX5_ST_SZ_BYTES(create_page_track_obj_in) + 938 record_size * num_ranges; 939 in = kzalloc(inlen, GFP_KERNEL); 940 if (!in) 941 return -ENOMEM; 942 943 cmd_hdr = MLX5_ADDR_OF(create_page_track_obj_in, in, 944 general_obj_in_cmd_hdr); 945 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, 946 MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 947 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, 948 MLX5_OBJ_TYPE_PAGE_TRACK); 949 obj_context = MLX5_ADDR_OF(create_page_track_obj_in, in, obj_context); 950 MLX5_SET(page_track, obj_context, vhca_id, mvdev->vhca_id); 951 MLX5_SET(page_track, obj_context, track_type, 1); 952 MLX5_SET(page_track, obj_context, log_page_size, 953 ilog2(tracker->host_qp->tracked_page_size)); 954 MLX5_SET(page_track, obj_context, log_msg_size, 955 ilog2(tracker->host_qp->max_msg_size)); 956 MLX5_SET(page_track, obj_context, reporting_qpn, tracker->fw_qp->qpn); 957 MLX5_SET(page_track, obj_context, num_ranges, num_ranges); 958 959 range_list_ptr = MLX5_ADDR_OF(page_track, obj_context, track_range); 960 node = interval_tree_iter_first(ranges, 0, ULONG_MAX); 961 for (i = 0; i < num_ranges; i++) { 962 void *addr_range_i_base = range_list_ptr + record_size * i; 963 unsigned long length = node->last - node->start + 1; 964 965 MLX5_SET64(page_track_range, addr_range_i_base, start_address, 966 node->start); 967 MLX5_SET64(page_track_range, addr_range_i_base, length, length); 968 total_ranges_len += length; 969 node = interval_tree_iter_next(node, 0, ULONG_MAX); 970 } 971 972 WARN_ON(node); 973 log_addr_space_size = ilog2(roundup_pow_of_two(total_ranges_len)); 974 if (log_addr_space_size < 975 (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) || 976 log_addr_space_size > 977 (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_addr_space))) { 978 err = -EOPNOTSUPP; 979 goto out; 980 } 981 982 MLX5_SET(page_track, obj_context, log_addr_space_size, 983 log_addr_space_size); 984 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 985 if (err) 986 goto out; 987 988 tracker->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 989 out: 990 kfree(in); 991 return err; 992 } 993 994 static int mlx5vf_cmd_destroy_tracker(struct mlx5_core_dev *mdev, 995 u32 tracker_id) 996 { 997 u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; 998 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; 999 1000 MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1001 MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); 1002 MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, tracker_id); 1003 1004 return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); 1005 } 1006 1007 static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev, 1008 u32 tracker_id, unsigned long iova, 1009 unsigned long length, u32 tracker_state) 1010 { 1011 u32 in[MLX5_ST_SZ_DW(modify_page_track_obj_in)] = {}; 1012 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; 1013 void *obj_context; 1014 void *cmd_hdr; 1015 1016 cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); 1017 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1018 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); 1019 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker_id); 1020 1021 obj_context = MLX5_ADDR_OF(modify_page_track_obj_in, in, obj_context); 1022 MLX5_SET64(page_track, obj_context, modify_field_select, 0x3); 1023 MLX5_SET64(page_track, obj_context, range_start_address, iova); 1024 MLX5_SET64(page_track, obj_context, length, length); 1025 MLX5_SET(page_track, obj_context, state, tracker_state); 1026 1027 return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); 1028 } 1029 1030 static int mlx5vf_cmd_query_tracker(struct mlx5_core_dev *mdev, 1031 struct mlx5_vhca_page_tracker *tracker) 1032 { 1033 u32 out[MLX5_ST_SZ_DW(query_page_track_obj_out)] = {}; 1034 u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; 1035 void *obj_context; 1036 void *cmd_hdr; 1037 int err; 1038 1039 cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); 1040 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1041 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); 1042 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker->id); 1043 1044 err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); 1045 if (err) 1046 return err; 1047 1048 obj_context = MLX5_ADDR_OF(query_page_track_obj_out, out, obj_context); 1049 tracker->status = MLX5_GET(page_track, obj_context, state); 1050 return 0; 1051 } 1052 1053 static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev, 1054 struct mlx5_vhca_cq_buf *buf, int nent, 1055 int cqe_size) 1056 { 1057 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 1058 u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0); 1059 u8 log_wq_sz = ilog2(cqe_size); 1060 int err; 1061 1062 err = mlx5_frag_buf_alloc_node(mdev, nent * cqe_size, frag_buf, 1063 mdev->priv.numa_node); 1064 if (err) 1065 return err; 1066 1067 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 1068 buf->cqe_size = cqe_size; 1069 buf->nent = nent; 1070 return 0; 1071 } 1072 1073 static void init_cq_frag_buf(struct mlx5_vhca_cq_buf *buf) 1074 { 1075 struct mlx5_cqe64 *cqe64; 1076 void *cqe; 1077 int i; 1078 1079 for (i = 0; i < buf->nent; i++) { 1080 cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i); 1081 cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; 1082 cqe64->op_own = MLX5_CQE_INVALID << 4; 1083 } 1084 } 1085 1086 static void mlx5vf_destroy_cq(struct mlx5_core_dev *mdev, 1087 struct mlx5_vhca_cq *cq) 1088 { 1089 mlx5_core_destroy_cq(mdev, &cq->mcq); 1090 mlx5_frag_buf_free(mdev, &cq->buf.frag_buf); 1091 mlx5_db_free(mdev, &cq->db); 1092 } 1093 1094 static void mlx5vf_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type) 1095 { 1096 if (type != MLX5_EVENT_TYPE_CQ_ERROR) 1097 return; 1098 1099 set_tracker_error(container_of(mcq, struct mlx5vf_pci_core_device, 1100 tracker.cq.mcq)); 1101 } 1102 1103 static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, 1104 void *data) 1105 { 1106 struct mlx5_vhca_page_tracker *tracker = 1107 mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb); 1108 struct mlx5vf_pci_core_device *mvdev = container_of( 1109 tracker, struct mlx5vf_pci_core_device, tracker); 1110 struct mlx5_eqe_obj_change *object; 1111 struct mlx5_eqe *eqe = data; 1112 u8 event_type = (u8)type; 1113 u8 queue_type; 1114 u32 obj_id; 1115 int qp_num; 1116 1117 switch (event_type) { 1118 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: 1119 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: 1120 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: 1121 queue_type = eqe->data.qp_srq.type; 1122 if (queue_type != MLX5_EVENT_QUEUE_TYPE_QP) 1123 break; 1124 qp_num = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; 1125 if (qp_num != tracker->host_qp->qpn && 1126 qp_num != tracker->fw_qp->qpn) 1127 break; 1128 set_tracker_error(mvdev); 1129 break; 1130 case MLX5_EVENT_TYPE_OBJECT_CHANGE: 1131 object = &eqe->data.obj_change; 1132 obj_id = be32_to_cpu(object->obj_id); 1133 if (obj_id == tracker->id) 1134 set_tracker_change_event(mvdev); 1135 break; 1136 default: 1137 break; 1138 } 1139 1140 return NOTIFY_OK; 1141 } 1142 1143 static void mlx5vf_cq_complete(struct mlx5_core_cq *mcq, 1144 struct mlx5_eqe *eqe) 1145 { 1146 struct mlx5vf_pci_core_device *mvdev = 1147 container_of(mcq, struct mlx5vf_pci_core_device, 1148 tracker.cq.mcq); 1149 1150 complete(&mvdev->tracker_comp); 1151 } 1152 1153 static int mlx5vf_create_cq(struct mlx5_core_dev *mdev, 1154 struct mlx5_vhca_page_tracker *tracker, 1155 size_t ncqe) 1156 { 1157 int cqe_size = cache_line_size() == 128 ? 128 : 64; 1158 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 1159 struct mlx5_vhca_cq *cq; 1160 int inlen, err, eqn; 1161 void *cqc, *in; 1162 __be64 *pas; 1163 int vector; 1164 1165 cq = &tracker->cq; 1166 ncqe = roundup_pow_of_two(ncqe); 1167 err = mlx5_db_alloc_node(mdev, &cq->db, mdev->priv.numa_node); 1168 if (err) 1169 return err; 1170 1171 cq->ncqe = ncqe; 1172 cq->mcq.set_ci_db = cq->db.db; 1173 cq->mcq.arm_db = cq->db.db + 1; 1174 cq->mcq.cqe_sz = cqe_size; 1175 err = alloc_cq_frag_buf(mdev, &cq->buf, ncqe, cqe_size); 1176 if (err) 1177 goto err_db_free; 1178 1179 init_cq_frag_buf(&cq->buf); 1180 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 1181 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * 1182 cq->buf.frag_buf.npages; 1183 in = kvzalloc(inlen, GFP_KERNEL); 1184 if (!in) { 1185 err = -ENOMEM; 1186 goto err_buff; 1187 } 1188 1189 vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); 1190 err = mlx5_comp_eqn_get(mdev, vector, &eqn); 1191 if (err) 1192 goto err_vec; 1193 1194 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 1195 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); 1196 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 1197 MLX5_SET(cqc, cqc, uar_page, tracker->uar->index); 1198 MLX5_SET(cqc, cqc, log_page_size, cq->buf.frag_buf.page_shift - 1199 MLX5_ADAPTER_PAGE_SHIFT); 1200 MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); 1201 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 1202 mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas); 1203 cq->mcq.comp = mlx5vf_cq_complete; 1204 cq->mcq.event = mlx5vf_cq_event; 1205 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); 1206 if (err) 1207 goto err_vec; 1208 1209 mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map, 1210 cq->mcq.cons_index); 1211 kvfree(in); 1212 return 0; 1213 1214 err_vec: 1215 kvfree(in); 1216 err_buff: 1217 mlx5_frag_buf_free(mdev, &cq->buf.frag_buf); 1218 err_db_free: 1219 mlx5_db_free(mdev, &cq->db); 1220 return err; 1221 } 1222 1223 static struct mlx5_vhca_qp * 1224 mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev, 1225 struct mlx5_vhca_page_tracker *tracker, u32 max_recv_wr) 1226 { 1227 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 1228 struct mlx5_vhca_qp *qp; 1229 u8 log_rq_stride; 1230 u8 log_rq_sz; 1231 void *qpc; 1232 int inlen; 1233 void *in; 1234 int err; 1235 1236 qp = kzalloc(sizeof(*qp), GFP_KERNEL_ACCOUNT); 1237 if (!qp) 1238 return ERR_PTR(-ENOMEM); 1239 1240 err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node); 1241 if (err) 1242 goto err_free; 1243 1244 if (max_recv_wr) { 1245 qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr); 1246 log_rq_stride = ilog2(MLX5_SEND_WQE_DS); 1247 log_rq_sz = ilog2(qp->rq.wqe_cnt); 1248 err = mlx5_frag_buf_alloc_node(mdev, 1249 wq_get_byte_sz(log_rq_sz, log_rq_stride), 1250 &qp->buf, mdev->priv.numa_node); 1251 if (err) 1252 goto err_db_free; 1253 mlx5_init_fbc(qp->buf.frags, log_rq_stride, log_rq_sz, &qp->rq.fbc); 1254 } 1255 1256 qp->rq.db = &qp->db.db[MLX5_RCV_DBR]; 1257 inlen = MLX5_ST_SZ_BYTES(create_qp_in) + 1258 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * 1259 qp->buf.npages; 1260 in = kvzalloc(inlen, GFP_KERNEL); 1261 if (!in) { 1262 err = -ENOMEM; 1263 goto err_in; 1264 } 1265 1266 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 1267 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 1268 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 1269 MLX5_SET(qpc, qpc, pd, tracker->pdn); 1270 MLX5_SET(qpc, qpc, uar_page, tracker->uar->index); 1271 MLX5_SET(qpc, qpc, log_page_size, 1272 qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 1273 MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); 1274 if (MLX5_CAP_GEN(mdev, cqe_version) == 1) 1275 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); 1276 MLX5_SET(qpc, qpc, no_sq, 1); 1277 if (max_recv_wr) { 1278 MLX5_SET(qpc, qpc, cqn_rcv, tracker->cq.mcq.cqn); 1279 MLX5_SET(qpc, qpc, log_rq_stride, log_rq_stride - 4); 1280 MLX5_SET(qpc, qpc, log_rq_size, log_rq_sz); 1281 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 1282 MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); 1283 mlx5_fill_page_frag_array(&qp->buf, 1284 (__be64 *)MLX5_ADDR_OF(create_qp_in, 1285 in, pas)); 1286 } else { 1287 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 1288 } 1289 1290 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 1291 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 1292 kvfree(in); 1293 if (err) 1294 goto err_in; 1295 1296 qp->qpn = MLX5_GET(create_qp_out, out, qpn); 1297 return qp; 1298 1299 err_in: 1300 if (max_recv_wr) 1301 mlx5_frag_buf_free(mdev, &qp->buf); 1302 err_db_free: 1303 mlx5_db_free(mdev, &qp->db); 1304 err_free: 1305 kfree(qp); 1306 return ERR_PTR(err); 1307 } 1308 1309 static void mlx5vf_post_recv(struct mlx5_vhca_qp *qp) 1310 { 1311 struct mlx5_wqe_data_seg *data; 1312 unsigned int ix; 1313 1314 WARN_ON(qp->rq.pc - qp->rq.cc >= qp->rq.wqe_cnt); 1315 ix = qp->rq.pc & (qp->rq.wqe_cnt - 1); 1316 data = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ix); 1317 data->byte_count = cpu_to_be32(qp->max_msg_size); 1318 data->lkey = cpu_to_be32(qp->recv_buf.mkey); 1319 data->addr = cpu_to_be64(qp->recv_buf.next_rq_offset); 1320 qp->rq.pc++; 1321 /* Make sure that descriptors are written before doorbell record. */ 1322 dma_wmb(); 1323 *qp->rq.db = cpu_to_be32(qp->rq.pc & 0xffff); 1324 } 1325 1326 static int mlx5vf_activate_qp(struct mlx5_core_dev *mdev, 1327 struct mlx5_vhca_qp *qp, u32 remote_qpn, 1328 bool host_qp) 1329 { 1330 u32 init_in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; 1331 u32 rtr_in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; 1332 u32 rts_in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; 1333 void *qpc; 1334 int ret; 1335 1336 /* Init */ 1337 qpc = MLX5_ADDR_OF(rst2init_qp_in, init_in, qpc); 1338 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); 1339 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); 1340 MLX5_SET(qpc, qpc, rre, 1); 1341 MLX5_SET(qpc, qpc, rwe, 1); 1342 MLX5_SET(rst2init_qp_in, init_in, opcode, MLX5_CMD_OP_RST2INIT_QP); 1343 MLX5_SET(rst2init_qp_in, init_in, qpn, qp->qpn); 1344 ret = mlx5_cmd_exec_in(mdev, rst2init_qp, init_in); 1345 if (ret) 1346 return ret; 1347 1348 if (host_qp) { 1349 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 1350 int i; 1351 1352 for (i = 0; i < qp->rq.wqe_cnt; i++) { 1353 mlx5vf_post_recv(qp); 1354 recv_buf->next_rq_offset += qp->max_msg_size; 1355 } 1356 } 1357 1358 /* RTR */ 1359 qpc = MLX5_ADDR_OF(init2rtr_qp_in, rtr_in, qpc); 1360 MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); 1361 MLX5_SET(qpc, qpc, mtu, IB_MTU_4096); 1362 MLX5_SET(qpc, qpc, log_msg_max, MLX5_CAP_GEN(mdev, log_max_msg)); 1363 MLX5_SET(qpc, qpc, remote_qpn, remote_qpn); 1364 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); 1365 MLX5_SET(qpc, qpc, primary_address_path.fl, 1); 1366 MLX5_SET(qpc, qpc, min_rnr_nak, 1); 1367 MLX5_SET(init2rtr_qp_in, rtr_in, opcode, MLX5_CMD_OP_INIT2RTR_QP); 1368 MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); 1369 ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, rtr_in); 1370 if (ret || host_qp) 1371 return ret; 1372 1373 /* RTS */ 1374 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, rts_in, qpc); 1375 MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); 1376 MLX5_SET(qpc, qpc, retry_count, 7); 1377 MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ 1378 MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ 1379 MLX5_SET(rtr2rts_qp_in, rts_in, opcode, MLX5_CMD_OP_RTR2RTS_QP); 1380 MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); 1381 1382 return mlx5_cmd_exec_in(mdev, rtr2rts_qp, rts_in); 1383 } 1384 1385 static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev, 1386 struct mlx5_vhca_qp *qp) 1387 { 1388 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 1389 1390 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 1391 MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); 1392 mlx5_cmd_exec_in(mdev, destroy_qp, in); 1393 1394 mlx5_frag_buf_free(mdev, &qp->buf); 1395 mlx5_db_free(mdev, &qp->db); 1396 kfree(qp); 1397 } 1398 1399 static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev, 1400 struct mlx5_vhca_qp *qp) 1401 { 1402 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 1403 1404 mlx5_core_destroy_mkey(mdev, recv_buf->mkey); 1405 unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in, 1406 &recv_buf->state, DMA_FROM_DEVICE); 1407 kvfree(recv_buf->mkey_in); 1408 free_page_list(recv_buf->npages, recv_buf->page_list); 1409 } 1410 1411 static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev, 1412 struct mlx5_vhca_qp *qp, u32 pdn, 1413 u64 rq_size) 1414 { 1415 unsigned int npages = DIV_ROUND_UP_ULL(rq_size, PAGE_SIZE); 1416 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 1417 int err; 1418 1419 err = mlx5vf_add_pages(&recv_buf->page_list, npages); 1420 if (err) 1421 return err; 1422 1423 recv_buf->npages = npages; 1424 1425 recv_buf->mkey_in = alloc_mkey_in(npages, pdn); 1426 if (!recv_buf->mkey_in) { 1427 err = -ENOMEM; 1428 goto end; 1429 } 1430 1431 err = register_dma_pages(mdev, npages, recv_buf->page_list, 1432 recv_buf->mkey_in, &recv_buf->state, 1433 DMA_FROM_DEVICE); 1434 if (err) 1435 goto err_register_dma; 1436 1437 err = create_mkey(mdev, npages, recv_buf->mkey_in, &recv_buf->mkey); 1438 if (err) 1439 goto err_create_mkey; 1440 1441 return 0; 1442 1443 err_create_mkey: 1444 unregister_dma_pages(mdev, npages, recv_buf->mkey_in, &recv_buf->state, 1445 DMA_FROM_DEVICE); 1446 err_register_dma: 1447 kvfree(recv_buf->mkey_in); 1448 recv_buf->mkey_in = NULL; 1449 end: 1450 free_page_list(npages, recv_buf->page_list); 1451 return err; 1452 } 1453 1454 static void 1455 _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev) 1456 { 1457 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 1458 struct mlx5_core_dev *mdev = mvdev->mdev; 1459 1460 lockdep_assert_held(&mvdev->state_mutex); 1461 1462 if (!mvdev->log_active) 1463 return; 1464 1465 WARN_ON(mvdev->mdev_detach); 1466 1467 mlx5_eq_notifier_unregister(mdev, &tracker->nb); 1468 mlx5vf_cmd_destroy_tracker(mdev, tracker->id); 1469 mlx5vf_destroy_qp(mdev, tracker->fw_qp); 1470 mlx5vf_free_qp_recv_resources(mdev, tracker->host_qp); 1471 mlx5vf_destroy_qp(mdev, tracker->host_qp); 1472 mlx5vf_destroy_cq(mdev, &tracker->cq); 1473 mlx5_core_dealloc_pd(mdev, tracker->pdn); 1474 mlx5_put_uars_page(mdev, tracker->uar); 1475 mvdev->log_active = false; 1476 } 1477 1478 int mlx5vf_stop_page_tracker(struct vfio_device *vdev) 1479 { 1480 struct mlx5vf_pci_core_device *mvdev = container_of( 1481 vdev, struct mlx5vf_pci_core_device, core_device.vdev); 1482 1483 mutex_lock(&mvdev->state_mutex); 1484 if (!mvdev->log_active) 1485 goto end; 1486 1487 _mlx5vf_free_page_tracker_resources(mvdev); 1488 mvdev->log_active = false; 1489 end: 1490 mlx5vf_state_mutex_unlock(mvdev); 1491 return 0; 1492 } 1493 1494 int mlx5vf_start_page_tracker(struct vfio_device *vdev, 1495 struct rb_root_cached *ranges, u32 nnodes, 1496 u64 *page_size) 1497 { 1498 struct mlx5vf_pci_core_device *mvdev = container_of( 1499 vdev, struct mlx5vf_pci_core_device, core_device.vdev); 1500 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 1501 u8 log_tracked_page = ilog2(*page_size); 1502 struct mlx5_vhca_qp *host_qp; 1503 struct mlx5_vhca_qp *fw_qp; 1504 struct mlx5_core_dev *mdev; 1505 u32 log_max_msg_size; 1506 u32 max_msg_size; 1507 u64 rq_size = SZ_2M; 1508 u32 max_recv_wr; 1509 int err; 1510 1511 mutex_lock(&mvdev->state_mutex); 1512 if (mvdev->mdev_detach) { 1513 err = -ENOTCONN; 1514 goto end; 1515 } 1516 1517 if (mvdev->log_active) { 1518 err = -EINVAL; 1519 goto end; 1520 } 1521 1522 mdev = mvdev->mdev; 1523 log_max_msg_size = MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_msg_size); 1524 max_msg_size = (1ULL << log_max_msg_size); 1525 /* The RQ must hold at least 4 WQEs/messages for successful QP creation */ 1526 if (rq_size < 4 * max_msg_size) 1527 rq_size = 4 * max_msg_size; 1528 1529 memset(tracker, 0, sizeof(*tracker)); 1530 tracker->uar = mlx5_get_uars_page(mdev); 1531 if (IS_ERR(tracker->uar)) { 1532 err = PTR_ERR(tracker->uar); 1533 goto end; 1534 } 1535 1536 err = mlx5_core_alloc_pd(mdev, &tracker->pdn); 1537 if (err) 1538 goto err_uar; 1539 1540 max_recv_wr = DIV_ROUND_UP_ULL(rq_size, max_msg_size); 1541 err = mlx5vf_create_cq(mdev, tracker, max_recv_wr); 1542 if (err) 1543 goto err_dealloc_pd; 1544 1545 host_qp = mlx5vf_create_rc_qp(mdev, tracker, max_recv_wr); 1546 if (IS_ERR(host_qp)) { 1547 err = PTR_ERR(host_qp); 1548 goto err_cq; 1549 } 1550 1551 host_qp->max_msg_size = max_msg_size; 1552 if (log_tracked_page < MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1553 pg_track_log_min_page_size)) { 1554 log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1555 pg_track_log_min_page_size); 1556 } else if (log_tracked_page > MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1557 pg_track_log_max_page_size)) { 1558 log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1559 pg_track_log_max_page_size); 1560 } 1561 1562 host_qp->tracked_page_size = (1ULL << log_tracked_page); 1563 err = mlx5vf_alloc_qp_recv_resources(mdev, host_qp, tracker->pdn, 1564 rq_size); 1565 if (err) 1566 goto err_host_qp; 1567 1568 fw_qp = mlx5vf_create_rc_qp(mdev, tracker, 0); 1569 if (IS_ERR(fw_qp)) { 1570 err = PTR_ERR(fw_qp); 1571 goto err_recv_resources; 1572 } 1573 1574 err = mlx5vf_activate_qp(mdev, host_qp, fw_qp->qpn, true); 1575 if (err) 1576 goto err_activate; 1577 1578 err = mlx5vf_activate_qp(mdev, fw_qp, host_qp->qpn, false); 1579 if (err) 1580 goto err_activate; 1581 1582 tracker->host_qp = host_qp; 1583 tracker->fw_qp = fw_qp; 1584 err = mlx5vf_create_tracker(mdev, mvdev, ranges, nnodes); 1585 if (err) 1586 goto err_activate; 1587 1588 MLX5_NB_INIT(&tracker->nb, mlx5vf_event_notifier, NOTIFY_ANY); 1589 mlx5_eq_notifier_register(mdev, &tracker->nb); 1590 *page_size = host_qp->tracked_page_size; 1591 mvdev->log_active = true; 1592 mlx5vf_state_mutex_unlock(mvdev); 1593 return 0; 1594 1595 err_activate: 1596 mlx5vf_destroy_qp(mdev, fw_qp); 1597 err_recv_resources: 1598 mlx5vf_free_qp_recv_resources(mdev, host_qp); 1599 err_host_qp: 1600 mlx5vf_destroy_qp(mdev, host_qp); 1601 err_cq: 1602 mlx5vf_destroy_cq(mdev, &tracker->cq); 1603 err_dealloc_pd: 1604 mlx5_core_dealloc_pd(mdev, tracker->pdn); 1605 err_uar: 1606 mlx5_put_uars_page(mdev, tracker->uar); 1607 end: 1608 mlx5vf_state_mutex_unlock(mvdev); 1609 return err; 1610 } 1611 1612 static void 1613 set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp, 1614 struct iova_bitmap *dirty) 1615 { 1616 u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry); 1617 u32 nent = size / entry_size; 1618 u32 nent_in_page; 1619 u32 nent_to_set; 1620 struct page *page; 1621 u32 page_offset; 1622 u32 page_index; 1623 u32 buf_offset; 1624 void *kaddr; 1625 u64 addr; 1626 u64 *buf; 1627 int i; 1628 1629 buf_offset = index * qp->max_msg_size; 1630 if (WARN_ON(buf_offset + size >= qp->recv_buf.npages * PAGE_SIZE || 1631 (nent > qp->max_msg_size / entry_size))) 1632 return; 1633 1634 do { 1635 page_index = buf_offset / PAGE_SIZE; 1636 page_offset = buf_offset % PAGE_SIZE; 1637 nent_in_page = (PAGE_SIZE - page_offset) / entry_size; 1638 page = qp->recv_buf.page_list[page_index]; 1639 kaddr = kmap_local_page(page); 1640 buf = kaddr + page_offset; 1641 nent_to_set = min(nent, nent_in_page); 1642 for (i = 0; i < nent_to_set; i++) { 1643 addr = MLX5_GET(page_track_report_entry, buf + i, 1644 dirty_address_low); 1645 addr |= (u64)MLX5_GET(page_track_report_entry, buf + i, 1646 dirty_address_high) << 32; 1647 iova_bitmap_set(dirty, addr, qp->tracked_page_size); 1648 } 1649 kunmap_local(kaddr); 1650 buf_offset += (nent_to_set * entry_size); 1651 nent -= nent_to_set; 1652 } while (nent); 1653 } 1654 1655 static void 1656 mlx5vf_rq_cqe(struct mlx5_vhca_qp *qp, struct mlx5_cqe64 *cqe, 1657 struct iova_bitmap *dirty, int *tracker_status) 1658 { 1659 u32 size; 1660 int ix; 1661 1662 qp->rq.cc++; 1663 *tracker_status = be32_to_cpu(cqe->immediate) >> 28; 1664 size = be32_to_cpu(cqe->byte_cnt); 1665 ix = be16_to_cpu(cqe->wqe_counter) & (qp->rq.wqe_cnt - 1); 1666 1667 /* zero length CQE, no data */ 1668 WARN_ON(!size && *tracker_status == MLX5_PAGE_TRACK_STATE_REPORTING); 1669 if (size) 1670 set_report_output(size, ix, qp, dirty); 1671 1672 qp->recv_buf.next_rq_offset = ix * qp->max_msg_size; 1673 mlx5vf_post_recv(qp); 1674 } 1675 1676 static void *get_cqe(struct mlx5_vhca_cq *cq, int n) 1677 { 1678 return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n); 1679 } 1680 1681 static struct mlx5_cqe64 *get_sw_cqe(struct mlx5_vhca_cq *cq, int n) 1682 { 1683 void *cqe = get_cqe(cq, n & (cq->ncqe - 1)); 1684 struct mlx5_cqe64 *cqe64; 1685 1686 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; 1687 1688 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 1689 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ncqe)))) { 1690 return cqe64; 1691 } else { 1692 return NULL; 1693 } 1694 } 1695 1696 static int 1697 mlx5vf_cq_poll_one(struct mlx5_vhca_cq *cq, struct mlx5_vhca_qp *qp, 1698 struct iova_bitmap *dirty, int *tracker_status) 1699 { 1700 struct mlx5_cqe64 *cqe; 1701 u8 opcode; 1702 1703 cqe = get_sw_cqe(cq, cq->mcq.cons_index); 1704 if (!cqe) 1705 return CQ_EMPTY; 1706 1707 ++cq->mcq.cons_index; 1708 /* 1709 * Make sure we read CQ entry contents after we've checked the 1710 * ownership bit. 1711 */ 1712 rmb(); 1713 opcode = get_cqe_opcode(cqe); 1714 switch (opcode) { 1715 case MLX5_CQE_RESP_SEND_IMM: 1716 mlx5vf_rq_cqe(qp, cqe, dirty, tracker_status); 1717 return CQ_OK; 1718 default: 1719 return CQ_POLL_ERR; 1720 } 1721 } 1722 1723 int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, 1724 unsigned long length, 1725 struct iova_bitmap *dirty) 1726 { 1727 struct mlx5vf_pci_core_device *mvdev = container_of( 1728 vdev, struct mlx5vf_pci_core_device, core_device.vdev); 1729 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 1730 struct mlx5_vhca_cq *cq = &tracker->cq; 1731 struct mlx5_core_dev *mdev; 1732 int poll_err, err; 1733 1734 mutex_lock(&mvdev->state_mutex); 1735 if (!mvdev->log_active) { 1736 err = -EINVAL; 1737 goto end; 1738 } 1739 1740 if (mvdev->mdev_detach) { 1741 err = -ENOTCONN; 1742 goto end; 1743 } 1744 1745 if (tracker->is_err) { 1746 err = -EIO; 1747 goto end; 1748 } 1749 1750 mdev = mvdev->mdev; 1751 err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length, 1752 MLX5_PAGE_TRACK_STATE_REPORTING); 1753 if (err) 1754 goto end; 1755 1756 tracker->status = MLX5_PAGE_TRACK_STATE_REPORTING; 1757 while (tracker->status == MLX5_PAGE_TRACK_STATE_REPORTING && 1758 !tracker->is_err) { 1759 poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, dirty, 1760 &tracker->status); 1761 if (poll_err == CQ_EMPTY) { 1762 mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map, 1763 cq->mcq.cons_index); 1764 poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, 1765 dirty, &tracker->status); 1766 if (poll_err == CQ_EMPTY) { 1767 wait_for_completion(&mvdev->tracker_comp); 1768 if (tracker->object_changed) { 1769 tracker->object_changed = false; 1770 err = mlx5vf_cmd_query_tracker(mdev, tracker); 1771 if (err) 1772 goto end; 1773 } 1774 continue; 1775 } 1776 } 1777 if (poll_err == CQ_POLL_ERR) { 1778 err = -EIO; 1779 goto end; 1780 } 1781 mlx5_cq_set_ci(&cq->mcq); 1782 } 1783 1784 if (tracker->status == MLX5_PAGE_TRACK_STATE_ERROR) 1785 tracker->is_err = true; 1786 1787 if (tracker->is_err) 1788 err = -EIO; 1789 end: 1790 mlx5vf_state_mutex_unlock(mvdev); 1791 return err; 1792 } 1793