1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2013-2021, Mellanox Technologies inc. All rights reserved. 4 */ 5 6 #include <linux/interrupt.h> 7 #include <linux/notifier.h> 8 #include <linux/mlx5/driver.h> 9 #include <linux/mlx5/vport.h> 10 #include <linux/mlx5/eq.h> 11 #ifdef CONFIG_RFS_ACCEL 12 #include <linux/cpu_rmap.h> 13 #endif 14 #include "mlx5_core.h" 15 #include "lib/eq.h" 16 #include "fpga/core.h" 17 #include "eswitch.h" 18 #include "lib/clock.h" 19 #include "diag/fw_tracer.h" 20 #include "mlx5_irq.h" 21 #include "pci_irq.h" 22 #include "devlink.h" 23 #include "en_accel/ipsec.h" 24 25 enum { 26 MLX5_EQE_OWNER_INIT_VAL = 0x1, 27 }; 28 29 enum { 30 MLX5_EQ_STATE_ARMED = 0x9, 31 MLX5_EQ_STATE_FIRED = 0xa, 32 MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, 33 }; 34 35 enum { 36 MLX5_EQ_DOORBEL_OFFSET = 0x40, 37 }; 38 39 /* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update 40 * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is 41 * used to set the EQ size, budget must be smaller than the EQ size. 42 */ 43 enum { 44 MLX5_EQ_POLLING_BUDGET = 128, 45 }; 46 47 static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); 48 49 struct mlx5_eq_table { 50 struct xarray comp_eqs; 51 struct mlx5_eq_async pages_eq; 52 struct mlx5_eq_async cmd_eq; 53 struct mlx5_eq_async async_eq; 54 55 struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; 56 57 /* Since CQ DB is stored in async_eq */ 58 struct mlx5_nb cq_err_nb; 59 60 struct mutex lock; /* sync async eqs creations */ 61 struct mutex comp_lock; /* sync comp eqs creations */ 62 int curr_comp_eqs; 63 int max_comp_eqs; 64 struct mlx5_irq_table *irq_table; 65 struct xarray comp_irqs; 66 struct mlx5_irq *ctrl_irq; 67 struct cpu_rmap *rmap; 68 struct cpumask used_cpus; 69 }; 70 71 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ 72 (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ 73 (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ 74 (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \ 75 (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \ 76 (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \ 77 (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ 78 (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \ 79 (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \ 80 (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \ 81 (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ 82 (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) 83 84 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) 85 { 86 u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {}; 87 88 MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ); 89 MLX5_SET(destroy_eq_in, in, eq_number, eqn); 90 return mlx5_cmd_exec_in(dev, destroy_eq, in); 91 } 92 93 /* caller must eventually call mlx5_cq_put on the returned cq */ 94 static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) 95 { 96 struct mlx5_cq_table *table = &eq->cq_table; 97 struct mlx5_core_cq *cq = NULL; 98 99 rcu_read_lock(); 100 cq = radix_tree_lookup(&table->tree, cqn); 101 if (likely(cq)) 102 mlx5_cq_hold(cq); 103 rcu_read_unlock(); 104 105 return cq; 106 } 107 108 static int mlx5_eq_comp_int(struct notifier_block *nb, 109 __always_unused unsigned long action, 110 __always_unused void *data) 111 { 112 struct mlx5_eq_comp *eq_comp = 113 container_of(nb, struct mlx5_eq_comp, irq_nb); 114 struct mlx5_eq *eq = &eq_comp->core; 115 struct mlx5_eqe *eqe; 116 int num_eqes = 0; 117 118 while ((eqe = next_eqe_sw(eq))) { 119 struct mlx5_core_cq *cq; 120 u32 cqn; 121 122 /* Make sure we read EQ entry contents after we've 123 * checked the ownership bit. 124 */ 125 dma_rmb(); 126 /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */ 127 cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; 128 129 cq = mlx5_eq_cq_get(eq, cqn); 130 if (likely(cq)) { 131 ++cq->arm_sn; 132 cq->comp(cq, eqe); 133 mlx5_cq_put(cq); 134 } else { 135 dev_dbg_ratelimited(eq->dev->device, 136 "Completion event for bogus CQ 0x%x\n", cqn); 137 } 138 139 ++eq->cons_index; 140 141 if (++num_eqes >= MLX5_EQ_POLLING_BUDGET) 142 break; 143 } 144 145 eq_update_ci(eq, 1); 146 147 return 0; 148 } 149 150 /* Some architectures don't latch interrupts when they are disabled, so using 151 * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to 152 * avoid losing them. It is not recommended to use it, unless this is the last 153 * resort. 154 */ 155 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) 156 { 157 u32 count_eqe; 158 159 disable_irq(eq->core.irqn); 160 count_eqe = eq->core.cons_index; 161 mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); 162 count_eqe = eq->core.cons_index - count_eqe; 163 enable_irq(eq->core.irqn); 164 165 return count_eqe; 166 } 167 168 static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery, 169 unsigned long *flags) 170 __acquires(&eq->lock) 171 { 172 if (!recovery) 173 spin_lock(&eq->lock); 174 else 175 spin_lock_irqsave(&eq->lock, *flags); 176 } 177 178 static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery, 179 unsigned long *flags) 180 __releases(&eq->lock) 181 { 182 if (!recovery) 183 spin_unlock(&eq->lock); 184 else 185 spin_unlock_irqrestore(&eq->lock, *flags); 186 } 187 188 enum async_eq_nb_action { 189 ASYNC_EQ_IRQ_HANDLER = 0, 190 ASYNC_EQ_RECOVER = 1, 191 }; 192 193 static int mlx5_eq_async_int(struct notifier_block *nb, 194 unsigned long action, void *data) 195 { 196 struct mlx5_eq_async *eq_async = 197 container_of(nb, struct mlx5_eq_async, irq_nb); 198 struct mlx5_eq *eq = &eq_async->core; 199 struct mlx5_eq_table *eqt; 200 struct mlx5_core_dev *dev; 201 struct mlx5_eqe *eqe; 202 unsigned long flags; 203 int num_eqes = 0; 204 bool recovery; 205 206 dev = eq->dev; 207 eqt = dev->priv.eq_table; 208 209 recovery = action == ASYNC_EQ_RECOVER; 210 mlx5_eq_async_int_lock(eq_async, recovery, &flags); 211 212 while ((eqe = next_eqe_sw(eq))) { 213 /* 214 * Make sure we read EQ entry contents after we've 215 * checked the ownership bit. 216 */ 217 dma_rmb(); 218 219 atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); 220 atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); 221 222 ++eq->cons_index; 223 224 if (++num_eqes >= MLX5_EQ_POLLING_BUDGET) 225 break; 226 } 227 228 eq_update_ci(eq, 1); 229 mlx5_eq_async_int_unlock(eq_async, recovery, &flags); 230 231 return unlikely(recovery) ? num_eqes : 0; 232 } 233 234 void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev) 235 { 236 struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq; 237 int eqes; 238 239 eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL); 240 if (eqes) 241 mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes); 242 } 243 244 static void init_eq_buf(struct mlx5_eq *eq) 245 { 246 struct mlx5_eqe *eqe; 247 int i; 248 249 for (i = 0; i < eq_get_size(eq); i++) { 250 eqe = get_eqe(eq, i); 251 eqe->owner = MLX5_EQE_OWNER_INIT_VAL; 252 } 253 } 254 255 static int 256 create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, 257 struct mlx5_eq_param *param) 258 { 259 u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE); 260 struct mlx5_cq_table *cq_table = &eq->cq_table; 261 u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; 262 u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); 263 struct mlx5_priv *priv = &dev->priv; 264 __be64 *pas; 265 u16 vecidx; 266 void *eqc; 267 int inlen; 268 u32 *in; 269 int err; 270 int i; 271 272 /* Init CQ table */ 273 memset(cq_table, 0, sizeof(*cq_table)); 274 spin_lock_init(&cq_table->lock); 275 INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); 276 277 eq->cons_index = 0; 278 279 err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride), 280 &eq->frag_buf, dev->priv.numa_node); 281 if (err) 282 return err; 283 284 mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); 285 init_eq_buf(eq); 286 287 eq->irq = param->irq; 288 vecidx = mlx5_irq_get_index(eq->irq); 289 290 inlen = MLX5_ST_SZ_BYTES(create_eq_in) + 291 MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages; 292 293 in = kvzalloc(inlen, GFP_KERNEL); 294 if (!in) { 295 err = -ENOMEM; 296 goto err_buf; 297 } 298 299 pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); 300 mlx5_fill_page_frag_array(&eq->frag_buf, pas); 301 302 MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); 303 if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx)) 304 MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID); 305 306 for (i = 0; i < 4; i++) 307 MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i, 308 param->mask[i]); 309 310 eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); 311 MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz); 312 MLX5_SET(eqc, eqc, uar_page, priv->uar->index); 313 MLX5_SET(eqc, eqc, intr, vecidx); 314 MLX5_SET(eqc, eqc, log_page_size, 315 eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 316 317 err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); 318 if (err) 319 goto err_in; 320 321 eq->vecidx = vecidx; 322 eq->eqn = MLX5_GET(create_eq_out, out, eq_number); 323 eq->irqn = pci_irq_vector(dev->pdev, vecidx); 324 eq->dev = dev; 325 eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; 326 327 err = mlx5_debug_eq_add(dev, eq); 328 if (err) 329 goto err_eq; 330 331 kvfree(in); 332 return 0; 333 334 err_eq: 335 mlx5_cmd_destroy_eq(dev, eq->eqn); 336 337 err_in: 338 kvfree(in); 339 340 err_buf: 341 mlx5_frag_buf_free(dev, &eq->frag_buf); 342 return err; 343 } 344 345 /** 346 * mlx5_eq_enable - Enable EQ for receiving EQEs 347 * @dev : Device which owns the eq 348 * @eq : EQ to enable 349 * @nb : Notifier call block 350 * 351 * Must be called after EQ is created in device. 352 * 353 * @return: 0 if no error 354 */ 355 int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, 356 struct notifier_block *nb) 357 { 358 int err; 359 360 err = mlx5_irq_attach_nb(eq->irq, nb); 361 if (!err) 362 eq_update_ci(eq, 1); 363 364 return err; 365 } 366 EXPORT_SYMBOL(mlx5_eq_enable); 367 368 /** 369 * mlx5_eq_disable - Disable EQ for receiving EQEs 370 * @dev : Device which owns the eq 371 * @eq : EQ to disable 372 * @nb : Notifier call block 373 * 374 * Must be called before EQ is destroyed. 375 */ 376 void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, 377 struct notifier_block *nb) 378 { 379 mlx5_irq_detach_nb(eq->irq, nb); 380 } 381 EXPORT_SYMBOL(mlx5_eq_disable); 382 383 static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) 384 { 385 int err; 386 387 mlx5_debug_eq_remove(dev, eq); 388 389 err = mlx5_cmd_destroy_eq(dev, eq->eqn); 390 if (err) 391 mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", 392 eq->eqn); 393 394 mlx5_frag_buf_free(dev, &eq->frag_buf); 395 return err; 396 } 397 398 int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) 399 { 400 struct mlx5_cq_table *table = &eq->cq_table; 401 int err; 402 403 spin_lock(&table->lock); 404 err = radix_tree_insert(&table->tree, cq->cqn, cq); 405 spin_unlock(&table->lock); 406 407 return err; 408 } 409 410 void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) 411 { 412 struct mlx5_cq_table *table = &eq->cq_table; 413 struct mlx5_core_cq *tmp; 414 415 spin_lock(&table->lock); 416 tmp = radix_tree_delete(&table->tree, cq->cqn); 417 spin_unlock(&table->lock); 418 419 if (!tmp) { 420 mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", 421 eq->eqn, cq->cqn); 422 return; 423 } 424 425 if (tmp != cq) 426 mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", 427 eq->eqn, cq->cqn); 428 } 429 430 int mlx5_eq_table_init(struct mlx5_core_dev *dev) 431 { 432 struct mlx5_eq_table *eq_table; 433 int i; 434 435 eq_table = kvzalloc_node(sizeof(*eq_table), GFP_KERNEL, 436 dev->priv.numa_node); 437 if (!eq_table) 438 return -ENOMEM; 439 440 dev->priv.eq_table = eq_table; 441 442 mlx5_eq_debugfs_init(dev); 443 444 mutex_init(&eq_table->lock); 445 for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) 446 ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); 447 448 eq_table->irq_table = mlx5_irq_table_get(dev); 449 cpumask_clear(&eq_table->used_cpus); 450 xa_init(&eq_table->comp_eqs); 451 xa_init(&eq_table->comp_irqs); 452 mutex_init(&eq_table->comp_lock); 453 eq_table->curr_comp_eqs = 0; 454 return 0; 455 } 456 457 void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) 458 { 459 struct mlx5_eq_table *table = dev->priv.eq_table; 460 461 mlx5_eq_debugfs_cleanup(dev); 462 xa_destroy(&table->comp_irqs); 463 xa_destroy(&table->comp_eqs); 464 kvfree(table); 465 } 466 467 /* Async EQs */ 468 469 static int create_async_eq(struct mlx5_core_dev *dev, 470 struct mlx5_eq *eq, struct mlx5_eq_param *param) 471 { 472 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 473 int err; 474 475 mutex_lock(&eq_table->lock); 476 err = create_map_eq(dev, eq, param); 477 mutex_unlock(&eq_table->lock); 478 return err; 479 } 480 481 static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) 482 { 483 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 484 int err; 485 486 mutex_lock(&eq_table->lock); 487 err = destroy_unmap_eq(dev, eq); 488 mutex_unlock(&eq_table->lock); 489 return err; 490 } 491 492 static int cq_err_event_notifier(struct notifier_block *nb, 493 unsigned long type, void *data) 494 { 495 struct mlx5_eq_table *eqt; 496 struct mlx5_core_cq *cq; 497 struct mlx5_eqe *eqe; 498 struct mlx5_eq *eq; 499 u32 cqn; 500 501 /* type == MLX5_EVENT_TYPE_CQ_ERROR */ 502 503 eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); 504 eq = &eqt->async_eq.core; 505 eqe = data; 506 507 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; 508 mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", 509 cqn, eqe->data.cq_err.syndrome); 510 511 cq = mlx5_eq_cq_get(eq, cqn); 512 if (unlikely(!cq)) { 513 mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); 514 return NOTIFY_OK; 515 } 516 517 if (cq->event) 518 cq->event(cq, type); 519 520 mlx5_cq_put(cq); 521 522 return NOTIFY_OK; 523 } 524 525 static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4]) 526 { 527 __be64 *user_unaffiliated_events; 528 __be64 *user_affiliated_events; 529 int i; 530 531 user_affiliated_events = 532 MLX5_CAP_DEV_EVENT(dev, user_affiliated_events); 533 user_unaffiliated_events = 534 MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events); 535 536 for (i = 0; i < 4; i++) 537 mask[i] |= be64_to_cpu(user_affiliated_events[i] | 538 user_unaffiliated_events[i]); 539 } 540 541 static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) 542 { 543 u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; 544 545 if (MLX5_VPORT_MANAGER(dev)) 546 async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); 547 548 if (MLX5_CAP_GEN(dev, general_notification_event)) 549 async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT); 550 551 if (MLX5_CAP_GEN(dev, port_module_event)) 552 async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT); 553 else 554 mlx5_core_dbg(dev, "port_module_event is not set\n"); 555 556 if (MLX5_PPS_CAP(dev)) 557 async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); 558 559 if (MLX5_CAP_GEN(dev, fpga)) 560 async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) | 561 (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR); 562 if (MLX5_CAP_GEN_MAX(dev, dct)) 563 async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED); 564 565 if (MLX5_CAP_GEN(dev, temp_warn_event)) 566 async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT); 567 568 if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) 569 async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); 570 571 if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) 572 async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); 573 574 if (mlx5_eswitch_is_funcs_handler(dev)) 575 async_event_mask |= 576 (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); 577 578 if (MLX5_CAP_GEN_MAX(dev, vhca_state)) 579 async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE); 580 581 if (MLX5_CAP_MACSEC(dev, log_max_macsec_offload)) 582 async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); 583 584 if (mlx5_ipsec_device_caps(dev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) 585 async_event_mask |= 586 (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); 587 588 mask[0] = async_event_mask; 589 590 if (MLX5_CAP_GEN(dev, event_cap)) 591 gather_user_async_events(dev, mask); 592 } 593 594 static int 595 setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, 596 struct mlx5_eq_param *param, const char *name) 597 { 598 int err; 599 600 eq->irq_nb.notifier_call = mlx5_eq_async_int; 601 spin_lock_init(&eq->lock); 602 603 err = create_async_eq(dev, &eq->core, param); 604 if (err) { 605 mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); 606 return err; 607 } 608 err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); 609 if (err) { 610 mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err); 611 destroy_async_eq(dev, &eq->core); 612 } 613 return err; 614 } 615 616 static void cleanup_async_eq(struct mlx5_core_dev *dev, 617 struct mlx5_eq_async *eq, const char *name) 618 { 619 int err; 620 621 mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); 622 err = destroy_async_eq(dev, &eq->core); 623 if (err) 624 mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n", 625 name, err); 626 } 627 628 static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) 629 { 630 struct devlink *devlink = priv_to_devlink(dev); 631 union devlink_param_value val; 632 int err; 633 634 err = devl_param_driverinit_value_get(devlink, 635 DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, 636 &val); 637 if (!err) 638 return val.vu32; 639 mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); 640 return MLX5_NUM_ASYNC_EQE; 641 } 642 643 static int create_async_eqs(struct mlx5_core_dev *dev) 644 { 645 struct mlx5_eq_table *table = dev->priv.eq_table; 646 struct mlx5_eq_param param = {}; 647 int err; 648 649 /* All the async_eqs are using single IRQ, request one IRQ and share its 650 * index among all the async_eqs of this device. 651 */ 652 table->ctrl_irq = mlx5_ctrl_irq_request(dev); 653 if (IS_ERR(table->ctrl_irq)) 654 return PTR_ERR(table->ctrl_irq); 655 656 MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); 657 mlx5_eq_notifier_register(dev, &table->cq_err_nb); 658 659 param = (struct mlx5_eq_param) { 660 .irq = table->ctrl_irq, 661 .nent = MLX5_NUM_CMD_EQE, 662 .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, 663 }; 664 mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ); 665 err = setup_async_eq(dev, &table->cmd_eq, ¶m, "cmd"); 666 if (err) 667 goto err1; 668 669 mlx5_cmd_use_events(dev); 670 mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); 671 672 param = (struct mlx5_eq_param) { 673 .irq = table->ctrl_irq, 674 .nent = async_eq_depth_devlink_param_get(dev), 675 }; 676 677 gather_async_events_mask(dev, param.mask); 678 err = setup_async_eq(dev, &table->async_eq, ¶m, "async"); 679 if (err) 680 goto err2; 681 682 /* Skip page eq creation when the device does not request for page requests */ 683 if (MLX5_CAP_GEN(dev, page_request_disable)) { 684 mlx5_core_dbg(dev, "Skip page EQ creation\n"); 685 return 0; 686 } 687 688 param = (struct mlx5_eq_param) { 689 .irq = table->ctrl_irq, 690 .nent = /* TODO: sriov max_vf + */ 1, 691 .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, 692 }; 693 694 err = setup_async_eq(dev, &table->pages_eq, ¶m, "pages"); 695 if (err) 696 goto err3; 697 698 return 0; 699 700 err3: 701 cleanup_async_eq(dev, &table->async_eq, "async"); 702 err2: 703 mlx5_cmd_use_polling(dev); 704 cleanup_async_eq(dev, &table->cmd_eq, "cmd"); 705 err1: 706 mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); 707 mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); 708 mlx5_ctrl_irq_release(dev, table->ctrl_irq); 709 return err; 710 } 711 712 static void destroy_async_eqs(struct mlx5_core_dev *dev) 713 { 714 struct mlx5_eq_table *table = dev->priv.eq_table; 715 716 if (!MLX5_CAP_GEN(dev, page_request_disable)) 717 cleanup_async_eq(dev, &table->pages_eq, "pages"); 718 cleanup_async_eq(dev, &table->async_eq, "async"); 719 mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ); 720 mlx5_cmd_use_polling(dev); 721 cleanup_async_eq(dev, &table->cmd_eq, "cmd"); 722 mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); 723 mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); 724 mlx5_ctrl_irq_release(dev, table->ctrl_irq); 725 } 726 727 struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) 728 { 729 return &dev->priv.eq_table->async_eq.core; 730 } 731 732 void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) 733 { 734 synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); 735 } 736 737 void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) 738 { 739 synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); 740 } 741 742 /* Generic EQ API for mlx5_core consumers 743 * Needed For RDMA ODP EQ for now 744 */ 745 struct mlx5_eq * 746 mlx5_eq_create_generic(struct mlx5_core_dev *dev, 747 struct mlx5_eq_param *param) 748 { 749 struct mlx5_eq *eq = kvzalloc_node(sizeof(*eq), GFP_KERNEL, 750 dev->priv.numa_node); 751 int err; 752 753 if (!eq) 754 return ERR_PTR(-ENOMEM); 755 756 param->irq = dev->priv.eq_table->ctrl_irq; 757 err = create_async_eq(dev, eq, param); 758 if (err) { 759 kvfree(eq); 760 eq = ERR_PTR(err); 761 } 762 763 return eq; 764 } 765 EXPORT_SYMBOL(mlx5_eq_create_generic); 766 767 int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) 768 { 769 int err; 770 771 if (IS_ERR(eq)) 772 return -EINVAL; 773 774 err = destroy_async_eq(dev, eq); 775 if (err) 776 goto out; 777 778 kvfree(eq); 779 out: 780 return err; 781 } 782 EXPORT_SYMBOL(mlx5_eq_destroy_generic); 783 784 struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) 785 { 786 u32 ci = eq->cons_index + cc; 787 u32 nent = eq_get_size(eq); 788 struct mlx5_eqe *eqe; 789 790 eqe = get_eqe(eq, ci & (nent - 1)); 791 eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe; 792 /* Make sure we read EQ entry contents after we've 793 * checked the ownership bit. 794 */ 795 if (eqe) 796 dma_rmb(); 797 798 return eqe; 799 } 800 EXPORT_SYMBOL(mlx5_eq_get_eqe); 801 802 void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) 803 { 804 eq->cons_index += cc; 805 eq_update_ci(eq, arm); 806 } 807 EXPORT_SYMBOL(mlx5_eq_update_ci); 808 809 static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx) 810 { 811 struct mlx5_eq_table *table = dev->priv.eq_table; 812 struct mlx5_irq *irq; 813 814 irq = xa_load(&table->comp_irqs, vecidx); 815 if (!irq) 816 return; 817 818 xa_erase(&table->comp_irqs, vecidx); 819 mlx5_irq_release_vector(irq); 820 } 821 822 static int mlx5_cpumask_default_spread(struct mlx5_core_dev *dev, int index) 823 { 824 return cpumask_local_spread(index, dev->priv.numa_node); 825 } 826 827 static struct cpu_rmap *mlx5_eq_table_get_pci_rmap(struct mlx5_core_dev *dev) 828 { 829 #ifdef CONFIG_RFS_ACCEL 830 #ifdef CONFIG_MLX5_SF 831 if (mlx5_core_is_sf(dev)) 832 return dev->priv.parent_mdev->priv.eq_table->rmap; 833 #endif 834 return dev->priv.eq_table->rmap; 835 #else 836 return NULL; 837 #endif 838 } 839 840 static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) 841 { 842 struct mlx5_eq_table *table = dev->priv.eq_table; 843 struct cpu_rmap *rmap; 844 struct mlx5_irq *irq; 845 int cpu; 846 847 rmap = mlx5_eq_table_get_pci_rmap(dev); 848 cpu = mlx5_cpumask_default_spread(dev, vecidx); 849 irq = mlx5_irq_request_vector(dev, cpu, vecidx, &rmap); 850 if (IS_ERR(irq)) 851 return PTR_ERR(irq); 852 853 return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); 854 } 855 856 static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) 857 { 858 struct mlx5_eq_table *table = dev->priv.eq_table; 859 struct mlx5_irq *irq; 860 int cpu; 861 862 irq = xa_load(&table->comp_irqs, vecidx); 863 if (!irq) 864 return; 865 866 cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); 867 cpumask_clear_cpu(cpu, &table->used_cpus); 868 xa_erase(&table->comp_irqs, vecidx); 869 mlx5_irq_affinity_irq_release(dev, irq); 870 } 871 872 static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) 873 { 874 struct mlx5_irq_pool *pool = mlx5_irq_table_get_comp_irq_pool(dev); 875 struct mlx5_eq_table *table = dev->priv.eq_table; 876 struct irq_affinity_desc af_desc = {}; 877 struct mlx5_irq *irq; 878 879 /* In case SF irq pool does not exist, fallback to the PF irqs*/ 880 if (!mlx5_irq_pool_is_sf_pool(pool)) 881 return comp_irq_request_pci(dev, vecidx); 882 883 af_desc.is_managed = false; 884 cpumask_copy(&af_desc.mask, cpu_online_mask); 885 cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus); 886 irq = mlx5_irq_affinity_request(dev, pool, &af_desc); 887 if (IS_ERR(irq)) 888 return PTR_ERR(irq); 889 890 cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq)); 891 mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", 892 pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), 893 cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), 894 mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); 895 896 return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); 897 } 898 899 static void comp_irq_release(struct mlx5_core_dev *dev, u16 vecidx) 900 { 901 mlx5_core_is_sf(dev) ? comp_irq_release_sf(dev, vecidx) : 902 comp_irq_release_pci(dev, vecidx); 903 } 904 905 static int comp_irq_request(struct mlx5_core_dev *dev, u16 vecidx) 906 { 907 return mlx5_core_is_sf(dev) ? comp_irq_request_sf(dev, vecidx) : 908 comp_irq_request_pci(dev, vecidx); 909 } 910 911 #ifdef CONFIG_RFS_ACCEL 912 static int alloc_rmap(struct mlx5_core_dev *mdev) 913 { 914 struct mlx5_eq_table *eq_table = mdev->priv.eq_table; 915 916 /* rmap is a mapping between irq number and queue number. 917 * Each irq can be assigned only to a single rmap. 918 * Since SFs share IRQs, rmap mapping cannot function correctly 919 * for irqs that are shared between different core/netdev RX rings. 920 * Hence we don't allow netdev rmap for SFs. 921 */ 922 if (mlx5_core_is_sf(mdev)) 923 return 0; 924 925 eq_table->rmap = alloc_irq_cpu_rmap(eq_table->max_comp_eqs); 926 if (!eq_table->rmap) 927 return -ENOMEM; 928 return 0; 929 } 930 931 static void free_rmap(struct mlx5_core_dev *mdev) 932 { 933 struct mlx5_eq_table *eq_table = mdev->priv.eq_table; 934 935 if (eq_table->rmap) { 936 free_irq_cpu_rmap(eq_table->rmap); 937 eq_table->rmap = NULL; 938 } 939 } 940 #else 941 static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; } 942 static void free_rmap(struct mlx5_core_dev *mdev) {} 943 #endif 944 945 static void destroy_comp_eq(struct mlx5_core_dev *dev, struct mlx5_eq_comp *eq, u16 vecidx) 946 { 947 struct mlx5_eq_table *table = dev->priv.eq_table; 948 949 xa_erase(&table->comp_eqs, vecidx); 950 mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); 951 if (destroy_unmap_eq(dev, &eq->core)) 952 mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", 953 eq->core.eqn); 954 tasklet_disable(&eq->tasklet_ctx.task); 955 kfree(eq); 956 comp_irq_release(dev, vecidx); 957 table->curr_comp_eqs--; 958 } 959 960 static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) 961 { 962 struct devlink *devlink = priv_to_devlink(dev); 963 union devlink_param_value val; 964 int err; 965 966 err = devl_param_driverinit_value_get(devlink, 967 DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, 968 &val); 969 if (!err) 970 return val.vu32; 971 mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); 972 return MLX5_COMP_EQ_SIZE; 973 } 974 975 /* Must be called with EQ table comp_lock held */ 976 static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx) 977 { 978 struct mlx5_eq_table *table = dev->priv.eq_table; 979 struct mlx5_eq_param param = {}; 980 struct mlx5_eq_comp *eq; 981 struct mlx5_irq *irq; 982 int nent; 983 int err; 984 985 lockdep_assert_held(&table->comp_lock); 986 if (table->curr_comp_eqs == table->max_comp_eqs) { 987 mlx5_core_err(dev, "maximum number of vectors is allocated, %d\n", 988 table->max_comp_eqs); 989 return -ENOMEM; 990 } 991 992 err = comp_irq_request(dev, vecidx); 993 if (err) 994 return err; 995 996 nent = comp_eq_depth_devlink_param_get(dev); 997 998 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node); 999 if (!eq) { 1000 err = -ENOMEM; 1001 goto clean_irq; 1002 } 1003 1004 INIT_LIST_HEAD(&eq->tasklet_ctx.list); 1005 INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); 1006 spin_lock_init(&eq->tasklet_ctx.lock); 1007 tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb); 1008 1009 irq = xa_load(&table->comp_irqs, vecidx); 1010 eq->irq_nb.notifier_call = mlx5_eq_comp_int; 1011 param = (struct mlx5_eq_param) { 1012 .irq = irq, 1013 .nent = nent, 1014 }; 1015 1016 err = create_map_eq(dev, &eq->core, ¶m); 1017 if (err) 1018 goto clean_eq; 1019 err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); 1020 if (err) { 1021 destroy_unmap_eq(dev, &eq->core); 1022 goto clean_eq; 1023 } 1024 1025 mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); 1026 err = xa_err(xa_store(&table->comp_eqs, vecidx, eq, GFP_KERNEL)); 1027 if (err) 1028 goto disable_eq; 1029 1030 table->curr_comp_eqs++; 1031 return eq->core.eqn; 1032 1033 disable_eq: 1034 mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); 1035 clean_eq: 1036 kfree(eq); 1037 clean_irq: 1038 comp_irq_release(dev, vecidx); 1039 return err; 1040 } 1041 1042 int mlx5_comp_eqn_get(struct mlx5_core_dev *dev, u16 vecidx, int *eqn) 1043 { 1044 struct mlx5_eq_table *table = dev->priv.eq_table; 1045 struct mlx5_eq_comp *eq; 1046 int ret = 0; 1047 1048 if (vecidx >= table->max_comp_eqs) { 1049 mlx5_core_dbg(dev, "Requested vector index %u should be less than %u", 1050 vecidx, table->max_comp_eqs); 1051 return -EINVAL; 1052 } 1053 1054 mutex_lock(&table->comp_lock); 1055 eq = xa_load(&table->comp_eqs, vecidx); 1056 if (eq) { 1057 *eqn = eq->core.eqn; 1058 goto out; 1059 } 1060 1061 ret = create_comp_eq(dev, vecidx); 1062 if (ret < 0) { 1063 mutex_unlock(&table->comp_lock); 1064 return ret; 1065 } 1066 1067 *eqn = ret; 1068 out: 1069 mutex_unlock(&table->comp_lock); 1070 return 0; 1071 } 1072 EXPORT_SYMBOL(mlx5_comp_eqn_get); 1073 1074 int mlx5_comp_irqn_get(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) 1075 { 1076 struct mlx5_eq_table *table = dev->priv.eq_table; 1077 struct mlx5_eq_comp *eq; 1078 int eqn; 1079 int err; 1080 1081 /* Allocate the EQ if not allocated yet */ 1082 err = mlx5_comp_eqn_get(dev, vector, &eqn); 1083 if (err) 1084 return err; 1085 1086 eq = xa_load(&table->comp_eqs, vector); 1087 *irqn = eq->core.irqn; 1088 return 0; 1089 } 1090 1091 unsigned int mlx5_comp_vectors_max(struct mlx5_core_dev *dev) 1092 { 1093 return dev->priv.eq_table->max_comp_eqs; 1094 } 1095 EXPORT_SYMBOL(mlx5_comp_vectors_max); 1096 1097 static struct cpumask * 1098 mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) 1099 { 1100 struct mlx5_eq_table *table = dev->priv.eq_table; 1101 struct mlx5_eq_comp *eq; 1102 1103 eq = xa_load(&table->comp_eqs, vector); 1104 if (eq) 1105 return mlx5_irq_get_affinity_mask(eq->core.irq); 1106 1107 return NULL; 1108 } 1109 1110 int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector) 1111 { 1112 struct cpumask *mask; 1113 int cpu; 1114 1115 mask = mlx5_comp_irq_get_affinity_mask(dev, vector); 1116 if (mask) 1117 cpu = cpumask_first(mask); 1118 else 1119 cpu = mlx5_cpumask_default_spread(dev, vector); 1120 1121 return cpu; 1122 } 1123 EXPORT_SYMBOL(mlx5_comp_vector_get_cpu); 1124 1125 #ifdef CONFIG_RFS_ACCEL 1126 struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) 1127 { 1128 return dev->priv.eq_table->rmap; 1129 } 1130 #endif 1131 1132 struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) 1133 { 1134 struct mlx5_eq_table *table = dev->priv.eq_table; 1135 struct mlx5_eq_comp *eq; 1136 unsigned long index; 1137 1138 xa_for_each(&table->comp_eqs, index, eq) 1139 if (eq->core.eqn == eqn) 1140 return eq; 1141 1142 return ERR_PTR(-ENOENT); 1143 } 1144 1145 /* This function should only be called after mlx5_cmd_force_teardown_hca */ 1146 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) 1147 { 1148 mlx5_irq_table_free_irqs(dev); 1149 } 1150 1151 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1152 #define MLX5_MAX_ASYNC_EQS 4 1153 #else 1154 #define MLX5_MAX_ASYNC_EQS 3 1155 #endif 1156 1157 static int get_num_eqs(struct mlx5_core_dev *dev) 1158 { 1159 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 1160 int max_dev_eqs; 1161 int num_eqs; 1162 1163 /* If ethernet is disabled we use just a single completion vector to 1164 * have the other vectors available for other drivers using mlx5_core. For 1165 * example, mlx5_vdpa 1166 */ 1167 if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev)) 1168 return 1; 1169 1170 max_dev_eqs = mlx5_max_eq_cap_get(dev); 1171 1172 num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table), 1173 max_dev_eqs - MLX5_MAX_ASYNC_EQS); 1174 if (mlx5_core_is_sf(dev)) { 1175 int max_eqs_sf = MLX5_CAP_GEN_2(dev, sf_eq_usage) ? 1176 MLX5_CAP_GEN_2(dev, max_num_eqs_24b) : 1177 MLX5_COMP_EQS_PER_SF; 1178 1179 max_eqs_sf = min_t(int, max_eqs_sf, 1180 mlx5_irq_table_get_sfs_vec(eq_table->irq_table)); 1181 num_eqs = min_t(int, num_eqs, max_eqs_sf); 1182 } 1183 1184 return num_eqs; 1185 } 1186 1187 int mlx5_eq_table_create(struct mlx5_core_dev *dev) 1188 { 1189 struct mlx5_eq_table *eq_table = dev->priv.eq_table; 1190 int err; 1191 1192 eq_table->max_comp_eqs = get_num_eqs(dev); 1193 err = create_async_eqs(dev); 1194 if (err) { 1195 mlx5_core_err(dev, "Failed to create async EQs\n"); 1196 goto err_async_eqs; 1197 } 1198 1199 err = alloc_rmap(dev); 1200 if (err) { 1201 mlx5_core_err(dev, "Failed to allocate rmap\n"); 1202 goto err_rmap; 1203 } 1204 1205 return 0; 1206 1207 err_rmap: 1208 destroy_async_eqs(dev); 1209 err_async_eqs: 1210 return err; 1211 } 1212 1213 void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) 1214 { 1215 struct mlx5_eq_table *table = dev->priv.eq_table; 1216 struct mlx5_eq_comp *eq; 1217 unsigned long index; 1218 1219 xa_for_each(&table->comp_eqs, index, eq) 1220 destroy_comp_eq(dev, eq, index); 1221 1222 free_rmap(dev); 1223 destroy_async_eqs(dev); 1224 } 1225 1226 int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) 1227 { 1228 struct mlx5_eq_table *eqt = dev->priv.eq_table; 1229 1230 return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); 1231 } 1232 EXPORT_SYMBOL(mlx5_eq_notifier_register); 1233 1234 int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) 1235 { 1236 struct mlx5_eq_table *eqt = dev->priv.eq_table; 1237 1238 return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); 1239 } 1240 EXPORT_SYMBOL(mlx5_eq_notifier_unregister); 1241