1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3 * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
4 */
5 #include <rdma/ib_verbs.h>
6 #include <rdma/rdma_counter.h>
7
8 #include "core_priv.h"
9 #include "restrack.h"
10
11 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
12
__counter_set_mode(struct rdma_port_counter * port_counter,enum rdma_nl_counter_mode new_mode,enum rdma_nl_counter_mask new_mask,bool bind_opcnt)13 static int __counter_set_mode(struct rdma_port_counter *port_counter,
14 enum rdma_nl_counter_mode new_mode,
15 enum rdma_nl_counter_mask new_mask,
16 bool bind_opcnt)
17 {
18 if (new_mode == RDMA_COUNTER_MODE_AUTO) {
19 if (new_mask & (~ALL_AUTO_MODE_MASKS))
20 return -EINVAL;
21 if (port_counter->num_counters)
22 return -EBUSY;
23 }
24
25 port_counter->mode.mode = new_mode;
26 port_counter->mode.mask = new_mask;
27 port_counter->mode.bind_opcnt = bind_opcnt;
28 return 0;
29 }
30
31 /*
32 * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
33 *
34 * @dev: Device to operate
35 * @port: Port to use
36 * @mask: Mask to configure
37 * @extack: Message to the user
38 *
39 * Return 0 on success. If counter mode wasn't changed then it is considered
40 * as success as well.
41 * Return -EBUSY when changing to auto mode while there are bounded counters.
42 *
43 */
rdma_counter_set_auto_mode(struct ib_device * dev,u32 port,enum rdma_nl_counter_mask mask,bool bind_opcnt,struct netlink_ext_ack * extack)44 int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
45 enum rdma_nl_counter_mask mask,
46 bool bind_opcnt,
47 struct netlink_ext_ack *extack)
48 {
49 struct rdma_port_counter *port_counter;
50 enum rdma_nl_counter_mode mode;
51 int ret;
52
53 port_counter = &dev->port_data[port].port_counter;
54 if (!port_counter->hstats)
55 return -EOPNOTSUPP;
56
57 mutex_lock(&port_counter->lock);
58 if (mask)
59 mode = RDMA_COUNTER_MODE_AUTO;
60 else
61 mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
62 RDMA_COUNTER_MODE_NONE;
63
64 if (port_counter->mode.mode == mode &&
65 port_counter->mode.mask == mask &&
66 port_counter->mode.bind_opcnt == bind_opcnt) {
67 ret = 0;
68 goto out;
69 }
70
71 ret = __counter_set_mode(port_counter, mode, mask, bind_opcnt);
72
73 out:
74 mutex_unlock(&port_counter->lock);
75 if (ret == -EBUSY)
76 NL_SET_ERR_MSG(
77 extack,
78 "Modifying auto mode is not allowed when there is a bound QP");
79 return ret;
80 }
81
auto_mode_init_counter(struct rdma_counter * counter,const struct ib_qp * qp,enum rdma_nl_counter_mask new_mask)82 static void auto_mode_init_counter(struct rdma_counter *counter,
83 const struct ib_qp *qp,
84 enum rdma_nl_counter_mask new_mask)
85 {
86 struct auto_mode_param *param = &counter->mode.param;
87
88 counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
89 counter->mode.mask = new_mask;
90
91 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
92 param->qp_type = qp->qp_type;
93 }
94
__rdma_counter_bind_qp(struct rdma_counter * counter,struct ib_qp * qp,u32 port)95 static int __rdma_counter_bind_qp(struct rdma_counter *counter,
96 struct ib_qp *qp, u32 port)
97 {
98 int ret;
99
100 if (qp->counter)
101 return -EINVAL;
102
103 if (!qp->device->ops.counter_bind_qp)
104 return -EOPNOTSUPP;
105
106 mutex_lock(&counter->lock);
107 ret = qp->device->ops.counter_bind_qp(counter, qp, port);
108 mutex_unlock(&counter->lock);
109
110 return ret;
111 }
112
rdma_counter_modify(struct ib_device * dev,u32 port,unsigned int index,bool enable)113 int rdma_counter_modify(struct ib_device *dev, u32 port,
114 unsigned int index, bool enable)
115 {
116 struct rdma_hw_stats *stats;
117 int ret = 0;
118
119 if (!dev->ops.modify_hw_stat)
120 return -EOPNOTSUPP;
121
122 stats = ib_get_hw_stats_port(dev, port);
123 if (!stats || index >= stats->num_counters ||
124 !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
125 return -EINVAL;
126
127 mutex_lock(&stats->lock);
128
129 if (enable != test_bit(index, stats->is_disabled))
130 goto out;
131
132 ret = dev->ops.modify_hw_stat(dev, port, index, enable);
133 if (ret)
134 goto out;
135
136 if (enable)
137 clear_bit(index, stats->is_disabled);
138 else
139 set_bit(index, stats->is_disabled);
140 out:
141 mutex_unlock(&stats->lock);
142 return ret;
143 }
144
alloc_and_bind(struct ib_device * dev,u32 port,struct ib_qp * qp,enum rdma_nl_counter_mode mode,bool bind_opcnt)145 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
146 struct ib_qp *qp,
147 enum rdma_nl_counter_mode mode,
148 bool bind_opcnt)
149 {
150 struct rdma_port_counter *port_counter;
151 struct rdma_counter *counter;
152 int ret;
153
154 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats)
155 return NULL;
156
157 counter = rdma_zalloc_drv_obj(dev, rdma_counter);
158 if (!counter)
159 return NULL;
160
161 counter->device = dev;
162 counter->port = port;
163
164 dev->ops.counter_init(counter);
165
166 rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
167 counter->stats = dev->ops.counter_alloc_stats(counter);
168 if (!counter->stats)
169 goto err_stats;
170
171 port_counter = &dev->port_data[port].port_counter;
172 mutex_lock(&port_counter->lock);
173 switch (mode) {
174 case RDMA_COUNTER_MODE_MANUAL:
175 ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
176 0, bind_opcnt);
177 if (ret) {
178 mutex_unlock(&port_counter->lock);
179 goto err_mode;
180 }
181 break;
182 case RDMA_COUNTER_MODE_AUTO:
183 auto_mode_init_counter(counter, qp, port_counter->mode.mask);
184 break;
185 default:
186 ret = -EOPNOTSUPP;
187 mutex_unlock(&port_counter->lock);
188 goto err_mode;
189 }
190
191 port_counter->num_counters++;
192 mutex_unlock(&port_counter->lock);
193
194 counter->mode.mode = mode;
195 counter->mode.bind_opcnt = bind_opcnt;
196 kref_init(&counter->kref);
197 mutex_init(&counter->lock);
198
199 ret = __rdma_counter_bind_qp(counter, qp, port);
200 if (ret)
201 goto err_mode;
202
203 rdma_restrack_parent_name(&counter->res, &qp->res);
204 rdma_restrack_add(&counter->res);
205 return counter;
206
207 err_mode:
208 rdma_free_hw_stats_struct(counter->stats);
209 err_stats:
210 rdma_restrack_put(&counter->res);
211 kfree(counter);
212 return NULL;
213 }
214
rdma_counter_free(struct rdma_counter * counter)215 static void rdma_counter_free(struct rdma_counter *counter)
216 {
217 struct rdma_port_counter *port_counter;
218
219 port_counter = &counter->device->port_data[counter->port].port_counter;
220 mutex_lock(&port_counter->lock);
221 port_counter->num_counters--;
222 if (!port_counter->num_counters &&
223 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
224 __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0,
225 false);
226
227 mutex_unlock(&port_counter->lock);
228
229 rdma_restrack_del(&counter->res);
230 rdma_free_hw_stats_struct(counter->stats);
231 kfree(counter);
232 }
233
auto_mode_match(struct ib_qp * qp,struct rdma_counter * counter,enum rdma_nl_counter_mask auto_mask)234 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
235 enum rdma_nl_counter_mask auto_mask)
236 {
237 struct auto_mode_param *param = &counter->mode.param;
238 bool match = true;
239
240 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
241 match &= (param->qp_type == qp->qp_type);
242
243 if (auto_mask & RDMA_COUNTER_MASK_PID)
244 match &= (task_pid_nr(counter->res.task) ==
245 task_pid_nr(qp->res.task));
246
247 return match;
248 }
249
__rdma_counter_unbind_qp(struct ib_qp * qp,u32 port)250 static int __rdma_counter_unbind_qp(struct ib_qp *qp, u32 port)
251 {
252 struct rdma_counter *counter = qp->counter;
253 int ret;
254
255 if (!qp->device->ops.counter_unbind_qp)
256 return -EOPNOTSUPP;
257
258 mutex_lock(&counter->lock);
259 ret = qp->device->ops.counter_unbind_qp(qp, port);
260 mutex_unlock(&counter->lock);
261
262 return ret;
263 }
264
counter_history_stat_update(struct rdma_counter * counter)265 static void counter_history_stat_update(struct rdma_counter *counter)
266 {
267 struct ib_device *dev = counter->device;
268 struct rdma_port_counter *port_counter;
269 int i;
270
271 port_counter = &dev->port_data[counter->port].port_counter;
272 if (!port_counter->hstats)
273 return;
274
275 rdma_counter_query_stats(counter);
276
277 for (i = 0; i < counter->stats->num_counters; i++)
278 port_counter->hstats->value[i] += counter->stats->value[i];
279 }
280
281 /*
282 * rdma_get_counter_auto_mode - Find the counter that @qp should be bound
283 * with in auto mode
284 *
285 * Return: The counter (with ref-count increased) if found
286 */
rdma_get_counter_auto_mode(struct ib_qp * qp,u32 port)287 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
288 u32 port)
289 {
290 struct rdma_port_counter *port_counter;
291 struct rdma_counter *counter = NULL;
292 struct ib_device *dev = qp->device;
293 struct rdma_restrack_entry *res;
294 struct rdma_restrack_root *rt;
295 unsigned long id = 0;
296
297 port_counter = &dev->port_data[port].port_counter;
298 rt = &dev->res[RDMA_RESTRACK_COUNTER];
299 xa_lock(&rt->xa);
300 xa_for_each(&rt->xa, id, res) {
301 counter = container_of(res, struct rdma_counter, res);
302 if ((counter->device != qp->device) || (counter->port != port))
303 goto next;
304
305 if (auto_mode_match(qp, counter, port_counter->mode.mask))
306 break;
307 next:
308 counter = NULL;
309 }
310
311 if (counter && !kref_get_unless_zero(&counter->kref))
312 counter = NULL;
313
314 xa_unlock(&rt->xa);
315 return counter;
316 }
317
counter_release(struct kref * kref)318 static void counter_release(struct kref *kref)
319 {
320 struct rdma_counter *counter;
321
322 counter = container_of(kref, struct rdma_counter, kref);
323 counter_history_stat_update(counter);
324 counter->device->ops.counter_dealloc(counter);
325 rdma_counter_free(counter);
326 }
327
328 /*
329 * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
330 * the auto-mode rule
331 */
rdma_counter_bind_qp_auto(struct ib_qp * qp,u32 port)332 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
333 {
334 struct rdma_port_counter *port_counter;
335 struct ib_device *dev = qp->device;
336 struct rdma_counter *counter;
337 int ret;
338
339 if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res))
340 return 0;
341
342 if (!rdma_is_port_valid(dev, port))
343 return -EINVAL;
344
345 port_counter = &dev->port_data[port].port_counter;
346 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO)
347 return 0;
348
349 counter = rdma_get_counter_auto_mode(qp, port);
350 if (counter) {
351 ret = __rdma_counter_bind_qp(counter, qp, port);
352 if (ret) {
353 kref_put(&counter->kref, counter_release);
354 return ret;
355 }
356 } else {
357 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO,
358 port_counter->mode.bind_opcnt);
359 if (!counter)
360 return -ENOMEM;
361 }
362
363 return 0;
364 }
365
366 /*
367 * rdma_counter_unbind_qp - Unbind a qp from a counter
368 * @force:
369 * true - Decrease the counter ref-count anyway (e.g., qp destroy)
370 */
rdma_counter_unbind_qp(struct ib_qp * qp,u32 port,bool force)371 int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force)
372 {
373 struct rdma_counter *counter = qp->counter;
374 int ret;
375
376 if (!counter)
377 return -EINVAL;
378
379 ret = __rdma_counter_unbind_qp(qp, port);
380 if (ret && !force)
381 return ret;
382
383 kref_put(&counter->kref, counter_release);
384 return 0;
385 }
386
rdma_counter_query_stats(struct rdma_counter * counter)387 int rdma_counter_query_stats(struct rdma_counter *counter)
388 {
389 struct ib_device *dev = counter->device;
390 int ret;
391
392 if (!dev->ops.counter_update_stats)
393 return -EINVAL;
394
395 mutex_lock(&counter->lock);
396 ret = dev->ops.counter_update_stats(counter);
397 mutex_unlock(&counter->lock);
398
399 return ret;
400 }
401
get_running_counters_hwstat_sum(struct ib_device * dev,u32 port,u32 index)402 static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
403 u32 port, u32 index)
404 {
405 struct rdma_restrack_entry *res;
406 struct rdma_restrack_root *rt;
407 struct rdma_counter *counter;
408 unsigned long id = 0;
409 u64 sum = 0;
410
411 rt = &dev->res[RDMA_RESTRACK_COUNTER];
412 xa_lock(&rt->xa);
413 xa_for_each(&rt->xa, id, res) {
414 if (!rdma_restrack_get(res))
415 continue;
416
417 xa_unlock(&rt->xa);
418
419 counter = container_of(res, struct rdma_counter, res);
420 if ((counter->device != dev) || (counter->port != port) ||
421 rdma_counter_query_stats(counter))
422 goto next;
423
424 sum += counter->stats->value[index];
425
426 next:
427 xa_lock(&rt->xa);
428 rdma_restrack_put(res);
429 }
430
431 xa_unlock(&rt->xa);
432 return sum;
433 }
434
435 /*
436 * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
437 * specific port, including the running ones and history data
438 */
rdma_counter_get_hwstat_value(struct ib_device * dev,u32 port,u32 index)439 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index)
440 {
441 struct rdma_port_counter *port_counter;
442 u64 sum;
443
444 port_counter = &dev->port_data[port].port_counter;
445 if (!port_counter->hstats)
446 return 0;
447
448 sum = get_running_counters_hwstat_sum(dev, port, index);
449 sum += port_counter->hstats->value[index];
450
451 return sum;
452 }
453
rdma_counter_get_qp(struct ib_device * dev,u32 qp_num)454 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
455 {
456 struct rdma_restrack_entry *res = NULL;
457 struct ib_qp *qp = NULL;
458
459 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num);
460 if (IS_ERR(res))
461 return NULL;
462
463 qp = container_of(res, struct ib_qp, res);
464 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
465 goto err;
466
467 return qp;
468
469 err:
470 rdma_restrack_put(res);
471 return NULL;
472 }
473
rdma_get_counter_by_id(struct ib_device * dev,u32 counter_id)474 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
475 u32 counter_id)
476 {
477 struct rdma_restrack_entry *res;
478 struct rdma_counter *counter;
479
480 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id);
481 if (IS_ERR(res))
482 return NULL;
483
484 counter = container_of(res, struct rdma_counter, res);
485 kref_get(&counter->kref);
486 rdma_restrack_put(res);
487
488 return counter;
489 }
490
491 /*
492 * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
493 */
rdma_counter_bind_qpn(struct ib_device * dev,u32 port,u32 qp_num,u32 counter_id)494 int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
495 u32 qp_num, u32 counter_id)
496 {
497 struct rdma_port_counter *port_counter;
498 struct rdma_counter *counter;
499 struct ib_qp *qp;
500 int ret;
501
502 port_counter = &dev->port_data[port].port_counter;
503 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
504 return -EINVAL;
505
506 qp = rdma_counter_get_qp(dev, qp_num);
507 if (!qp)
508 return -ENOENT;
509
510 counter = rdma_get_counter_by_id(dev, counter_id);
511 if (!counter) {
512 ret = -ENOENT;
513 goto err;
514 }
515
516 if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) {
517 ret = -EINVAL;
518 goto err_task;
519 }
520
521 if ((counter->device != qp->device) || (counter->port != qp->port)) {
522 ret = -EINVAL;
523 goto err_task;
524 }
525
526 ret = __rdma_counter_bind_qp(counter, qp, port);
527 if (ret)
528 goto err_task;
529
530 rdma_restrack_put(&qp->res);
531 return 0;
532
533 err_task:
534 kref_put(&counter->kref, counter_release);
535 err:
536 rdma_restrack_put(&qp->res);
537 return ret;
538 }
539
540 /*
541 * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
542 * The id of new counter is returned in @counter_id
543 */
rdma_counter_bind_qpn_alloc(struct ib_device * dev,u32 port,u32 qp_num,u32 * counter_id)544 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
545 u32 qp_num, u32 *counter_id)
546 {
547 struct rdma_port_counter *port_counter;
548 struct rdma_counter *counter;
549 struct ib_qp *qp;
550 int ret;
551
552 if (!rdma_is_port_valid(dev, port))
553 return -EINVAL;
554
555 port_counter = &dev->port_data[port].port_counter;
556 if (!port_counter->hstats)
557 return -EOPNOTSUPP;
558
559 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
560 return -EINVAL;
561
562 qp = rdma_counter_get_qp(dev, qp_num);
563 if (!qp)
564 return -ENOENT;
565
566 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
567 ret = -EINVAL;
568 goto err;
569 }
570
571 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL, true);
572 if (!counter) {
573 ret = -ENOMEM;
574 goto err;
575 }
576
577 if (counter_id)
578 *counter_id = counter->id;
579
580 rdma_restrack_put(&qp->res);
581 return 0;
582
583 err:
584 rdma_restrack_put(&qp->res);
585 return ret;
586 }
587
588 /*
589 * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
590 */
rdma_counter_unbind_qpn(struct ib_device * dev,u32 port,u32 qp_num,u32 counter_id)591 int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
592 u32 qp_num, u32 counter_id)
593 {
594 struct rdma_port_counter *port_counter;
595 struct ib_qp *qp;
596 int ret;
597
598 if (!rdma_is_port_valid(dev, port))
599 return -EINVAL;
600
601 qp = rdma_counter_get_qp(dev, qp_num);
602 if (!qp)
603 return -ENOENT;
604
605 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
606 ret = -EINVAL;
607 goto out;
608 }
609
610 port_counter = &dev->port_data[port].port_counter;
611 if (!qp->counter || qp->counter->id != counter_id ||
612 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) {
613 ret = -EINVAL;
614 goto out;
615 }
616
617 ret = rdma_counter_unbind_qp(qp, port, false);
618
619 out:
620 rdma_restrack_put(&qp->res);
621 return ret;
622 }
623
rdma_counter_get_mode(struct ib_device * dev,u32 port,enum rdma_nl_counter_mode * mode,enum rdma_nl_counter_mask * mask,bool * opcnt)624 int rdma_counter_get_mode(struct ib_device *dev, u32 port,
625 enum rdma_nl_counter_mode *mode,
626 enum rdma_nl_counter_mask *mask,
627 bool *opcnt)
628 {
629 struct rdma_port_counter *port_counter;
630
631 port_counter = &dev->port_data[port].port_counter;
632 *mode = port_counter->mode.mode;
633 *mask = port_counter->mode.mask;
634 *opcnt = port_counter->mode.bind_opcnt;
635
636 return 0;
637 }
638
rdma_counter_init(struct ib_device * dev)639 void rdma_counter_init(struct ib_device *dev)
640 {
641 struct rdma_port_counter *port_counter;
642 u32 port, i;
643
644 if (!dev->port_data)
645 return;
646
647 rdma_for_each_port(dev, port) {
648 port_counter = &dev->port_data[port].port_counter;
649 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
650 mutex_init(&port_counter->lock);
651
652 if (!dev->ops.alloc_hw_port_stats)
653 continue;
654
655 port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port);
656 if (!port_counter->hstats)
657 goto fail;
658 }
659
660 return;
661
662 fail:
663 for (i = port; i >= rdma_start_port(dev); i--) {
664 port_counter = &dev->port_data[port].port_counter;
665 rdma_free_hw_stats_struct(port_counter->hstats);
666 port_counter->hstats = NULL;
667 mutex_destroy(&port_counter->lock);
668 }
669 }
670
rdma_counter_release(struct ib_device * dev)671 void rdma_counter_release(struct ib_device *dev)
672 {
673 struct rdma_port_counter *port_counter;
674 u32 port;
675
676 rdma_for_each_port(dev, port) {
677 port_counter = &dev->port_data[port].port_counter;
678 rdma_free_hw_stats_struct(port_counter->hstats);
679 mutex_destroy(&port_counter->lock);
680 }
681 }
682