1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd */
3
4 #include <linux/mlx5/driver.h>
5 #include <linux/mlx5/device.h>
6 #include "mlx5_core.h"
7 #include "dev.h"
8 #include "sf/vhca_event.h"
9 #include "sf/sf.h"
10 #include "sf/mlx5_ifc_vhca_event.h"
11 #include "ecpf.h"
12 #define CREATE_TRACE_POINTS
13 #include "diag/dev_tracepoint.h"
14
15 struct mlx5_sf_dev_table {
16 struct xarray devices;
17 phys_addr_t base_address;
18 u64 sf_bar_length;
19 struct notifier_block nb;
20 struct workqueue_struct *active_wq;
21 struct work_struct work;
22 u8 stop_active_wq:1;
23 struct mlx5_core_dev *dev;
24 };
25
26 struct mlx5_sf_dev_active_work_ctx {
27 struct work_struct work;
28 struct mlx5_vhca_state_event event;
29 struct mlx5_sf_dev_table *table;
30 int sf_index;
31 };
32
mlx5_sf_dev_supported(const struct mlx5_core_dev * dev)33 static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev)
34 {
35 return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev);
36 }
37
mlx5_sf_dev_allocated(const struct mlx5_core_dev * dev)38 bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
39 {
40 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
41
42 return table && !xa_empty(&table->devices);
43 }
44
sfnum_show(struct device * dev,struct device_attribute * attr,char * buf)45 static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf)
46 {
47 struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev);
48 struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
49
50 return sysfs_emit(buf, "%u\n", sf_dev->sfnum);
51 }
52 static DEVICE_ATTR_RO(sfnum);
53
54 static struct attribute *sf_device_attrs[] = {
55 &dev_attr_sfnum.attr,
56 NULL,
57 };
58
59 static const struct attribute_group sf_attr_group = {
60 .attrs = sf_device_attrs,
61 };
62
63 static const struct attribute_group *sf_attr_groups[2] = {
64 &sf_attr_group,
65 NULL
66 };
67
mlx5_sf_dev_release(struct device * device)68 static void mlx5_sf_dev_release(struct device *device)
69 {
70 struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev);
71 struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
72
73 mlx5_adev_idx_free(adev->id);
74 kfree(sf_dev);
75 }
76
mlx5_sf_dev_remove(struct mlx5_core_dev * dev,struct mlx5_sf_dev * sf_dev)77 static void mlx5_sf_dev_remove(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev)
78 {
79 int id;
80
81 id = sf_dev->adev.id;
82 trace_mlx5_sf_dev_del(dev, sf_dev, id);
83
84 auxiliary_device_delete(&sf_dev->adev);
85 auxiliary_device_uninit(&sf_dev->adev);
86 }
87
mlx5_sf_dev_add(struct mlx5_core_dev * dev,u16 sf_index,u16 fn_id,u32 sfnum)88 static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u16 fn_id, u32 sfnum)
89 {
90 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
91 struct mlx5_sf_dev *sf_dev;
92 struct pci_dev *pdev;
93 int err;
94 int id;
95
96 id = mlx5_adev_idx_alloc();
97 if (id < 0) {
98 err = id;
99 goto add_err;
100 }
101
102 sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL);
103 if (!sf_dev) {
104 mlx5_adev_idx_free(id);
105 err = -ENOMEM;
106 goto add_err;
107 }
108 pdev = dev->pdev;
109 sf_dev->adev.id = id;
110 sf_dev->adev.name = MLX5_SF_DEV_ID_NAME;
111 sf_dev->adev.dev.release = mlx5_sf_dev_release;
112 sf_dev->adev.dev.parent = &pdev->dev;
113 sf_dev->adev.dev.groups = sf_attr_groups;
114 sf_dev->sfnum = sfnum;
115 sf_dev->parent_mdev = dev;
116 sf_dev->fn_id = fn_id;
117
118 sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length);
119
120 trace_mlx5_sf_dev_add(dev, sf_dev, id);
121
122 err = auxiliary_device_init(&sf_dev->adev);
123 if (err) {
124 mlx5_adev_idx_free(id);
125 kfree(sf_dev);
126 goto add_err;
127 }
128
129 err = auxiliary_device_add(&sf_dev->adev);
130 if (err) {
131 auxiliary_device_uninit(&sf_dev->adev);
132 goto add_err;
133 }
134
135 err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL);
136 if (err)
137 goto xa_err;
138 return;
139
140 xa_err:
141 mlx5_sf_dev_remove(dev, sf_dev);
142 add_err:
143 mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n",
144 sf_index, sfnum, err);
145 }
146
mlx5_sf_dev_del(struct mlx5_core_dev * dev,struct mlx5_sf_dev * sf_dev,u16 sf_index)147 static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev, u16 sf_index)
148 {
149 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
150
151 xa_erase(&table->devices, sf_index);
152 mlx5_sf_dev_remove(dev, sf_dev);
153 }
154
155 static int
mlx5_sf_dev_state_change_handler(struct notifier_block * nb,unsigned long event_code,void * data)156 mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_code, void *data)
157 {
158 struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb);
159 const struct mlx5_vhca_state_event *event = data;
160 struct mlx5_sf_dev *sf_dev;
161 u16 max_functions;
162 u16 sf_index;
163 u16 base_id;
164
165 max_functions = mlx5_sf_max_functions(table->dev);
166 if (!max_functions)
167 return 0;
168
169 base_id = mlx5_sf_start_function_id(table->dev);
170 if (event->function_id < base_id || event->function_id >= (base_id + max_functions))
171 return 0;
172
173 sf_index = event->function_id - base_id;
174 sf_dev = xa_load(&table->devices, sf_index);
175 switch (event->new_vhca_state) {
176 case MLX5_VHCA_STATE_INVALID:
177 case MLX5_VHCA_STATE_ALLOCATED:
178 if (sf_dev)
179 mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
180 break;
181 case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
182 if (sf_dev)
183 mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
184 else
185 mlx5_core_err(table->dev,
186 "SF DEV: teardown state for invalid dev index=%d sfnum=0x%x\n",
187 sf_index, event->sw_function_id);
188 break;
189 case MLX5_VHCA_STATE_ACTIVE:
190 if (!sf_dev)
191 mlx5_sf_dev_add(table->dev, sf_index, event->function_id,
192 event->sw_function_id);
193 break;
194 default:
195 break;
196 }
197 return 0;
198 }
199
mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table * table)200 static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table)
201 {
202 struct mlx5_core_dev *dev = table->dev;
203 u16 max_functions;
204 u16 function_id;
205 int err = 0;
206 int i;
207
208 max_functions = mlx5_sf_max_functions(dev);
209 function_id = mlx5_sf_start_function_id(dev);
210 /* Arm the vhca context as the vhca event notifier */
211 for (i = 0; i < max_functions; i++) {
212 err = mlx5_vhca_event_arm(dev, function_id);
213 if (err)
214 return err;
215
216 function_id++;
217 }
218 return 0;
219 }
220
mlx5_sf_dev_add_active_work(struct work_struct * _work)221 static void mlx5_sf_dev_add_active_work(struct work_struct *_work)
222 {
223 struct mlx5_sf_dev_active_work_ctx *work_ctx;
224
225 work_ctx = container_of(_work, struct mlx5_sf_dev_active_work_ctx, work);
226 if (work_ctx->table->stop_active_wq)
227 goto out;
228 /* Don't probe device which is already probe */
229 if (!xa_load(&work_ctx->table->devices, work_ctx->sf_index))
230 mlx5_sf_dev_add(work_ctx->table->dev, work_ctx->sf_index,
231 work_ctx->event.function_id, work_ctx->event.sw_function_id);
232 /* There is a race where SF got inactive after the query
233 * above. e.g.: the query returns that the state of the
234 * SF is active, and after that the eswitch manager set it to
235 * inactive.
236 * This case cannot be managed in SW, since the probing of the
237 * SF is on one system, and the inactivation is on a different
238 * system.
239 * If the inactive is done after the SF perform init_hca(),
240 * the SF will fully probe and then removed. If it was
241 * done before init_hca(), the SF probe will fail.
242 */
243 out:
244 kfree(work_ctx);
245 }
246
247 /* In case SFs are generated externally, probe active SFs */
mlx5_sf_dev_queue_active_works(struct work_struct * _work)248 static void mlx5_sf_dev_queue_active_works(struct work_struct *_work)
249 {
250 struct mlx5_sf_dev_table *table = container_of(_work, struct mlx5_sf_dev_table, work);
251 u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
252 struct mlx5_sf_dev_active_work_ctx *work_ctx;
253 struct mlx5_core_dev *dev = table->dev;
254 u16 max_functions;
255 u16 function_id;
256 u16 sw_func_id;
257 int err = 0;
258 int wq_idx;
259 u8 state;
260 int i;
261
262 max_functions = mlx5_sf_max_functions(dev);
263 function_id = mlx5_sf_start_function_id(dev);
264 for (i = 0; i < max_functions; i++, function_id++) {
265 if (table->stop_active_wq)
266 return;
267 err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out));
268 if (err)
269 /* A failure of specific vhca doesn't mean others will
270 * fail as well.
271 */
272 continue;
273 state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
274 if (state != MLX5_VHCA_STATE_ACTIVE)
275 continue;
276
277 sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id);
278 work_ctx = kzalloc(sizeof(*work_ctx), GFP_KERNEL);
279 if (!work_ctx)
280 return;
281
282 INIT_WORK(&work_ctx->work, &mlx5_sf_dev_add_active_work);
283 work_ctx->event.function_id = function_id;
284 work_ctx->event.sw_function_id = sw_func_id;
285 work_ctx->table = table;
286 work_ctx->sf_index = i;
287 wq_idx = work_ctx->event.function_id % MLX5_DEV_MAX_WQS;
288 mlx5_vhca_events_work_enqueue(dev, wq_idx, &work_ctx->work);
289 }
290 }
291
292 /* In case SFs are generated externally, probe active SFs */
mlx5_sf_dev_create_active_works(struct mlx5_sf_dev_table * table)293 static int mlx5_sf_dev_create_active_works(struct mlx5_sf_dev_table *table)
294 {
295 if (MLX5_CAP_GEN(table->dev, eswitch_manager))
296 return 0; /* the table is local */
297
298 /* Use a workqueue to probe active SFs, which are in large
299 * quantity and may take up to minutes to probe.
300 */
301 table->active_wq = create_singlethread_workqueue("mlx5_active_sf");
302 if (!table->active_wq)
303 return -ENOMEM;
304 INIT_WORK(&table->work, &mlx5_sf_dev_queue_active_works);
305 queue_work(table->active_wq, &table->work);
306 return 0;
307 }
308
mlx5_sf_dev_destroy_active_works(struct mlx5_sf_dev_table * table)309 static void mlx5_sf_dev_destroy_active_works(struct mlx5_sf_dev_table *table)
310 {
311 if (table->active_wq) {
312 table->stop_active_wq = true;
313 destroy_workqueue(table->active_wq);
314 }
315 }
316
mlx5_sf_dev_table_create(struct mlx5_core_dev * dev)317 void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
318 {
319 struct mlx5_sf_dev_table *table;
320 int err;
321
322 if (!mlx5_sf_dev_supported(dev))
323 return;
324
325 table = kzalloc(sizeof(*table), GFP_KERNEL);
326 if (!table) {
327 err = -ENOMEM;
328 goto table_err;
329 }
330
331 table->nb.notifier_call = mlx5_sf_dev_state_change_handler;
332 table->dev = dev;
333 table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12);
334 table->base_address = pci_resource_start(dev->pdev, 2);
335 xa_init(&table->devices);
336 dev->priv.sf_dev_table = table;
337
338 err = mlx5_vhca_event_notifier_register(dev, &table->nb);
339 if (err)
340 goto vhca_err;
341
342 err = mlx5_sf_dev_create_active_works(table);
343 if (err)
344 goto add_active_err;
345
346 err = mlx5_sf_dev_vhca_arm_all(table);
347 if (err)
348 goto arm_err;
349 return;
350
351 arm_err:
352 mlx5_sf_dev_destroy_active_works(table);
353 add_active_err:
354 mlx5_vhca_event_notifier_unregister(dev, &table->nb);
355 mlx5_vhca_event_work_queues_flush(dev);
356 vhca_err:
357 kfree(table);
358 dev->priv.sf_dev_table = NULL;
359 table_err:
360 mlx5_core_err(dev, "SF DEV table create err = %d\n", err);
361 }
362
mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table * table)363 static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table)
364 {
365 struct mlx5_sf_dev *sf_dev;
366 unsigned long index;
367
368 xa_for_each(&table->devices, index, sf_dev) {
369 xa_erase(&table->devices, index);
370 mlx5_sf_dev_remove(table->dev, sf_dev);
371 }
372 }
373
mlx5_sf_dev_table_destroy(struct mlx5_core_dev * dev)374 void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
375 {
376 struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table;
377
378 if (!table)
379 return;
380
381 mlx5_sf_dev_destroy_active_works(table);
382 mlx5_vhca_event_notifier_unregister(dev, &table->nb);
383 mlx5_vhca_event_work_queues_flush(dev);
384
385 /* Now that event handler is not running, it is safe to destroy
386 * the sf device without race.
387 */
388 mlx5_sf_dev_destroy_all(table);
389
390 WARN_ON(!xa_empty(&table->devices));
391 kfree(table);
392 dev->priv.sf_dev_table = NULL;
393 }
394