1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
3
4 #include "internal.h"
5 #include "lib/clock.h"
6
7 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
8
9 struct mlx5hws_send_ring_dep_wqe *
mlx5hws_send_add_new_dep_wqe(struct mlx5hws_send_engine * queue)10 mlx5hws_send_add_new_dep_wqe(struct mlx5hws_send_engine *queue)
11 {
12 struct mlx5hws_send_ring_sq *send_sq = &queue->send_ring.send_sq;
13 unsigned int idx = send_sq->head_dep_idx++ & (queue->num_entries - 1);
14
15 memset(&send_sq->dep_wqe[idx].wqe_data.tag, 0, MLX5HWS_MATCH_TAG_SZ);
16
17 return &send_sq->dep_wqe[idx];
18 }
19
mlx5hws_send_abort_new_dep_wqe(struct mlx5hws_send_engine * queue)20 void mlx5hws_send_abort_new_dep_wqe(struct mlx5hws_send_engine *queue)
21 {
22 queue->send_ring.send_sq.head_dep_idx--;
23 }
24
mlx5hws_send_all_dep_wqe(struct mlx5hws_send_engine * queue)25 void mlx5hws_send_all_dep_wqe(struct mlx5hws_send_engine *queue)
26 {
27 struct mlx5hws_send_ring_sq *send_sq = &queue->send_ring.send_sq;
28 struct mlx5hws_send_ste_attr ste_attr = {0};
29 struct mlx5hws_send_ring_dep_wqe *dep_wqe;
30
31 ste_attr.send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
32 ste_attr.send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
33 ste_attr.send_attr.len = MLX5HWS_WQE_SZ_GTA_CTRL + MLX5HWS_WQE_SZ_GTA_DATA;
34 ste_attr.gta_opcode = MLX5HWS_WQE_GTA_OP_ACTIVATE;
35
36 /* Fence first from previous depend WQEs */
37 ste_attr.send_attr.fence = 1;
38
39 while (send_sq->head_dep_idx != send_sq->tail_dep_idx) {
40 dep_wqe = &send_sq->dep_wqe[send_sq->tail_dep_idx++ & (queue->num_entries - 1)];
41
42 /* Notify HW on the last WQE */
43 ste_attr.send_attr.notify_hw = (send_sq->tail_dep_idx == send_sq->head_dep_idx);
44 ste_attr.send_attr.user_data = dep_wqe->user_data;
45 ste_attr.send_attr.rule = dep_wqe->rule;
46
47 ste_attr.rtc_0 = dep_wqe->rtc_0;
48 ste_attr.rtc_1 = dep_wqe->rtc_1;
49 ste_attr.retry_rtc_0 = dep_wqe->retry_rtc_0;
50 ste_attr.retry_rtc_1 = dep_wqe->retry_rtc_1;
51 ste_attr.used_id_rtc_0 = &dep_wqe->rule->rtc_0;
52 ste_attr.used_id_rtc_1 = &dep_wqe->rule->rtc_1;
53 ste_attr.wqe_ctrl = &dep_wqe->wqe_ctrl;
54 ste_attr.wqe_data = &dep_wqe->wqe_data;
55 ste_attr.direct_index = dep_wqe->direct_index;
56
57 mlx5hws_send_ste(queue, &ste_attr);
58
59 /* Fencing is done only on the first WQE */
60 ste_attr.send_attr.fence = 0;
61 }
62 }
63
64 struct mlx5hws_send_engine_post_ctrl
mlx5hws_send_engine_post_start(struct mlx5hws_send_engine * queue)65 mlx5hws_send_engine_post_start(struct mlx5hws_send_engine *queue)
66 {
67 struct mlx5hws_send_engine_post_ctrl ctrl;
68
69 ctrl.queue = queue;
70 /* Currently only one send ring is supported */
71 ctrl.send_ring = &queue->send_ring;
72 ctrl.num_wqebbs = 0;
73
74 return ctrl;
75 }
76
mlx5hws_send_engine_post_req_wqe(struct mlx5hws_send_engine_post_ctrl * ctrl,char ** buf,size_t * len)77 void mlx5hws_send_engine_post_req_wqe(struct mlx5hws_send_engine_post_ctrl *ctrl,
78 char **buf, size_t *len)
79 {
80 struct mlx5hws_send_ring_sq *send_sq = &ctrl->send_ring->send_sq;
81 unsigned int idx;
82
83 idx = (send_sq->cur_post + ctrl->num_wqebbs) & send_sq->buf_mask;
84
85 /* Note that *buf is a single MLX5_SEND_WQE_BB. It cannot be used
86 * as buffer of more than one WQE_BB, since the two MLX5_SEND_WQE_BB
87 * can be on 2 different kernel memory pages.
88 */
89 *buf = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
90 *len = MLX5_SEND_WQE_BB;
91
92 if (!ctrl->num_wqebbs) {
93 *buf += sizeof(struct mlx5hws_wqe_ctrl_seg);
94 *len -= sizeof(struct mlx5hws_wqe_ctrl_seg);
95 }
96
97 ctrl->num_wqebbs++;
98 }
99
hws_send_engine_post_ring(struct mlx5hws_send_ring_sq * sq,struct mlx5hws_wqe_ctrl_seg * doorbell_cseg)100 static void hws_send_engine_post_ring(struct mlx5hws_send_ring_sq *sq,
101 struct mlx5hws_wqe_ctrl_seg *doorbell_cseg)
102 {
103 /* ensure wqe is visible to device before updating doorbell record */
104 dma_wmb();
105
106 *sq->wq.db = cpu_to_be32(sq->cur_post);
107
108 /* ensure doorbell record is visible to device before ringing the
109 * doorbell
110 */
111 wmb();
112
113 mlx5_write64((__be32 *)doorbell_cseg, sq->uar_map);
114
115 /* Ensure doorbell is written on uar_page before poll_cq */
116 WRITE_ONCE(doorbell_cseg, NULL);
117 }
118
119 static void
hws_send_wqe_set_tag(struct mlx5hws_wqe_gta_data_seg_ste * wqe_data,struct mlx5hws_rule_match_tag * tag,bool is_jumbo)120 hws_send_wqe_set_tag(struct mlx5hws_wqe_gta_data_seg_ste *wqe_data,
121 struct mlx5hws_rule_match_tag *tag,
122 bool is_jumbo)
123 {
124 if (is_jumbo) {
125 /* Clear previous possibly dirty control */
126 memset(wqe_data, 0, MLX5HWS_STE_CTRL_SZ);
127 memcpy(wqe_data->jumbo, tag->jumbo, MLX5HWS_JUMBO_TAG_SZ);
128 } else {
129 /* Clear previous possibly dirty control and actions */
130 memset(wqe_data, 0, MLX5HWS_STE_CTRL_SZ + MLX5HWS_ACTIONS_SZ);
131 memcpy(wqe_data->tag, tag->match, MLX5HWS_MATCH_TAG_SZ);
132 }
133 }
134
mlx5hws_send_engine_post_end(struct mlx5hws_send_engine_post_ctrl * ctrl,struct mlx5hws_send_engine_post_attr * attr)135 void mlx5hws_send_engine_post_end(struct mlx5hws_send_engine_post_ctrl *ctrl,
136 struct mlx5hws_send_engine_post_attr *attr)
137 {
138 struct mlx5hws_wqe_ctrl_seg *wqe_ctrl;
139 struct mlx5hws_send_ring_sq *sq;
140 unsigned int idx;
141 u32 flags = 0;
142
143 sq = &ctrl->send_ring->send_sq;
144 idx = sq->cur_post & sq->buf_mask;
145 sq->last_idx = idx;
146
147 wqe_ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, idx);
148
149 wqe_ctrl->opmod_idx_opcode =
150 cpu_to_be32((attr->opmod << 24) |
151 ((sq->cur_post & 0xffff) << 8) |
152 attr->opcode);
153 wqe_ctrl->qpn_ds =
154 cpu_to_be32((attr->len + sizeof(struct mlx5hws_wqe_ctrl_seg)) / 16 |
155 sq->sqn << 8);
156 wqe_ctrl->imm = cpu_to_be32(attr->id);
157
158 flags |= attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
159 flags |= attr->fence ? MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE : 0;
160 wqe_ctrl->flags = cpu_to_be32(flags);
161
162 sq->wr_priv[idx].id = attr->id;
163 sq->wr_priv[idx].retry_id = attr->retry_id;
164
165 sq->wr_priv[idx].rule = attr->rule;
166 sq->wr_priv[idx].user_data = attr->user_data;
167 sq->wr_priv[idx].num_wqebbs = ctrl->num_wqebbs;
168
169 if (attr->rule) {
170 sq->wr_priv[idx].rule->pending_wqes++;
171 sq->wr_priv[idx].used_id = attr->used_id;
172 }
173
174 sq->cur_post += ctrl->num_wqebbs;
175
176 if (attr->notify_hw)
177 hws_send_engine_post_ring(sq, wqe_ctrl);
178 }
179
hws_send_wqe(struct mlx5hws_send_engine * queue,struct mlx5hws_send_engine_post_attr * send_attr,struct mlx5hws_wqe_gta_ctrl_seg * send_wqe_ctrl,void * send_wqe_data,void * send_wqe_tag,bool is_jumbo,u8 gta_opcode,u32 direct_index)180 static void hws_send_wqe(struct mlx5hws_send_engine *queue,
181 struct mlx5hws_send_engine_post_attr *send_attr,
182 struct mlx5hws_wqe_gta_ctrl_seg *send_wqe_ctrl,
183 void *send_wqe_data,
184 void *send_wqe_tag,
185 bool is_jumbo,
186 u8 gta_opcode,
187 u32 direct_index)
188 {
189 struct mlx5hws_wqe_gta_data_seg_ste *wqe_data;
190 struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
191 struct mlx5hws_send_engine_post_ctrl ctrl;
192 size_t wqe_len;
193
194 ctrl = mlx5hws_send_engine_post_start(queue);
195 mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
196 mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
197
198 wqe_ctrl->op_dirix = cpu_to_be32(gta_opcode << 28 | direct_index);
199 memcpy(wqe_ctrl->stc_ix, send_wqe_ctrl->stc_ix,
200 sizeof(send_wqe_ctrl->stc_ix));
201
202 if (send_wqe_data)
203 memcpy(wqe_data, send_wqe_data, sizeof(*wqe_data));
204 else
205 hws_send_wqe_set_tag(wqe_data, send_wqe_tag, is_jumbo);
206
207 mlx5hws_send_engine_post_end(&ctrl, send_attr);
208 }
209
mlx5hws_send_ste(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ste_attr * ste_attr)210 void mlx5hws_send_ste(struct mlx5hws_send_engine *queue,
211 struct mlx5hws_send_ste_attr *ste_attr)
212 {
213 struct mlx5hws_send_engine_post_attr *send_attr = &ste_attr->send_attr;
214 u8 notify_hw = send_attr->notify_hw;
215 u8 fence = send_attr->fence;
216
217 if (ste_attr->rtc_1) {
218 send_attr->id = ste_attr->rtc_1;
219 send_attr->used_id = ste_attr->used_id_rtc_1;
220 send_attr->retry_id = ste_attr->retry_rtc_1;
221 send_attr->fence = fence;
222 send_attr->notify_hw = notify_hw && !ste_attr->rtc_0;
223 hws_send_wqe(queue, send_attr,
224 ste_attr->wqe_ctrl,
225 ste_attr->wqe_data,
226 ste_attr->wqe_tag,
227 ste_attr->wqe_tag_is_jumbo,
228 ste_attr->gta_opcode,
229 ste_attr->direct_index);
230 }
231
232 if (ste_attr->rtc_0) {
233 send_attr->id = ste_attr->rtc_0;
234 send_attr->used_id = ste_attr->used_id_rtc_0;
235 send_attr->retry_id = ste_attr->retry_rtc_0;
236 send_attr->fence = fence && !ste_attr->rtc_1;
237 send_attr->notify_hw = notify_hw;
238 hws_send_wqe(queue, send_attr,
239 ste_attr->wqe_ctrl,
240 ste_attr->wqe_data,
241 ste_attr->wqe_tag,
242 ste_attr->wqe_tag_is_jumbo,
243 ste_attr->gta_opcode,
244 ste_attr->direct_index);
245 }
246
247 /* Restore to original requested values */
248 send_attr->notify_hw = notify_hw;
249 send_attr->fence = fence;
250 }
251
hws_send_engine_retry_post_send(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,u16 wqe_cnt)252 static void hws_send_engine_retry_post_send(struct mlx5hws_send_engine *queue,
253 struct mlx5hws_send_ring_priv *priv,
254 u16 wqe_cnt)
255 {
256 struct mlx5hws_send_engine_post_attr send_attr = {0};
257 struct mlx5hws_wqe_gta_data_seg_ste *wqe_data;
258 struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
259 struct mlx5hws_send_engine_post_ctrl ctrl;
260 struct mlx5hws_send_ring_sq *send_sq;
261 unsigned int idx;
262 size_t wqe_len;
263 char *p;
264
265 send_attr.rule = priv->rule;
266 send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
267 send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
268 send_attr.len = MLX5_SEND_WQE_BB * 2 - sizeof(struct mlx5hws_wqe_ctrl_seg);
269 send_attr.notify_hw = 1;
270 send_attr.fence = 0;
271 send_attr.user_data = priv->user_data;
272 send_attr.id = priv->retry_id;
273 send_attr.used_id = priv->used_id;
274
275 ctrl = mlx5hws_send_engine_post_start(queue);
276 mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
277 mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
278
279 send_sq = &ctrl.send_ring->send_sq;
280 idx = wqe_cnt & send_sq->buf_mask;
281 p = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
282
283 /* Copy old gta ctrl */
284 memcpy(wqe_ctrl, p + sizeof(struct mlx5hws_wqe_ctrl_seg),
285 MLX5_SEND_WQE_BB - sizeof(struct mlx5hws_wqe_ctrl_seg));
286
287 idx = (wqe_cnt + 1) & send_sq->buf_mask;
288 p = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
289
290 /* Copy old gta data */
291 memcpy(wqe_data, p, MLX5_SEND_WQE_BB);
292
293 mlx5hws_send_engine_post_end(&ctrl, &send_attr);
294 }
295
mlx5hws_send_engine_flush_queue(struct mlx5hws_send_engine * queue)296 void mlx5hws_send_engine_flush_queue(struct mlx5hws_send_engine *queue)
297 {
298 struct mlx5hws_send_ring_sq *sq = &queue->send_ring.send_sq;
299 struct mlx5hws_wqe_ctrl_seg *wqe_ctrl;
300
301 wqe_ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, sq->last_idx);
302 wqe_ctrl->flags |= cpu_to_be32(MLX5_WQE_CTRL_CQ_UPDATE);
303
304 hws_send_engine_post_ring(sq, wqe_ctrl);
305 }
306
307 static void
hws_send_engine_update_rule_resize(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,enum mlx5hws_flow_op_status * status)308 hws_send_engine_update_rule_resize(struct mlx5hws_send_engine *queue,
309 struct mlx5hws_send_ring_priv *priv,
310 enum mlx5hws_flow_op_status *status)
311 {
312 switch (priv->rule->resize_info->state) {
313 case MLX5HWS_RULE_RESIZE_STATE_WRITING:
314 if (priv->rule->status == MLX5HWS_RULE_STATUS_FAILING) {
315 /* Backup original RTCs */
316 u32 orig_rtc_0 = priv->rule->resize_info->rtc_0;
317 u32 orig_rtc_1 = priv->rule->resize_info->rtc_1;
318
319 /* Delete partially failed move rule using resize_info */
320 priv->rule->resize_info->rtc_0 = priv->rule->rtc_0;
321 priv->rule->resize_info->rtc_1 = priv->rule->rtc_1;
322
323 /* Move rule to original RTC for future delete */
324 priv->rule->rtc_0 = orig_rtc_0;
325 priv->rule->rtc_1 = orig_rtc_1;
326 }
327 /* Clean leftovers */
328 mlx5hws_rule_move_hws_remove(priv->rule, queue, priv->user_data);
329 break;
330
331 case MLX5HWS_RULE_RESIZE_STATE_DELETING:
332 if (priv->rule->status == MLX5HWS_RULE_STATUS_FAILING) {
333 *status = MLX5HWS_FLOW_OP_ERROR;
334 } else {
335 *status = MLX5HWS_FLOW_OP_SUCCESS;
336 priv->rule->matcher = priv->rule->matcher->resize_dst;
337 }
338 priv->rule->resize_info->state = MLX5HWS_RULE_RESIZE_STATE_IDLE;
339 priv->rule->status = MLX5HWS_RULE_STATUS_CREATED;
340 break;
341
342 default:
343 break;
344 }
345 }
346
hws_send_engine_dump_error_cqe(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,struct mlx5_cqe64 * cqe)347 static void hws_send_engine_dump_error_cqe(struct mlx5hws_send_engine *queue,
348 struct mlx5hws_send_ring_priv *priv,
349 struct mlx5_cqe64 *cqe)
350 {
351 u8 wqe_opcode = cqe ? be32_to_cpu(cqe->sop_drop_qpn) >> 24 : 0;
352 struct mlx5hws_context *ctx = priv->rule->matcher->tbl->ctx;
353 u32 opcode = cqe ? get_cqe_opcode(cqe) : 0;
354 struct mlx5hws_rule *rule = priv->rule;
355
356 /* If something bad happens and lots of rules are failing, we don't
357 * want to pollute dmesg. Print only the first bad cqe per engine,
358 * the one that started the avalanche.
359 */
360 if (queue->error_cqe_printed)
361 return;
362
363 queue->error_cqe_printed = true;
364
365 if (mlx5hws_rule_move_in_progress(rule))
366 mlx5hws_err(ctx,
367 "--- rule 0x%08llx: error completion moving rule: phase %s, wqes left %d\n",
368 HWS_PTR_TO_ID(rule),
369 rule->resize_info->state ==
370 MLX5HWS_RULE_RESIZE_STATE_WRITING ? "WRITING" :
371 rule->resize_info->state ==
372 MLX5HWS_RULE_RESIZE_STATE_DELETING ? "DELETING" :
373 "UNKNOWN",
374 rule->pending_wqes);
375 else
376 mlx5hws_err(ctx,
377 "--- rule 0x%08llx: error completion %s (%d), wqes left %d\n",
378 HWS_PTR_TO_ID(rule),
379 rule->status ==
380 MLX5HWS_RULE_STATUS_CREATING ? "CREATING" :
381 rule->status ==
382 MLX5HWS_RULE_STATUS_DELETING ? "DELETING" :
383 rule->status ==
384 MLX5HWS_RULE_STATUS_FAILING ? "FAILING" :
385 rule->status ==
386 MLX5HWS_RULE_STATUS_UPDATING ? "UPDATING" : "NA",
387 rule->status,
388 rule->pending_wqes);
389
390 mlx5hws_err(ctx, " rule 0x%08llx: matcher 0x%llx %s\n",
391 HWS_PTR_TO_ID(rule),
392 HWS_PTR_TO_ID(rule->matcher),
393 (rule->matcher->flags & MLX5HWS_MATCHER_FLAGS_ISOLATED) ?
394 "(isolated)" : "");
395
396 if (!cqe) {
397 mlx5hws_err(ctx, " rule 0x%08llx: no CQE\n",
398 HWS_PTR_TO_ID(rule));
399 return;
400 }
401
402 mlx5hws_err(ctx, " rule 0x%08llx: cqe->opcode = %d %s\n",
403 HWS_PTR_TO_ID(rule), opcode,
404 opcode == MLX5_CQE_REQ ? "(MLX5_CQE_REQ)" :
405 opcode == MLX5_CQE_REQ_ERR ? "(MLX5_CQE_REQ_ERR)" : " ");
406
407 if (opcode == MLX5_CQE_REQ_ERR) {
408 struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
409
410 mlx5hws_err(ctx,
411 " rule 0x%08llx: |--- hw_error_syndrome = 0x%x\n",
412 HWS_PTR_TO_ID(rule),
413 err_cqe->rsvd1[16]);
414 mlx5hws_err(ctx,
415 " rule 0x%08llx: |--- hw_syndrome_type = 0x%x\n",
416 HWS_PTR_TO_ID(rule),
417 err_cqe->rsvd1[17] >> 4);
418 mlx5hws_err(ctx,
419 " rule 0x%08llx: |--- vendor_err_synd = 0x%x\n",
420 HWS_PTR_TO_ID(rule),
421 err_cqe->vendor_err_synd);
422 mlx5hws_err(ctx,
423 " rule 0x%08llx: |--- syndrome = 0x%x\n",
424 HWS_PTR_TO_ID(rule),
425 err_cqe->syndrome);
426 }
427
428 mlx5hws_err(ctx,
429 " rule 0x%08llx: cqe->byte_cnt = 0x%08x\n",
430 HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->byte_cnt));
431 mlx5hws_err(ctx,
432 " rule 0x%08llx: |-- UPDATE STATUS = %s\n",
433 HWS_PTR_TO_ID(rule),
434 (be32_to_cpu(cqe->byte_cnt) & 0x80000000) ?
435 "FAILURE" : "SUCCESS");
436 mlx5hws_err(ctx,
437 " rule 0x%08llx: |------- SYNDROME = %s\n",
438 HWS_PTR_TO_ID(rule),
439 ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 1) ?
440 "SET_FLOW_FAIL" :
441 ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 2) ?
442 "DISABLE_FLOW_FAIL" : "UNKNOWN");
443 mlx5hws_err(ctx,
444 " rule 0x%08llx: cqe->sop_drop_qpn = 0x%08x\n",
445 HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->sop_drop_qpn));
446 mlx5hws_err(ctx,
447 " rule 0x%08llx: |-send wqe opcode = 0x%02x %s\n",
448 HWS_PTR_TO_ID(rule), wqe_opcode,
449 wqe_opcode == MLX5HWS_WQE_OPCODE_TBL_ACCESS ?
450 "(MLX5HWS_WQE_OPCODE_TBL_ACCESS)" : "(UNKNOWN)");
451 mlx5hws_err(ctx,
452 " rule 0x%08llx: |------------ qpn = 0x%06x\n",
453 HWS_PTR_TO_ID(rule),
454 be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff);
455 }
456
hws_send_engine_update_rule(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,u16 wqe_cnt,enum mlx5hws_flow_op_status * status,struct mlx5_cqe64 * cqe)457 static void hws_send_engine_update_rule(struct mlx5hws_send_engine *queue,
458 struct mlx5hws_send_ring_priv *priv,
459 u16 wqe_cnt,
460 enum mlx5hws_flow_op_status *status,
461 struct mlx5_cqe64 *cqe)
462 {
463 priv->rule->pending_wqes--;
464
465 if (unlikely(*status == MLX5HWS_FLOW_OP_ERROR)) {
466 if (priv->retry_id) {
467 /* If there is a retry_id, then it's not an error yet,
468 * retry to insert this rule in the collision RTC.
469 */
470 hws_send_engine_retry_post_send(queue, priv, wqe_cnt);
471 return;
472 }
473 hws_send_engine_dump_error_cqe(queue, priv, cqe);
474 /* Some part of the rule failed */
475 priv->rule->status = MLX5HWS_RULE_STATUS_FAILING;
476 *priv->used_id = 0;
477 } else {
478 *priv->used_id = priv->id;
479 }
480
481 /* Update rule status for the last completion */
482 if (!priv->rule->pending_wqes) {
483 if (unlikely(mlx5hws_rule_move_in_progress(priv->rule))) {
484 hws_send_engine_update_rule_resize(queue, priv, status);
485 return;
486 }
487
488 if (unlikely(priv->rule->status == MLX5HWS_RULE_STATUS_FAILING)) {
489 /* Rule completely failed and doesn't require cleanup */
490 if (!priv->rule->rtc_0 && !priv->rule->rtc_1)
491 priv->rule->status = MLX5HWS_RULE_STATUS_FAILED;
492
493 *status = MLX5HWS_FLOW_OP_ERROR;
494 } else {
495 /* Increase the status, this only works on good flow as
496 * the enum is arranged this way:
497 * - creating -> created
498 * - updating -> updated
499 * - deleting -> deleted
500 */
501 priv->rule->status++;
502 *status = MLX5HWS_FLOW_OP_SUCCESS;
503 if (priv->rule->status == MLX5HWS_RULE_STATUS_DELETED) {
504 /* Rule was deleted, now we can safely release
505 * action STEs and clear resize info
506 */
507 mlx5hws_rule_free_action_ste(&priv->rule->action_ste);
508 mlx5hws_rule_clear_resize_info(priv->rule);
509 } else if (priv->rule->status == MLX5HWS_RULE_STATUS_UPDATED) {
510 /* Rule was updated, free the old action STEs */
511 mlx5hws_rule_free_action_ste(&priv->rule->old_action_ste);
512 /* Update completed - move the rule back to "created" */
513 priv->rule->status = MLX5HWS_RULE_STATUS_CREATED;
514 }
515 }
516 }
517 }
518
hws_send_engine_update(struct mlx5hws_send_engine * queue,struct mlx5_cqe64 * cqe,struct mlx5hws_send_ring_priv * priv,struct mlx5hws_flow_op_result res[],s64 * i,u32 res_nb,u16 wqe_cnt)519 static void hws_send_engine_update(struct mlx5hws_send_engine *queue,
520 struct mlx5_cqe64 *cqe,
521 struct mlx5hws_send_ring_priv *priv,
522 struct mlx5hws_flow_op_result res[],
523 s64 *i,
524 u32 res_nb,
525 u16 wqe_cnt)
526 {
527 enum mlx5hws_flow_op_status status;
528
529 if (!cqe || (likely(be32_to_cpu(cqe->byte_cnt) >> 31 == 0) &&
530 likely(get_cqe_opcode(cqe) == MLX5_CQE_REQ))) {
531 status = MLX5HWS_FLOW_OP_SUCCESS;
532 } else {
533 status = MLX5HWS_FLOW_OP_ERROR;
534 }
535
536 if (priv->user_data) {
537 if (priv->rule) {
538 hws_send_engine_update_rule(queue, priv, wqe_cnt,
539 &status, cqe);
540 /* Completion is provided on the last rule WQE */
541 if (priv->rule->pending_wqes)
542 return;
543 }
544
545 if (*i < res_nb) {
546 res[*i].user_data = priv->user_data;
547 res[*i].status = status;
548 (*i)++;
549 mlx5hws_send_engine_dec_rule(queue);
550 } else {
551 mlx5hws_send_engine_gen_comp(queue, priv->user_data, status);
552 }
553 }
554 }
555
mlx5hws_parse_cqe(struct mlx5hws_send_ring_cq * cq,struct mlx5_cqe64 * cqe64)556 static int mlx5hws_parse_cqe(struct mlx5hws_send_ring_cq *cq,
557 struct mlx5_cqe64 *cqe64)
558 {
559 if (unlikely(get_cqe_opcode(cqe64) != MLX5_CQE_REQ)) {
560 struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe64;
561
562 mlx5_core_err(cq->mdev, "Bad OP in HWS SQ CQE: 0x%x\n", get_cqe_opcode(cqe64));
563 mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n", err_cqe->vendor_err_synd);
564 mlx5_core_err(cq->mdev, "syndrome=%x\n", err_cqe->syndrome);
565 print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET,
566 16, 1, err_cqe,
567 sizeof(*err_cqe), false);
568 return CQ_POLL_ERR;
569 }
570
571 return CQ_OK;
572 }
573
mlx5hws_cq_poll_one(struct mlx5hws_send_ring_cq * cq)574 static int mlx5hws_cq_poll_one(struct mlx5hws_send_ring_cq *cq)
575 {
576 struct mlx5_cqe64 *cqe64;
577 int err;
578
579 cqe64 = mlx5_cqwq_get_cqe(&cq->wq);
580 if (!cqe64) {
581 if (unlikely(cq->mdev->state ==
582 MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
583 mlx5_core_dbg_once(cq->mdev,
584 "Polling CQ while device is shutting down\n");
585 return CQ_POLL_ERR;
586 }
587 return CQ_EMPTY;
588 }
589
590 mlx5_cqwq_pop(&cq->wq);
591 err = mlx5hws_parse_cqe(cq, cqe64);
592 mlx5_cqwq_update_db_record(&cq->wq);
593
594 return err;
595 }
596
hws_send_engine_poll_cq(struct mlx5hws_send_engine * queue,struct mlx5hws_flow_op_result res[],s64 * polled,u32 res_nb)597 static void hws_send_engine_poll_cq(struct mlx5hws_send_engine *queue,
598 struct mlx5hws_flow_op_result res[],
599 s64 *polled,
600 u32 res_nb)
601 {
602 struct mlx5hws_send_ring *send_ring = &queue->send_ring;
603 struct mlx5hws_send_ring_cq *cq = &send_ring->send_cq;
604 struct mlx5hws_send_ring_sq *sq = &send_ring->send_sq;
605 struct mlx5hws_send_ring_priv *priv;
606 struct mlx5_cqe64 *cqe;
607 u8 cqe_opcode;
608 u16 wqe_cnt;
609
610 cqe = mlx5_cqwq_get_cqe(&cq->wq);
611 if (!cqe)
612 return;
613
614 cqe_opcode = get_cqe_opcode(cqe);
615 if (cqe_opcode == MLX5_CQE_INVALID)
616 return;
617
618 if (unlikely(cqe_opcode != MLX5_CQE_REQ))
619 queue->err = true;
620
621 wqe_cnt = be16_to_cpu(cqe->wqe_counter) & sq->buf_mask;
622
623 while (cq->poll_wqe != wqe_cnt) {
624 priv = &sq->wr_priv[cq->poll_wqe];
625 hws_send_engine_update(queue, NULL, priv, res, polled, res_nb, 0);
626 cq->poll_wqe = (cq->poll_wqe + priv->num_wqebbs) & sq->buf_mask;
627 }
628
629 priv = &sq->wr_priv[wqe_cnt];
630 cq->poll_wqe = (wqe_cnt + priv->num_wqebbs) & sq->buf_mask;
631 hws_send_engine_update(queue, cqe, priv, res, polled, res_nb, wqe_cnt);
632 mlx5hws_cq_poll_one(cq);
633 }
634
hws_send_engine_poll_list(struct mlx5hws_send_engine * queue,struct mlx5hws_flow_op_result res[],s64 * polled,u32 res_nb)635 static void hws_send_engine_poll_list(struct mlx5hws_send_engine *queue,
636 struct mlx5hws_flow_op_result res[],
637 s64 *polled,
638 u32 res_nb)
639 {
640 struct mlx5hws_completed_poll *comp = &queue->completed;
641
642 while (comp->ci != comp->pi) {
643 if (*polled < res_nb) {
644 res[*polled].status =
645 comp->entries[comp->ci].status;
646 res[*polled].user_data =
647 comp->entries[comp->ci].user_data;
648 (*polled)++;
649 comp->ci = (comp->ci + 1) & comp->mask;
650 mlx5hws_send_engine_dec_rule(queue);
651 } else {
652 return;
653 }
654 }
655 }
656
hws_send_engine_poll(struct mlx5hws_send_engine * queue,struct mlx5hws_flow_op_result res[],u32 res_nb)657 static int hws_send_engine_poll(struct mlx5hws_send_engine *queue,
658 struct mlx5hws_flow_op_result res[],
659 u32 res_nb)
660 {
661 s64 polled = 0;
662
663 hws_send_engine_poll_list(queue, res, &polled, res_nb);
664
665 if (polled >= res_nb)
666 return polled;
667
668 hws_send_engine_poll_cq(queue, res, &polled, res_nb);
669
670 return polled;
671 }
672
mlx5hws_send_queue_poll(struct mlx5hws_context * ctx,u16 queue_id,struct mlx5hws_flow_op_result res[],u32 res_nb)673 int mlx5hws_send_queue_poll(struct mlx5hws_context *ctx,
674 u16 queue_id,
675 struct mlx5hws_flow_op_result res[],
676 u32 res_nb)
677 {
678 return hws_send_engine_poll(&ctx->send_queue[queue_id], res, res_nb);
679 }
680
hws_send_ring_alloc_sq(struct mlx5_core_dev * mdev,int numa_node,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,void * sqc_data)681 static int hws_send_ring_alloc_sq(struct mlx5_core_dev *mdev,
682 int numa_node,
683 struct mlx5hws_send_engine *queue,
684 struct mlx5hws_send_ring_sq *sq,
685 void *sqc_data)
686 {
687 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
688 struct mlx5_wq_cyc *wq = &sq->wq;
689 struct mlx5_wq_param param;
690 size_t buf_sz;
691 int err;
692
693 sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
694 sq->mdev = mdev;
695
696 param.db_numa_node = numa_node;
697 param.buf_numa_node = numa_node;
698 err = mlx5_wq_cyc_create(mdev, ¶m, sqc_wq, wq, &sq->wq_ctrl);
699 if (err)
700 return err;
701 wq->db = &wq->db[MLX5_SND_DBR];
702
703 buf_sz = queue->num_entries * MAX_WQES_PER_RULE;
704 sq->dep_wqe = kcalloc(queue->num_entries, sizeof(*sq->dep_wqe), GFP_KERNEL);
705 if (!sq->dep_wqe) {
706 err = -ENOMEM;
707 goto destroy_wq_cyc;
708 }
709
710 sq->wr_priv = kzalloc(sizeof(*sq->wr_priv) * buf_sz, GFP_KERNEL);
711 if (!sq->wr_priv) {
712 err = -ENOMEM;
713 goto free_dep_wqe;
714 }
715
716 sq->buf_mask = (queue->num_entries * MAX_WQES_PER_RULE) - 1;
717
718 return 0;
719
720 free_dep_wqe:
721 kfree(sq->dep_wqe);
722 destroy_wq_cyc:
723 mlx5_wq_destroy(&sq->wq_ctrl);
724 return err;
725 }
726
hws_send_ring_free_sq(struct mlx5hws_send_ring_sq * sq)727 static void hws_send_ring_free_sq(struct mlx5hws_send_ring_sq *sq)
728 {
729 if (!sq)
730 return;
731 kfree(sq->wr_priv);
732 kfree(sq->dep_wqe);
733 mlx5_wq_destroy(&sq->wq_ctrl);
734 }
735
hws_send_ring_create_sq(struct mlx5_core_dev * mdev,u32 pdn,void * sqc_data,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,struct mlx5hws_send_ring_cq * cq)736 static int hws_send_ring_create_sq(struct mlx5_core_dev *mdev, u32 pdn,
737 void *sqc_data,
738 struct mlx5hws_send_engine *queue,
739 struct mlx5hws_send_ring_sq *sq,
740 struct mlx5hws_send_ring_cq *cq)
741 {
742 void *in, *sqc, *wq;
743 int inlen, err;
744 u8 ts_format;
745
746 inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
747 sizeof(u64) * sq->wq_ctrl.buf.npages;
748 in = kvzalloc(inlen, GFP_KERNEL);
749 if (!in)
750 return -ENOMEM;
751
752 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
753 wq = MLX5_ADDR_OF(sqc, sqc, wq);
754
755 memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc));
756 MLX5_SET(sqc, sqc, cqn, cq->mcq.cqn);
757
758 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
759 MLX5_SET(sqc, sqc, flush_in_error_en, 1);
760 MLX5_SET(sqc, sqc, non_wire, 1);
761
762 ts_format = mlx5_is_real_time_sq(mdev) ? MLX5_TIMESTAMP_FORMAT_REAL_TIME :
763 MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
764 MLX5_SET(sqc, sqc, ts_format, ts_format);
765
766 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
767 MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index);
768 MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
769 MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
770
771 mlx5_fill_page_frag_array(&sq->wq_ctrl.buf,
772 (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
773
774 err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
775
776 kvfree(in);
777
778 return err;
779 }
780
hws_send_ring_destroy_sq(struct mlx5_core_dev * mdev,struct mlx5hws_send_ring_sq * sq)781 static void hws_send_ring_destroy_sq(struct mlx5_core_dev *mdev,
782 struct mlx5hws_send_ring_sq *sq)
783 {
784 mlx5_core_destroy_sq(mdev, sq->sqn);
785 }
786
hws_send_ring_set_sq_rdy(struct mlx5_core_dev * mdev,u32 sqn)787 static int hws_send_ring_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn)
788 {
789 void *in, *sqc;
790 int inlen, err;
791
792 inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
793 in = kvzalloc(inlen, GFP_KERNEL);
794 if (!in)
795 return -ENOMEM;
796
797 MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST);
798 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
799 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
800
801 err = mlx5_core_modify_sq(mdev, sqn, in);
802
803 kvfree(in);
804
805 return err;
806 }
807
hws_send_ring_close_sq(struct mlx5hws_send_ring_sq * sq)808 static void hws_send_ring_close_sq(struct mlx5hws_send_ring_sq *sq)
809 {
810 mlx5_core_destroy_sq(sq->mdev, sq->sqn);
811 mlx5_wq_destroy(&sq->wq_ctrl);
812 kfree(sq->wr_priv);
813 kfree(sq->dep_wqe);
814 }
815
hws_send_ring_create_sq_rdy(struct mlx5_core_dev * mdev,u32 pdn,void * sqc_data,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,struct mlx5hws_send_ring_cq * cq)816 static int hws_send_ring_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn,
817 void *sqc_data,
818 struct mlx5hws_send_engine *queue,
819 struct mlx5hws_send_ring_sq *sq,
820 struct mlx5hws_send_ring_cq *cq)
821 {
822 int err;
823
824 err = hws_send_ring_create_sq(mdev, pdn, sqc_data, queue, sq, cq);
825 if (err)
826 return err;
827
828 err = hws_send_ring_set_sq_rdy(mdev, sq->sqn);
829 if (err)
830 hws_send_ring_destroy_sq(mdev, sq);
831
832 return err;
833 }
834
hws_send_ring_open_sq(struct mlx5hws_context * ctx,int numa_node,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,struct mlx5hws_send_ring_cq * cq)835 static int hws_send_ring_open_sq(struct mlx5hws_context *ctx,
836 int numa_node,
837 struct mlx5hws_send_engine *queue,
838 struct mlx5hws_send_ring_sq *sq,
839 struct mlx5hws_send_ring_cq *cq)
840 {
841 size_t buf_sz, sq_log_buf_sz;
842 void *sqc_data, *wq;
843 int err;
844
845 sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL);
846 if (!sqc_data)
847 return -ENOMEM;
848
849 buf_sz = queue->num_entries * MAX_WQES_PER_RULE;
850 sq_log_buf_sz = ilog2(roundup_pow_of_two(buf_sz));
851
852 wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
853 MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
854 MLX5_SET(wq, wq, pd, ctx->pd_num);
855 MLX5_SET(wq, wq, log_wq_sz, sq_log_buf_sz);
856
857 err = hws_send_ring_alloc_sq(ctx->mdev, numa_node, queue, sq, sqc_data);
858 if (err)
859 goto err_free_sqc;
860
861 err = hws_send_ring_create_sq_rdy(ctx->mdev, ctx->pd_num, sqc_data,
862 queue, sq, cq);
863 if (err)
864 goto err_free_sq;
865
866 kvfree(sqc_data);
867
868 return 0;
869 err_free_sq:
870 hws_send_ring_free_sq(sq);
871 err_free_sqc:
872 kvfree(sqc_data);
873 return err;
874 }
875
hws_cq_complete(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)876 static void hws_cq_complete(struct mlx5_core_cq *mcq,
877 struct mlx5_eqe *eqe)
878 {
879 pr_err("CQ completion CQ: #%u\n", mcq->cqn);
880 }
881
hws_send_ring_alloc_cq(struct mlx5_core_dev * mdev,int numa_node,struct mlx5hws_send_engine * queue,void * cqc_data,struct mlx5hws_send_ring_cq * cq)882 static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev,
883 int numa_node,
884 struct mlx5hws_send_engine *queue,
885 void *cqc_data,
886 struct mlx5hws_send_ring_cq *cq)
887 {
888 struct mlx5_core_cq *mcq = &cq->mcq;
889 struct mlx5_wq_param param;
890 struct mlx5_cqe64 *cqe;
891 int err;
892 u32 i;
893
894 param.buf_numa_node = numa_node;
895 param.db_numa_node = numa_node;
896
897 err = mlx5_cqwq_create(mdev, ¶m, cqc_data, &cq->wq, &cq->wq_ctrl);
898 if (err)
899 return err;
900
901 mcq->cqe_sz = 64;
902 mcq->set_ci_db = cq->wq_ctrl.db.db;
903 mcq->arm_db = cq->wq_ctrl.db.db + 1;
904 mcq->comp = hws_cq_complete;
905
906 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
907 cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
908 cqe->op_own = 0xf1;
909 }
910
911 cq->mdev = mdev;
912
913 return 0;
914 }
915
hws_send_ring_create_cq(struct mlx5_core_dev * mdev,struct mlx5hws_send_engine * queue,void * cqc_data,struct mlx5hws_send_ring_cq * cq)916 static int hws_send_ring_create_cq(struct mlx5_core_dev *mdev,
917 struct mlx5hws_send_engine *queue,
918 void *cqc_data,
919 struct mlx5hws_send_ring_cq *cq)
920 {
921 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
922 struct mlx5_core_cq *mcq = &cq->mcq;
923 void *in, *cqc;
924 int inlen, eqn;
925 int err;
926
927 err = mlx5_comp_eqn_get(mdev, 0, &eqn);
928 if (err)
929 return err;
930
931 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
932 sizeof(u64) * cq->wq_ctrl.buf.npages;
933 in = kvzalloc(inlen, GFP_KERNEL);
934 if (!in)
935 return -ENOMEM;
936
937 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
938 memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc));
939 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
940 (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
941
942 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
943 MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
944 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
945 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
946
947 err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
948
949 kvfree(in);
950
951 return err;
952 }
953
hws_send_ring_open_cq(struct mlx5_core_dev * mdev,struct mlx5hws_send_engine * queue,int numa_node,struct mlx5hws_send_ring_cq * cq)954 static int hws_send_ring_open_cq(struct mlx5_core_dev *mdev,
955 struct mlx5hws_send_engine *queue,
956 int numa_node,
957 struct mlx5hws_send_ring_cq *cq)
958 {
959 void *cqc_data;
960 int err;
961
962 cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL);
963 if (!cqc_data)
964 return -ENOMEM;
965
966 MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index);
967 MLX5_SET(cqc, cqc_data, cqe_sz, queue->num_entries);
968 MLX5_SET(cqc, cqc_data, log_cq_size, ilog2(queue->num_entries));
969
970 err = hws_send_ring_alloc_cq(mdev, numa_node, queue, cqc_data, cq);
971 if (err)
972 goto err_out;
973
974 err = hws_send_ring_create_cq(mdev, queue, cqc_data, cq);
975 if (err)
976 goto err_free_cq;
977
978 kvfree(cqc_data);
979
980 return 0;
981
982 err_free_cq:
983 mlx5_wq_destroy(&cq->wq_ctrl);
984 err_out:
985 kvfree(cqc_data);
986 return err;
987 }
988
hws_send_ring_close_cq(struct mlx5hws_send_ring_cq * cq)989 static void hws_send_ring_close_cq(struct mlx5hws_send_ring_cq *cq)
990 {
991 mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
992 mlx5_wq_destroy(&cq->wq_ctrl);
993 }
994
hws_send_ring_close(struct mlx5hws_send_engine * queue)995 static void hws_send_ring_close(struct mlx5hws_send_engine *queue)
996 {
997 hws_send_ring_close_sq(&queue->send_ring.send_sq);
998 hws_send_ring_close_cq(&queue->send_ring.send_cq);
999 }
1000
mlx5hws_send_ring_open(struct mlx5hws_context * ctx,struct mlx5hws_send_engine * queue)1001 static int mlx5hws_send_ring_open(struct mlx5hws_context *ctx,
1002 struct mlx5hws_send_engine *queue)
1003 {
1004 int numa_node = dev_to_node(mlx5_core_dma_dev(ctx->mdev));
1005 struct mlx5hws_send_ring *ring = &queue->send_ring;
1006 int err;
1007
1008 err = hws_send_ring_open_cq(ctx->mdev, queue, numa_node, &ring->send_cq);
1009 if (err)
1010 return err;
1011
1012 err = hws_send_ring_open_sq(ctx, numa_node, queue, &ring->send_sq,
1013 &ring->send_cq);
1014 if (err)
1015 goto close_cq;
1016
1017 return err;
1018
1019 close_cq:
1020 hws_send_ring_close_cq(&ring->send_cq);
1021 return err;
1022 }
1023
mlx5hws_send_queue_close(struct mlx5hws_send_engine * queue)1024 static void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue)
1025 {
1026 if (!queue->num_entries)
1027 return; /* this queue wasn't initialized */
1028
1029 hws_send_ring_close(queue);
1030 kfree(queue->completed.entries);
1031 }
1032
mlx5hws_send_queue_open(struct mlx5hws_context * ctx,struct mlx5hws_send_engine * queue,u16 queue_size)1033 static int mlx5hws_send_queue_open(struct mlx5hws_context *ctx,
1034 struct mlx5hws_send_engine *queue,
1035 u16 queue_size)
1036 {
1037 int err;
1038
1039 mutex_init(&queue->lock);
1040
1041 queue->num_entries = roundup_pow_of_two(queue_size);
1042 queue->used_entries = 0;
1043
1044 queue->completed.entries = kcalloc(queue->num_entries,
1045 sizeof(queue->completed.entries[0]),
1046 GFP_KERNEL);
1047 if (!queue->completed.entries)
1048 return -ENOMEM;
1049
1050 queue->completed.pi = 0;
1051 queue->completed.ci = 0;
1052 queue->completed.mask = queue->num_entries - 1;
1053 err = mlx5hws_send_ring_open(ctx, queue);
1054 if (err)
1055 goto free_completed_entries;
1056
1057 return 0;
1058
1059 free_completed_entries:
1060 kfree(queue->completed.entries);
1061 return err;
1062 }
1063
__hws_send_queues_close(struct mlx5hws_context * ctx,u16 queues)1064 static void __hws_send_queues_close(struct mlx5hws_context *ctx, u16 queues)
1065 {
1066 while (queues--)
1067 mlx5hws_send_queue_close(&ctx->send_queue[queues]);
1068 }
1069
hws_send_queues_bwc_locks_destroy(struct mlx5hws_context * ctx)1070 static void hws_send_queues_bwc_locks_destroy(struct mlx5hws_context *ctx)
1071 {
1072 int bwc_queues = mlx5hws_bwc_queues(ctx);
1073 int i;
1074
1075 if (!mlx5hws_context_bwc_supported(ctx))
1076 return;
1077
1078 for (i = 0; i < bwc_queues; i++) {
1079 mutex_destroy(&ctx->bwc_send_queue_locks[i]);
1080 lockdep_unregister_key(ctx->bwc_lock_class_keys + i);
1081 }
1082
1083 kfree(ctx->bwc_lock_class_keys);
1084 kfree(ctx->bwc_send_queue_locks);
1085 }
1086
mlx5hws_send_queues_close(struct mlx5hws_context * ctx)1087 void mlx5hws_send_queues_close(struct mlx5hws_context *ctx)
1088 {
1089 hws_send_queues_bwc_locks_destroy(ctx);
1090 __hws_send_queues_close(ctx, ctx->queues);
1091 kfree(ctx->send_queue);
1092 }
1093
hws_bwc_send_queues_init(struct mlx5hws_context * ctx)1094 static int hws_bwc_send_queues_init(struct mlx5hws_context *ctx)
1095 {
1096 /* Number of BWC queues is equal to number of the usual HWS queues */
1097 int bwc_queues = ctx->queues - 1;
1098 int i;
1099
1100 if (!mlx5hws_context_bwc_supported(ctx))
1101 return 0;
1102
1103 ctx->queues += bwc_queues;
1104
1105 ctx->bwc_send_queue_locks = kcalloc(bwc_queues,
1106 sizeof(*ctx->bwc_send_queue_locks),
1107 GFP_KERNEL);
1108
1109 if (!ctx->bwc_send_queue_locks)
1110 return -ENOMEM;
1111
1112 ctx->bwc_lock_class_keys = kcalloc(bwc_queues,
1113 sizeof(*ctx->bwc_lock_class_keys),
1114 GFP_KERNEL);
1115 if (!ctx->bwc_lock_class_keys)
1116 goto err_lock_class_keys;
1117
1118 for (i = 0; i < bwc_queues; i++) {
1119 mutex_init(&ctx->bwc_send_queue_locks[i]);
1120 lockdep_register_key(ctx->bwc_lock_class_keys + i);
1121 lockdep_set_class(ctx->bwc_send_queue_locks + i, ctx->bwc_lock_class_keys + i);
1122 }
1123
1124 return 0;
1125
1126 err_lock_class_keys:
1127 kfree(ctx->bwc_send_queue_locks);
1128 return -ENOMEM;
1129 }
1130
mlx5hws_send_queues_open(struct mlx5hws_context * ctx,u16 queues,u16 queue_size)1131 int mlx5hws_send_queues_open(struct mlx5hws_context *ctx,
1132 u16 queues,
1133 u16 queue_size)
1134 {
1135 int err = 0;
1136 int i = 0;
1137
1138 /* Open one extra queue for control path */
1139 ctx->queues = queues + 1;
1140
1141 /* open a separate set of queues and locks for bwc API */
1142 err = hws_bwc_send_queues_init(ctx);
1143 if (err)
1144 return err;
1145
1146 ctx->send_queue = kcalloc(ctx->queues, sizeof(*ctx->send_queue), GFP_KERNEL);
1147 if (!ctx->send_queue) {
1148 err = -ENOMEM;
1149 goto free_bwc_locks;
1150 }
1151
1152 /* If native API isn't supported, skip the unused native queues:
1153 * initialize BWC queues and control queue only.
1154 */
1155 if (!mlx5hws_context_native_supported(ctx))
1156 i = mlx5hws_bwc_get_queue_id(ctx, 0);
1157
1158 for (; i < ctx->queues; i++) {
1159 err = mlx5hws_send_queue_open(ctx, &ctx->send_queue[i], queue_size);
1160 if (err)
1161 goto close_send_queues;
1162 }
1163
1164 return 0;
1165
1166 close_send_queues:
1167 __hws_send_queues_close(ctx, i);
1168
1169 kfree(ctx->send_queue);
1170
1171 free_bwc_locks:
1172 hws_send_queues_bwc_locks_destroy(ctx);
1173
1174 return err;
1175 }
1176
mlx5hws_send_queue_action(struct mlx5hws_context * ctx,u16 queue_id,u32 actions)1177 int mlx5hws_send_queue_action(struct mlx5hws_context *ctx,
1178 u16 queue_id,
1179 u32 actions)
1180 {
1181 struct mlx5hws_send_ring_sq *send_sq;
1182 struct mlx5hws_send_engine *queue;
1183 bool wait_comp = false;
1184 s64 polled = 0;
1185
1186 queue = &ctx->send_queue[queue_id];
1187 send_sq = &queue->send_ring.send_sq;
1188
1189 switch (actions) {
1190 case MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC:
1191 wait_comp = true;
1192 fallthrough;
1193 case MLX5HWS_SEND_QUEUE_ACTION_DRAIN_ASYNC:
1194 if (send_sq->head_dep_idx != send_sq->tail_dep_idx)
1195 /* Send dependent WQEs to drain the queue */
1196 mlx5hws_send_all_dep_wqe(queue);
1197 else
1198 /* Signal on the last posted WQE */
1199 mlx5hws_send_engine_flush_queue(queue);
1200
1201 /* Poll queue until empty */
1202 while (wait_comp && !mlx5hws_send_engine_empty(queue))
1203 hws_send_engine_poll_cq(queue, NULL, &polled, 0);
1204
1205 break;
1206 default:
1207 return -EINVAL;
1208 }
1209
1210 return 0;
1211 }
1212
1213 static int
hws_send_wqe_fw(struct mlx5_core_dev * mdev,u32 pd_num,struct mlx5hws_send_engine_post_attr * send_attr,struct mlx5hws_wqe_gta_ctrl_seg * send_wqe_ctrl,void * send_wqe_match_data,void * send_wqe_match_tag,void * send_wqe_range_data,void * send_wqe_range_tag,bool is_jumbo,u8 gta_opcode)1214 hws_send_wqe_fw(struct mlx5_core_dev *mdev,
1215 u32 pd_num,
1216 struct mlx5hws_send_engine_post_attr *send_attr,
1217 struct mlx5hws_wqe_gta_ctrl_seg *send_wqe_ctrl,
1218 void *send_wqe_match_data,
1219 void *send_wqe_match_tag,
1220 void *send_wqe_range_data,
1221 void *send_wqe_range_tag,
1222 bool is_jumbo,
1223 u8 gta_opcode)
1224 {
1225 bool has_range = send_wqe_range_data || send_wqe_range_tag;
1226 bool has_match = send_wqe_match_data || send_wqe_match_tag;
1227 struct mlx5hws_wqe_gta_data_seg_ste gta_wqe_data0 = {0};
1228 struct mlx5hws_wqe_gta_data_seg_ste gta_wqe_data1 = {0};
1229 struct mlx5hws_wqe_gta_ctrl_seg gta_wqe_ctrl = {0};
1230 struct mlx5hws_cmd_generate_wqe_attr attr = {0};
1231 struct mlx5hws_wqe_ctrl_seg wqe_ctrl = {0};
1232 struct mlx5_cqe64 cqe;
1233 u32 flags = 0;
1234 int ret;
1235
1236 /* Set WQE control */
1237 wqe_ctrl.opmod_idx_opcode = cpu_to_be32((send_attr->opmod << 24) | send_attr->opcode);
1238 wqe_ctrl.qpn_ds = cpu_to_be32((send_attr->len + sizeof(struct mlx5hws_wqe_ctrl_seg)) / 16);
1239 flags |= send_attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
1240 wqe_ctrl.flags = cpu_to_be32(flags);
1241 wqe_ctrl.imm = cpu_to_be32(send_attr->id);
1242
1243 /* Set GTA WQE CTRL */
1244 memcpy(gta_wqe_ctrl.stc_ix, send_wqe_ctrl->stc_ix, sizeof(send_wqe_ctrl->stc_ix));
1245 gta_wqe_ctrl.op_dirix = cpu_to_be32(gta_opcode << 28);
1246
1247 /* Set GTA match WQE DATA */
1248 if (has_match) {
1249 if (send_wqe_match_data)
1250 memcpy(>a_wqe_data0, send_wqe_match_data, sizeof(gta_wqe_data0));
1251 else
1252 hws_send_wqe_set_tag(>a_wqe_data0, send_wqe_match_tag, is_jumbo);
1253
1254 gta_wqe_data0.rsvd1_definer = cpu_to_be32(send_attr->match_definer_id << 8);
1255 attr.gta_data_0 = (u8 *)>a_wqe_data0;
1256 }
1257
1258 /* Set GTA range WQE DATA */
1259 if (has_range) {
1260 if (send_wqe_range_data)
1261 memcpy(>a_wqe_data1, send_wqe_range_data, sizeof(gta_wqe_data1));
1262 else
1263 hws_send_wqe_set_tag(>a_wqe_data1, send_wqe_range_tag, false);
1264
1265 gta_wqe_data1.rsvd1_definer = cpu_to_be32(send_attr->range_definer_id << 8);
1266 attr.gta_data_1 = (u8 *)>a_wqe_data1;
1267 }
1268
1269 attr.pdn = pd_num;
1270 attr.wqe_ctrl = (u8 *)&wqe_ctrl;
1271 attr.gta_ctrl = (u8 *)>a_wqe_ctrl;
1272
1273 send_wqe:
1274 ret = mlx5hws_cmd_generate_wqe(mdev, &attr, &cqe);
1275 if (ret) {
1276 mlx5_core_err(mdev, "Failed to write WQE using command");
1277 return ret;
1278 }
1279
1280 if ((get_cqe_opcode(&cqe) == MLX5_CQE_REQ) &&
1281 (be32_to_cpu(cqe.byte_cnt) >> 31 == 0)) {
1282 *send_attr->used_id = send_attr->id;
1283 return 0;
1284 }
1285
1286 /* Retry if rule failed */
1287 if (send_attr->retry_id) {
1288 wqe_ctrl.imm = cpu_to_be32(send_attr->retry_id);
1289 send_attr->id = send_attr->retry_id;
1290 send_attr->retry_id = 0;
1291 goto send_wqe;
1292 }
1293
1294 return -1;
1295 }
1296
mlx5hws_send_stes_fw(struct mlx5hws_context * ctx,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ste_attr * ste_attr)1297 void mlx5hws_send_stes_fw(struct mlx5hws_context *ctx,
1298 struct mlx5hws_send_engine *queue,
1299 struct mlx5hws_send_ste_attr *ste_attr)
1300 {
1301 struct mlx5hws_send_engine_post_attr *send_attr = &ste_attr->send_attr;
1302 struct mlx5hws_rule *rule = send_attr->rule;
1303 struct mlx5_core_dev *mdev;
1304 u16 queue_id;
1305 u32 pdn;
1306 int ret;
1307
1308 queue_id = queue - ctx->send_queue;
1309 mdev = ctx->mdev;
1310 pdn = ctx->pd_num;
1311
1312 /* Writing through FW can't HW fence, therefore we drain the queue */
1313 if (send_attr->fence)
1314 mlx5hws_send_queue_action(ctx,
1315 queue_id,
1316 MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC);
1317
1318 if (ste_attr->rtc_1) {
1319 send_attr->id = ste_attr->rtc_1;
1320 send_attr->used_id = ste_attr->used_id_rtc_1;
1321 send_attr->retry_id = ste_attr->retry_rtc_1;
1322 ret = hws_send_wqe_fw(mdev, pdn, send_attr,
1323 ste_attr->wqe_ctrl,
1324 ste_attr->wqe_data,
1325 ste_attr->wqe_tag,
1326 ste_attr->range_wqe_data,
1327 ste_attr->range_wqe_tag,
1328 ste_attr->wqe_tag_is_jumbo,
1329 ste_attr->gta_opcode);
1330 if (ret)
1331 goto fail_rule;
1332 }
1333
1334 if (ste_attr->rtc_0) {
1335 send_attr->id = ste_attr->rtc_0;
1336 send_attr->used_id = ste_attr->used_id_rtc_0;
1337 send_attr->retry_id = ste_attr->retry_rtc_0;
1338 ret = hws_send_wqe_fw(mdev, pdn, send_attr,
1339 ste_attr->wqe_ctrl,
1340 ste_attr->wqe_data,
1341 ste_attr->wqe_tag,
1342 ste_attr->range_wqe_data,
1343 ste_attr->range_wqe_tag,
1344 ste_attr->wqe_tag_is_jumbo,
1345 ste_attr->gta_opcode);
1346 if (ret)
1347 goto fail_rule;
1348 }
1349
1350 /* Increase the status, this only works on good flow as the enum
1351 * is arrange it away creating -> created -> deleting -> deleted
1352 */
1353 if (likely(rule))
1354 rule->status++;
1355
1356 mlx5hws_send_engine_gen_comp(queue, send_attr->user_data, MLX5HWS_FLOW_OP_SUCCESS);
1357
1358 return;
1359
1360 fail_rule:
1361 if (likely(rule))
1362 rule->status = !rule->rtc_0 && !rule->rtc_1 ?
1363 MLX5HWS_RULE_STATUS_FAILED : MLX5HWS_RULE_STATUS_FAILING;
1364
1365 mlx5hws_send_engine_gen_comp(queue, send_attr->user_data, MLX5HWS_FLOW_OP_ERROR);
1366 }
1367