xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c (revision 578b856b5e72b7b8cd2390a0e525e240d3e80c92) !
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2024 NVIDIA Corporation & Affiliates */
3 
4 #include "internal.h"
5 #include "lib/clock.h"
6 
7 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
8 
9 struct mlx5hws_send_ring_dep_wqe *
mlx5hws_send_add_new_dep_wqe(struct mlx5hws_send_engine * queue)10 mlx5hws_send_add_new_dep_wqe(struct mlx5hws_send_engine *queue)
11 {
12 	struct mlx5hws_send_ring_sq *send_sq = &queue->send_ring.send_sq;
13 	unsigned int idx = send_sq->head_dep_idx++ & (queue->num_entries - 1);
14 
15 	memset(&send_sq->dep_wqe[idx].wqe_data.tag, 0, MLX5HWS_MATCH_TAG_SZ);
16 
17 	return &send_sq->dep_wqe[idx];
18 }
19 
mlx5hws_send_abort_new_dep_wqe(struct mlx5hws_send_engine * queue)20 void mlx5hws_send_abort_new_dep_wqe(struct mlx5hws_send_engine *queue)
21 {
22 	queue->send_ring.send_sq.head_dep_idx--;
23 }
24 
mlx5hws_send_all_dep_wqe(struct mlx5hws_send_engine * queue)25 void mlx5hws_send_all_dep_wqe(struct mlx5hws_send_engine *queue)
26 {
27 	struct mlx5hws_send_ring_sq *send_sq = &queue->send_ring.send_sq;
28 	struct mlx5hws_send_ste_attr ste_attr = {0};
29 	struct mlx5hws_send_ring_dep_wqe *dep_wqe;
30 
31 	ste_attr.send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
32 	ste_attr.send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
33 	ste_attr.send_attr.len = MLX5HWS_WQE_SZ_GTA_CTRL + MLX5HWS_WQE_SZ_GTA_DATA;
34 	ste_attr.gta_opcode = MLX5HWS_WQE_GTA_OP_ACTIVATE;
35 
36 	/* Fence first from previous depend WQEs  */
37 	ste_attr.send_attr.fence = 1;
38 
39 	while (send_sq->head_dep_idx != send_sq->tail_dep_idx) {
40 		dep_wqe = &send_sq->dep_wqe[send_sq->tail_dep_idx++ & (queue->num_entries - 1)];
41 
42 		/* Notify HW on the last WQE */
43 		ste_attr.send_attr.notify_hw = (send_sq->tail_dep_idx == send_sq->head_dep_idx);
44 		ste_attr.send_attr.user_data = dep_wqe->user_data;
45 		ste_attr.send_attr.rule = dep_wqe->rule;
46 
47 		ste_attr.rtc_0 = dep_wqe->rtc_0;
48 		ste_attr.rtc_1 = dep_wqe->rtc_1;
49 		ste_attr.retry_rtc_0 = dep_wqe->retry_rtc_0;
50 		ste_attr.retry_rtc_1 = dep_wqe->retry_rtc_1;
51 		ste_attr.used_id_rtc_0 = &dep_wqe->rule->rtc_0;
52 		ste_attr.used_id_rtc_1 = &dep_wqe->rule->rtc_1;
53 		ste_attr.wqe_ctrl = &dep_wqe->wqe_ctrl;
54 		ste_attr.wqe_data = &dep_wqe->wqe_data;
55 		ste_attr.direct_index = dep_wqe->direct_index;
56 
57 		mlx5hws_send_ste(queue, &ste_attr);
58 
59 		/* Fencing is done only on the first WQE */
60 		ste_attr.send_attr.fence = 0;
61 	}
62 }
63 
64 struct mlx5hws_send_engine_post_ctrl
mlx5hws_send_engine_post_start(struct mlx5hws_send_engine * queue)65 mlx5hws_send_engine_post_start(struct mlx5hws_send_engine *queue)
66 {
67 	struct mlx5hws_send_engine_post_ctrl ctrl;
68 
69 	ctrl.queue = queue;
70 	/* Currently only one send ring is supported */
71 	ctrl.send_ring = &queue->send_ring;
72 	ctrl.num_wqebbs = 0;
73 
74 	return ctrl;
75 }
76 
mlx5hws_send_engine_post_req_wqe(struct mlx5hws_send_engine_post_ctrl * ctrl,char ** buf,size_t * len)77 void mlx5hws_send_engine_post_req_wqe(struct mlx5hws_send_engine_post_ctrl *ctrl,
78 				      char **buf, size_t *len)
79 {
80 	struct mlx5hws_send_ring_sq *send_sq = &ctrl->send_ring->send_sq;
81 	unsigned int idx;
82 
83 	idx = (send_sq->cur_post + ctrl->num_wqebbs) & send_sq->buf_mask;
84 
85 	/* Note that *buf is a single MLX5_SEND_WQE_BB. It cannot be used
86 	 * as buffer of more than one WQE_BB, since the two MLX5_SEND_WQE_BB
87 	 * can be on 2 different kernel memory pages.
88 	 */
89 	*buf = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
90 	*len = MLX5_SEND_WQE_BB;
91 
92 	if (!ctrl->num_wqebbs) {
93 		*buf += sizeof(struct mlx5hws_wqe_ctrl_seg);
94 		*len -= sizeof(struct mlx5hws_wqe_ctrl_seg);
95 	}
96 
97 	ctrl->num_wqebbs++;
98 }
99 
hws_send_engine_post_ring(struct mlx5hws_send_ring_sq * sq,struct mlx5hws_wqe_ctrl_seg * doorbell_cseg)100 static void hws_send_engine_post_ring(struct mlx5hws_send_ring_sq *sq,
101 				      struct mlx5hws_wqe_ctrl_seg *doorbell_cseg)
102 {
103 	/* ensure wqe is visible to device before updating doorbell record */
104 	dma_wmb();
105 
106 	*sq->wq.db = cpu_to_be32(sq->cur_post);
107 
108 	/* ensure doorbell record is visible to device before ringing the
109 	 * doorbell
110 	 */
111 	wmb();
112 
113 	mlx5_write64((__be32 *)doorbell_cseg, sq->uar_map);
114 
115 	/* Ensure doorbell is written on uar_page before poll_cq */
116 	WRITE_ONCE(doorbell_cseg, NULL);
117 }
118 
119 static void
hws_send_wqe_set_tag(struct mlx5hws_wqe_gta_data_seg_ste * wqe_data,struct mlx5hws_rule_match_tag * tag,bool is_jumbo)120 hws_send_wqe_set_tag(struct mlx5hws_wqe_gta_data_seg_ste *wqe_data,
121 		     struct mlx5hws_rule_match_tag *tag,
122 		     bool is_jumbo)
123 {
124 	if (is_jumbo) {
125 		/* Clear previous possibly dirty control */
126 		memset(wqe_data, 0, MLX5HWS_STE_CTRL_SZ);
127 		memcpy(wqe_data->jumbo, tag->jumbo, MLX5HWS_JUMBO_TAG_SZ);
128 	} else {
129 		/* Clear previous possibly dirty control and actions */
130 		memset(wqe_data, 0, MLX5HWS_STE_CTRL_SZ + MLX5HWS_ACTIONS_SZ);
131 		memcpy(wqe_data->tag, tag->match, MLX5HWS_MATCH_TAG_SZ);
132 	}
133 }
134 
mlx5hws_send_engine_post_end(struct mlx5hws_send_engine_post_ctrl * ctrl,struct mlx5hws_send_engine_post_attr * attr)135 void mlx5hws_send_engine_post_end(struct mlx5hws_send_engine_post_ctrl *ctrl,
136 				  struct mlx5hws_send_engine_post_attr *attr)
137 {
138 	struct mlx5hws_wqe_ctrl_seg *wqe_ctrl;
139 	struct mlx5hws_send_ring_sq *sq;
140 	unsigned int idx;
141 	u32 flags = 0;
142 
143 	sq = &ctrl->send_ring->send_sq;
144 	idx = sq->cur_post & sq->buf_mask;
145 	sq->last_idx = idx;
146 
147 	wqe_ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, idx);
148 
149 	wqe_ctrl->opmod_idx_opcode =
150 		cpu_to_be32((attr->opmod << 24) |
151 			    ((sq->cur_post & 0xffff) << 8) |
152 			    attr->opcode);
153 	wqe_ctrl->qpn_ds =
154 		cpu_to_be32((attr->len + sizeof(struct mlx5hws_wqe_ctrl_seg)) / 16 |
155 				 sq->sqn << 8);
156 	wqe_ctrl->imm = cpu_to_be32(attr->id);
157 
158 	flags |= attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
159 	flags |= attr->fence ? MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE : 0;
160 	wqe_ctrl->flags = cpu_to_be32(flags);
161 
162 	sq->wr_priv[idx].id = attr->id;
163 	sq->wr_priv[idx].retry_id = attr->retry_id;
164 
165 	sq->wr_priv[idx].rule = attr->rule;
166 	sq->wr_priv[idx].user_data = attr->user_data;
167 	sq->wr_priv[idx].num_wqebbs = ctrl->num_wqebbs;
168 
169 	if (attr->rule) {
170 		sq->wr_priv[idx].rule->pending_wqes++;
171 		sq->wr_priv[idx].used_id = attr->used_id;
172 	}
173 
174 	sq->cur_post += ctrl->num_wqebbs;
175 
176 	if (attr->notify_hw)
177 		hws_send_engine_post_ring(sq, wqe_ctrl);
178 }
179 
hws_send_wqe(struct mlx5hws_send_engine * queue,struct mlx5hws_send_engine_post_attr * send_attr,struct mlx5hws_wqe_gta_ctrl_seg * send_wqe_ctrl,void * send_wqe_data,void * send_wqe_tag,bool is_jumbo,u8 gta_opcode,u32 direct_index)180 static void hws_send_wqe(struct mlx5hws_send_engine *queue,
181 			 struct mlx5hws_send_engine_post_attr *send_attr,
182 			 struct mlx5hws_wqe_gta_ctrl_seg *send_wqe_ctrl,
183 			 void *send_wqe_data,
184 			 void *send_wqe_tag,
185 			 bool is_jumbo,
186 			 u8 gta_opcode,
187 			 u32 direct_index)
188 {
189 	struct mlx5hws_wqe_gta_data_seg_ste *wqe_data;
190 	struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
191 	struct mlx5hws_send_engine_post_ctrl ctrl;
192 	size_t wqe_len;
193 
194 	ctrl = mlx5hws_send_engine_post_start(queue);
195 	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
196 	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
197 
198 	wqe_ctrl->op_dirix = cpu_to_be32(gta_opcode << 28 | direct_index);
199 	memcpy(wqe_ctrl->stc_ix, send_wqe_ctrl->stc_ix,
200 	       sizeof(send_wqe_ctrl->stc_ix));
201 
202 	if (send_wqe_data)
203 		memcpy(wqe_data, send_wqe_data, sizeof(*wqe_data));
204 	else
205 		hws_send_wqe_set_tag(wqe_data, send_wqe_tag, is_jumbo);
206 
207 	mlx5hws_send_engine_post_end(&ctrl, send_attr);
208 }
209 
mlx5hws_send_ste(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ste_attr * ste_attr)210 void mlx5hws_send_ste(struct mlx5hws_send_engine *queue,
211 		      struct mlx5hws_send_ste_attr *ste_attr)
212 {
213 	struct mlx5hws_send_engine_post_attr *send_attr = &ste_attr->send_attr;
214 	u8 notify_hw = send_attr->notify_hw;
215 	u8 fence = send_attr->fence;
216 
217 	if (ste_attr->rtc_1) {
218 		send_attr->id = ste_attr->rtc_1;
219 		send_attr->used_id = ste_attr->used_id_rtc_1;
220 		send_attr->retry_id = ste_attr->retry_rtc_1;
221 		send_attr->fence = fence;
222 		send_attr->notify_hw = notify_hw && !ste_attr->rtc_0;
223 		hws_send_wqe(queue, send_attr,
224 			     ste_attr->wqe_ctrl,
225 			     ste_attr->wqe_data,
226 			     ste_attr->wqe_tag,
227 			     ste_attr->wqe_tag_is_jumbo,
228 			     ste_attr->gta_opcode,
229 			     ste_attr->direct_index);
230 	}
231 
232 	if (ste_attr->rtc_0) {
233 		send_attr->id = ste_attr->rtc_0;
234 		send_attr->used_id = ste_attr->used_id_rtc_0;
235 		send_attr->retry_id = ste_attr->retry_rtc_0;
236 		send_attr->fence = fence && !ste_attr->rtc_1;
237 		send_attr->notify_hw = notify_hw;
238 		hws_send_wqe(queue, send_attr,
239 			     ste_attr->wqe_ctrl,
240 			     ste_attr->wqe_data,
241 			     ste_attr->wqe_tag,
242 			     ste_attr->wqe_tag_is_jumbo,
243 			     ste_attr->gta_opcode,
244 			     ste_attr->direct_index);
245 	}
246 
247 	/* Restore to original requested values */
248 	send_attr->notify_hw = notify_hw;
249 	send_attr->fence = fence;
250 }
251 
hws_send_engine_retry_post_send(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,u16 wqe_cnt)252 static void hws_send_engine_retry_post_send(struct mlx5hws_send_engine *queue,
253 					    struct mlx5hws_send_ring_priv *priv,
254 					    u16 wqe_cnt)
255 {
256 	struct mlx5hws_send_engine_post_attr send_attr = {0};
257 	struct mlx5hws_wqe_gta_data_seg_ste *wqe_data;
258 	struct mlx5hws_wqe_gta_ctrl_seg *wqe_ctrl;
259 	struct mlx5hws_send_engine_post_ctrl ctrl;
260 	struct mlx5hws_send_ring_sq *send_sq;
261 	unsigned int idx;
262 	size_t wqe_len;
263 	char *p;
264 
265 	send_attr.rule = priv->rule;
266 	send_attr.opcode = MLX5HWS_WQE_OPCODE_TBL_ACCESS;
267 	send_attr.opmod = MLX5HWS_WQE_GTA_OPMOD_STE;
268 	send_attr.len = MLX5_SEND_WQE_BB * 2 - sizeof(struct mlx5hws_wqe_ctrl_seg);
269 	send_attr.notify_hw = 1;
270 	send_attr.fence = 0;
271 	send_attr.user_data = priv->user_data;
272 	send_attr.id = priv->retry_id;
273 	send_attr.used_id = priv->used_id;
274 
275 	ctrl = mlx5hws_send_engine_post_start(queue);
276 	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
277 	mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_data, &wqe_len);
278 
279 	send_sq = &ctrl.send_ring->send_sq;
280 	idx = wqe_cnt & send_sq->buf_mask;
281 	p = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
282 
283 	/* Copy old gta ctrl */
284 	memcpy(wqe_ctrl, p + sizeof(struct mlx5hws_wqe_ctrl_seg),
285 	       MLX5_SEND_WQE_BB - sizeof(struct mlx5hws_wqe_ctrl_seg));
286 
287 	idx = (wqe_cnt + 1) & send_sq->buf_mask;
288 	p = mlx5_wq_cyc_get_wqe(&send_sq->wq, idx);
289 
290 	/* Copy old gta data */
291 	memcpy(wqe_data, p, MLX5_SEND_WQE_BB);
292 
293 	mlx5hws_send_engine_post_end(&ctrl, &send_attr);
294 }
295 
mlx5hws_send_engine_flush_queue(struct mlx5hws_send_engine * queue)296 void mlx5hws_send_engine_flush_queue(struct mlx5hws_send_engine *queue)
297 {
298 	struct mlx5hws_send_ring_sq *sq = &queue->send_ring.send_sq;
299 	struct mlx5hws_wqe_ctrl_seg *wqe_ctrl;
300 
301 	wqe_ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, sq->last_idx);
302 	wqe_ctrl->flags |= cpu_to_be32(MLX5_WQE_CTRL_CQ_UPDATE);
303 
304 	hws_send_engine_post_ring(sq, wqe_ctrl);
305 }
306 
307 static void
hws_send_engine_update_rule_resize(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,enum mlx5hws_flow_op_status * status)308 hws_send_engine_update_rule_resize(struct mlx5hws_send_engine *queue,
309 				   struct mlx5hws_send_ring_priv *priv,
310 				   enum mlx5hws_flow_op_status *status)
311 {
312 	switch (priv->rule->resize_info->state) {
313 	case MLX5HWS_RULE_RESIZE_STATE_WRITING:
314 		if (priv->rule->status == MLX5HWS_RULE_STATUS_FAILING) {
315 			/* Backup original RTCs */
316 			u32 orig_rtc_0 = priv->rule->resize_info->rtc_0;
317 			u32 orig_rtc_1 = priv->rule->resize_info->rtc_1;
318 
319 			/* Delete partially failed move rule using resize_info */
320 			priv->rule->resize_info->rtc_0 = priv->rule->rtc_0;
321 			priv->rule->resize_info->rtc_1 = priv->rule->rtc_1;
322 
323 			/* Move rule to original RTC for future delete */
324 			priv->rule->rtc_0 = orig_rtc_0;
325 			priv->rule->rtc_1 = orig_rtc_1;
326 		}
327 		/* Clean leftovers */
328 		mlx5hws_rule_move_hws_remove(priv->rule, queue, priv->user_data);
329 		break;
330 
331 	case MLX5HWS_RULE_RESIZE_STATE_DELETING:
332 		if (priv->rule->status == MLX5HWS_RULE_STATUS_FAILING) {
333 			*status = MLX5HWS_FLOW_OP_ERROR;
334 		} else {
335 			*status = MLX5HWS_FLOW_OP_SUCCESS;
336 			priv->rule->matcher = priv->rule->matcher->resize_dst;
337 		}
338 		priv->rule->resize_info->state = MLX5HWS_RULE_RESIZE_STATE_IDLE;
339 		priv->rule->status = MLX5HWS_RULE_STATUS_CREATED;
340 		break;
341 
342 	default:
343 		break;
344 	}
345 }
346 
hws_send_engine_dump_error_cqe(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,struct mlx5_cqe64 * cqe)347 static void hws_send_engine_dump_error_cqe(struct mlx5hws_send_engine *queue,
348 					   struct mlx5hws_send_ring_priv *priv,
349 					   struct mlx5_cqe64 *cqe)
350 {
351 	u8 wqe_opcode = cqe ? be32_to_cpu(cqe->sop_drop_qpn) >> 24 : 0;
352 	struct mlx5hws_context *ctx = priv->rule->matcher->tbl->ctx;
353 	u32 opcode = cqe ? get_cqe_opcode(cqe) : 0;
354 	struct mlx5hws_rule *rule = priv->rule;
355 
356 	/* If something bad happens and lots of rules are failing, we don't
357 	 * want to pollute dmesg. Print only the first bad cqe per engine,
358 	 * the one that started the avalanche.
359 	 */
360 	if (queue->error_cqe_printed)
361 		return;
362 
363 	queue->error_cqe_printed = true;
364 
365 	if (mlx5hws_rule_move_in_progress(rule))
366 		mlx5hws_err(ctx,
367 			    "--- rule 0x%08llx: error completion moving rule: phase %s, wqes left %d\n",
368 			    HWS_PTR_TO_ID(rule),
369 			    rule->resize_info->state ==
370 			    MLX5HWS_RULE_RESIZE_STATE_WRITING ? "WRITING" :
371 			    rule->resize_info->state ==
372 			    MLX5HWS_RULE_RESIZE_STATE_DELETING ? "DELETING" :
373 			    "UNKNOWN",
374 			    rule->pending_wqes);
375 	else
376 		mlx5hws_err(ctx,
377 			    "--- rule 0x%08llx: error completion %s (%d), wqes left %d\n",
378 			    HWS_PTR_TO_ID(rule),
379 			    rule->status ==
380 			    MLX5HWS_RULE_STATUS_CREATING ? "CREATING" :
381 			    rule->status ==
382 			    MLX5HWS_RULE_STATUS_DELETING ? "DELETING" :
383 			    rule->status ==
384 			    MLX5HWS_RULE_STATUS_FAILING ? "FAILING" :
385 			    rule->status ==
386 			    MLX5HWS_RULE_STATUS_UPDATING ? "UPDATING" : "NA",
387 			    rule->status,
388 			    rule->pending_wqes);
389 
390 	mlx5hws_err(ctx, "    rule 0x%08llx: matcher 0x%llx %s\n",
391 		    HWS_PTR_TO_ID(rule),
392 		    HWS_PTR_TO_ID(rule->matcher),
393 		    (rule->matcher->flags & MLX5HWS_MATCHER_FLAGS_ISOLATED) ?
394 		    "(isolated)" : "");
395 
396 	if (!cqe) {
397 		mlx5hws_err(ctx, "    rule 0x%08llx: no CQE\n",
398 			    HWS_PTR_TO_ID(rule));
399 		return;
400 	}
401 
402 	mlx5hws_err(ctx, "    rule 0x%08llx: cqe->opcode      = %d %s\n",
403 		    HWS_PTR_TO_ID(rule), opcode,
404 		    opcode == MLX5_CQE_REQ ? "(MLX5_CQE_REQ)" :
405 		    opcode == MLX5_CQE_REQ_ERR ? "(MLX5_CQE_REQ_ERR)" : " ");
406 
407 	if (opcode == MLX5_CQE_REQ_ERR) {
408 		struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
409 
410 		mlx5hws_err(ctx,
411 			    "    rule 0x%08llx:  |--- hw_error_syndrome = 0x%x\n",
412 			    HWS_PTR_TO_ID(rule),
413 			    err_cqe->rsvd1[16]);
414 		mlx5hws_err(ctx,
415 			    "    rule 0x%08llx:  |--- hw_syndrome_type = 0x%x\n",
416 			    HWS_PTR_TO_ID(rule),
417 			    err_cqe->rsvd1[17] >> 4);
418 		mlx5hws_err(ctx,
419 			    "    rule 0x%08llx:  |--- vendor_err_synd = 0x%x\n",
420 			    HWS_PTR_TO_ID(rule),
421 			    err_cqe->vendor_err_synd);
422 		mlx5hws_err(ctx,
423 			    "    rule 0x%08llx:  |--- syndrome = 0x%x\n",
424 			    HWS_PTR_TO_ID(rule),
425 			    err_cqe->syndrome);
426 	}
427 
428 	mlx5hws_err(ctx,
429 		    "    rule 0x%08llx: cqe->byte_cnt      = 0x%08x\n",
430 		    HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->byte_cnt));
431 	mlx5hws_err(ctx,
432 		    "    rule 0x%08llx:  |-- UPDATE STATUS = %s\n",
433 		    HWS_PTR_TO_ID(rule),
434 		    (be32_to_cpu(cqe->byte_cnt) & 0x80000000) ?
435 		    "FAILURE" : "SUCCESS");
436 	mlx5hws_err(ctx,
437 		    "    rule 0x%08llx:  |------- SYNDROME = %s\n",
438 		    HWS_PTR_TO_ID(rule),
439 		    ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 1) ?
440 		    "SET_FLOW_FAIL" :
441 		    ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 2) ?
442 		    "DISABLE_FLOW_FAIL" : "UNKNOWN");
443 	mlx5hws_err(ctx,
444 		    "    rule 0x%08llx: cqe->sop_drop_qpn  = 0x%08x\n",
445 		    HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->sop_drop_qpn));
446 	mlx5hws_err(ctx,
447 		    "    rule 0x%08llx:  |-send wqe opcode = 0x%02x %s\n",
448 		    HWS_PTR_TO_ID(rule), wqe_opcode,
449 		    wqe_opcode == MLX5HWS_WQE_OPCODE_TBL_ACCESS ?
450 		    "(MLX5HWS_WQE_OPCODE_TBL_ACCESS)" : "(UNKNOWN)");
451 	mlx5hws_err(ctx,
452 		    "    rule 0x%08llx:  |------------ qpn = 0x%06x\n",
453 		    HWS_PTR_TO_ID(rule),
454 		    be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff);
455 }
456 
hws_send_engine_update_rule(struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_priv * priv,u16 wqe_cnt,enum mlx5hws_flow_op_status * status,struct mlx5_cqe64 * cqe)457 static void hws_send_engine_update_rule(struct mlx5hws_send_engine *queue,
458 					struct mlx5hws_send_ring_priv *priv,
459 					u16 wqe_cnt,
460 					enum mlx5hws_flow_op_status *status,
461 					struct mlx5_cqe64 *cqe)
462 {
463 	priv->rule->pending_wqes--;
464 
465 	if (unlikely(*status == MLX5HWS_FLOW_OP_ERROR)) {
466 		if (priv->retry_id) {
467 			/* If there is a retry_id, then it's not an error yet,
468 			 * retry to insert this rule in the collision RTC.
469 			 */
470 			hws_send_engine_retry_post_send(queue, priv, wqe_cnt);
471 			return;
472 		}
473 		hws_send_engine_dump_error_cqe(queue, priv, cqe);
474 		/* Some part of the rule failed */
475 		priv->rule->status = MLX5HWS_RULE_STATUS_FAILING;
476 		*priv->used_id = 0;
477 	} else {
478 		*priv->used_id = priv->id;
479 	}
480 
481 	/* Update rule status for the last completion */
482 	if (!priv->rule->pending_wqes) {
483 		if (unlikely(mlx5hws_rule_move_in_progress(priv->rule))) {
484 			hws_send_engine_update_rule_resize(queue, priv, status);
485 			return;
486 		}
487 
488 		if (unlikely(priv->rule->status == MLX5HWS_RULE_STATUS_FAILING)) {
489 			/* Rule completely failed and doesn't require cleanup */
490 			if (!priv->rule->rtc_0 && !priv->rule->rtc_1)
491 				priv->rule->status = MLX5HWS_RULE_STATUS_FAILED;
492 
493 			*status = MLX5HWS_FLOW_OP_ERROR;
494 		} else {
495 			/* Increase the status, this only works on good flow as
496 			 * the enum is arranged this way:
497 			 *  - creating -> created
498 			 *  - updating -> updated
499 			 *  - deleting -> deleted
500 			 */
501 			priv->rule->status++;
502 			*status = MLX5HWS_FLOW_OP_SUCCESS;
503 			if (priv->rule->status == MLX5HWS_RULE_STATUS_DELETED) {
504 				/* Rule was deleted, now we can safely release
505 				 * action STEs and clear resize info
506 				 */
507 				mlx5hws_rule_free_action_ste(&priv->rule->action_ste);
508 				mlx5hws_rule_clear_resize_info(priv->rule);
509 			} else if (priv->rule->status == MLX5HWS_RULE_STATUS_UPDATED) {
510 				/* Rule was updated, free the old action STEs */
511 				mlx5hws_rule_free_action_ste(&priv->rule->old_action_ste);
512 				/* Update completed - move the rule back to "created" */
513 				priv->rule->status = MLX5HWS_RULE_STATUS_CREATED;
514 			}
515 		}
516 	}
517 }
518 
hws_send_engine_update(struct mlx5hws_send_engine * queue,struct mlx5_cqe64 * cqe,struct mlx5hws_send_ring_priv * priv,struct mlx5hws_flow_op_result res[],s64 * i,u32 res_nb,u16 wqe_cnt)519 static void hws_send_engine_update(struct mlx5hws_send_engine *queue,
520 				   struct mlx5_cqe64 *cqe,
521 				   struct mlx5hws_send_ring_priv *priv,
522 				   struct mlx5hws_flow_op_result res[],
523 				   s64 *i,
524 				   u32 res_nb,
525 				   u16 wqe_cnt)
526 {
527 	enum mlx5hws_flow_op_status status;
528 
529 	if (!cqe || (likely(be32_to_cpu(cqe->byte_cnt) >> 31 == 0) &&
530 		     likely(get_cqe_opcode(cqe) == MLX5_CQE_REQ))) {
531 		status = MLX5HWS_FLOW_OP_SUCCESS;
532 	} else {
533 		status = MLX5HWS_FLOW_OP_ERROR;
534 	}
535 
536 	if (priv->user_data) {
537 		if (priv->rule) {
538 			hws_send_engine_update_rule(queue, priv, wqe_cnt,
539 						    &status, cqe);
540 			/* Completion is provided on the last rule WQE */
541 			if (priv->rule->pending_wqes)
542 				return;
543 		}
544 
545 		if (*i < res_nb) {
546 			res[*i].user_data = priv->user_data;
547 			res[*i].status = status;
548 			(*i)++;
549 			mlx5hws_send_engine_dec_rule(queue);
550 		} else {
551 			mlx5hws_send_engine_gen_comp(queue, priv->user_data, status);
552 		}
553 	}
554 }
555 
mlx5hws_parse_cqe(struct mlx5hws_send_ring_cq * cq,struct mlx5_cqe64 * cqe64)556 static int mlx5hws_parse_cqe(struct mlx5hws_send_ring_cq *cq,
557 			     struct mlx5_cqe64 *cqe64)
558 {
559 	if (unlikely(get_cqe_opcode(cqe64) != MLX5_CQE_REQ)) {
560 		struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe64;
561 
562 		mlx5_core_err(cq->mdev, "Bad OP in HWS SQ CQE: 0x%x\n", get_cqe_opcode(cqe64));
563 		mlx5_core_err(cq->mdev, "vendor_err_synd=%x\n", err_cqe->vendor_err_synd);
564 		mlx5_core_err(cq->mdev, "syndrome=%x\n", err_cqe->syndrome);
565 		print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET,
566 			       16, 1, err_cqe,
567 			       sizeof(*err_cqe), false);
568 		return CQ_POLL_ERR;
569 	}
570 
571 	return CQ_OK;
572 }
573 
mlx5hws_cq_poll_one(struct mlx5hws_send_ring_cq * cq)574 static int mlx5hws_cq_poll_one(struct mlx5hws_send_ring_cq *cq)
575 {
576 	struct mlx5_cqe64 *cqe64;
577 	int err;
578 
579 	cqe64 = mlx5_cqwq_get_cqe(&cq->wq);
580 	if (!cqe64) {
581 		if (unlikely(cq->mdev->state ==
582 			     MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
583 			mlx5_core_dbg_once(cq->mdev,
584 					   "Polling CQ while device is shutting down\n");
585 			return CQ_POLL_ERR;
586 		}
587 		return CQ_EMPTY;
588 	}
589 
590 	mlx5_cqwq_pop(&cq->wq);
591 	err = mlx5hws_parse_cqe(cq, cqe64);
592 	mlx5_cqwq_update_db_record(&cq->wq);
593 
594 	return err;
595 }
596 
hws_send_engine_poll_cq(struct mlx5hws_send_engine * queue,struct mlx5hws_flow_op_result res[],s64 * polled,u32 res_nb)597 static void hws_send_engine_poll_cq(struct mlx5hws_send_engine *queue,
598 				    struct mlx5hws_flow_op_result res[],
599 				    s64 *polled,
600 				    u32 res_nb)
601 {
602 	struct mlx5hws_send_ring *send_ring = &queue->send_ring;
603 	struct mlx5hws_send_ring_cq *cq = &send_ring->send_cq;
604 	struct mlx5hws_send_ring_sq *sq = &send_ring->send_sq;
605 	struct mlx5hws_send_ring_priv *priv;
606 	struct mlx5_cqe64 *cqe;
607 	u8 cqe_opcode;
608 	u16 wqe_cnt;
609 
610 	cqe = mlx5_cqwq_get_cqe(&cq->wq);
611 	if (!cqe)
612 		return;
613 
614 	cqe_opcode = get_cqe_opcode(cqe);
615 	if (cqe_opcode == MLX5_CQE_INVALID)
616 		return;
617 
618 	if (unlikely(cqe_opcode != MLX5_CQE_REQ))
619 		queue->err = true;
620 
621 	wqe_cnt = be16_to_cpu(cqe->wqe_counter) & sq->buf_mask;
622 
623 	while (cq->poll_wqe != wqe_cnt) {
624 		priv = &sq->wr_priv[cq->poll_wqe];
625 		hws_send_engine_update(queue, NULL, priv, res, polled, res_nb, 0);
626 		cq->poll_wqe = (cq->poll_wqe + priv->num_wqebbs) & sq->buf_mask;
627 	}
628 
629 	priv = &sq->wr_priv[wqe_cnt];
630 	cq->poll_wqe = (wqe_cnt + priv->num_wqebbs) & sq->buf_mask;
631 	hws_send_engine_update(queue, cqe, priv, res, polled, res_nb, wqe_cnt);
632 	mlx5hws_cq_poll_one(cq);
633 }
634 
hws_send_engine_poll_list(struct mlx5hws_send_engine * queue,struct mlx5hws_flow_op_result res[],s64 * polled,u32 res_nb)635 static void hws_send_engine_poll_list(struct mlx5hws_send_engine *queue,
636 				      struct mlx5hws_flow_op_result res[],
637 				      s64 *polled,
638 				      u32 res_nb)
639 {
640 	struct mlx5hws_completed_poll *comp = &queue->completed;
641 
642 	while (comp->ci != comp->pi) {
643 		if (*polled < res_nb) {
644 			res[*polled].status =
645 				comp->entries[comp->ci].status;
646 			res[*polled].user_data =
647 				comp->entries[comp->ci].user_data;
648 			(*polled)++;
649 			comp->ci = (comp->ci + 1) & comp->mask;
650 			mlx5hws_send_engine_dec_rule(queue);
651 		} else {
652 			return;
653 		}
654 	}
655 }
656 
hws_send_engine_poll(struct mlx5hws_send_engine * queue,struct mlx5hws_flow_op_result res[],u32 res_nb)657 static int hws_send_engine_poll(struct mlx5hws_send_engine *queue,
658 				struct mlx5hws_flow_op_result res[],
659 				u32 res_nb)
660 {
661 	s64 polled = 0;
662 
663 	hws_send_engine_poll_list(queue, res, &polled, res_nb);
664 
665 	if (polled >= res_nb)
666 		return polled;
667 
668 	hws_send_engine_poll_cq(queue, res, &polled, res_nb);
669 
670 	return polled;
671 }
672 
mlx5hws_send_queue_poll(struct mlx5hws_context * ctx,u16 queue_id,struct mlx5hws_flow_op_result res[],u32 res_nb)673 int mlx5hws_send_queue_poll(struct mlx5hws_context *ctx,
674 			    u16 queue_id,
675 			    struct mlx5hws_flow_op_result res[],
676 			    u32 res_nb)
677 {
678 	return hws_send_engine_poll(&ctx->send_queue[queue_id], res, res_nb);
679 }
680 
hws_send_ring_alloc_sq(struct mlx5_core_dev * mdev,int numa_node,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,void * sqc_data)681 static int hws_send_ring_alloc_sq(struct mlx5_core_dev *mdev,
682 				  int numa_node,
683 				  struct mlx5hws_send_engine *queue,
684 				  struct mlx5hws_send_ring_sq *sq,
685 				  void *sqc_data)
686 {
687 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
688 	struct mlx5_wq_cyc *wq = &sq->wq;
689 	struct mlx5_wq_param param;
690 	size_t buf_sz;
691 	int err;
692 
693 	sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
694 	sq->mdev = mdev;
695 
696 	param.db_numa_node = numa_node;
697 	param.buf_numa_node = numa_node;
698 	err = mlx5_wq_cyc_create(mdev, &param, sqc_wq, wq, &sq->wq_ctrl);
699 	if (err)
700 		return err;
701 	wq->db = &wq->db[MLX5_SND_DBR];
702 
703 	buf_sz = queue->num_entries * MAX_WQES_PER_RULE;
704 	sq->dep_wqe = kcalloc(queue->num_entries, sizeof(*sq->dep_wqe), GFP_KERNEL);
705 	if (!sq->dep_wqe) {
706 		err = -ENOMEM;
707 		goto destroy_wq_cyc;
708 	}
709 
710 	sq->wr_priv = kzalloc(sizeof(*sq->wr_priv) * buf_sz, GFP_KERNEL);
711 	if (!sq->wr_priv) {
712 		err = -ENOMEM;
713 		goto free_dep_wqe;
714 	}
715 
716 	sq->buf_mask = (queue->num_entries * MAX_WQES_PER_RULE) - 1;
717 
718 	return 0;
719 
720 free_dep_wqe:
721 	kfree(sq->dep_wqe);
722 destroy_wq_cyc:
723 	mlx5_wq_destroy(&sq->wq_ctrl);
724 	return err;
725 }
726 
hws_send_ring_free_sq(struct mlx5hws_send_ring_sq * sq)727 static void hws_send_ring_free_sq(struct mlx5hws_send_ring_sq *sq)
728 {
729 	if (!sq)
730 		return;
731 	kfree(sq->wr_priv);
732 	kfree(sq->dep_wqe);
733 	mlx5_wq_destroy(&sq->wq_ctrl);
734 }
735 
hws_send_ring_create_sq(struct mlx5_core_dev * mdev,u32 pdn,void * sqc_data,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,struct mlx5hws_send_ring_cq * cq)736 static int hws_send_ring_create_sq(struct mlx5_core_dev *mdev, u32 pdn,
737 				   void *sqc_data,
738 				   struct mlx5hws_send_engine *queue,
739 				   struct mlx5hws_send_ring_sq *sq,
740 				   struct mlx5hws_send_ring_cq *cq)
741 {
742 	void *in, *sqc, *wq;
743 	int inlen, err;
744 	u8 ts_format;
745 
746 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
747 		sizeof(u64) * sq->wq_ctrl.buf.npages;
748 	in = kvzalloc(inlen, GFP_KERNEL);
749 	if (!in)
750 		return -ENOMEM;
751 
752 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
753 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
754 
755 	memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc));
756 	MLX5_SET(sqc, sqc, cqn, cq->mcq.cqn);
757 
758 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
759 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
760 	MLX5_SET(sqc, sqc, non_wire, 1);
761 
762 	ts_format = mlx5_is_real_time_sq(mdev) ? MLX5_TIMESTAMP_FORMAT_REAL_TIME :
763 						 MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
764 	MLX5_SET(sqc, sqc, ts_format, ts_format);
765 
766 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
767 	MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index);
768 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
769 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
770 
771 	mlx5_fill_page_frag_array(&sq->wq_ctrl.buf,
772 				  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
773 
774 	err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
775 
776 	kvfree(in);
777 
778 	return err;
779 }
780 
hws_send_ring_destroy_sq(struct mlx5_core_dev * mdev,struct mlx5hws_send_ring_sq * sq)781 static void hws_send_ring_destroy_sq(struct mlx5_core_dev *mdev,
782 				     struct mlx5hws_send_ring_sq *sq)
783 {
784 	mlx5_core_destroy_sq(mdev, sq->sqn);
785 }
786 
hws_send_ring_set_sq_rdy(struct mlx5_core_dev * mdev,u32 sqn)787 static int hws_send_ring_set_sq_rdy(struct mlx5_core_dev *mdev, u32 sqn)
788 {
789 	void *in, *sqc;
790 	int inlen, err;
791 
792 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
793 	in = kvzalloc(inlen, GFP_KERNEL);
794 	if (!in)
795 		return -ENOMEM;
796 
797 	MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST);
798 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
799 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
800 
801 	err = mlx5_core_modify_sq(mdev, sqn, in);
802 
803 	kvfree(in);
804 
805 	return err;
806 }
807 
hws_send_ring_close_sq(struct mlx5hws_send_ring_sq * sq)808 static void hws_send_ring_close_sq(struct mlx5hws_send_ring_sq *sq)
809 {
810 	mlx5_core_destroy_sq(sq->mdev, sq->sqn);
811 	mlx5_wq_destroy(&sq->wq_ctrl);
812 	kfree(sq->wr_priv);
813 	kfree(sq->dep_wqe);
814 }
815 
hws_send_ring_create_sq_rdy(struct mlx5_core_dev * mdev,u32 pdn,void * sqc_data,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,struct mlx5hws_send_ring_cq * cq)816 static int hws_send_ring_create_sq_rdy(struct mlx5_core_dev *mdev, u32 pdn,
817 				       void *sqc_data,
818 				       struct mlx5hws_send_engine *queue,
819 				       struct mlx5hws_send_ring_sq *sq,
820 				       struct mlx5hws_send_ring_cq *cq)
821 {
822 	int err;
823 
824 	err = hws_send_ring_create_sq(mdev, pdn, sqc_data, queue, sq, cq);
825 	if (err)
826 		return err;
827 
828 	err = hws_send_ring_set_sq_rdy(mdev, sq->sqn);
829 	if (err)
830 		hws_send_ring_destroy_sq(mdev, sq);
831 
832 	return err;
833 }
834 
hws_send_ring_open_sq(struct mlx5hws_context * ctx,int numa_node,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ring_sq * sq,struct mlx5hws_send_ring_cq * cq)835 static int hws_send_ring_open_sq(struct mlx5hws_context *ctx,
836 				 int numa_node,
837 				 struct mlx5hws_send_engine *queue,
838 				 struct mlx5hws_send_ring_sq *sq,
839 				 struct mlx5hws_send_ring_cq *cq)
840 {
841 	size_t buf_sz, sq_log_buf_sz;
842 	void *sqc_data, *wq;
843 	int err;
844 
845 	sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL);
846 	if (!sqc_data)
847 		return -ENOMEM;
848 
849 	buf_sz = queue->num_entries * MAX_WQES_PER_RULE;
850 	sq_log_buf_sz = ilog2(roundup_pow_of_two(buf_sz));
851 
852 	wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
853 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
854 	MLX5_SET(wq, wq, pd, ctx->pd_num);
855 	MLX5_SET(wq, wq, log_wq_sz, sq_log_buf_sz);
856 
857 	err = hws_send_ring_alloc_sq(ctx->mdev, numa_node, queue, sq, sqc_data);
858 	if (err)
859 		goto err_free_sqc;
860 
861 	err = hws_send_ring_create_sq_rdy(ctx->mdev, ctx->pd_num, sqc_data,
862 					  queue, sq, cq);
863 	if (err)
864 		goto err_free_sq;
865 
866 	kvfree(sqc_data);
867 
868 	return 0;
869 err_free_sq:
870 	hws_send_ring_free_sq(sq);
871 err_free_sqc:
872 	kvfree(sqc_data);
873 	return err;
874 }
875 
hws_cq_complete(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)876 static void hws_cq_complete(struct mlx5_core_cq *mcq,
877 			    struct mlx5_eqe *eqe)
878 {
879 	pr_err("CQ completion CQ: #%u\n", mcq->cqn);
880 }
881 
hws_send_ring_alloc_cq(struct mlx5_core_dev * mdev,int numa_node,struct mlx5hws_send_engine * queue,void * cqc_data,struct mlx5hws_send_ring_cq * cq)882 static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev,
883 				  int numa_node,
884 				  struct mlx5hws_send_engine *queue,
885 				  void *cqc_data,
886 				  struct mlx5hws_send_ring_cq *cq)
887 {
888 	struct mlx5_core_cq *mcq = &cq->mcq;
889 	struct mlx5_wq_param param;
890 	struct mlx5_cqe64 *cqe;
891 	int err;
892 	u32 i;
893 
894 	param.buf_numa_node = numa_node;
895 	param.db_numa_node = numa_node;
896 
897 	err = mlx5_cqwq_create(mdev, &param, cqc_data, &cq->wq, &cq->wq_ctrl);
898 	if (err)
899 		return err;
900 
901 	mcq->cqe_sz = 64;
902 	mcq->set_ci_db = cq->wq_ctrl.db.db;
903 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
904 	mcq->comp = hws_cq_complete;
905 
906 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
907 		cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
908 		cqe->op_own = 0xf1;
909 	}
910 
911 	cq->mdev = mdev;
912 
913 	return 0;
914 }
915 
hws_send_ring_create_cq(struct mlx5_core_dev * mdev,struct mlx5hws_send_engine * queue,void * cqc_data,struct mlx5hws_send_ring_cq * cq)916 static int hws_send_ring_create_cq(struct mlx5_core_dev *mdev,
917 				   struct mlx5hws_send_engine *queue,
918 				   void *cqc_data,
919 				   struct mlx5hws_send_ring_cq *cq)
920 {
921 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
922 	struct mlx5_core_cq *mcq = &cq->mcq;
923 	void *in, *cqc;
924 	int inlen, eqn;
925 	int err;
926 
927 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
928 	if (err)
929 		return err;
930 
931 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
932 		sizeof(u64) * cq->wq_ctrl.buf.npages;
933 	in = kvzalloc(inlen, GFP_KERNEL);
934 	if (!in)
935 		return -ENOMEM;
936 
937 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
938 	memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc));
939 	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
940 				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
941 
942 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
943 	MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
944 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
945 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
946 
947 	err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
948 
949 	kvfree(in);
950 
951 	return err;
952 }
953 
hws_send_ring_open_cq(struct mlx5_core_dev * mdev,struct mlx5hws_send_engine * queue,int numa_node,struct mlx5hws_send_ring_cq * cq)954 static int hws_send_ring_open_cq(struct mlx5_core_dev *mdev,
955 				 struct mlx5hws_send_engine *queue,
956 				 int numa_node,
957 				 struct mlx5hws_send_ring_cq *cq)
958 {
959 	void *cqc_data;
960 	int err;
961 
962 	cqc_data = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL);
963 	if (!cqc_data)
964 		return -ENOMEM;
965 
966 	MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index);
967 	MLX5_SET(cqc, cqc_data, cqe_sz, queue->num_entries);
968 	MLX5_SET(cqc, cqc_data, log_cq_size, ilog2(queue->num_entries));
969 
970 	err = hws_send_ring_alloc_cq(mdev, numa_node, queue, cqc_data, cq);
971 	if (err)
972 		goto err_out;
973 
974 	err = hws_send_ring_create_cq(mdev, queue, cqc_data, cq);
975 	if (err)
976 		goto err_free_cq;
977 
978 	kvfree(cqc_data);
979 
980 	return 0;
981 
982 err_free_cq:
983 	mlx5_wq_destroy(&cq->wq_ctrl);
984 err_out:
985 	kvfree(cqc_data);
986 	return err;
987 }
988 
hws_send_ring_close_cq(struct mlx5hws_send_ring_cq * cq)989 static void hws_send_ring_close_cq(struct mlx5hws_send_ring_cq *cq)
990 {
991 	mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
992 	mlx5_wq_destroy(&cq->wq_ctrl);
993 }
994 
hws_send_ring_close(struct mlx5hws_send_engine * queue)995 static void hws_send_ring_close(struct mlx5hws_send_engine *queue)
996 {
997 	hws_send_ring_close_sq(&queue->send_ring.send_sq);
998 	hws_send_ring_close_cq(&queue->send_ring.send_cq);
999 }
1000 
mlx5hws_send_ring_open(struct mlx5hws_context * ctx,struct mlx5hws_send_engine * queue)1001 static int mlx5hws_send_ring_open(struct mlx5hws_context *ctx,
1002 				  struct mlx5hws_send_engine *queue)
1003 {
1004 	int numa_node = dev_to_node(mlx5_core_dma_dev(ctx->mdev));
1005 	struct mlx5hws_send_ring *ring = &queue->send_ring;
1006 	int err;
1007 
1008 	err = hws_send_ring_open_cq(ctx->mdev, queue, numa_node, &ring->send_cq);
1009 	if (err)
1010 		return err;
1011 
1012 	err = hws_send_ring_open_sq(ctx, numa_node, queue, &ring->send_sq,
1013 				    &ring->send_cq);
1014 	if (err)
1015 		goto close_cq;
1016 
1017 	return err;
1018 
1019 close_cq:
1020 	hws_send_ring_close_cq(&ring->send_cq);
1021 	return err;
1022 }
1023 
mlx5hws_send_queue_close(struct mlx5hws_send_engine * queue)1024 static void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue)
1025 {
1026 	if (!queue->num_entries)
1027 		return; /* this queue wasn't initialized */
1028 
1029 	hws_send_ring_close(queue);
1030 	kfree(queue->completed.entries);
1031 }
1032 
mlx5hws_send_queue_open(struct mlx5hws_context * ctx,struct mlx5hws_send_engine * queue,u16 queue_size)1033 static int mlx5hws_send_queue_open(struct mlx5hws_context *ctx,
1034 				   struct mlx5hws_send_engine *queue,
1035 				   u16 queue_size)
1036 {
1037 	int err;
1038 
1039 	mutex_init(&queue->lock);
1040 
1041 	queue->num_entries = roundup_pow_of_two(queue_size);
1042 	queue->used_entries = 0;
1043 
1044 	queue->completed.entries = kcalloc(queue->num_entries,
1045 					   sizeof(queue->completed.entries[0]),
1046 					   GFP_KERNEL);
1047 	if (!queue->completed.entries)
1048 		return -ENOMEM;
1049 
1050 	queue->completed.pi = 0;
1051 	queue->completed.ci = 0;
1052 	queue->completed.mask = queue->num_entries - 1;
1053 	err = mlx5hws_send_ring_open(ctx, queue);
1054 	if (err)
1055 		goto free_completed_entries;
1056 
1057 	return 0;
1058 
1059 free_completed_entries:
1060 	kfree(queue->completed.entries);
1061 	return err;
1062 }
1063 
__hws_send_queues_close(struct mlx5hws_context * ctx,u16 queues)1064 static void __hws_send_queues_close(struct mlx5hws_context *ctx, u16 queues)
1065 {
1066 	while (queues--)
1067 		mlx5hws_send_queue_close(&ctx->send_queue[queues]);
1068 }
1069 
hws_send_queues_bwc_locks_destroy(struct mlx5hws_context * ctx)1070 static void hws_send_queues_bwc_locks_destroy(struct mlx5hws_context *ctx)
1071 {
1072 	int bwc_queues = mlx5hws_bwc_queues(ctx);
1073 	int i;
1074 
1075 	if (!mlx5hws_context_bwc_supported(ctx))
1076 		return;
1077 
1078 	for (i = 0; i < bwc_queues; i++) {
1079 		mutex_destroy(&ctx->bwc_send_queue_locks[i]);
1080 		lockdep_unregister_key(ctx->bwc_lock_class_keys + i);
1081 	}
1082 
1083 	kfree(ctx->bwc_lock_class_keys);
1084 	kfree(ctx->bwc_send_queue_locks);
1085 }
1086 
mlx5hws_send_queues_close(struct mlx5hws_context * ctx)1087 void mlx5hws_send_queues_close(struct mlx5hws_context *ctx)
1088 {
1089 	hws_send_queues_bwc_locks_destroy(ctx);
1090 	__hws_send_queues_close(ctx, ctx->queues);
1091 	kfree(ctx->send_queue);
1092 }
1093 
hws_bwc_send_queues_init(struct mlx5hws_context * ctx)1094 static int hws_bwc_send_queues_init(struct mlx5hws_context *ctx)
1095 {
1096 	/* Number of BWC queues is equal to number of the usual HWS queues */
1097 	int bwc_queues = ctx->queues - 1;
1098 	int i;
1099 
1100 	if (!mlx5hws_context_bwc_supported(ctx))
1101 		return 0;
1102 
1103 	ctx->queues += bwc_queues;
1104 
1105 	ctx->bwc_send_queue_locks = kcalloc(bwc_queues,
1106 					    sizeof(*ctx->bwc_send_queue_locks),
1107 					    GFP_KERNEL);
1108 
1109 	if (!ctx->bwc_send_queue_locks)
1110 		return -ENOMEM;
1111 
1112 	ctx->bwc_lock_class_keys = kcalloc(bwc_queues,
1113 					   sizeof(*ctx->bwc_lock_class_keys),
1114 					   GFP_KERNEL);
1115 	if (!ctx->bwc_lock_class_keys)
1116 		goto err_lock_class_keys;
1117 
1118 	for (i = 0; i < bwc_queues; i++) {
1119 		mutex_init(&ctx->bwc_send_queue_locks[i]);
1120 		lockdep_register_key(ctx->bwc_lock_class_keys + i);
1121 		lockdep_set_class(ctx->bwc_send_queue_locks + i, ctx->bwc_lock_class_keys + i);
1122 	}
1123 
1124 	return 0;
1125 
1126 err_lock_class_keys:
1127 	kfree(ctx->bwc_send_queue_locks);
1128 	return -ENOMEM;
1129 }
1130 
mlx5hws_send_queues_open(struct mlx5hws_context * ctx,u16 queues,u16 queue_size)1131 int mlx5hws_send_queues_open(struct mlx5hws_context *ctx,
1132 			     u16 queues,
1133 			     u16 queue_size)
1134 {
1135 	int err = 0;
1136 	int i = 0;
1137 
1138 	/* Open one extra queue for control path */
1139 	ctx->queues = queues + 1;
1140 
1141 	/* open a separate set of queues and locks for bwc API */
1142 	err = hws_bwc_send_queues_init(ctx);
1143 	if (err)
1144 		return err;
1145 
1146 	ctx->send_queue = kcalloc(ctx->queues, sizeof(*ctx->send_queue), GFP_KERNEL);
1147 	if (!ctx->send_queue) {
1148 		err = -ENOMEM;
1149 		goto free_bwc_locks;
1150 	}
1151 
1152 	/* If native API isn't supported, skip the unused native queues:
1153 	 * initialize BWC queues and control queue only.
1154 	 */
1155 	if (!mlx5hws_context_native_supported(ctx))
1156 		i = mlx5hws_bwc_get_queue_id(ctx, 0);
1157 
1158 	for (; i < ctx->queues; i++) {
1159 		err = mlx5hws_send_queue_open(ctx, &ctx->send_queue[i], queue_size);
1160 		if (err)
1161 			goto close_send_queues;
1162 	}
1163 
1164 	return 0;
1165 
1166 close_send_queues:
1167 	 __hws_send_queues_close(ctx, i);
1168 
1169 	kfree(ctx->send_queue);
1170 
1171 free_bwc_locks:
1172 	hws_send_queues_bwc_locks_destroy(ctx);
1173 
1174 	return err;
1175 }
1176 
mlx5hws_send_queue_action(struct mlx5hws_context * ctx,u16 queue_id,u32 actions)1177 int mlx5hws_send_queue_action(struct mlx5hws_context *ctx,
1178 			      u16 queue_id,
1179 			      u32 actions)
1180 {
1181 	struct mlx5hws_send_ring_sq *send_sq;
1182 	struct mlx5hws_send_engine *queue;
1183 	bool wait_comp = false;
1184 	s64 polled = 0;
1185 
1186 	queue = &ctx->send_queue[queue_id];
1187 	send_sq = &queue->send_ring.send_sq;
1188 
1189 	switch (actions) {
1190 	case MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC:
1191 		wait_comp = true;
1192 		fallthrough;
1193 	case MLX5HWS_SEND_QUEUE_ACTION_DRAIN_ASYNC:
1194 		if (send_sq->head_dep_idx != send_sq->tail_dep_idx)
1195 			/* Send dependent WQEs to drain the queue */
1196 			mlx5hws_send_all_dep_wqe(queue);
1197 		else
1198 			/* Signal on the last posted WQE */
1199 			mlx5hws_send_engine_flush_queue(queue);
1200 
1201 		/* Poll queue until empty */
1202 		while (wait_comp && !mlx5hws_send_engine_empty(queue))
1203 			hws_send_engine_poll_cq(queue, NULL, &polled, 0);
1204 
1205 		break;
1206 	default:
1207 		return -EINVAL;
1208 	}
1209 
1210 	return 0;
1211 }
1212 
1213 static int
hws_send_wqe_fw(struct mlx5_core_dev * mdev,u32 pd_num,struct mlx5hws_send_engine_post_attr * send_attr,struct mlx5hws_wqe_gta_ctrl_seg * send_wqe_ctrl,void * send_wqe_match_data,void * send_wqe_match_tag,void * send_wqe_range_data,void * send_wqe_range_tag,bool is_jumbo,u8 gta_opcode)1214 hws_send_wqe_fw(struct mlx5_core_dev *mdev,
1215 		u32 pd_num,
1216 		struct mlx5hws_send_engine_post_attr *send_attr,
1217 		struct mlx5hws_wqe_gta_ctrl_seg *send_wqe_ctrl,
1218 		void *send_wqe_match_data,
1219 		void *send_wqe_match_tag,
1220 		void *send_wqe_range_data,
1221 		void *send_wqe_range_tag,
1222 		bool is_jumbo,
1223 		u8 gta_opcode)
1224 {
1225 	bool has_range = send_wqe_range_data || send_wqe_range_tag;
1226 	bool has_match = send_wqe_match_data || send_wqe_match_tag;
1227 	struct mlx5hws_wqe_gta_data_seg_ste gta_wqe_data0 = {0};
1228 	struct mlx5hws_wqe_gta_data_seg_ste gta_wqe_data1 = {0};
1229 	struct mlx5hws_wqe_gta_ctrl_seg gta_wqe_ctrl = {0};
1230 	struct mlx5hws_cmd_generate_wqe_attr attr = {0};
1231 	struct mlx5hws_wqe_ctrl_seg wqe_ctrl = {0};
1232 	struct mlx5_cqe64 cqe;
1233 	u32 flags = 0;
1234 	int ret;
1235 
1236 	/* Set WQE control */
1237 	wqe_ctrl.opmod_idx_opcode = cpu_to_be32((send_attr->opmod << 24) | send_attr->opcode);
1238 	wqe_ctrl.qpn_ds = cpu_to_be32((send_attr->len + sizeof(struct mlx5hws_wqe_ctrl_seg)) / 16);
1239 	flags |= send_attr->notify_hw ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
1240 	wqe_ctrl.flags = cpu_to_be32(flags);
1241 	wqe_ctrl.imm = cpu_to_be32(send_attr->id);
1242 
1243 	/* Set GTA WQE CTRL */
1244 	memcpy(gta_wqe_ctrl.stc_ix, send_wqe_ctrl->stc_ix, sizeof(send_wqe_ctrl->stc_ix));
1245 	gta_wqe_ctrl.op_dirix = cpu_to_be32(gta_opcode << 28);
1246 
1247 	/* Set GTA match WQE DATA */
1248 	if (has_match) {
1249 		if (send_wqe_match_data)
1250 			memcpy(&gta_wqe_data0, send_wqe_match_data, sizeof(gta_wqe_data0));
1251 		else
1252 			hws_send_wqe_set_tag(&gta_wqe_data0, send_wqe_match_tag, is_jumbo);
1253 
1254 		gta_wqe_data0.rsvd1_definer = cpu_to_be32(send_attr->match_definer_id << 8);
1255 		attr.gta_data_0 = (u8 *)&gta_wqe_data0;
1256 	}
1257 
1258 	/* Set GTA range WQE DATA */
1259 	if (has_range) {
1260 		if (send_wqe_range_data)
1261 			memcpy(&gta_wqe_data1, send_wqe_range_data, sizeof(gta_wqe_data1));
1262 		else
1263 			hws_send_wqe_set_tag(&gta_wqe_data1, send_wqe_range_tag, false);
1264 
1265 		gta_wqe_data1.rsvd1_definer = cpu_to_be32(send_attr->range_definer_id << 8);
1266 		attr.gta_data_1 = (u8 *)&gta_wqe_data1;
1267 	}
1268 
1269 	attr.pdn = pd_num;
1270 	attr.wqe_ctrl = (u8 *)&wqe_ctrl;
1271 	attr.gta_ctrl = (u8 *)&gta_wqe_ctrl;
1272 
1273 send_wqe:
1274 	ret = mlx5hws_cmd_generate_wqe(mdev, &attr, &cqe);
1275 	if (ret) {
1276 		mlx5_core_err(mdev, "Failed to write WQE using command");
1277 		return ret;
1278 	}
1279 
1280 	if ((get_cqe_opcode(&cqe) == MLX5_CQE_REQ) &&
1281 	    (be32_to_cpu(cqe.byte_cnt) >> 31 == 0)) {
1282 		*send_attr->used_id = send_attr->id;
1283 		return 0;
1284 	}
1285 
1286 	/* Retry if rule failed */
1287 	if (send_attr->retry_id) {
1288 		wqe_ctrl.imm = cpu_to_be32(send_attr->retry_id);
1289 		send_attr->id = send_attr->retry_id;
1290 		send_attr->retry_id = 0;
1291 		goto send_wqe;
1292 	}
1293 
1294 	return -1;
1295 }
1296 
mlx5hws_send_stes_fw(struct mlx5hws_context * ctx,struct mlx5hws_send_engine * queue,struct mlx5hws_send_ste_attr * ste_attr)1297 void mlx5hws_send_stes_fw(struct mlx5hws_context *ctx,
1298 			  struct mlx5hws_send_engine *queue,
1299 			  struct mlx5hws_send_ste_attr *ste_attr)
1300 {
1301 	struct mlx5hws_send_engine_post_attr *send_attr = &ste_attr->send_attr;
1302 	struct mlx5hws_rule *rule = send_attr->rule;
1303 	struct mlx5_core_dev *mdev;
1304 	u16 queue_id;
1305 	u32 pdn;
1306 	int ret;
1307 
1308 	queue_id = queue - ctx->send_queue;
1309 	mdev = ctx->mdev;
1310 	pdn = ctx->pd_num;
1311 
1312 	/* Writing through FW can't HW fence, therefore we drain the queue */
1313 	if (send_attr->fence)
1314 		mlx5hws_send_queue_action(ctx,
1315 					  queue_id,
1316 					  MLX5HWS_SEND_QUEUE_ACTION_DRAIN_SYNC);
1317 
1318 	if (ste_attr->rtc_1) {
1319 		send_attr->id = ste_attr->rtc_1;
1320 		send_attr->used_id = ste_attr->used_id_rtc_1;
1321 		send_attr->retry_id = ste_attr->retry_rtc_1;
1322 		ret = hws_send_wqe_fw(mdev, pdn, send_attr,
1323 				      ste_attr->wqe_ctrl,
1324 				      ste_attr->wqe_data,
1325 				      ste_attr->wqe_tag,
1326 				      ste_attr->range_wqe_data,
1327 				      ste_attr->range_wqe_tag,
1328 				      ste_attr->wqe_tag_is_jumbo,
1329 				      ste_attr->gta_opcode);
1330 		if (ret)
1331 			goto fail_rule;
1332 	}
1333 
1334 	if (ste_attr->rtc_0) {
1335 		send_attr->id = ste_attr->rtc_0;
1336 		send_attr->used_id = ste_attr->used_id_rtc_0;
1337 		send_attr->retry_id = ste_attr->retry_rtc_0;
1338 		ret = hws_send_wqe_fw(mdev, pdn, send_attr,
1339 				      ste_attr->wqe_ctrl,
1340 				      ste_attr->wqe_data,
1341 				      ste_attr->wqe_tag,
1342 				      ste_attr->range_wqe_data,
1343 				      ste_attr->range_wqe_tag,
1344 				      ste_attr->wqe_tag_is_jumbo,
1345 				      ste_attr->gta_opcode);
1346 		if (ret)
1347 			goto fail_rule;
1348 	}
1349 
1350 	/* Increase the status, this only works on good flow as the enum
1351 	 * is arrange it away creating -> created -> deleting -> deleted
1352 	 */
1353 	if (likely(rule))
1354 		rule->status++;
1355 
1356 	mlx5hws_send_engine_gen_comp(queue, send_attr->user_data, MLX5HWS_FLOW_OP_SUCCESS);
1357 
1358 	return;
1359 
1360 fail_rule:
1361 	if (likely(rule))
1362 		rule->status = !rule->rtc_0 && !rule->rtc_1 ?
1363 			MLX5HWS_RULE_STATUS_FAILED : MLX5HWS_RULE_STATUS_FAILING;
1364 
1365 	mlx5hws_send_engine_gen_comp(queue, send_attr->user_data, MLX5HWS_FLOW_OP_ERROR);
1366 }
1367