xref: /linux/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h (revision 44a8c96edd0ee9320a1ad87afc7b10f38e55d5ec)
1 /* SPDX-License-Identifier: GPL-2.0-only
2  * Copyright (C) 2020 Marvell.
3  */
4 
5 #ifndef __OTX2_CPT_REQMGR_H
6 #define __OTX2_CPT_REQMGR_H
7 
8 #include "otx2_cpt_common.h"
9 
10 /* Completion code size and initial value */
11 #define OTX2_CPT_COMPLETION_CODE_SIZE 8
12 #define OTX2_CPT_COMPLETION_CODE_INIT OTX2_CPT_COMP_E_NOTDONE
13 /*
14  * Maximum total number of SG buffers is 100, we divide it equally
15  * between input and output
16  */
17 #define OTX2_CPT_MAX_SG_IN_CNT  50
18 #define OTX2_CPT_MAX_SG_OUT_CNT 50
19 
20 /* DMA mode direct or SG */
21 #define OTX2_CPT_DMA_MODE_DIRECT 0
22 #define OTX2_CPT_DMA_MODE_SG     1
23 
24 /* Context source CPTR or DPTR */
25 #define OTX2_CPT_FROM_CPTR 0
26 #define OTX2_CPT_FROM_DPTR 1
27 
28 #define OTX2_CPT_MAX_REQ_SIZE 65535
29 
30 #define SG_COMPS_MAX    4
31 #define SGV2_COMPS_MAX  3
32 
33 #define SG_COMP_3    3
34 #define SG_COMP_2    2
35 #define SG_COMP_1    1
36 
37 #define OTX2_CPT_DPTR_RPTR_ALIGN	8
38 #define OTX2_CPT_RES_ADDR_ALIGN		32
39 
40 union otx2_cpt_opcode {
41 	u16 flags;
42 	struct {
43 		u8 major;
44 		u8 minor;
45 	} s;
46 };
47 
48 struct otx2_cptvf_request {
49 	u32 param1;
50 	u32 param2;
51 	u16 dlen;
52 	union otx2_cpt_opcode opcode;
53 	dma_addr_t cptr_dma;
54 	void *cptr;
55 };
56 
57 /*
58  * CPT_INST_S software command definitions
59  * Words EI (0-3)
60  */
61 union otx2_cpt_iq_cmd_word0 {
62 	u64 u;
63 	struct {
64 		__be16 opcode;
65 		__be16 param1;
66 		__be16 param2;
67 		__be16 dlen;
68 	} s;
69 };
70 
71 union otx2_cpt_iq_cmd_word3 {
72 	u64 u;
73 	struct {
74 		u64 cptr:61;
75 		u64 grp:3;
76 	} s;
77 };
78 
79 struct otx2_cpt_iq_command {
80 	union otx2_cpt_iq_cmd_word0 cmd;
81 	u64 dptr;
82 	u64 rptr;
83 	union otx2_cpt_iq_cmd_word3 cptr;
84 };
85 
86 struct otx2_cpt_pending_entry {
87 	void *completion_addr;	/* Completion address */
88 	void *info;
89 	/* Kernel async request callback */
90 	void (*callback)(int status, void *arg1, void *arg2);
91 	struct crypto_async_request *areq; /* Async request callback arg */
92 	u8 resume_sender;	/* Notify sender to resume sending requests */
93 	u8 busy;		/* Entry status (free/busy) */
94 };
95 
96 struct otx2_cpt_pending_queue {
97 	struct otx2_cpt_pending_entry *head; /* Head of the queue */
98 	u32 front;		/* Process work from here */
99 	u32 rear;		/* Append new work here */
100 	u32 pending_count;	/* Pending requests count */
101 	u32 qlen;		/* Queue length */
102 	spinlock_t lock;	/* Queue lock */
103 };
104 
105 struct otx2_cpt_buf_ptr {
106 	u8 *vptr;
107 	dma_addr_t dma_addr;
108 	u16 size;
109 };
110 
111 union otx2_cpt_ctrl_info {
112 	u32 flags;
113 	struct {
114 #if defined(__BIG_ENDIAN_BITFIELD)
115 		u32 reserved_6_31:26;
116 		u32 grp:3;	/* Group bits */
117 		u32 dma_mode:2;	/* DMA mode */
118 		u32 se_req:1;	/* To SE core */
119 #else
120 		u32 se_req:1;	/* To SE core */
121 		u32 dma_mode:2;	/* DMA mode */
122 		u32 grp:3;	/* Group bits */
123 		u32 reserved_6_31:26;
124 #endif
125 	} s;
126 };
127 
128 struct otx2_cpt_req_info {
129 	/* Kernel async request callback */
130 	void (*callback)(int status, void *arg1, void *arg2);
131 	struct crypto_async_request *areq; /* Async request callback arg */
132 	struct otx2_cptvf_request req;/* Request information (core specific) */
133 	union otx2_cpt_ctrl_info ctrl;/* User control information */
134 	struct otx2_cpt_buf_ptr in[OTX2_CPT_MAX_SG_IN_CNT];
135 	struct otx2_cpt_buf_ptr out[OTX2_CPT_MAX_SG_OUT_CNT];
136 	u8 *iv_out;     /* IV to send back */
137 	u16 rlen;	/* Output length */
138 	u8 in_cnt;	/* Number of input buffers */
139 	u8 out_cnt;	/* Number of output buffers */
140 	u8 req_type;	/* Type of request */
141 	u8 is_enc;	/* Is a request an encryption request */
142 	u8 is_trunc_hmac;/* Is truncated hmac used */
143 };
144 
145 struct otx2_cpt_inst_info {
146 	struct otx2_cpt_pending_entry *pentry;
147 	struct otx2_cpt_req_info *req;
148 	struct pci_dev *pdev;
149 	void *completion_addr;
150 	u8 *out_buffer;
151 	u8 *in_buffer;
152 	dma_addr_t dptr_baddr;
153 	dma_addr_t rptr_baddr;
154 	dma_addr_t comp_baddr;
155 	unsigned long time_in;
156 	u32 dlen;
157 	u32 dma_len;
158 	u64 gthr_sz;
159 	u64 sctr_sz;
160 	u8 extra_time;
161 };
162 
163 struct otx2_cpt_sglist_component {
164 	__be16 len0;
165 	__be16 len1;
166 	__be16 len2;
167 	__be16 len3;
168 	__be64 ptr0;
169 	__be64 ptr1;
170 	__be64 ptr2;
171 	__be64 ptr3;
172 };
173 
174 struct cn10kb_cpt_sglist_component {
175 	u16 len0;
176 	u16 len1;
177 	u16 len2;
178 	u16 valid_segs;
179 	u64 ptr0;
180 	u64 ptr1;
181 	u64 ptr2;
182 };
183 
otx2_cpt_info_destroy(struct pci_dev * pdev,struct otx2_cpt_inst_info * info)184 static inline void otx2_cpt_info_destroy(struct pci_dev *pdev,
185 					 struct otx2_cpt_inst_info *info)
186 {
187 	struct otx2_cpt_req_info *req;
188 	int i;
189 
190 	if (info->dptr_baddr)
191 		dma_unmap_single(&pdev->dev, info->dptr_baddr,
192 				 info->dma_len, DMA_BIDIRECTIONAL);
193 
194 	if (info->req) {
195 		req = info->req;
196 		for (i = 0; i < req->out_cnt; i++) {
197 			if (req->out[i].dma_addr)
198 				dma_unmap_single(&pdev->dev,
199 						 req->out[i].dma_addr,
200 						 req->out[i].size,
201 						 DMA_BIDIRECTIONAL);
202 		}
203 
204 		for (i = 0; i < req->in_cnt; i++) {
205 			if (req->in[i].dma_addr)
206 				dma_unmap_single(&pdev->dev,
207 						 req->in[i].dma_addr,
208 						 req->in[i].size,
209 						 DMA_BIDIRECTIONAL);
210 		}
211 	}
212 	kfree(info);
213 }
214 
setup_sgio_components(struct pci_dev * pdev,struct otx2_cpt_buf_ptr * list,int buf_count,u8 * buffer)215 static inline int setup_sgio_components(struct pci_dev *pdev,
216 					struct otx2_cpt_buf_ptr *list,
217 					int buf_count, u8 *buffer)
218 {
219 	struct otx2_cpt_sglist_component *sg_ptr;
220 	int components;
221 	int i, j;
222 
223 	if (unlikely(!list)) {
224 		dev_err(&pdev->dev, "Input list pointer is NULL\n");
225 		return -EINVAL;
226 	}
227 
228 	for (i = 0; i < buf_count; i++) {
229 		if (unlikely(!list[i].vptr))
230 			continue;
231 		list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr,
232 						  list[i].size,
233 						  DMA_BIDIRECTIONAL);
234 		if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) {
235 			dev_err(&pdev->dev, "Dma mapping failed\n");
236 			goto sg_cleanup;
237 		}
238 	}
239 	components = buf_count / SG_COMPS_MAX;
240 	sg_ptr = (struct otx2_cpt_sglist_component *)buffer;
241 	for (i = 0; i < components; i++) {
242 		sg_ptr->len0 = cpu_to_be16(list[i * SG_COMPS_MAX + 0].size);
243 		sg_ptr->len1 = cpu_to_be16(list[i * SG_COMPS_MAX + 1].size);
244 		sg_ptr->len2 = cpu_to_be16(list[i * SG_COMPS_MAX + 2].size);
245 		sg_ptr->len3 = cpu_to_be16(list[i * SG_COMPS_MAX + 3].size);
246 		sg_ptr->ptr0 = cpu_to_be64(list[i * SG_COMPS_MAX + 0].dma_addr);
247 		sg_ptr->ptr1 = cpu_to_be64(list[i * SG_COMPS_MAX + 1].dma_addr);
248 		sg_ptr->ptr2 = cpu_to_be64(list[i * SG_COMPS_MAX + 2].dma_addr);
249 		sg_ptr->ptr3 = cpu_to_be64(list[i * SG_COMPS_MAX + 3].dma_addr);
250 		sg_ptr++;
251 	}
252 	components = buf_count % SG_COMPS_MAX;
253 
254 	switch (components) {
255 	case SG_COMP_3:
256 		sg_ptr->len2 = cpu_to_be16(list[i * SG_COMPS_MAX + 2].size);
257 		sg_ptr->ptr2 = cpu_to_be64(list[i * SG_COMPS_MAX + 2].dma_addr);
258 		fallthrough;
259 	case SG_COMP_2:
260 		sg_ptr->len1 = cpu_to_be16(list[i * SG_COMPS_MAX + 1].size);
261 		sg_ptr->ptr1 = cpu_to_be64(list[i * SG_COMPS_MAX + 1].dma_addr);
262 		fallthrough;
263 	case SG_COMP_1:
264 		sg_ptr->len0 = cpu_to_be16(list[i * SG_COMPS_MAX + 0].size);
265 		sg_ptr->ptr0 = cpu_to_be64(list[i * SG_COMPS_MAX + 0].dma_addr);
266 		break;
267 	default:
268 		break;
269 	}
270 	return 0;
271 
272 sg_cleanup:
273 	for (j = 0; j < i; j++) {
274 		if (list[j].dma_addr) {
275 			dma_unmap_single(&pdev->dev, list[j].dma_addr,
276 					 list[j].size, DMA_BIDIRECTIONAL);
277 		}
278 
279 		list[j].dma_addr = 0;
280 	}
281 	return -EIO;
282 }
283 
sgv2io_components_setup(struct pci_dev * pdev,struct otx2_cpt_buf_ptr * list,int buf_count,u8 * buffer)284 static inline int sgv2io_components_setup(struct pci_dev *pdev,
285 					  struct otx2_cpt_buf_ptr *list,
286 					  int buf_count, u8 *buffer)
287 {
288 	struct cn10kb_cpt_sglist_component *sg_ptr;
289 	int components;
290 	int i, j;
291 
292 	if (unlikely(!list)) {
293 		dev_err(&pdev->dev, "Input list pointer is NULL\n");
294 		return -EFAULT;
295 	}
296 
297 	for (i = 0; i < buf_count; i++) {
298 		if (unlikely(!list[i].vptr))
299 			continue;
300 		list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr,
301 						  list[i].size,
302 						  DMA_BIDIRECTIONAL);
303 		if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) {
304 			dev_err(&pdev->dev, "Dma mapping failed\n");
305 			goto sg_cleanup;
306 		}
307 	}
308 	components = buf_count / SGV2_COMPS_MAX;
309 	sg_ptr = (struct cn10kb_cpt_sglist_component *)buffer;
310 	for (i = 0; i < components; i++) {
311 		sg_ptr->len0 = list[i * SGV2_COMPS_MAX + 0].size;
312 		sg_ptr->len1 = list[i * SGV2_COMPS_MAX + 1].size;
313 		sg_ptr->len2 = list[i * SGV2_COMPS_MAX + 2].size;
314 		sg_ptr->ptr0 = list[i * SGV2_COMPS_MAX + 0].dma_addr;
315 		sg_ptr->ptr1 = list[i * SGV2_COMPS_MAX + 1].dma_addr;
316 		sg_ptr->ptr2 = list[i * SGV2_COMPS_MAX + 2].dma_addr;
317 		sg_ptr->valid_segs = SGV2_COMPS_MAX;
318 		sg_ptr++;
319 	}
320 	components = buf_count % SGV2_COMPS_MAX;
321 
322 	sg_ptr->valid_segs = components;
323 	switch (components) {
324 	case SG_COMP_2:
325 		sg_ptr->len1 = list[i * SGV2_COMPS_MAX + 1].size;
326 		sg_ptr->ptr1 = list[i * SGV2_COMPS_MAX + 1].dma_addr;
327 		fallthrough;
328 	case SG_COMP_1:
329 		sg_ptr->len0 = list[i * SGV2_COMPS_MAX + 0].size;
330 		sg_ptr->ptr0 = list[i * SGV2_COMPS_MAX + 0].dma_addr;
331 		break;
332 	default:
333 		break;
334 	}
335 	return 0;
336 
337 sg_cleanup:
338 	for (j = 0; j < i; j++) {
339 		if (list[j].dma_addr) {
340 			dma_unmap_single(&pdev->dev, list[j].dma_addr,
341 					 list[j].size, DMA_BIDIRECTIONAL);
342 		}
343 
344 		list[j].dma_addr = 0;
345 	}
346 	return -EIO;
347 }
348 
349 static inline struct otx2_cpt_inst_info *
cn10k_sgv2_info_create(struct pci_dev * pdev,struct otx2_cpt_req_info * req,gfp_t gfp)350 cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
351 		       gfp_t gfp)
352 {
353 	u32 dlen = 0, g_len, s_len, sg_len, info_len;
354 	struct otx2_cpt_inst_info *info;
355 	u32 total_mem_len;
356 	int i;
357 
358 	/* Allocate memory to meet below alignment requirement:
359 	 *  ------------------------------------
360 	 * |    struct otx2_cpt_inst_info       |
361 	 * |    (No alignment required)         |
362 	 * |    --------------------------------|
363 	 * |   | padding for ARCH_DMA_MINALIGN  |
364 	 * |   | alignment                      |
365 	 * |------------------------------------|
366 	 * |    SG List Gather/Input memory     |
367 	 * |    Length = multiple of 32Bytes    |
368 	 * |    Alignment = 8Byte               |
369 	 * |----------------------------------  |
370 	 * |    SG List Scatter/Output memory   |
371 	 * |    Length = multiple of 32Bytes    |
372 	 * |    Alignment = 8Byte               |
373 	 * |     -------------------------------|
374 	 * |    | padding for 32B alignment     |
375 	 * |------------------------------------|
376 	 * |    Result response memory          |
377 	 * |    Alignment = 32Byte              |
378 	 *  ------------------------------------
379 	 */
380 
381 	info_len = sizeof(*info);
382 
383 	g_len = ((req->in_cnt + 2) / 3) *
384 		 sizeof(struct cn10kb_cpt_sglist_component);
385 	s_len = ((req->out_cnt + 2) / 3) *
386 		 sizeof(struct cn10kb_cpt_sglist_component);
387 	sg_len = g_len + s_len;
388 
389 	/* Allocate extra memory for SG and response address alignment */
390 	total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN);
391 	total_mem_len += (ARCH_DMA_MINALIGN - 1) &
392 			  ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1);
393 	total_mem_len += ALIGN(sg_len, OTX2_CPT_RES_ADDR_ALIGN);
394 	total_mem_len += sizeof(union otx2_cpt_res_s);
395 
396 	info = kzalloc(total_mem_len, gfp);
397 	if (unlikely(!info))
398 		return NULL;
399 
400 	for (i = 0; i < req->in_cnt; i++)
401 		dlen += req->in[i].size;
402 
403 	info->dlen = dlen;
404 	info->in_buffer = PTR_ALIGN((u8 *)info + info_len, ARCH_DMA_MINALIGN);
405 	info->out_buffer = info->in_buffer + g_len;
406 	info->gthr_sz = req->in_cnt;
407 	info->sctr_sz = req->out_cnt;
408 
409 	/* Setup gather (input) components */
410 	if (sgv2io_components_setup(pdev, req->in, req->in_cnt,
411 				    info->in_buffer)) {
412 		dev_err(&pdev->dev, "Failed to setup gather list\n");
413 		goto destroy_info;
414 	}
415 
416 	if (sgv2io_components_setup(pdev, req->out, req->out_cnt,
417 				    info->out_buffer)) {
418 		dev_err(&pdev->dev, "Failed to setup scatter list\n");
419 		goto destroy_info;
420 	}
421 
422 	info->dma_len = total_mem_len - info_len;
423 	info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer,
424 					  info->dma_len, DMA_BIDIRECTIONAL);
425 	if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) {
426 		dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n");
427 		goto destroy_info;
428 	}
429 	info->rptr_baddr = info->dptr_baddr + g_len;
430 	/*
431 	 * Get buffer for union otx2_cpt_res_s response
432 	 * structure and its physical address
433 	 */
434 	info->completion_addr = PTR_ALIGN((info->in_buffer + sg_len),
435 					  OTX2_CPT_RES_ADDR_ALIGN);
436 	info->comp_baddr = ALIGN((info->dptr_baddr + sg_len),
437 				 OTX2_CPT_RES_ADDR_ALIGN);
438 
439 	return info;
440 
441 destroy_info:
442 	otx2_cpt_info_destroy(pdev, info);
443 	return NULL;
444 }
445 
446 /* SG list header size in bytes */
447 #define SG_LIST_HDR_SIZE	8
448 static inline struct otx2_cpt_inst_info *
otx2_sg_info_create(struct pci_dev * pdev,struct otx2_cpt_req_info * req,gfp_t gfp)449 otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
450 		    gfp_t gfp)
451 {
452 	struct otx2_cpt_inst_info *info;
453 	u32 dlen, info_len;
454 	u16 g_len, s_len;
455 	u32 total_mem_len;
456 
457 	if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT ||
458 		     req->out_cnt > OTX2_CPT_MAX_SG_OUT_CNT)) {
459 		dev_err(&pdev->dev, "Error too many sg components\n");
460 		return NULL;
461 	}
462 
463 	/* Allocate memory to meet below alignment requirement:
464 	 *  ------------------------------------
465 	 * |    struct otx2_cpt_inst_info       |
466 	 * |    (No alignment required)         |
467 	 * |    --------------------------------|
468 	 * |   | padding for ARCH_DMA_MINALIGN  |
469 	 * |   | alignment                      |
470 	 * |------------------------------------|
471 	 * |    SG List Header of 8 Byte        |
472 	 * |------------------------------------|
473 	 * |    SG List Gather/Input memory     |
474 	 * |    Length = multiple of 32Bytes    |
475 	 * |    Alignment = 8Byte               |
476 	 * |----------------------------------  |
477 	 * |    SG List Scatter/Output memory   |
478 	 * |    Length = multiple of 32Bytes    |
479 	 * |    Alignment = 8Byte               |
480 	 * |     -------------------------------|
481 	 * |    | padding for 32B alignment     |
482 	 * |------------------------------------|
483 	 * |    Result response memory          |
484 	 * |    Alignment = 32Byte              |
485 	 *  ------------------------------------
486 	 */
487 
488 	info_len = sizeof(*info);
489 
490 	g_len = ((req->in_cnt + 3) / 4) *
491 		 sizeof(struct otx2_cpt_sglist_component);
492 	s_len = ((req->out_cnt + 3) / 4) *
493 		 sizeof(struct otx2_cpt_sglist_component);
494 
495 	dlen = g_len + s_len + SG_LIST_HDR_SIZE;
496 
497 	/* Allocate extra memory for SG and response address alignment */
498 	total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN);
499 	total_mem_len += (ARCH_DMA_MINALIGN - 1) &
500 			  ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1);
501 	total_mem_len += ALIGN(dlen, OTX2_CPT_RES_ADDR_ALIGN);
502 	total_mem_len += sizeof(union otx2_cpt_res_s);
503 
504 	info = kzalloc(total_mem_len, gfp);
505 	if (unlikely(!info))
506 		return NULL;
507 
508 	info->dlen = dlen;
509 	info->in_buffer = PTR_ALIGN((u8 *)info + info_len, ARCH_DMA_MINALIGN);
510 	info->out_buffer = info->in_buffer + SG_LIST_HDR_SIZE + g_len;
511 
512 	((u16 *)info->in_buffer)[0] = req->out_cnt;
513 	((u16 *)info->in_buffer)[1] = req->in_cnt;
514 	((u16 *)info->in_buffer)[2] = 0;
515 	((u16 *)info->in_buffer)[3] = 0;
516 	cpu_to_be64s((u64 *)info->in_buffer);
517 
518 	/* Setup gather (input) components */
519 	if (setup_sgio_components(pdev, req->in, req->in_cnt,
520 				  &info->in_buffer[8])) {
521 		dev_err(&pdev->dev, "Failed to setup gather list\n");
522 		goto destroy_info;
523 	}
524 
525 	if (setup_sgio_components(pdev, req->out, req->out_cnt,
526 				  info->out_buffer)) {
527 		dev_err(&pdev->dev, "Failed to setup scatter list\n");
528 		goto destroy_info;
529 	}
530 
531 	info->dma_len = total_mem_len - info_len;
532 	info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer,
533 					  info->dma_len, DMA_BIDIRECTIONAL);
534 	if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) {
535 		dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n");
536 		goto destroy_info;
537 	}
538 	/*
539 	 * Get buffer for union otx2_cpt_res_s response
540 	 * structure and its physical address
541 	 */
542 	info->completion_addr = PTR_ALIGN((info->in_buffer + dlen),
543 					  OTX2_CPT_RES_ADDR_ALIGN);
544 	info->comp_baddr = ALIGN((info->dptr_baddr + dlen),
545 				 OTX2_CPT_RES_ADDR_ALIGN);
546 
547 	return info;
548 
549 destroy_info:
550 	otx2_cpt_info_destroy(pdev, info);
551 	return NULL;
552 }
553 
554 struct otx2_cptlf_wqe;
555 int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
556 			int cpu_num);
557 void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe);
558 int otx2_cpt_get_eng_grp_num(struct pci_dev *pdev,
559 			     enum otx2_cpt_eng_type);
560 
561 #endif /* __OTX2_CPT_REQMGR_H */
562