1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Driver for IBM PowerNV compression accelerator
4  *
5  * Copyright (C) 2015 Dan Streetman, IBM Corp
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include "nx-842.h"
11 
12 #include <crypto/internal/scompress.h>
13 #include <linux/timer.h>
14 
15 #include <asm/prom.h>
16 #include <asm/icswx.h>
17 #include <asm/vas.h>
18 #include <asm/reg.h>
19 #include <asm/opal-api.h>
20 #include <asm/opal.h>
21 
22 MODULE_LICENSE("GPL");
23 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
24 MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors");
25 MODULE_ALIAS_CRYPTO("842");
26 MODULE_ALIAS_CRYPTO("842-nx");
27 
28 #define WORKMEM_ALIGN	(CRB_ALIGN)
29 #define CSB_WAIT_MAX	(5000) /* ms */
30 #define VAS_RETRIES	(10)
31 
32 struct nx842_workmem {
33 	/* Below fields must be properly aligned */
34 	struct coprocessor_request_block crb; /* CRB_ALIGN align */
35 	struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
36 	struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
37 	/* Above fields must be properly aligned */
38 
39 	ktime_t start;
40 
41 	char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
42 } __packed __aligned(WORKMEM_ALIGN);
43 
44 struct nx_coproc {
45 	unsigned int chip_id;
46 	unsigned int ct;	/* Can be 842 or GZIP high/normal*/
47 	unsigned int ci;	/* Coprocessor instance, used with icswx */
48 	struct {
49 		struct vas_window *rxwin;
50 		int id;
51 	} vas;
52 	struct list_head list;
53 };
54 
55 /*
56  * Send the request to NX engine on the chip for the corresponding CPU
57  * where the process is executing. Use with VAS function.
58  */
59 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
60 
61 /* no cpu hotplug on powernv, so this list never changes after init */
62 static LIST_HEAD(nx_coprocs);
63 static unsigned int nx842_ct;	/* used in icswx function */
64 
65 /*
66  * Using same values as in skiboot or coprocessor type representing
67  * in NX workbook.
68  */
69 #define NX_CT_GZIP	(2)	/* on P9 and later */
70 #define NX_CT_842	(3)
71 
72 static int (*nx842_powernv_exec)(const unsigned char *in,
73 				unsigned int inlen, unsigned char *out,
74 				unsigned int *outlenp, void *workmem, int fc);
75 
76 /*
77  * setup_indirect_dde - Setup an indirect DDE
78  *
79  * The DDE is setup with the DDE count, byte count, and address of
80  * first direct DDE in the list.
81  */
setup_indirect_dde(struct data_descriptor_entry * dde,struct data_descriptor_entry * ddl,unsigned int dde_count,unsigned int byte_count)82 static void setup_indirect_dde(struct data_descriptor_entry *dde,
83 			       struct data_descriptor_entry *ddl,
84 			       unsigned int dde_count, unsigned int byte_count)
85 {
86 	dde->flags = 0;
87 	dde->count = dde_count;
88 	dde->index = 0;
89 	dde->length = cpu_to_be32(byte_count);
90 	dde->address = cpu_to_be64(nx842_get_pa(ddl));
91 }
92 
93 /*
94  * setup_direct_dde - Setup single DDE from buffer
95  *
96  * The DDE is setup with the buffer and length.  The buffer must be properly
97  * aligned.  The used length is returned.
98  * Returns:
99  *   N    Successfully set up DDE with N bytes
100  */
setup_direct_dde(struct data_descriptor_entry * dde,unsigned long pa,unsigned int len)101 static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
102 				     unsigned long pa, unsigned int len)
103 {
104 	unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
105 
106 	dde->flags = 0;
107 	dde->count = 0;
108 	dde->index = 0;
109 	dde->length = cpu_to_be32(l);
110 	dde->address = cpu_to_be64(pa);
111 
112 	return l;
113 }
114 
115 /*
116  * setup_ddl - Setup DDL from buffer
117  *
118  * Returns:
119  *   0		Successfully set up DDL
120  */
setup_ddl(struct data_descriptor_entry * dde,struct data_descriptor_entry * ddl,unsigned char * buf,unsigned int len,bool in)121 static int setup_ddl(struct data_descriptor_entry *dde,
122 		     struct data_descriptor_entry *ddl,
123 		     unsigned char *buf, unsigned int len,
124 		     bool in)
125 {
126 	unsigned long pa = nx842_get_pa(buf);
127 	int i, ret, total_len = len;
128 
129 	if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
130 		pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n",
131 			 in ? "input" : "output", pa, DDE_BUFFER_ALIGN);
132 		return -EINVAL;
133 	}
134 
135 	/* only need to check last mult; since buffer must be
136 	 * DDE_BUFFER_ALIGN aligned, and that is a multiple of
137 	 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers
138 	 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT.
139 	 */
140 	if (len % DDE_BUFFER_LAST_MULT) {
141 		pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n",
142 			 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT);
143 		if (in)
144 			return -EINVAL;
145 		len = round_down(len, DDE_BUFFER_LAST_MULT);
146 	}
147 
148 	/* use a single direct DDE */
149 	if (len <= LEN_ON_PAGE(pa)) {
150 		ret = setup_direct_dde(dde, pa, len);
151 		WARN_ON(ret < len);
152 		return 0;
153 	}
154 
155 	/* use the DDL */
156 	for (i = 0; i < DDL_LEN_MAX && len > 0; i++) {
157 		ret = setup_direct_dde(&ddl[i], pa, len);
158 		buf += ret;
159 		len -= ret;
160 		pa = nx842_get_pa(buf);
161 	}
162 
163 	if (len > 0) {
164 		pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n",
165 			 total_len, in ? "input" : "output", len);
166 		if (in)
167 			return -EMSGSIZE;
168 		total_len -= len;
169 	}
170 	setup_indirect_dde(dde, ddl, i, total_len);
171 
172 	return 0;
173 }
174 
175 #define CSB_ERR(csb, msg, ...)					\
176 	pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n",	\
177 	       ##__VA_ARGS__, (csb)->flags,			\
178 	       (csb)->cs, (csb)->cc, (csb)->ce,			\
179 	       be32_to_cpu((csb)->count))
180 
181 #define CSB_ERR_ADDR(csb, msg, ...)				\
182 	CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__,		\
183 		(unsigned long)be64_to_cpu((csb)->address))
184 
wait_for_csb(struct nx842_workmem * wmem,struct coprocessor_status_block * csb)185 static int wait_for_csb(struct nx842_workmem *wmem,
186 			struct coprocessor_status_block *csb)
187 {
188 	ktime_t start = wmem->start, now = ktime_get();
189 	ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX);
190 
191 	while (!(READ_ONCE(csb->flags) & CSB_V)) {
192 		cpu_relax();
193 		now = ktime_get();
194 		if (ktime_after(now, timeout))
195 			break;
196 	}
197 
198 	/* hw has updated csb and output buffer */
199 	barrier();
200 
201 	/* check CSB flags */
202 	if (!(csb->flags & CSB_V)) {
203 		CSB_ERR(csb, "CSB still not valid after %ld us, giving up",
204 			(long)ktime_us_delta(now, start));
205 		return -ETIMEDOUT;
206 	}
207 	if (csb->flags & CSB_F) {
208 		CSB_ERR(csb, "Invalid CSB format");
209 		return -EPROTO;
210 	}
211 	if (csb->flags & CSB_CH) {
212 		CSB_ERR(csb, "Invalid CSB chaining state");
213 		return -EPROTO;
214 	}
215 
216 	/* verify CSB completion sequence is 0 */
217 	if (csb->cs) {
218 		CSB_ERR(csb, "Invalid CSB completion sequence");
219 		return -EPROTO;
220 	}
221 
222 	/* check CSB Completion Code */
223 	switch (csb->cc) {
224 	/* no error */
225 	case CSB_CC_SUCCESS:
226 		break;
227 	case CSB_CC_TPBC_GT_SPBC:
228 		/* not an error, but the compressed data is
229 		 * larger than the uncompressed data :(
230 		 */
231 		break;
232 
233 	/* input data errors */
234 	case CSB_CC_OPERAND_OVERLAP:
235 		/* input and output buffers overlap */
236 		CSB_ERR(csb, "Operand Overlap error");
237 		return -EINVAL;
238 	case CSB_CC_INVALID_OPERAND:
239 		CSB_ERR(csb, "Invalid operand");
240 		return -EINVAL;
241 	case CSB_CC_NOSPC:
242 		/* output buffer too small */
243 		return -ENOSPC;
244 	case CSB_CC_ABORT:
245 		CSB_ERR(csb, "Function aborted");
246 		return -EINTR;
247 	case CSB_CC_CRC_MISMATCH:
248 		CSB_ERR(csb, "CRC mismatch");
249 		return -EINVAL;
250 	case CSB_CC_TEMPL_INVALID:
251 		CSB_ERR(csb, "Compressed data template invalid");
252 		return -EINVAL;
253 	case CSB_CC_TEMPL_OVERFLOW:
254 		CSB_ERR(csb, "Compressed data template shows data past end");
255 		return -EINVAL;
256 	case CSB_CC_EXCEED_BYTE_COUNT:	/* P9 or later */
257 		/*
258 		 * DDE byte count exceeds the limit specified in Maximum
259 		 * byte count register.
260 		 */
261 		CSB_ERR(csb, "DDE byte count exceeds the limit");
262 		return -EINVAL;
263 
264 	/* these should not happen */
265 	case CSB_CC_INVALID_ALIGN:
266 		/* setup_ddl should have detected this */
267 		CSB_ERR_ADDR(csb, "Invalid alignment");
268 		return -EINVAL;
269 	case CSB_CC_DATA_LENGTH:
270 		/* setup_ddl should have detected this */
271 		CSB_ERR(csb, "Invalid data length");
272 		return -EINVAL;
273 	case CSB_CC_WR_TRANSLATION:
274 	case CSB_CC_TRANSLATION:
275 	case CSB_CC_TRANSLATION_DUP1:
276 	case CSB_CC_TRANSLATION_DUP2:
277 	case CSB_CC_TRANSLATION_DUP3:
278 	case CSB_CC_TRANSLATION_DUP4:
279 	case CSB_CC_TRANSLATION_DUP5:
280 	case CSB_CC_TRANSLATION_DUP6:
281 		/* should not happen, we use physical addrs */
282 		CSB_ERR_ADDR(csb, "Translation error");
283 		return -EPROTO;
284 	case CSB_CC_WR_PROTECTION:
285 	case CSB_CC_PROTECTION:
286 	case CSB_CC_PROTECTION_DUP1:
287 	case CSB_CC_PROTECTION_DUP2:
288 	case CSB_CC_PROTECTION_DUP3:
289 	case CSB_CC_PROTECTION_DUP4:
290 	case CSB_CC_PROTECTION_DUP5:
291 	case CSB_CC_PROTECTION_DUP6:
292 		/* should not happen, we use physical addrs */
293 		CSB_ERR_ADDR(csb, "Protection error");
294 		return -EPROTO;
295 	case CSB_CC_PRIVILEGE:
296 		/* shouldn't happen, we're in HYP mode */
297 		CSB_ERR(csb, "Insufficient Privilege error");
298 		return -EPROTO;
299 	case CSB_CC_EXCESSIVE_DDE:
300 		/* shouldn't happen, setup_ddl doesn't use many dde's */
301 		CSB_ERR(csb, "Too many DDEs in DDL");
302 		return -EINVAL;
303 	case CSB_CC_TRANSPORT:
304 	case CSB_CC_INVALID_CRB:	/* P9 or later */
305 		/* shouldn't happen, we setup CRB correctly */
306 		CSB_ERR(csb, "Invalid CRB");
307 		return -EINVAL;
308 	case CSB_CC_INVALID_DDE:	/* P9 or later */
309 		/*
310 		 * shouldn't happen, setup_direct/indirect_dde creates
311 		 * DDE right
312 		 */
313 		CSB_ERR(csb, "Invalid DDE");
314 		return -EINVAL;
315 	case CSB_CC_SEGMENTED_DDL:
316 		/* shouldn't happen, setup_ddl creates DDL right */
317 		CSB_ERR(csb, "Segmented DDL error");
318 		return -EINVAL;
319 	case CSB_CC_DDE_OVERFLOW:
320 		/* shouldn't happen, setup_ddl creates DDL right */
321 		CSB_ERR(csb, "DDE overflow error");
322 		return -EINVAL;
323 	case CSB_CC_SESSION:
324 		/* should not happen with ICSWX */
325 		CSB_ERR(csb, "Session violation error");
326 		return -EPROTO;
327 	case CSB_CC_CHAIN:
328 		/* should not happen, we don't use chained CRBs */
329 		CSB_ERR(csb, "Chained CRB error");
330 		return -EPROTO;
331 	case CSB_CC_SEQUENCE:
332 		/* should not happen, we don't use chained CRBs */
333 		CSB_ERR(csb, "CRB sequence number error");
334 		return -EPROTO;
335 	case CSB_CC_UNKNOWN_CODE:
336 		CSB_ERR(csb, "Unknown subfunction code");
337 		return -EPROTO;
338 
339 	/* hardware errors */
340 	case CSB_CC_RD_EXTERNAL:
341 	case CSB_CC_RD_EXTERNAL_DUP1:
342 	case CSB_CC_RD_EXTERNAL_DUP2:
343 	case CSB_CC_RD_EXTERNAL_DUP3:
344 		CSB_ERR_ADDR(csb, "Read error outside coprocessor");
345 		return -EPROTO;
346 	case CSB_CC_WR_EXTERNAL:
347 		CSB_ERR_ADDR(csb, "Write error outside coprocessor");
348 		return -EPROTO;
349 	case CSB_CC_INTERNAL:
350 		CSB_ERR(csb, "Internal error in coprocessor");
351 		return -EPROTO;
352 	case CSB_CC_PROVISION:
353 		CSB_ERR(csb, "Storage provision error");
354 		return -EPROTO;
355 	case CSB_CC_HW:
356 		CSB_ERR(csb, "Correctable hardware error");
357 		return -EPROTO;
358 	case CSB_CC_HW_EXPIRED_TIMER:	/* P9 or later */
359 		CSB_ERR(csb, "Job did not finish within allowed time");
360 		return -EPROTO;
361 
362 	default:
363 		CSB_ERR(csb, "Invalid CC %d", csb->cc);
364 		return -EPROTO;
365 	}
366 
367 	/* check Completion Extension state */
368 	if (csb->ce & CSB_CE_TERMINATION) {
369 		CSB_ERR(csb, "CSB request was terminated");
370 		return -EPROTO;
371 	}
372 	if (csb->ce & CSB_CE_INCOMPLETE) {
373 		CSB_ERR(csb, "CSB request not complete");
374 		return -EPROTO;
375 	}
376 	if (!(csb->ce & CSB_CE_TPBC)) {
377 		CSB_ERR(csb, "TPBC not provided, unknown target length");
378 		return -EPROTO;
379 	}
380 
381 	/* successful completion */
382 	pr_debug_ratelimited("Processed %u bytes in %lu us\n",
383 			     be32_to_cpu(csb->count),
384 			     (unsigned long)ktime_us_delta(now, start));
385 
386 	return 0;
387 }
388 
nx842_config_crb(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int outlen,struct nx842_workmem * wmem)389 static int nx842_config_crb(const unsigned char *in, unsigned int inlen,
390 			unsigned char *out, unsigned int outlen,
391 			struct nx842_workmem *wmem)
392 {
393 	struct coprocessor_request_block *crb;
394 	struct coprocessor_status_block *csb;
395 	u64 csb_addr;
396 	int ret;
397 
398 	crb = &wmem->crb;
399 	csb = &crb->csb;
400 
401 	/* Clear any previous values */
402 	memset(crb, 0, sizeof(*crb));
403 
404 	/* set up DDLs */
405 	ret = setup_ddl(&crb->source, wmem->ddl_in,
406 			(unsigned char *)in, inlen, true);
407 	if (ret)
408 		return ret;
409 
410 	ret = setup_ddl(&crb->target, wmem->ddl_out,
411 			out, outlen, false);
412 	if (ret)
413 		return ret;
414 
415 	/* set up CRB's CSB addr */
416 	csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
417 	csb_addr |= CRB_CSB_AT; /* Addrs are phys */
418 	crb->csb_addr = cpu_to_be64(csb_addr);
419 
420 	return 0;
421 }
422 
423 /**
424  * nx842_exec_icswx - compress/decompress data using the 842 algorithm
425  *
426  * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
427  * This compresses or decompresses the provided input buffer into the provided
428  * output buffer.
429  *
430  * Upon return from this function @outlen contains the length of the
431  * output data.  If there is an error then @outlen will be 0 and an
432  * error will be specified by the return code from this function.
433  *
434  * The @workmem buffer should only be used by one function call at a time.
435  *
436  * @in: input buffer pointer
437  * @inlen: input buffer size
438  * @out: output buffer pointer
439  * @outlenp: output buffer size pointer
440  * @workmem: working memory buffer pointer, size determined by
441  *           nx842_powernv_driver.workmem_size
442  * @fc: function code, see CCW Function Codes in nx-842.h
443  *
444  * Returns:
445  *   0		Success, output of length @outlenp stored in the buffer at @out
446  *   -ENODEV	Hardware unavailable
447  *   -ENOSPC	Output buffer is to small
448  *   -EMSGSIZE	Input buffer too large
449  *   -EINVAL	buffer constraints do not fix nx842_constraints
450  *   -EPROTO	hardware error during operation
451  *   -ETIMEDOUT	hardware did not complete operation in reasonable time
452  *   -EINTR	operation was aborted
453  */
nx842_exec_icswx(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * workmem,int fc)454 static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen,
455 				  unsigned char *out, unsigned int *outlenp,
456 				  void *workmem, int fc)
457 {
458 	struct coprocessor_request_block *crb;
459 	struct coprocessor_status_block *csb;
460 	struct nx842_workmem *wmem;
461 	int ret;
462 	u32 ccw;
463 	unsigned int outlen = *outlenp;
464 
465 	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
466 
467 	*outlenp = 0;
468 
469 	/* shoudn't happen, we don't load without a coproc */
470 	if (!nx842_ct) {
471 		pr_err_ratelimited("coprocessor CT is 0");
472 		return -ENODEV;
473 	}
474 
475 	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
476 	if (ret)
477 		return ret;
478 
479 	crb = &wmem->crb;
480 	csb = &crb->csb;
481 
482 	/* set up CCW */
483 	ccw = 0;
484 	ccw = SET_FIELD(CCW_CT, ccw, nx842_ct);
485 	ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */
486 	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
487 
488 	wmem->start = ktime_get();
489 
490 	/* do ICSWX */
491 	ret = icswx(cpu_to_be32(ccw), crb);
492 
493 	pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret,
494 			     (unsigned int)ccw,
495 			     (unsigned int)be32_to_cpu(crb->ccw));
496 
497 	/*
498 	 * NX842 coprocessor sets 3rd bit in CR register with XER[S0].
499 	 * XER[S0] is the integer summary overflow bit which is nothing
500 	 * to do NX. Since this bit can be set with other return values,
501 	 * mask this bit.
502 	 */
503 	ret &= ~ICSWX_XERS0;
504 
505 	switch (ret) {
506 	case ICSWX_INITIATED:
507 		ret = wait_for_csb(wmem, csb);
508 		break;
509 	case ICSWX_BUSY:
510 		pr_debug_ratelimited("842 Coprocessor busy\n");
511 		ret = -EBUSY;
512 		break;
513 	case ICSWX_REJECTED:
514 		pr_err_ratelimited("ICSWX rejected\n");
515 		ret = -EPROTO;
516 		break;
517 	}
518 
519 	if (!ret)
520 		*outlenp = be32_to_cpu(csb->count);
521 
522 	return ret;
523 }
524 
525 /**
526  * nx842_exec_vas - compress/decompress data using the 842 algorithm
527  *
528  * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
529  * This compresses or decompresses the provided input buffer into the provided
530  * output buffer.
531  *
532  * Upon return from this function @outlen contains the length of the
533  * output data.  If there is an error then @outlen will be 0 and an
534  * error will be specified by the return code from this function.
535  *
536  * The @workmem buffer should only be used by one function call at a time.
537  *
538  * @in: input buffer pointer
539  * @inlen: input buffer size
540  * @out: output buffer pointer
541  * @outlenp: output buffer size pointer
542  * @workmem: working memory buffer pointer, size determined by
543  *           nx842_powernv_driver.workmem_size
544  * @fc: function code, see CCW Function Codes in nx-842.h
545  *
546  * Returns:
547  *   0		Success, output of length @outlenp stored in the buffer
548  *		at @out
549  *   -ENODEV	Hardware unavailable
550  *   -ENOSPC	Output buffer is to small
551  *   -EMSGSIZE	Input buffer too large
552  *   -EINVAL	buffer constraints do not fix nx842_constraints
553  *   -EPROTO	hardware error during operation
554  *   -ETIMEDOUT	hardware did not complete operation in reasonable time
555  *   -EINTR	operation was aborted
556  */
nx842_exec_vas(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * workmem,int fc)557 static int nx842_exec_vas(const unsigned char *in, unsigned int inlen,
558 				  unsigned char *out, unsigned int *outlenp,
559 				  void *workmem, int fc)
560 {
561 	struct coprocessor_request_block *crb;
562 	struct coprocessor_status_block *csb;
563 	struct nx842_workmem *wmem;
564 	struct vas_window *txwin;
565 	int ret, i = 0;
566 	u32 ccw;
567 	unsigned int outlen = *outlenp;
568 
569 	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
570 
571 	*outlenp = 0;
572 
573 	crb = &wmem->crb;
574 	csb = &crb->csb;
575 
576 	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
577 	if (ret)
578 		return ret;
579 
580 	ccw = 0;
581 	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
582 	crb->ccw = cpu_to_be32(ccw);
583 
584 	do {
585 		wmem->start = ktime_get();
586 		preempt_disable();
587 		txwin = this_cpu_read(cpu_txwin);
588 
589 		/*
590 		 * VAS copy CRB into L2 cache. Refer <asm/vas.h>.
591 		 * @crb and @offset.
592 		 */
593 		vas_copy_crb(crb, 0);
594 
595 		/*
596 		 * VAS paste previously copied CRB to NX.
597 		 * @txwin, @offset and @last (must be true).
598 		 */
599 		ret = vas_paste_crb(txwin, 0, 1);
600 		preempt_enable();
601 		/*
602 		 * Retry copy/paste function for VAS failures.
603 		 */
604 	} while (ret && (i++ < VAS_RETRIES));
605 
606 	if (ret) {
607 		pr_err_ratelimited("VAS copy/paste failed\n");
608 		return ret;
609 	}
610 
611 	ret = wait_for_csb(wmem, csb);
612 	if (!ret)
613 		*outlenp = be32_to_cpu(csb->count);
614 
615 	return ret;
616 }
617 
618 /**
619  * nx842_powernv_compress - Compress data using the 842 algorithm
620  *
621  * Compression provided by the NX842 coprocessor on IBM PowerNV systems.
622  * The input buffer is compressed and the result is stored in the
623  * provided output buffer.
624  *
625  * Upon return from this function @outlen contains the length of the
626  * compressed data.  If there is an error then @outlen will be 0 and an
627  * error will be specified by the return code from this function.
628  *
629  * @in: input buffer pointer
630  * @inlen: input buffer size
631  * @out: output buffer pointer
632  * @outlenp: output buffer size pointer
633  * @wmem: working memory buffer pointer, size determined by
634  *        nx842_powernv_driver.workmem_size
635  *
636  * Returns: see @nx842_powernv_exec()
637  */
nx842_powernv_compress(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * wmem)638 static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
639 				  unsigned char *out, unsigned int *outlenp,
640 				  void *wmem)
641 {
642 	return nx842_powernv_exec(in, inlen, out, outlenp,
643 				      wmem, CCW_FC_842_COMP_CRC);
644 }
645 
646 /**
647  * nx842_powernv_decompress - Decompress data using the 842 algorithm
648  *
649  * Decompression provided by the NX842 coprocessor on IBM PowerNV systems.
650  * The input buffer is decompressed and the result is stored in the
651  * provided output buffer.
652  *
653  * Upon return from this function @outlen contains the length of the
654  * decompressed data.  If there is an error then @outlen will be 0 and an
655  * error will be specified by the return code from this function.
656  *
657  * @in: input buffer pointer
658  * @inlen: input buffer size
659  * @out: output buffer pointer
660  * @outlenp: output buffer size pointer
661  * @wmem: working memory buffer pointer, size determined by
662  *        nx842_powernv_driver.workmem_size
663  *
664  * Returns: see @nx842_powernv_exec()
665  */
nx842_powernv_decompress(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * wmem)666 static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
667 				    unsigned char *out, unsigned int *outlenp,
668 				    void *wmem)
669 {
670 	return nx842_powernv_exec(in, inlen, out, outlenp,
671 				      wmem, CCW_FC_842_DECOMP_CRC);
672 }
673 
nx_add_coprocs_list(struct nx_coproc * coproc,int chipid)674 static inline void nx_add_coprocs_list(struct nx_coproc *coproc,
675 					int chipid)
676 {
677 	coproc->chip_id = chipid;
678 	INIT_LIST_HEAD(&coproc->list);
679 	list_add(&coproc->list, &nx_coprocs);
680 }
681 
nx_alloc_txwin(struct nx_coproc * coproc)682 static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc)
683 {
684 	struct vas_window *txwin = NULL;
685 	struct vas_tx_win_attr txattr;
686 
687 	/*
688 	 * Kernel requests will be high priority. So open send
689 	 * windows only for high priority RxFIFO entries.
690 	 */
691 	vas_init_tx_win_attr(&txattr, coproc->ct);
692 	txattr.lpid = 0;	/* lpid is 0 for kernel requests */
693 
694 	/*
695 	 * Open a VAS send window which is used to send request to NX.
696 	 */
697 	txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr);
698 	if (IS_ERR(txwin))
699 		pr_err("ibm,nx-842: Can not open TX window: %ld\n",
700 				PTR_ERR(txwin));
701 
702 	return txwin;
703 }
704 
705 /*
706  * Identify chip ID for each CPU, open send wndow for the corresponding NX
707  * engine and save txwin in percpu cpu_txwin.
708  * cpu_txwin is used in copy/paste operation for each compression /
709  * decompression request.
710  */
nx_open_percpu_txwins(void)711 static int nx_open_percpu_txwins(void)
712 {
713 	struct nx_coproc *coproc, *n;
714 	unsigned int i, chip_id;
715 
716 	for_each_possible_cpu(i) {
717 		struct vas_window *txwin = NULL;
718 
719 		chip_id = cpu_to_chip_id(i);
720 
721 		list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
722 			/*
723 			 * Kernel requests use only high priority FIFOs. So
724 			 * open send windows for these FIFOs.
725 			 * GZIP is not supported in kernel right now.
726 			 */
727 
728 			if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
729 				continue;
730 
731 			if (coproc->chip_id == chip_id) {
732 				txwin = nx_alloc_txwin(coproc);
733 				if (IS_ERR(txwin))
734 					return PTR_ERR(txwin);
735 
736 				per_cpu(cpu_txwin, i) = txwin;
737 				break;
738 			}
739 		}
740 
741 		if (!per_cpu(cpu_txwin, i)) {
742 			/* shouldn't happen, Each chip will have NX engine */
743 			pr_err("NX engine is not available for CPU %d\n", i);
744 			return -EINVAL;
745 		}
746 	}
747 
748 	return 0;
749 }
750 
nx_set_ct(struct nx_coproc * coproc,const char * priority,int high,int normal)751 static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority,
752 				int high, int normal)
753 {
754 	if (!strcmp(priority, "High"))
755 		coproc->ct = high;
756 	else if (!strcmp(priority, "Normal"))
757 		coproc->ct = normal;
758 	else {
759 		pr_err("Invalid RxFIFO priority value\n");
760 		return -EINVAL;
761 	}
762 
763 	return 0;
764 }
765 
vas_cfg_coproc_info(struct device_node * dn,int chip_id,int vasid,int type,int * ct)766 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
767 					int vasid, int type, int *ct)
768 {
769 	struct vas_window *rxwin = NULL;
770 	struct vas_rx_win_attr rxattr;
771 	u32 lpid, pid, tid, fifo_size;
772 	struct nx_coproc *coproc;
773 	u64 rx_fifo;
774 	const char *priority;
775 	int ret;
776 
777 	ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo);
778 	if (ret) {
779 		pr_err("Missing rx-fifo-address property\n");
780 		return ret;
781 	}
782 
783 	ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size);
784 	if (ret) {
785 		pr_err("Missing rx-fifo-size property\n");
786 		return ret;
787 	}
788 
789 	ret = of_property_read_u32(dn, "lpid", &lpid);
790 	if (ret) {
791 		pr_err("Missing lpid property\n");
792 		return ret;
793 	}
794 
795 	ret = of_property_read_u32(dn, "pid", &pid);
796 	if (ret) {
797 		pr_err("Missing pid property\n");
798 		return ret;
799 	}
800 
801 	ret = of_property_read_u32(dn, "tid", &tid);
802 	if (ret) {
803 		pr_err("Missing tid property\n");
804 		return ret;
805 	}
806 
807 	ret = of_property_read_string(dn, "priority", &priority);
808 	if (ret) {
809 		pr_err("Missing priority property\n");
810 		return ret;
811 	}
812 
813 	coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
814 	if (!coproc)
815 		return -ENOMEM;
816 
817 	if (type == NX_CT_842)
818 		ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI,
819 			VAS_COP_TYPE_842);
820 	else if (type == NX_CT_GZIP)
821 		ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI,
822 				VAS_COP_TYPE_GZIP);
823 
824 	if (ret)
825 		goto err_out;
826 
827 	vas_init_rx_win_attr(&rxattr, coproc->ct);
828 	rxattr.rx_fifo = rx_fifo;
829 	rxattr.rx_fifo_size = fifo_size;
830 	rxattr.lnotify_lpid = lpid;
831 	rxattr.lnotify_pid = pid;
832 	rxattr.lnotify_tid = tid;
833 	/*
834 	 * Maximum RX window credits can not be more than #CRBs in
835 	 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
836 	 */
837 	rxattr.wcreds_max = fifo_size / CRB_SIZE;
838 
839 	/*
840 	 * Open a VAS receice window which is used to configure RxFIFO
841 	 * for NX.
842 	 */
843 	rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr);
844 	if (IS_ERR(rxwin)) {
845 		ret = PTR_ERR(rxwin);
846 		pr_err("setting RxFIFO with VAS failed: %d\n",
847 			ret);
848 		goto err_out;
849 	}
850 
851 	coproc->vas.rxwin = rxwin;
852 	coproc->vas.id = vasid;
853 	nx_add_coprocs_list(coproc, chip_id);
854 
855 	/*
856 	 * (lpid, pid, tid) combination has to be unique for each
857 	 * coprocessor instance in the system. So to make it
858 	 * unique, skiboot uses coprocessor type such as 842 or
859 	 * GZIP for pid and provides this value to kernel in pid
860 	 * device-tree property.
861 	 */
862 	*ct = pid;
863 
864 	return 0;
865 
866 err_out:
867 	kfree(coproc);
868 	return ret;
869 }
870 
nx_coproc_init(int chip_id,int ct_842,int ct_gzip)871 static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip)
872 {
873 	int ret = 0;
874 
875 	if (opal_check_token(OPAL_NX_COPROC_INIT)) {
876 		ret = opal_nx_coproc_init(chip_id, ct_842);
877 
878 		if (!ret)
879 			ret = opal_nx_coproc_init(chip_id, ct_gzip);
880 
881 		if (ret) {
882 			ret = opal_error_code(ret);
883 			pr_err("Failed to initialize NX for chip(%d): %d\n",
884 				chip_id, ret);
885 		}
886 	} else
887 		pr_warn("Firmware doesn't support NX initialization\n");
888 
889 	return ret;
890 }
891 
find_nx_device_tree(struct device_node * dn,int chip_id,int vasid,int type,char * devname,int * ct)892 static int __init find_nx_device_tree(struct device_node *dn, int chip_id,
893 					int vasid, int type, char *devname,
894 					int *ct)
895 {
896 	int ret = 0;
897 
898 	if (of_device_is_compatible(dn, devname)) {
899 		ret  = vas_cfg_coproc_info(dn, chip_id, vasid, type, ct);
900 		if (ret)
901 			of_node_put(dn);
902 	}
903 
904 	return ret;
905 }
906 
nx_powernv_probe_vas(struct device_node * pn)907 static int __init nx_powernv_probe_vas(struct device_node *pn)
908 {
909 	int chip_id, vasid, ret = 0;
910 	int ct_842 = 0, ct_gzip = 0;
911 	struct device_node *dn;
912 
913 	chip_id = of_get_ibm_chip_id(pn);
914 	if (chip_id < 0) {
915 		pr_err("ibm,chip-id missing\n");
916 		return -EINVAL;
917 	}
918 
919 	vasid = chip_to_vas_id(chip_id);
920 	if (vasid < 0) {
921 		pr_err("Unable to map chip_id %d to vasid\n", chip_id);
922 		return -EINVAL;
923 	}
924 
925 	for_each_child_of_node(pn, dn) {
926 		ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842,
927 					"ibm,p9-nx-842", &ct_842);
928 
929 		if (!ret)
930 			ret = find_nx_device_tree(dn, chip_id, vasid,
931 				NX_CT_GZIP, "ibm,p9-nx-gzip", &ct_gzip);
932 
933 		if (ret) {
934 			of_node_put(dn);
935 			return ret;
936 		}
937 	}
938 
939 	if (!ct_842 || !ct_gzip) {
940 		pr_err("NX FIFO nodes are missing\n");
941 		return -EINVAL;
942 	}
943 
944 	/*
945 	 * Initialize NX instance for both high and normal priority FIFOs.
946 	 */
947 	ret = nx_coproc_init(chip_id, ct_842, ct_gzip);
948 
949 	return ret;
950 }
951 
nx842_powernv_probe(struct device_node * dn)952 static int __init nx842_powernv_probe(struct device_node *dn)
953 {
954 	struct nx_coproc *coproc;
955 	unsigned int ct, ci;
956 	int chip_id;
957 
958 	chip_id = of_get_ibm_chip_id(dn);
959 	if (chip_id < 0) {
960 		pr_err("ibm,chip-id missing\n");
961 		return -EINVAL;
962 	}
963 
964 	if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) {
965 		pr_err("ibm,842-coprocessor-type missing\n");
966 		return -EINVAL;
967 	}
968 
969 	if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) {
970 		pr_err("ibm,842-coprocessor-instance missing\n");
971 		return -EINVAL;
972 	}
973 
974 	coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
975 	if (!coproc)
976 		return -ENOMEM;
977 
978 	coproc->ct = ct;
979 	coproc->ci = ci;
980 	nx_add_coprocs_list(coproc, chip_id);
981 
982 	pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci);
983 
984 	if (!nx842_ct)
985 		nx842_ct = ct;
986 	else if (nx842_ct != ct)
987 		pr_err("NX842 chip %d, CT %d != first found CT %d\n",
988 		       chip_id, ct, nx842_ct);
989 
990 	return 0;
991 }
992 
nx_delete_coprocs(void)993 static void nx_delete_coprocs(void)
994 {
995 	struct nx_coproc *coproc, *n;
996 	struct vas_window *txwin;
997 	int i;
998 
999 	/*
1000 	 * close percpu txwins that are opened for the corresponding coproc.
1001 	 */
1002 	for_each_possible_cpu(i) {
1003 		txwin = per_cpu(cpu_txwin, i);
1004 		if (txwin)
1005 			vas_win_close(txwin);
1006 
1007 		per_cpu(cpu_txwin, i) = NULL;
1008 	}
1009 
1010 	list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
1011 		if (coproc->vas.rxwin)
1012 			vas_win_close(coproc->vas.rxwin);
1013 
1014 		list_del(&coproc->list);
1015 		kfree(coproc);
1016 	}
1017 }
1018 
1019 static struct nx842_constraints nx842_powernv_constraints = {
1020 	.alignment =	DDE_BUFFER_ALIGN,
1021 	.multiple =	DDE_BUFFER_LAST_MULT,
1022 	.minimum =	DDE_BUFFER_LAST_MULT,
1023 	.maximum =	(DDL_LEN_MAX - 1) * PAGE_SIZE,
1024 };
1025 
1026 static struct nx842_driver nx842_powernv_driver = {
1027 	.name =		KBUILD_MODNAME,
1028 	.owner =	THIS_MODULE,
1029 	.workmem_size =	sizeof(struct nx842_workmem),
1030 	.constraints =	&nx842_powernv_constraints,
1031 	.compress =	nx842_powernv_compress,
1032 	.decompress =	nx842_powernv_decompress,
1033 };
1034 
nx842_powernv_crypto_alloc_ctx(void)1035 static void *nx842_powernv_crypto_alloc_ctx(void)
1036 {
1037 	return nx842_crypto_alloc_ctx(&nx842_powernv_driver);
1038 }
1039 
1040 static struct scomp_alg nx842_powernv_alg = {
1041 	.base.cra_name		= "842",
1042 	.base.cra_driver_name	= "842-nx",
1043 	.base.cra_priority	= 300,
1044 	.base.cra_module	= THIS_MODULE,
1045 
1046 	.alloc_ctx		= nx842_powernv_crypto_alloc_ctx,
1047 	.free_ctx		= nx842_crypto_free_ctx,
1048 	.compress		= nx842_crypto_compress,
1049 	.decompress		= nx842_crypto_decompress,
1050 };
1051 
nx_compress_powernv_init(void)1052 static __init int nx_compress_powernv_init(void)
1053 {
1054 	struct device_node *dn;
1055 	int ret;
1056 
1057 	/* verify workmem size/align restrictions */
1058 	BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
1059 	BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN);
1060 	BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN);
1061 	/* verify buffer size/align restrictions */
1062 	BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN);
1063 	BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
1064 	BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
1065 
1066 	for_each_compatible_node(dn, NULL, "ibm,power9-nx") {
1067 		ret = nx_powernv_probe_vas(dn);
1068 		if (ret) {
1069 			nx_delete_coprocs();
1070 			of_node_put(dn);
1071 			return ret;
1072 		}
1073 	}
1074 
1075 	if (list_empty(&nx_coprocs)) {
1076 		for_each_compatible_node(dn, NULL, "ibm,power-nx")
1077 			nx842_powernv_probe(dn);
1078 
1079 		if (!nx842_ct)
1080 			return -ENODEV;
1081 
1082 		nx842_powernv_exec = nx842_exec_icswx;
1083 	} else {
1084 		/*
1085 		 * Register VAS user space API for NX GZIP so
1086 		 * that user space can use GZIP engine.
1087 		 * Using high FIFO priority for kernel requests and
1088 		 * normal FIFO priority is assigned for userspace.
1089 		 * 842 compression is supported only in kernel.
1090 		 */
1091 		ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP,
1092 					       "nx-gzip");
1093 
1094 		/*
1095 		 * GZIP is not supported in kernel right now.
1096 		 * So open tx windows only for 842.
1097 		 */
1098 		if (!ret)
1099 			ret = nx_open_percpu_txwins();
1100 
1101 		if (ret) {
1102 			nx_delete_coprocs();
1103 			return ret;
1104 		}
1105 
1106 		nx842_powernv_exec = nx842_exec_vas;
1107 	}
1108 
1109 	ret = crypto_register_scomp(&nx842_powernv_alg);
1110 	if (ret) {
1111 		nx_delete_coprocs();
1112 		return ret;
1113 	}
1114 
1115 	return 0;
1116 }
1117 module_init(nx_compress_powernv_init);
1118 
nx_compress_powernv_exit(void)1119 static void __exit nx_compress_powernv_exit(void)
1120 {
1121 	/*
1122 	 * GZIP engine is supported only in power9 or later and nx842_ct
1123 	 * is used on power8 (icswx).
1124 	 * VAS API for NX GZIP is registered during init for user space
1125 	 * use. So delete this API use for GZIP engine.
1126 	 */
1127 	if (!nx842_ct)
1128 		vas_unregister_api_powernv();
1129 
1130 	crypto_unregister_scomp(&nx842_powernv_alg);
1131 
1132 	nx_delete_coprocs();
1133 }
1134 module_exit(nx_compress_powernv_exit);
1135