1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Driver for IBM PowerNV compression accelerator
4 *
5 * Copyright (C) 2015 Dan Streetman, IBM Corp
6 */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include "nx-842.h"
11
12 #include <crypto/internal/scompress.h>
13 #include <linux/timer.h>
14
15 #include <asm/prom.h>
16 #include <asm/icswx.h>
17 #include <asm/vas.h>
18 #include <asm/reg.h>
19 #include <asm/opal-api.h>
20 #include <asm/opal.h>
21
22 MODULE_LICENSE("GPL");
23 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
24 MODULE_DESCRIPTION("H/W Compression driver for IBM PowerNV processors");
25 MODULE_ALIAS_CRYPTO("842");
26 MODULE_ALIAS_CRYPTO("842-nx");
27
28 #define WORKMEM_ALIGN (CRB_ALIGN)
29 #define CSB_WAIT_MAX (5000) /* ms */
30 #define VAS_RETRIES (10)
31
32 struct nx842_workmem {
33 /* Below fields must be properly aligned */
34 struct coprocessor_request_block crb; /* CRB_ALIGN align */
35 struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
36 struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
37 /* Above fields must be properly aligned */
38
39 ktime_t start;
40
41 char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
42 } __packed __aligned(WORKMEM_ALIGN);
43
44 struct nx_coproc {
45 unsigned int chip_id;
46 unsigned int ct; /* Can be 842 or GZIP high/normal*/
47 unsigned int ci; /* Coprocessor instance, used with icswx */
48 struct {
49 struct vas_window *rxwin;
50 int id;
51 } vas;
52 struct list_head list;
53 };
54
55 /*
56 * Send the request to NX engine on the chip for the corresponding CPU
57 * where the process is executing. Use with VAS function.
58 */
59 static DEFINE_PER_CPU(struct vas_window *, cpu_txwin);
60
61 /* no cpu hotplug on powernv, so this list never changes after init */
62 static LIST_HEAD(nx_coprocs);
63 static unsigned int nx842_ct; /* used in icswx function */
64
65 /*
66 * Using same values as in skiboot or coprocessor type representing
67 * in NX workbook.
68 */
69 #define NX_CT_GZIP (2) /* on P9 and later */
70 #define NX_CT_842 (3)
71
72 static int (*nx842_powernv_exec)(const unsigned char *in,
73 unsigned int inlen, unsigned char *out,
74 unsigned int *outlenp, void *workmem, int fc);
75
76 /*
77 * setup_indirect_dde - Setup an indirect DDE
78 *
79 * The DDE is setup with the DDE count, byte count, and address of
80 * first direct DDE in the list.
81 */
setup_indirect_dde(struct data_descriptor_entry * dde,struct data_descriptor_entry * ddl,unsigned int dde_count,unsigned int byte_count)82 static void setup_indirect_dde(struct data_descriptor_entry *dde,
83 struct data_descriptor_entry *ddl,
84 unsigned int dde_count, unsigned int byte_count)
85 {
86 dde->flags = 0;
87 dde->count = dde_count;
88 dde->index = 0;
89 dde->length = cpu_to_be32(byte_count);
90 dde->address = cpu_to_be64(nx842_get_pa(ddl));
91 }
92
93 /*
94 * setup_direct_dde - Setup single DDE from buffer
95 *
96 * The DDE is setup with the buffer and length. The buffer must be properly
97 * aligned. The used length is returned.
98 * Returns:
99 * N Successfully set up DDE with N bytes
100 */
setup_direct_dde(struct data_descriptor_entry * dde,unsigned long pa,unsigned int len)101 static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
102 unsigned long pa, unsigned int len)
103 {
104 unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
105
106 dde->flags = 0;
107 dde->count = 0;
108 dde->index = 0;
109 dde->length = cpu_to_be32(l);
110 dde->address = cpu_to_be64(pa);
111
112 return l;
113 }
114
115 /*
116 * setup_ddl - Setup DDL from buffer
117 *
118 * Returns:
119 * 0 Successfully set up DDL
120 */
setup_ddl(struct data_descriptor_entry * dde,struct data_descriptor_entry * ddl,unsigned char * buf,unsigned int len,bool in)121 static int setup_ddl(struct data_descriptor_entry *dde,
122 struct data_descriptor_entry *ddl,
123 unsigned char *buf, unsigned int len,
124 bool in)
125 {
126 unsigned long pa = nx842_get_pa(buf);
127 int i, ret, total_len = len;
128
129 if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
130 pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n",
131 in ? "input" : "output", pa, DDE_BUFFER_ALIGN);
132 return -EINVAL;
133 }
134
135 /* only need to check last mult; since buffer must be
136 * DDE_BUFFER_ALIGN aligned, and that is a multiple of
137 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers
138 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT.
139 */
140 if (len % DDE_BUFFER_LAST_MULT) {
141 pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n",
142 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT);
143 if (in)
144 return -EINVAL;
145 len = round_down(len, DDE_BUFFER_LAST_MULT);
146 }
147
148 /* use a single direct DDE */
149 if (len <= LEN_ON_PAGE(pa)) {
150 ret = setup_direct_dde(dde, pa, len);
151 WARN_ON(ret < len);
152 return 0;
153 }
154
155 /* use the DDL */
156 for (i = 0; i < DDL_LEN_MAX && len > 0; i++) {
157 ret = setup_direct_dde(&ddl[i], pa, len);
158 buf += ret;
159 len -= ret;
160 pa = nx842_get_pa(buf);
161 }
162
163 if (len > 0) {
164 pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n",
165 total_len, in ? "input" : "output", len);
166 if (in)
167 return -EMSGSIZE;
168 total_len -= len;
169 }
170 setup_indirect_dde(dde, ddl, i, total_len);
171
172 return 0;
173 }
174
175 #define CSB_ERR(csb, msg, ...) \
176 pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n", \
177 ##__VA_ARGS__, (csb)->flags, \
178 (csb)->cs, (csb)->cc, (csb)->ce, \
179 be32_to_cpu((csb)->count))
180
181 #define CSB_ERR_ADDR(csb, msg, ...) \
182 CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \
183 (unsigned long)be64_to_cpu((csb)->address))
184
wait_for_csb(struct nx842_workmem * wmem,struct coprocessor_status_block * csb)185 static int wait_for_csb(struct nx842_workmem *wmem,
186 struct coprocessor_status_block *csb)
187 {
188 ktime_t start = wmem->start, now = ktime_get();
189 ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX);
190
191 while (!(READ_ONCE(csb->flags) & CSB_V)) {
192 cpu_relax();
193 now = ktime_get();
194 if (ktime_after(now, timeout))
195 break;
196 }
197
198 /* hw has updated csb and output buffer */
199 barrier();
200
201 /* check CSB flags */
202 if (!(csb->flags & CSB_V)) {
203 CSB_ERR(csb, "CSB still not valid after %ld us, giving up",
204 (long)ktime_us_delta(now, start));
205 return -ETIMEDOUT;
206 }
207 if (csb->flags & CSB_F) {
208 CSB_ERR(csb, "Invalid CSB format");
209 return -EPROTO;
210 }
211 if (csb->flags & CSB_CH) {
212 CSB_ERR(csb, "Invalid CSB chaining state");
213 return -EPROTO;
214 }
215
216 /* verify CSB completion sequence is 0 */
217 if (csb->cs) {
218 CSB_ERR(csb, "Invalid CSB completion sequence");
219 return -EPROTO;
220 }
221
222 /* check CSB Completion Code */
223 switch (csb->cc) {
224 /* no error */
225 case CSB_CC_SUCCESS:
226 break;
227 case CSB_CC_TPBC_GT_SPBC:
228 /* not an error, but the compressed data is
229 * larger than the uncompressed data :(
230 */
231 break;
232
233 /* input data errors */
234 case CSB_CC_OPERAND_OVERLAP:
235 /* input and output buffers overlap */
236 CSB_ERR(csb, "Operand Overlap error");
237 return -EINVAL;
238 case CSB_CC_INVALID_OPERAND:
239 CSB_ERR(csb, "Invalid operand");
240 return -EINVAL;
241 case CSB_CC_NOSPC:
242 /* output buffer too small */
243 return -ENOSPC;
244 case CSB_CC_ABORT:
245 CSB_ERR(csb, "Function aborted");
246 return -EINTR;
247 case CSB_CC_CRC_MISMATCH:
248 CSB_ERR(csb, "CRC mismatch");
249 return -EINVAL;
250 case CSB_CC_TEMPL_INVALID:
251 CSB_ERR(csb, "Compressed data template invalid");
252 return -EINVAL;
253 case CSB_CC_TEMPL_OVERFLOW:
254 CSB_ERR(csb, "Compressed data template shows data past end");
255 return -EINVAL;
256 case CSB_CC_EXCEED_BYTE_COUNT: /* P9 or later */
257 /*
258 * DDE byte count exceeds the limit specified in Maximum
259 * byte count register.
260 */
261 CSB_ERR(csb, "DDE byte count exceeds the limit");
262 return -EINVAL;
263
264 /* these should not happen */
265 case CSB_CC_INVALID_ALIGN:
266 /* setup_ddl should have detected this */
267 CSB_ERR_ADDR(csb, "Invalid alignment");
268 return -EINVAL;
269 case CSB_CC_DATA_LENGTH:
270 /* setup_ddl should have detected this */
271 CSB_ERR(csb, "Invalid data length");
272 return -EINVAL;
273 case CSB_CC_WR_TRANSLATION:
274 case CSB_CC_TRANSLATION:
275 case CSB_CC_TRANSLATION_DUP1:
276 case CSB_CC_TRANSLATION_DUP2:
277 case CSB_CC_TRANSLATION_DUP3:
278 case CSB_CC_TRANSLATION_DUP4:
279 case CSB_CC_TRANSLATION_DUP5:
280 case CSB_CC_TRANSLATION_DUP6:
281 /* should not happen, we use physical addrs */
282 CSB_ERR_ADDR(csb, "Translation error");
283 return -EPROTO;
284 case CSB_CC_WR_PROTECTION:
285 case CSB_CC_PROTECTION:
286 case CSB_CC_PROTECTION_DUP1:
287 case CSB_CC_PROTECTION_DUP2:
288 case CSB_CC_PROTECTION_DUP3:
289 case CSB_CC_PROTECTION_DUP4:
290 case CSB_CC_PROTECTION_DUP5:
291 case CSB_CC_PROTECTION_DUP6:
292 /* should not happen, we use physical addrs */
293 CSB_ERR_ADDR(csb, "Protection error");
294 return -EPROTO;
295 case CSB_CC_PRIVILEGE:
296 /* shouldn't happen, we're in HYP mode */
297 CSB_ERR(csb, "Insufficient Privilege error");
298 return -EPROTO;
299 case CSB_CC_EXCESSIVE_DDE:
300 /* shouldn't happen, setup_ddl doesn't use many dde's */
301 CSB_ERR(csb, "Too many DDEs in DDL");
302 return -EINVAL;
303 case CSB_CC_TRANSPORT:
304 case CSB_CC_INVALID_CRB: /* P9 or later */
305 /* shouldn't happen, we setup CRB correctly */
306 CSB_ERR(csb, "Invalid CRB");
307 return -EINVAL;
308 case CSB_CC_INVALID_DDE: /* P9 or later */
309 /*
310 * shouldn't happen, setup_direct/indirect_dde creates
311 * DDE right
312 */
313 CSB_ERR(csb, "Invalid DDE");
314 return -EINVAL;
315 case CSB_CC_SEGMENTED_DDL:
316 /* shouldn't happen, setup_ddl creates DDL right */
317 CSB_ERR(csb, "Segmented DDL error");
318 return -EINVAL;
319 case CSB_CC_DDE_OVERFLOW:
320 /* shouldn't happen, setup_ddl creates DDL right */
321 CSB_ERR(csb, "DDE overflow error");
322 return -EINVAL;
323 case CSB_CC_SESSION:
324 /* should not happen with ICSWX */
325 CSB_ERR(csb, "Session violation error");
326 return -EPROTO;
327 case CSB_CC_CHAIN:
328 /* should not happen, we don't use chained CRBs */
329 CSB_ERR(csb, "Chained CRB error");
330 return -EPROTO;
331 case CSB_CC_SEQUENCE:
332 /* should not happen, we don't use chained CRBs */
333 CSB_ERR(csb, "CRB sequence number error");
334 return -EPROTO;
335 case CSB_CC_UNKNOWN_CODE:
336 CSB_ERR(csb, "Unknown subfunction code");
337 return -EPROTO;
338
339 /* hardware errors */
340 case CSB_CC_RD_EXTERNAL:
341 case CSB_CC_RD_EXTERNAL_DUP1:
342 case CSB_CC_RD_EXTERNAL_DUP2:
343 case CSB_CC_RD_EXTERNAL_DUP3:
344 CSB_ERR_ADDR(csb, "Read error outside coprocessor");
345 return -EPROTO;
346 case CSB_CC_WR_EXTERNAL:
347 CSB_ERR_ADDR(csb, "Write error outside coprocessor");
348 return -EPROTO;
349 case CSB_CC_INTERNAL:
350 CSB_ERR(csb, "Internal error in coprocessor");
351 return -EPROTO;
352 case CSB_CC_PROVISION:
353 CSB_ERR(csb, "Storage provision error");
354 return -EPROTO;
355 case CSB_CC_HW:
356 CSB_ERR(csb, "Correctable hardware error");
357 return -EPROTO;
358 case CSB_CC_HW_EXPIRED_TIMER: /* P9 or later */
359 CSB_ERR(csb, "Job did not finish within allowed time");
360 return -EPROTO;
361
362 default:
363 CSB_ERR(csb, "Invalid CC %d", csb->cc);
364 return -EPROTO;
365 }
366
367 /* check Completion Extension state */
368 if (csb->ce & CSB_CE_TERMINATION) {
369 CSB_ERR(csb, "CSB request was terminated");
370 return -EPROTO;
371 }
372 if (csb->ce & CSB_CE_INCOMPLETE) {
373 CSB_ERR(csb, "CSB request not complete");
374 return -EPROTO;
375 }
376 if (!(csb->ce & CSB_CE_TPBC)) {
377 CSB_ERR(csb, "TPBC not provided, unknown target length");
378 return -EPROTO;
379 }
380
381 /* successful completion */
382 pr_debug_ratelimited("Processed %u bytes in %lu us\n",
383 be32_to_cpu(csb->count),
384 (unsigned long)ktime_us_delta(now, start));
385
386 return 0;
387 }
388
nx842_config_crb(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int outlen,struct nx842_workmem * wmem)389 static int nx842_config_crb(const unsigned char *in, unsigned int inlen,
390 unsigned char *out, unsigned int outlen,
391 struct nx842_workmem *wmem)
392 {
393 struct coprocessor_request_block *crb;
394 struct coprocessor_status_block *csb;
395 u64 csb_addr;
396 int ret;
397
398 crb = &wmem->crb;
399 csb = &crb->csb;
400
401 /* Clear any previous values */
402 memset(crb, 0, sizeof(*crb));
403
404 /* set up DDLs */
405 ret = setup_ddl(&crb->source, wmem->ddl_in,
406 (unsigned char *)in, inlen, true);
407 if (ret)
408 return ret;
409
410 ret = setup_ddl(&crb->target, wmem->ddl_out,
411 out, outlen, false);
412 if (ret)
413 return ret;
414
415 /* set up CRB's CSB addr */
416 csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
417 csb_addr |= CRB_CSB_AT; /* Addrs are phys */
418 crb->csb_addr = cpu_to_be64(csb_addr);
419
420 return 0;
421 }
422
423 /**
424 * nx842_exec_icswx - compress/decompress data using the 842 algorithm
425 *
426 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
427 * This compresses or decompresses the provided input buffer into the provided
428 * output buffer.
429 *
430 * Upon return from this function @outlen contains the length of the
431 * output data. If there is an error then @outlen will be 0 and an
432 * error will be specified by the return code from this function.
433 *
434 * The @workmem buffer should only be used by one function call at a time.
435 *
436 * @in: input buffer pointer
437 * @inlen: input buffer size
438 * @out: output buffer pointer
439 * @outlenp: output buffer size pointer
440 * @workmem: working memory buffer pointer, size determined by
441 * nx842_powernv_driver.workmem_size
442 * @fc: function code, see CCW Function Codes in nx-842.h
443 *
444 * Returns:
445 * 0 Success, output of length @outlenp stored in the buffer at @out
446 * -ENODEV Hardware unavailable
447 * -ENOSPC Output buffer is to small
448 * -EMSGSIZE Input buffer too large
449 * -EINVAL buffer constraints do not fix nx842_constraints
450 * -EPROTO hardware error during operation
451 * -ETIMEDOUT hardware did not complete operation in reasonable time
452 * -EINTR operation was aborted
453 */
nx842_exec_icswx(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * workmem,int fc)454 static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen,
455 unsigned char *out, unsigned int *outlenp,
456 void *workmem, int fc)
457 {
458 struct coprocessor_request_block *crb;
459 struct coprocessor_status_block *csb;
460 struct nx842_workmem *wmem;
461 int ret;
462 u32 ccw;
463 unsigned int outlen = *outlenp;
464
465 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
466
467 *outlenp = 0;
468
469 /* shoudn't happen, we don't load without a coproc */
470 if (!nx842_ct) {
471 pr_err_ratelimited("coprocessor CT is 0");
472 return -ENODEV;
473 }
474
475 ret = nx842_config_crb(in, inlen, out, outlen, wmem);
476 if (ret)
477 return ret;
478
479 crb = &wmem->crb;
480 csb = &crb->csb;
481
482 /* set up CCW */
483 ccw = 0;
484 ccw = SET_FIELD(CCW_CT, ccw, nx842_ct);
485 ccw = SET_FIELD(CCW_CI_842, ccw, 0); /* use 0 for hw auto-selection */
486 ccw = SET_FIELD(CCW_FC_842, ccw, fc);
487
488 wmem->start = ktime_get();
489
490 /* do ICSWX */
491 ret = icswx(cpu_to_be32(ccw), crb);
492
493 pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret,
494 (unsigned int)ccw,
495 (unsigned int)be32_to_cpu(crb->ccw));
496
497 /*
498 * NX842 coprocessor sets 3rd bit in CR register with XER[S0].
499 * XER[S0] is the integer summary overflow bit which is nothing
500 * to do NX. Since this bit can be set with other return values,
501 * mask this bit.
502 */
503 ret &= ~ICSWX_XERS0;
504
505 switch (ret) {
506 case ICSWX_INITIATED:
507 ret = wait_for_csb(wmem, csb);
508 break;
509 case ICSWX_BUSY:
510 pr_debug_ratelimited("842 Coprocessor busy\n");
511 ret = -EBUSY;
512 break;
513 case ICSWX_REJECTED:
514 pr_err_ratelimited("ICSWX rejected\n");
515 ret = -EPROTO;
516 break;
517 }
518
519 if (!ret)
520 *outlenp = be32_to_cpu(csb->count);
521
522 return ret;
523 }
524
525 /**
526 * nx842_exec_vas - compress/decompress data using the 842 algorithm
527 *
528 * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
529 * This compresses or decompresses the provided input buffer into the provided
530 * output buffer.
531 *
532 * Upon return from this function @outlen contains the length of the
533 * output data. If there is an error then @outlen will be 0 and an
534 * error will be specified by the return code from this function.
535 *
536 * The @workmem buffer should only be used by one function call at a time.
537 *
538 * @in: input buffer pointer
539 * @inlen: input buffer size
540 * @out: output buffer pointer
541 * @outlenp: output buffer size pointer
542 * @workmem: working memory buffer pointer, size determined by
543 * nx842_powernv_driver.workmem_size
544 * @fc: function code, see CCW Function Codes in nx-842.h
545 *
546 * Returns:
547 * 0 Success, output of length @outlenp stored in the buffer
548 * at @out
549 * -ENODEV Hardware unavailable
550 * -ENOSPC Output buffer is to small
551 * -EMSGSIZE Input buffer too large
552 * -EINVAL buffer constraints do not fix nx842_constraints
553 * -EPROTO hardware error during operation
554 * -ETIMEDOUT hardware did not complete operation in reasonable time
555 * -EINTR operation was aborted
556 */
nx842_exec_vas(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * workmem,int fc)557 static int nx842_exec_vas(const unsigned char *in, unsigned int inlen,
558 unsigned char *out, unsigned int *outlenp,
559 void *workmem, int fc)
560 {
561 struct coprocessor_request_block *crb;
562 struct coprocessor_status_block *csb;
563 struct nx842_workmem *wmem;
564 struct vas_window *txwin;
565 int ret, i = 0;
566 u32 ccw;
567 unsigned int outlen = *outlenp;
568
569 wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
570
571 *outlenp = 0;
572
573 crb = &wmem->crb;
574 csb = &crb->csb;
575
576 ret = nx842_config_crb(in, inlen, out, outlen, wmem);
577 if (ret)
578 return ret;
579
580 ccw = 0;
581 ccw = SET_FIELD(CCW_FC_842, ccw, fc);
582 crb->ccw = cpu_to_be32(ccw);
583
584 do {
585 wmem->start = ktime_get();
586 preempt_disable();
587 txwin = this_cpu_read(cpu_txwin);
588
589 /*
590 * VAS copy CRB into L2 cache. Refer <asm/vas.h>.
591 * @crb and @offset.
592 */
593 vas_copy_crb(crb, 0);
594
595 /*
596 * VAS paste previously copied CRB to NX.
597 * @txwin, @offset and @last (must be true).
598 */
599 ret = vas_paste_crb(txwin, 0, 1);
600 preempt_enable();
601 /*
602 * Retry copy/paste function for VAS failures.
603 */
604 } while (ret && (i++ < VAS_RETRIES));
605
606 if (ret) {
607 pr_err_ratelimited("VAS copy/paste failed\n");
608 return ret;
609 }
610
611 ret = wait_for_csb(wmem, csb);
612 if (!ret)
613 *outlenp = be32_to_cpu(csb->count);
614
615 return ret;
616 }
617
618 /**
619 * nx842_powernv_compress - Compress data using the 842 algorithm
620 *
621 * Compression provided by the NX842 coprocessor on IBM PowerNV systems.
622 * The input buffer is compressed and the result is stored in the
623 * provided output buffer.
624 *
625 * Upon return from this function @outlen contains the length of the
626 * compressed data. If there is an error then @outlen will be 0 and an
627 * error will be specified by the return code from this function.
628 *
629 * @in: input buffer pointer
630 * @inlen: input buffer size
631 * @out: output buffer pointer
632 * @outlenp: output buffer size pointer
633 * @wmem: working memory buffer pointer, size determined by
634 * nx842_powernv_driver.workmem_size
635 *
636 * Returns: see @nx842_powernv_exec()
637 */
nx842_powernv_compress(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * wmem)638 static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
639 unsigned char *out, unsigned int *outlenp,
640 void *wmem)
641 {
642 return nx842_powernv_exec(in, inlen, out, outlenp,
643 wmem, CCW_FC_842_COMP_CRC);
644 }
645
646 /**
647 * nx842_powernv_decompress - Decompress data using the 842 algorithm
648 *
649 * Decompression provided by the NX842 coprocessor on IBM PowerNV systems.
650 * The input buffer is decompressed and the result is stored in the
651 * provided output buffer.
652 *
653 * Upon return from this function @outlen contains the length of the
654 * decompressed data. If there is an error then @outlen will be 0 and an
655 * error will be specified by the return code from this function.
656 *
657 * @in: input buffer pointer
658 * @inlen: input buffer size
659 * @out: output buffer pointer
660 * @outlenp: output buffer size pointer
661 * @wmem: working memory buffer pointer, size determined by
662 * nx842_powernv_driver.workmem_size
663 *
664 * Returns: see @nx842_powernv_exec()
665 */
nx842_powernv_decompress(const unsigned char * in,unsigned int inlen,unsigned char * out,unsigned int * outlenp,void * wmem)666 static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
667 unsigned char *out, unsigned int *outlenp,
668 void *wmem)
669 {
670 return nx842_powernv_exec(in, inlen, out, outlenp,
671 wmem, CCW_FC_842_DECOMP_CRC);
672 }
673
nx_add_coprocs_list(struct nx_coproc * coproc,int chipid)674 static inline void nx_add_coprocs_list(struct nx_coproc *coproc,
675 int chipid)
676 {
677 coproc->chip_id = chipid;
678 INIT_LIST_HEAD(&coproc->list);
679 list_add(&coproc->list, &nx_coprocs);
680 }
681
nx_alloc_txwin(struct nx_coproc * coproc)682 static struct vas_window *nx_alloc_txwin(struct nx_coproc *coproc)
683 {
684 struct vas_window *txwin = NULL;
685 struct vas_tx_win_attr txattr;
686
687 /*
688 * Kernel requests will be high priority. So open send
689 * windows only for high priority RxFIFO entries.
690 */
691 vas_init_tx_win_attr(&txattr, coproc->ct);
692 txattr.lpid = 0; /* lpid is 0 for kernel requests */
693
694 /*
695 * Open a VAS send window which is used to send request to NX.
696 */
697 txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr);
698 if (IS_ERR(txwin))
699 pr_err("ibm,nx-842: Can not open TX window: %ld\n",
700 PTR_ERR(txwin));
701
702 return txwin;
703 }
704
705 /*
706 * Identify chip ID for each CPU, open send wndow for the corresponding NX
707 * engine and save txwin in percpu cpu_txwin.
708 * cpu_txwin is used in copy/paste operation for each compression /
709 * decompression request.
710 */
nx_open_percpu_txwins(void)711 static int nx_open_percpu_txwins(void)
712 {
713 struct nx_coproc *coproc, *n;
714 unsigned int i, chip_id;
715
716 for_each_possible_cpu(i) {
717 struct vas_window *txwin = NULL;
718
719 chip_id = cpu_to_chip_id(i);
720
721 list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
722 /*
723 * Kernel requests use only high priority FIFOs. So
724 * open send windows for these FIFOs.
725 * GZIP is not supported in kernel right now.
726 */
727
728 if (coproc->ct != VAS_COP_TYPE_842_HIPRI)
729 continue;
730
731 if (coproc->chip_id == chip_id) {
732 txwin = nx_alloc_txwin(coproc);
733 if (IS_ERR(txwin))
734 return PTR_ERR(txwin);
735
736 per_cpu(cpu_txwin, i) = txwin;
737 break;
738 }
739 }
740
741 if (!per_cpu(cpu_txwin, i)) {
742 /* shouldn't happen, Each chip will have NX engine */
743 pr_err("NX engine is not available for CPU %d\n", i);
744 return -EINVAL;
745 }
746 }
747
748 return 0;
749 }
750
nx_set_ct(struct nx_coproc * coproc,const char * priority,int high,int normal)751 static int __init nx_set_ct(struct nx_coproc *coproc, const char *priority,
752 int high, int normal)
753 {
754 if (!strcmp(priority, "High"))
755 coproc->ct = high;
756 else if (!strcmp(priority, "Normal"))
757 coproc->ct = normal;
758 else {
759 pr_err("Invalid RxFIFO priority value\n");
760 return -EINVAL;
761 }
762
763 return 0;
764 }
765
vas_cfg_coproc_info(struct device_node * dn,int chip_id,int vasid,int type,int * ct)766 static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
767 int vasid, int type, int *ct)
768 {
769 struct vas_window *rxwin = NULL;
770 struct vas_rx_win_attr rxattr;
771 u32 lpid, pid, tid, fifo_size;
772 struct nx_coproc *coproc;
773 u64 rx_fifo;
774 const char *priority;
775 int ret;
776
777 ret = of_property_read_u64(dn, "rx-fifo-address", &rx_fifo);
778 if (ret) {
779 pr_err("Missing rx-fifo-address property\n");
780 return ret;
781 }
782
783 ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size);
784 if (ret) {
785 pr_err("Missing rx-fifo-size property\n");
786 return ret;
787 }
788
789 ret = of_property_read_u32(dn, "lpid", &lpid);
790 if (ret) {
791 pr_err("Missing lpid property\n");
792 return ret;
793 }
794
795 ret = of_property_read_u32(dn, "pid", &pid);
796 if (ret) {
797 pr_err("Missing pid property\n");
798 return ret;
799 }
800
801 ret = of_property_read_u32(dn, "tid", &tid);
802 if (ret) {
803 pr_err("Missing tid property\n");
804 return ret;
805 }
806
807 ret = of_property_read_string(dn, "priority", &priority);
808 if (ret) {
809 pr_err("Missing priority property\n");
810 return ret;
811 }
812
813 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
814 if (!coproc)
815 return -ENOMEM;
816
817 if (type == NX_CT_842)
818 ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_842_HIPRI,
819 VAS_COP_TYPE_842);
820 else if (type == NX_CT_GZIP)
821 ret = nx_set_ct(coproc, priority, VAS_COP_TYPE_GZIP_HIPRI,
822 VAS_COP_TYPE_GZIP);
823
824 if (ret)
825 goto err_out;
826
827 vas_init_rx_win_attr(&rxattr, coproc->ct);
828 rxattr.rx_fifo = rx_fifo;
829 rxattr.rx_fifo_size = fifo_size;
830 rxattr.lnotify_lpid = lpid;
831 rxattr.lnotify_pid = pid;
832 rxattr.lnotify_tid = tid;
833 /*
834 * Maximum RX window credits can not be more than #CRBs in
835 * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
836 */
837 rxattr.wcreds_max = fifo_size / CRB_SIZE;
838
839 /*
840 * Open a VAS receice window which is used to configure RxFIFO
841 * for NX.
842 */
843 rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr);
844 if (IS_ERR(rxwin)) {
845 ret = PTR_ERR(rxwin);
846 pr_err("setting RxFIFO with VAS failed: %d\n",
847 ret);
848 goto err_out;
849 }
850
851 coproc->vas.rxwin = rxwin;
852 coproc->vas.id = vasid;
853 nx_add_coprocs_list(coproc, chip_id);
854
855 /*
856 * (lpid, pid, tid) combination has to be unique for each
857 * coprocessor instance in the system. So to make it
858 * unique, skiboot uses coprocessor type such as 842 or
859 * GZIP for pid and provides this value to kernel in pid
860 * device-tree property.
861 */
862 *ct = pid;
863
864 return 0;
865
866 err_out:
867 kfree(coproc);
868 return ret;
869 }
870
nx_coproc_init(int chip_id,int ct_842,int ct_gzip)871 static int __init nx_coproc_init(int chip_id, int ct_842, int ct_gzip)
872 {
873 int ret = 0;
874
875 if (opal_check_token(OPAL_NX_COPROC_INIT)) {
876 ret = opal_nx_coproc_init(chip_id, ct_842);
877
878 if (!ret)
879 ret = opal_nx_coproc_init(chip_id, ct_gzip);
880
881 if (ret) {
882 ret = opal_error_code(ret);
883 pr_err("Failed to initialize NX for chip(%d): %d\n",
884 chip_id, ret);
885 }
886 } else
887 pr_warn("Firmware doesn't support NX initialization\n");
888
889 return ret;
890 }
891
find_nx_device_tree(struct device_node * dn,int chip_id,int vasid,int type,char * devname,int * ct)892 static int __init find_nx_device_tree(struct device_node *dn, int chip_id,
893 int vasid, int type, char *devname,
894 int *ct)
895 {
896 int ret = 0;
897
898 if (of_device_is_compatible(dn, devname)) {
899 ret = vas_cfg_coproc_info(dn, chip_id, vasid, type, ct);
900 if (ret)
901 of_node_put(dn);
902 }
903
904 return ret;
905 }
906
nx_powernv_probe_vas(struct device_node * pn)907 static int __init nx_powernv_probe_vas(struct device_node *pn)
908 {
909 int chip_id, vasid, ret = 0;
910 int ct_842 = 0, ct_gzip = 0;
911 struct device_node *dn;
912
913 chip_id = of_get_ibm_chip_id(pn);
914 if (chip_id < 0) {
915 pr_err("ibm,chip-id missing\n");
916 return -EINVAL;
917 }
918
919 vasid = chip_to_vas_id(chip_id);
920 if (vasid < 0) {
921 pr_err("Unable to map chip_id %d to vasid\n", chip_id);
922 return -EINVAL;
923 }
924
925 for_each_child_of_node(pn, dn) {
926 ret = find_nx_device_tree(dn, chip_id, vasid, NX_CT_842,
927 "ibm,p9-nx-842", &ct_842);
928
929 if (!ret)
930 ret = find_nx_device_tree(dn, chip_id, vasid,
931 NX_CT_GZIP, "ibm,p9-nx-gzip", &ct_gzip);
932
933 if (ret) {
934 of_node_put(dn);
935 return ret;
936 }
937 }
938
939 if (!ct_842 || !ct_gzip) {
940 pr_err("NX FIFO nodes are missing\n");
941 return -EINVAL;
942 }
943
944 /*
945 * Initialize NX instance for both high and normal priority FIFOs.
946 */
947 ret = nx_coproc_init(chip_id, ct_842, ct_gzip);
948
949 return ret;
950 }
951
nx842_powernv_probe(struct device_node * dn)952 static int __init nx842_powernv_probe(struct device_node *dn)
953 {
954 struct nx_coproc *coproc;
955 unsigned int ct, ci;
956 int chip_id;
957
958 chip_id = of_get_ibm_chip_id(dn);
959 if (chip_id < 0) {
960 pr_err("ibm,chip-id missing\n");
961 return -EINVAL;
962 }
963
964 if (of_property_read_u32(dn, "ibm,842-coprocessor-type", &ct)) {
965 pr_err("ibm,842-coprocessor-type missing\n");
966 return -EINVAL;
967 }
968
969 if (of_property_read_u32(dn, "ibm,842-coprocessor-instance", &ci)) {
970 pr_err("ibm,842-coprocessor-instance missing\n");
971 return -EINVAL;
972 }
973
974 coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
975 if (!coproc)
976 return -ENOMEM;
977
978 coproc->ct = ct;
979 coproc->ci = ci;
980 nx_add_coprocs_list(coproc, chip_id);
981
982 pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci);
983
984 if (!nx842_ct)
985 nx842_ct = ct;
986 else if (nx842_ct != ct)
987 pr_err("NX842 chip %d, CT %d != first found CT %d\n",
988 chip_id, ct, nx842_ct);
989
990 return 0;
991 }
992
nx_delete_coprocs(void)993 static void nx_delete_coprocs(void)
994 {
995 struct nx_coproc *coproc, *n;
996 struct vas_window *txwin;
997 int i;
998
999 /*
1000 * close percpu txwins that are opened for the corresponding coproc.
1001 */
1002 for_each_possible_cpu(i) {
1003 txwin = per_cpu(cpu_txwin, i);
1004 if (txwin)
1005 vas_win_close(txwin);
1006
1007 per_cpu(cpu_txwin, i) = NULL;
1008 }
1009
1010 list_for_each_entry_safe(coproc, n, &nx_coprocs, list) {
1011 if (coproc->vas.rxwin)
1012 vas_win_close(coproc->vas.rxwin);
1013
1014 list_del(&coproc->list);
1015 kfree(coproc);
1016 }
1017 }
1018
1019 static struct nx842_constraints nx842_powernv_constraints = {
1020 .alignment = DDE_BUFFER_ALIGN,
1021 .multiple = DDE_BUFFER_LAST_MULT,
1022 .minimum = DDE_BUFFER_LAST_MULT,
1023 .maximum = (DDL_LEN_MAX - 1) * PAGE_SIZE,
1024 };
1025
1026 static struct nx842_driver nx842_powernv_driver = {
1027 .name = KBUILD_MODNAME,
1028 .owner = THIS_MODULE,
1029 .workmem_size = sizeof(struct nx842_workmem),
1030 .constraints = &nx842_powernv_constraints,
1031 .compress = nx842_powernv_compress,
1032 .decompress = nx842_powernv_decompress,
1033 };
1034
nx842_powernv_crypto_alloc_ctx(void)1035 static void *nx842_powernv_crypto_alloc_ctx(void)
1036 {
1037 return nx842_crypto_alloc_ctx(&nx842_powernv_driver);
1038 }
1039
1040 static struct scomp_alg nx842_powernv_alg = {
1041 .base.cra_name = "842",
1042 .base.cra_driver_name = "842-nx",
1043 .base.cra_priority = 300,
1044 .base.cra_module = THIS_MODULE,
1045
1046 .alloc_ctx = nx842_powernv_crypto_alloc_ctx,
1047 .free_ctx = nx842_crypto_free_ctx,
1048 .compress = nx842_crypto_compress,
1049 .decompress = nx842_crypto_decompress,
1050 };
1051
nx_compress_powernv_init(void)1052 static __init int nx_compress_powernv_init(void)
1053 {
1054 struct device_node *dn;
1055 int ret;
1056
1057 /* verify workmem size/align restrictions */
1058 BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
1059 BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN);
1060 BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN);
1061 /* verify buffer size/align restrictions */
1062 BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN);
1063 BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
1064 BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
1065
1066 for_each_compatible_node(dn, NULL, "ibm,power9-nx") {
1067 ret = nx_powernv_probe_vas(dn);
1068 if (ret) {
1069 nx_delete_coprocs();
1070 of_node_put(dn);
1071 return ret;
1072 }
1073 }
1074
1075 if (list_empty(&nx_coprocs)) {
1076 for_each_compatible_node(dn, NULL, "ibm,power-nx")
1077 nx842_powernv_probe(dn);
1078
1079 if (!nx842_ct)
1080 return -ENODEV;
1081
1082 nx842_powernv_exec = nx842_exec_icswx;
1083 } else {
1084 /*
1085 * Register VAS user space API for NX GZIP so
1086 * that user space can use GZIP engine.
1087 * Using high FIFO priority for kernel requests and
1088 * normal FIFO priority is assigned for userspace.
1089 * 842 compression is supported only in kernel.
1090 */
1091 ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP,
1092 "nx-gzip");
1093
1094 /*
1095 * GZIP is not supported in kernel right now.
1096 * So open tx windows only for 842.
1097 */
1098 if (!ret)
1099 ret = nx_open_percpu_txwins();
1100
1101 if (ret) {
1102 nx_delete_coprocs();
1103 return ret;
1104 }
1105
1106 nx842_powernv_exec = nx842_exec_vas;
1107 }
1108
1109 ret = crypto_register_scomp(&nx842_powernv_alg);
1110 if (ret) {
1111 nx_delete_coprocs();
1112 return ret;
1113 }
1114
1115 return 0;
1116 }
1117 module_init(nx_compress_powernv_init);
1118
nx_compress_powernv_exit(void)1119 static void __exit nx_compress_powernv_exit(void)
1120 {
1121 /*
1122 * GZIP engine is supported only in power9 or later and nx842_ct
1123 * is used on power8 (icswx).
1124 * VAS API for NX GZIP is registered during init for user space
1125 * use. So delete this API use for GZIP engine.
1126 */
1127 if (!nx842_ct)
1128 vas_unregister_api_powernv();
1129
1130 crypto_unregister_scomp(&nx842_powernv_alg);
1131
1132 nx_delete_coprocs();
1133 }
1134 module_exit(nx_compress_powernv_exit);
1135