1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Cryptographic API for the NX-842 hardware compression.
4 *
5 * Copyright (C) IBM Corporation, 2011-2015
6 *
7 * Designer of the Power data compression engine:
8 * Bulent Abali <abali@us.ibm.com>
9 *
10 * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
11 * Seth Jennings <sjenning@linux.vnet.ibm.com>
12 *
13 * Rewrite: Dan Streetman <ddstreet@ieee.org>
14 *
15 * This is an interface to the NX-842 compression hardware in PowerPC
16 * processors. Most of the complexity of this drvier is due to the fact that
17 * the NX-842 compression hardware requires the input and output data buffers
18 * to be specifically aligned, to be a specific multiple in length, and within
19 * specific minimum and maximum lengths. Those restrictions, provided by the
20 * nx-842 driver via nx842_constraints, mean this driver must use bounce
21 * buffers and headers to correct misaligned in or out buffers, and to split
22 * input buffers that are too large.
23 *
24 * This driver will fall back to software decompression if the hardware
25 * decompression fails, so this driver's decompression should never fail as
26 * long as the provided compressed buffer is valid. Any compressed buffer
27 * created by this driver will have a header (except ones where the input
28 * perfectly matches the constraints); so users of this driver cannot simply
29 * pass a compressed buffer created by this driver over to the 842 software
30 * decompression library. Instead, users must use this driver to decompress;
31 * if the hardware fails or is unavailable, the compressed buffer will be
32 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
33 * software decompression library.
34 *
35 * This does not fall back to software compression, however, since the caller
36 * of this function is specifically requesting hardware compression; if the
37 * hardware compression fails, the caller can fall back to software
38 * compression, and the raw 842 compressed buffer that the software compressor
39 * creates can be passed to this driver for hardware decompression; any
40 * buffer without our specific header magic is assumed to be a raw 842 buffer
41 * and passed directly to the hardware. Note that the software compression
42 * library will produce a compressed buffer that is incompatible with the
43 * hardware decompressor if the original input buffer length is not a multiple
44 * of 8; if such a compressed buffer is passed to this driver for
45 * decompression, the hardware will reject it and this driver will then pass
46 * it over to the software library for decompression.
47 */
48
49 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
51 #include <linux/vmalloc.h>
52 #include <linux/sw842.h>
53 #include <linux/spinlock.h>
54
55 #include "nx-842.h"
56
57 /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
58 * template (see lib/842/842.h), so this magic number will never appear at
59 * the start of a raw 842 compressed buffer. That is important, as any buffer
60 * passed to us without this magic is assumed to be a raw 842 compressed
61 * buffer, and passed directly to the hardware to decompress.
62 */
63 #define NX842_CRYPTO_MAGIC (0xf842)
64 #define NX842_CRYPTO_HEADER_SIZE(g) \
65 (sizeof(struct nx842_crypto_header) + \
66 sizeof(struct nx842_crypto_header_group) * (g))
67 #define NX842_CRYPTO_HEADER_MAX_SIZE \
68 NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
69
70 /* bounce buffer size */
71 #define BOUNCE_BUFFER_ORDER (2)
72 #define BOUNCE_BUFFER_SIZE \
73 ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
74
75 /* try longer on comp because we can fallback to sw decomp if hw is busy */
76 #define COMP_BUSY_TIMEOUT (250) /* ms */
77 #define DECOMP_BUSY_TIMEOUT (50) /* ms */
78
79 struct nx842_crypto_param {
80 u8 *in;
81 unsigned int iremain;
82 u8 *out;
83 unsigned int oremain;
84 unsigned int ototal;
85 };
86
update_param(struct nx842_crypto_param * p,unsigned int slen,unsigned int dlen)87 static int update_param(struct nx842_crypto_param *p,
88 unsigned int slen, unsigned int dlen)
89 {
90 if (p->iremain < slen)
91 return -EOVERFLOW;
92 if (p->oremain < dlen)
93 return -ENOSPC;
94
95 p->in += slen;
96 p->iremain -= slen;
97 p->out += dlen;
98 p->oremain -= dlen;
99 p->ototal += dlen;
100
101 return 0;
102 }
103
nx842_crypto_alloc_ctx(struct nx842_driver * driver)104 void *nx842_crypto_alloc_ctx(struct nx842_driver *driver)
105 {
106 struct nx842_crypto_ctx *ctx;
107
108 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
109 if (!ctx)
110 return ERR_PTR(-ENOMEM);
111
112 spin_lock_init(&ctx->lock);
113 ctx->driver = driver;
114 ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
115 ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
116 ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
117 if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
118 kfree(ctx->wmem);
119 free_page((unsigned long)ctx->sbounce);
120 free_page((unsigned long)ctx->dbounce);
121 kfree(ctx);
122 return ERR_PTR(-ENOMEM);
123 }
124
125 return ctx;
126 }
127 EXPORT_SYMBOL_GPL(nx842_crypto_alloc_ctx);
128
nx842_crypto_free_ctx(void * p)129 void nx842_crypto_free_ctx(void *p)
130 {
131 struct nx842_crypto_ctx *ctx = p;
132
133 kfree(ctx->wmem);
134 free_page((unsigned long)ctx->sbounce);
135 free_page((unsigned long)ctx->dbounce);
136 }
137 EXPORT_SYMBOL_GPL(nx842_crypto_free_ctx);
138
check_constraints(struct nx842_constraints * c)139 static void check_constraints(struct nx842_constraints *c)
140 {
141 /* limit maximum, to always have enough bounce buffer to decompress */
142 if (c->maximum > BOUNCE_BUFFER_SIZE)
143 c->maximum = BOUNCE_BUFFER_SIZE;
144 }
145
nx842_crypto_add_header(struct nx842_crypto_header * hdr,u8 * buf)146 static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
147 {
148 int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
149
150 /* compress should have added space for header */
151 if (s > be16_to_cpu(hdr->group[0].padding)) {
152 pr_err("Internal error: no space for header\n");
153 return -EINVAL;
154 }
155
156 memcpy(buf, hdr, s);
157
158 print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
159
160 return 0;
161 }
162
compress(struct nx842_crypto_ctx * ctx,struct nx842_crypto_param * p,struct nx842_crypto_header_group * g,struct nx842_constraints * c,u16 * ignore,unsigned int hdrsize)163 static int compress(struct nx842_crypto_ctx *ctx,
164 struct nx842_crypto_param *p,
165 struct nx842_crypto_header_group *g,
166 struct nx842_constraints *c,
167 u16 *ignore,
168 unsigned int hdrsize)
169 {
170 unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
171 unsigned int adj_slen = slen;
172 u8 *src = p->in, *dst = p->out;
173 int ret, dskip = 0;
174 ktime_t timeout;
175
176 if (p->iremain == 0)
177 return -EOVERFLOW;
178
179 if (p->oremain == 0 || hdrsize + c->minimum > dlen)
180 return -ENOSPC;
181
182 if (slen % c->multiple)
183 adj_slen = round_up(slen, c->multiple);
184 if (slen < c->minimum)
185 adj_slen = c->minimum;
186 if (slen > c->maximum)
187 adj_slen = slen = c->maximum;
188 if (adj_slen > slen || (u64)src % c->alignment) {
189 adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
190 slen = min(slen, BOUNCE_BUFFER_SIZE);
191 if (adj_slen > slen)
192 memset(ctx->sbounce + slen, 0, adj_slen - slen);
193 memcpy(ctx->sbounce, src, slen);
194 src = ctx->sbounce;
195 slen = adj_slen;
196 pr_debug("using comp sbounce buffer, len %x\n", slen);
197 }
198
199 dst += hdrsize;
200 dlen -= hdrsize;
201
202 if ((u64)dst % c->alignment) {
203 dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
204 dst += dskip;
205 dlen -= dskip;
206 }
207 if (dlen % c->multiple)
208 dlen = round_down(dlen, c->multiple);
209 if (dlen < c->minimum) {
210 nospc:
211 dst = ctx->dbounce;
212 dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
213 dlen = round_down(dlen, c->multiple);
214 dskip = 0;
215 pr_debug("using comp dbounce buffer, len %x\n", dlen);
216 }
217 if (dlen > c->maximum)
218 dlen = c->maximum;
219
220 tmplen = dlen;
221 timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
222 do {
223 dlen = tmplen; /* reset dlen, if we're retrying */
224 ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
225 /* possibly we should reduce the slen here, instead of
226 * retrying with the dbounce buffer?
227 */
228 if (ret == -ENOSPC && dst != ctx->dbounce)
229 goto nospc;
230 } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
231 if (ret)
232 return ret;
233
234 dskip += hdrsize;
235
236 if (dst == ctx->dbounce)
237 memcpy(p->out + dskip, dst, dlen);
238
239 g->padding = cpu_to_be16(dskip);
240 g->compressed_length = cpu_to_be32(dlen);
241 g->uncompressed_length = cpu_to_be32(slen);
242
243 if (p->iremain < slen) {
244 *ignore = slen - p->iremain;
245 slen = p->iremain;
246 }
247
248 pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
249 slen, *ignore, dlen, dskip);
250
251 return update_param(p, slen, dskip + dlen);
252 }
253
nx842_crypto_compress(struct crypto_scomp * tfm,const u8 * src,unsigned int slen,u8 * dst,unsigned int * dlen,void * pctx)254 int nx842_crypto_compress(struct crypto_scomp *tfm,
255 const u8 *src, unsigned int slen,
256 u8 *dst, unsigned int *dlen, void *pctx)
257 {
258 struct nx842_crypto_ctx *ctx = pctx;
259 struct nx842_crypto_header *hdr =
260 container_of(&ctx->header,
261 struct nx842_crypto_header, hdr);
262 struct nx842_crypto_param p;
263 struct nx842_constraints c = *ctx->driver->constraints;
264 unsigned int groups, hdrsize, h;
265 int ret, n;
266 bool add_header;
267 u16 ignore = 0;
268
269 check_constraints(&c);
270
271 p.in = (u8 *)src;
272 p.iremain = slen;
273 p.out = dst;
274 p.oremain = *dlen;
275 p.ototal = 0;
276
277 *dlen = 0;
278
279 groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
280 DIV_ROUND_UP(p.iremain, c.maximum));
281 hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
282
283 spin_lock_bh(&ctx->lock);
284
285 /* skip adding header if the buffers meet all constraints */
286 add_header = (p.iremain % c.multiple ||
287 p.iremain < c.minimum ||
288 p.iremain > c.maximum ||
289 (u64)p.in % c.alignment ||
290 p.oremain % c.multiple ||
291 p.oremain < c.minimum ||
292 p.oremain > c.maximum ||
293 (u64)p.out % c.alignment);
294
295 hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
296 hdr->groups = 0;
297 hdr->ignore = 0;
298
299 while (p.iremain > 0) {
300 n = hdr->groups++;
301 ret = -ENOSPC;
302 if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
303 goto unlock;
304
305 /* header goes before first group */
306 h = !n && add_header ? hdrsize : 0;
307
308 if (ignore)
309 pr_warn("internal error, ignore is set %x\n", ignore);
310
311 ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
312 if (ret)
313 goto unlock;
314 }
315
316 if (!add_header && hdr->groups > 1) {
317 pr_err("Internal error: No header but multiple groups\n");
318 ret = -EINVAL;
319 goto unlock;
320 }
321
322 /* ignore indicates the input stream needed to be padded */
323 hdr->ignore = cpu_to_be16(ignore);
324 if (ignore)
325 pr_debug("marked %d bytes as ignore\n", ignore);
326
327 if (add_header)
328 ret = nx842_crypto_add_header(hdr, dst);
329 if (ret)
330 goto unlock;
331
332 *dlen = p.ototal;
333
334 pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
335
336 unlock:
337 spin_unlock_bh(&ctx->lock);
338 return ret;
339 }
340 EXPORT_SYMBOL_GPL(nx842_crypto_compress);
341
decompress(struct nx842_crypto_ctx * ctx,struct nx842_crypto_param * p,struct nx842_crypto_header_group * g,struct nx842_constraints * c,u16 ignore)342 static int decompress(struct nx842_crypto_ctx *ctx,
343 struct nx842_crypto_param *p,
344 struct nx842_crypto_header_group *g,
345 struct nx842_constraints *c,
346 u16 ignore)
347 {
348 unsigned int slen = be32_to_cpu(g->compressed_length);
349 unsigned int required_len = be32_to_cpu(g->uncompressed_length);
350 unsigned int dlen = p->oremain, tmplen;
351 unsigned int adj_slen = slen;
352 u8 *src = p->in, *dst = p->out;
353 u16 padding = be16_to_cpu(g->padding);
354 int ret, spadding = 0;
355 ktime_t timeout;
356
357 if (!slen || !required_len)
358 return -EINVAL;
359
360 if (p->iremain <= 0 || padding + slen > p->iremain)
361 return -EOVERFLOW;
362
363 if (p->oremain <= 0 || required_len - ignore > p->oremain)
364 return -ENOSPC;
365
366 src += padding;
367
368 if (slen % c->multiple)
369 adj_slen = round_up(slen, c->multiple);
370 if (slen < c->minimum)
371 adj_slen = c->minimum;
372 if (slen > c->maximum)
373 goto usesw;
374 if (slen < adj_slen || (u64)src % c->alignment) {
375 /* we can append padding bytes because the 842 format defines
376 * an "end" template (see lib/842/842_decompress.c) and will
377 * ignore any bytes following it.
378 */
379 if (slen < adj_slen)
380 memset(ctx->sbounce + slen, 0, adj_slen - slen);
381 memcpy(ctx->sbounce, src, slen);
382 src = ctx->sbounce;
383 spadding = adj_slen - slen;
384 slen = adj_slen;
385 pr_debug("using decomp sbounce buffer, len %x\n", slen);
386 }
387
388 if (dlen % c->multiple)
389 dlen = round_down(dlen, c->multiple);
390 if (dlen < required_len || (u64)dst % c->alignment) {
391 dst = ctx->dbounce;
392 dlen = min(required_len, BOUNCE_BUFFER_SIZE);
393 pr_debug("using decomp dbounce buffer, len %x\n", dlen);
394 }
395 if (dlen < c->minimum)
396 goto usesw;
397 if (dlen > c->maximum)
398 dlen = c->maximum;
399
400 tmplen = dlen;
401 timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
402 do {
403 dlen = tmplen; /* reset dlen, if we're retrying */
404 ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
405 } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
406 if (ret) {
407 usesw:
408 /* reset everything, sw doesn't have constraints */
409 src = p->in + padding;
410 slen = be32_to_cpu(g->compressed_length);
411 spadding = 0;
412 dst = p->out;
413 dlen = p->oremain;
414 if (dlen < required_len) { /* have ignore bytes */
415 dst = ctx->dbounce;
416 dlen = BOUNCE_BUFFER_SIZE;
417 }
418 pr_info_ratelimited("using software 842 decompression\n");
419 ret = sw842_decompress(src, slen, dst, &dlen);
420 }
421 if (ret)
422 return ret;
423
424 slen -= spadding;
425
426 dlen -= ignore;
427 if (ignore)
428 pr_debug("ignoring last %x bytes\n", ignore);
429
430 if (dst == ctx->dbounce)
431 memcpy(p->out, dst, dlen);
432
433 pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
434 slen, padding, dlen, ignore);
435
436 return update_param(p, slen + padding, dlen);
437 }
438
nx842_crypto_decompress(struct crypto_scomp * tfm,const u8 * src,unsigned int slen,u8 * dst,unsigned int * dlen,void * pctx)439 int nx842_crypto_decompress(struct crypto_scomp *tfm,
440 const u8 *src, unsigned int slen,
441 u8 *dst, unsigned int *dlen, void *pctx)
442 {
443 struct nx842_crypto_ctx *ctx = pctx;
444 struct nx842_crypto_header *hdr;
445 struct nx842_crypto_param p;
446 struct nx842_constraints c = *ctx->driver->constraints;
447 int n, ret, hdr_len;
448 u16 ignore = 0;
449
450 check_constraints(&c);
451
452 p.in = (u8 *)src;
453 p.iremain = slen;
454 p.out = dst;
455 p.oremain = *dlen;
456 p.ototal = 0;
457
458 *dlen = 0;
459
460 hdr = (struct nx842_crypto_header *)src;
461
462 spin_lock_bh(&ctx->lock);
463
464 /* If it doesn't start with our header magic number, assume it's a raw
465 * 842 compressed buffer and pass it directly to the hardware driver
466 */
467 if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
468 struct nx842_crypto_header_group g = {
469 .padding = 0,
470 .compressed_length = cpu_to_be32(p.iremain),
471 .uncompressed_length = cpu_to_be32(p.oremain),
472 };
473
474 ret = decompress(ctx, &p, &g, &c, 0);
475 if (ret)
476 goto unlock;
477
478 goto success;
479 }
480
481 if (!hdr->groups) {
482 pr_err("header has no groups\n");
483 ret = -EINVAL;
484 goto unlock;
485 }
486 if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
487 pr_err("header has too many groups %x, max %x\n",
488 hdr->groups, NX842_CRYPTO_GROUP_MAX);
489 ret = -EINVAL;
490 goto unlock;
491 }
492
493 hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
494 if (hdr_len > slen) {
495 ret = -EOVERFLOW;
496 goto unlock;
497 }
498
499 memcpy(&ctx->header, src, hdr_len);
500 hdr = container_of(&ctx->header, struct nx842_crypto_header, hdr);
501
502 for (n = 0; n < hdr->groups; n++) {
503 /* ignore applies to last group */
504 if (n + 1 == hdr->groups)
505 ignore = be16_to_cpu(hdr->ignore);
506
507 ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
508 if (ret)
509 goto unlock;
510 }
511
512 success:
513 *dlen = p.ototal;
514
515 pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
516
517 ret = 0;
518
519 unlock:
520 spin_unlock_bh(&ctx->lock);
521
522 return ret;
523 }
524 EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
525
526 MODULE_LICENSE("GPL");
527 MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
528 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
529