xref: /linux/drivers/crypto/nx/nx-842.c (revision aec2f682d47c54ef434b2d440992626d80b1ebdc)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Cryptographic API for the NX-842 hardware compression.
4  *
5  * Copyright (C) IBM Corporation, 2011-2015
6  *
7  * Designer of the Power data compression engine:
8  *   Bulent Abali <abali@us.ibm.com>
9  *
10  * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
11  *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
12  *
13  * Rewrite: Dan Streetman <ddstreet@ieee.org>
14  *
15  * This is an interface to the NX-842 compression hardware in PowerPC
16  * processors.  Most of the complexity of this drvier is due to the fact that
17  * the NX-842 compression hardware requires the input and output data buffers
18  * to be specifically aligned, to be a specific multiple in length, and within
19  * specific minimum and maximum lengths.  Those restrictions, provided by the
20  * nx-842 driver via nx842_constraints, mean this driver must use bounce
21  * buffers and headers to correct misaligned in or out buffers, and to split
22  * input buffers that are too large.
23  *
24  * This driver will fall back to software decompression if the hardware
25  * decompression fails, so this driver's decompression should never fail as
26  * long as the provided compressed buffer is valid.  Any compressed buffer
27  * created by this driver will have a header (except ones where the input
28  * perfectly matches the constraints); so users of this driver cannot simply
29  * pass a compressed buffer created by this driver over to the 842 software
30  * decompression library.  Instead, users must use this driver to decompress;
31  * if the hardware fails or is unavailable, the compressed buffer will be
32  * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
33  * software decompression library.
34  *
35  * This does not fall back to software compression, however, since the caller
36  * of this function is specifically requesting hardware compression; if the
37  * hardware compression fails, the caller can fall back to software
38  * compression, and the raw 842 compressed buffer that the software compressor
39  * creates can be passed to this driver for hardware decompression; any
40  * buffer without our specific header magic is assumed to be a raw 842 buffer
41  * and passed directly to the hardware.  Note that the software compression
42  * library will produce a compressed buffer that is incompatible with the
43  * hardware decompressor if the original input buffer length is not a multiple
44  * of 8; if such a compressed buffer is passed to this driver for
45  * decompression, the hardware will reject it and this driver will then pass
46  * it over to the software library for decompression.
47  */
48 
49 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50 
51 #include <linux/vmalloc.h>
52 #include <linux/sw842.h>
53 #include <linux/spinlock.h>
54 
55 #include "nx-842.h"
56 
57 /* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
58  * template (see lib/842/842.h), so this magic number will never appear at
59  * the start of a raw 842 compressed buffer.  That is important, as any buffer
60  * passed to us without this magic is assumed to be a raw 842 compressed
61  * buffer, and passed directly to the hardware to decompress.
62  */
63 #define NX842_CRYPTO_MAGIC	(0xf842)
64 #define NX842_CRYPTO_HEADER_SIZE(g)				\
65 	(sizeof(struct nx842_crypto_header) +			\
66 	 sizeof(struct nx842_crypto_header_group) * (g))
67 #define NX842_CRYPTO_HEADER_MAX_SIZE				\
68 	NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
69 
70 /* bounce buffer size */
71 #define BOUNCE_BUFFER_ORDER	(2)
72 #define BOUNCE_BUFFER_SIZE					\
73 	((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
74 
75 /* try longer on comp because we can fallback to sw decomp if hw is busy */
76 #define COMP_BUSY_TIMEOUT	(250) /* ms */
77 #define DECOMP_BUSY_TIMEOUT	(50) /* ms */
78 
79 struct nx842_crypto_param {
80 	u8 *in;
81 	unsigned int iremain;
82 	u8 *out;
83 	unsigned int oremain;
84 	unsigned int ototal;
85 };
86 
update_param(struct nx842_crypto_param * p,unsigned int slen,unsigned int dlen)87 static int update_param(struct nx842_crypto_param *p,
88 			unsigned int slen, unsigned int dlen)
89 {
90 	if (p->iremain < slen)
91 		return -EOVERFLOW;
92 	if (p->oremain < dlen)
93 		return -ENOSPC;
94 
95 	p->in += slen;
96 	p->iremain -= slen;
97 	p->out += dlen;
98 	p->oremain -= dlen;
99 	p->ototal += dlen;
100 
101 	return 0;
102 }
103 
nx842_crypto_alloc_ctx(struct nx842_driver * driver)104 void *nx842_crypto_alloc_ctx(struct nx842_driver *driver)
105 {
106 	struct nx842_crypto_ctx *ctx;
107 
108 	ctx = kzalloc_obj(*ctx);
109 	if (!ctx)
110 		return ERR_PTR(-ENOMEM);
111 
112 	spin_lock_init(&ctx->lock);
113 	ctx->driver = driver;
114 	ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
115 	ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
116 	ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
117 	if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
118 		nx842_crypto_free_ctx(ctx);
119 		return ERR_PTR(-ENOMEM);
120 	}
121 
122 	return ctx;
123 }
124 EXPORT_SYMBOL_GPL(nx842_crypto_alloc_ctx);
125 
nx842_crypto_free_ctx(void * p)126 void nx842_crypto_free_ctx(void *p)
127 {
128 	struct nx842_crypto_ctx *ctx = p;
129 
130 	kfree(ctx->wmem);
131 	free_pages((unsigned long)ctx->sbounce, BOUNCE_BUFFER_ORDER);
132 	free_pages((unsigned long)ctx->dbounce, BOUNCE_BUFFER_ORDER);
133 	kfree(ctx);
134 }
135 EXPORT_SYMBOL_GPL(nx842_crypto_free_ctx);
136 
check_constraints(struct nx842_constraints * c)137 static void check_constraints(struct nx842_constraints *c)
138 {
139 	/* limit maximum, to always have enough bounce buffer to decompress */
140 	if (c->maximum > BOUNCE_BUFFER_SIZE)
141 		c->maximum = BOUNCE_BUFFER_SIZE;
142 }
143 
nx842_crypto_add_header(struct nx842_crypto_header * hdr,u8 * buf)144 static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
145 {
146 	int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
147 
148 	/* compress should have added space for header */
149 	if (s > be16_to_cpu(hdr->group[0].padding)) {
150 		pr_err("Internal error: no space for header\n");
151 		return -EINVAL;
152 	}
153 
154 	memcpy(buf, hdr, s);
155 
156 	print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
157 
158 	return 0;
159 }
160 
compress(struct nx842_crypto_ctx * ctx,struct nx842_crypto_param * p,struct nx842_crypto_header_group * g,struct nx842_constraints * c,u16 * ignore,unsigned int hdrsize)161 static int compress(struct nx842_crypto_ctx *ctx,
162 		    struct nx842_crypto_param *p,
163 		    struct nx842_crypto_header_group *g,
164 		    struct nx842_constraints *c,
165 		    u16 *ignore,
166 		    unsigned int hdrsize)
167 {
168 	unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
169 	unsigned int adj_slen = slen;
170 	u8 *src = p->in, *dst = p->out;
171 	int ret, dskip = 0;
172 	ktime_t timeout;
173 
174 	if (p->iremain == 0)
175 		return -EOVERFLOW;
176 
177 	if (p->oremain == 0 || hdrsize + c->minimum > dlen)
178 		return -ENOSPC;
179 
180 	if (slen % c->multiple)
181 		adj_slen = round_up(slen, c->multiple);
182 	if (slen < c->minimum)
183 		adj_slen = c->minimum;
184 	if (slen > c->maximum)
185 		adj_slen = slen = c->maximum;
186 	if (adj_slen > slen || (u64)src % c->alignment) {
187 		adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
188 		slen = min(slen, BOUNCE_BUFFER_SIZE);
189 		if (adj_slen > slen)
190 			memset(ctx->sbounce + slen, 0, adj_slen - slen);
191 		memcpy(ctx->sbounce, src, slen);
192 		src = ctx->sbounce;
193 		slen = adj_slen;
194 		pr_debug("using comp sbounce buffer, len %x\n", slen);
195 	}
196 
197 	dst += hdrsize;
198 	dlen -= hdrsize;
199 
200 	if ((u64)dst % c->alignment) {
201 		dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
202 		dst += dskip;
203 		dlen -= dskip;
204 	}
205 	if (dlen % c->multiple)
206 		dlen = round_down(dlen, c->multiple);
207 	if (dlen < c->minimum) {
208 nospc:
209 		dst = ctx->dbounce;
210 		dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
211 		dlen = round_down(dlen, c->multiple);
212 		dskip = 0;
213 		pr_debug("using comp dbounce buffer, len %x\n", dlen);
214 	}
215 	if (dlen > c->maximum)
216 		dlen = c->maximum;
217 
218 	tmplen = dlen;
219 	timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
220 	do {
221 		dlen = tmplen; /* reset dlen, if we're retrying */
222 		ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
223 		/* possibly we should reduce the slen here, instead of
224 		 * retrying with the dbounce buffer?
225 		 */
226 		if (ret == -ENOSPC && dst != ctx->dbounce)
227 			goto nospc;
228 	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
229 	if (ret)
230 		return ret;
231 
232 	dskip += hdrsize;
233 
234 	if (dst == ctx->dbounce)
235 		memcpy(p->out + dskip, dst, dlen);
236 
237 	g->padding = cpu_to_be16(dskip);
238 	g->compressed_length = cpu_to_be32(dlen);
239 	g->uncompressed_length = cpu_to_be32(slen);
240 
241 	if (p->iremain < slen) {
242 		*ignore = slen - p->iremain;
243 		slen = p->iremain;
244 	}
245 
246 	pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
247 		 slen, *ignore, dlen, dskip);
248 
249 	return update_param(p, slen, dskip + dlen);
250 }
251 
nx842_crypto_compress(struct crypto_scomp * tfm,const u8 * src,unsigned int slen,u8 * dst,unsigned int * dlen,void * pctx)252 int nx842_crypto_compress(struct crypto_scomp *tfm,
253 			  const u8 *src, unsigned int slen,
254 			  u8 *dst, unsigned int *dlen, void *pctx)
255 {
256 	struct nx842_crypto_ctx *ctx = pctx;
257 	struct nx842_crypto_header *hdr =
258 				container_of(&ctx->header,
259 					     struct nx842_crypto_header, hdr);
260 	struct nx842_crypto_param p;
261 	struct nx842_constraints c = *ctx->driver->constraints;
262 	unsigned int groups, hdrsize, h;
263 	int ret, n;
264 	bool add_header;
265 	u16 ignore = 0;
266 
267 	check_constraints(&c);
268 
269 	p.in = (u8 *)src;
270 	p.iremain = slen;
271 	p.out = dst;
272 	p.oremain = *dlen;
273 	p.ototal = 0;
274 
275 	*dlen = 0;
276 
277 	groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
278 		       DIV_ROUND_UP(p.iremain, c.maximum));
279 	hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
280 
281 	spin_lock_bh(&ctx->lock);
282 
283 	/* skip adding header if the buffers meet all constraints */
284 	add_header = (p.iremain % c.multiple	||
285 		      p.iremain < c.minimum	||
286 		      p.iremain > c.maximum	||
287 		      (u64)p.in % c.alignment	||
288 		      p.oremain % c.multiple	||
289 		      p.oremain < c.minimum	||
290 		      p.oremain > c.maximum	||
291 		      (u64)p.out % c.alignment);
292 
293 	hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
294 	hdr->groups = 0;
295 	hdr->ignore = 0;
296 
297 	while (p.iremain > 0) {
298 		n = hdr->groups++;
299 		ret = -ENOSPC;
300 		if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
301 			goto unlock;
302 
303 		/* header goes before first group */
304 		h = !n && add_header ? hdrsize : 0;
305 
306 		if (ignore)
307 			pr_warn("internal error, ignore is set %x\n", ignore);
308 
309 		ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
310 		if (ret)
311 			goto unlock;
312 	}
313 
314 	if (!add_header && hdr->groups > 1) {
315 		pr_err("Internal error: No header but multiple groups\n");
316 		ret = -EINVAL;
317 		goto unlock;
318 	}
319 
320 	/* ignore indicates the input stream needed to be padded */
321 	hdr->ignore = cpu_to_be16(ignore);
322 	if (ignore)
323 		pr_debug("marked %d bytes as ignore\n", ignore);
324 
325 	if (add_header)
326 		ret = nx842_crypto_add_header(hdr, dst);
327 	if (ret)
328 		goto unlock;
329 
330 	*dlen = p.ototal;
331 
332 	pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
333 
334 unlock:
335 	spin_unlock_bh(&ctx->lock);
336 	return ret;
337 }
338 EXPORT_SYMBOL_GPL(nx842_crypto_compress);
339 
decompress(struct nx842_crypto_ctx * ctx,struct nx842_crypto_param * p,struct nx842_crypto_header_group * g,struct nx842_constraints * c,u16 ignore)340 static int decompress(struct nx842_crypto_ctx *ctx,
341 		      struct nx842_crypto_param *p,
342 		      struct nx842_crypto_header_group *g,
343 		      struct nx842_constraints *c,
344 		      u16 ignore)
345 {
346 	unsigned int slen = be32_to_cpu(g->compressed_length);
347 	unsigned int required_len = be32_to_cpu(g->uncompressed_length);
348 	unsigned int dlen = p->oremain, tmplen;
349 	unsigned int adj_slen = slen;
350 	u8 *src = p->in, *dst = p->out;
351 	u16 padding = be16_to_cpu(g->padding);
352 	int ret, spadding = 0;
353 	ktime_t timeout;
354 
355 	if (!slen || !required_len)
356 		return -EINVAL;
357 
358 	if (p->iremain <= 0 || padding + slen > p->iremain)
359 		return -EOVERFLOW;
360 
361 	if (p->oremain <= 0 || required_len - ignore > p->oremain)
362 		return -ENOSPC;
363 
364 	src += padding;
365 
366 	if (slen % c->multiple)
367 		adj_slen = round_up(slen, c->multiple);
368 	if (slen < c->minimum)
369 		adj_slen = c->minimum;
370 	if (slen > c->maximum)
371 		goto usesw;
372 	if (slen < adj_slen || (u64)src % c->alignment) {
373 		/* we can append padding bytes because the 842 format defines
374 		 * an "end" template (see lib/842/842_decompress.c) and will
375 		 * ignore any bytes following it.
376 		 */
377 		if (slen < adj_slen)
378 			memset(ctx->sbounce + slen, 0, adj_slen - slen);
379 		memcpy(ctx->sbounce, src, slen);
380 		src = ctx->sbounce;
381 		spadding = adj_slen - slen;
382 		slen = adj_slen;
383 		pr_debug("using decomp sbounce buffer, len %x\n", slen);
384 	}
385 
386 	if (dlen % c->multiple)
387 		dlen = round_down(dlen, c->multiple);
388 	if (dlen < required_len || (u64)dst % c->alignment) {
389 		dst = ctx->dbounce;
390 		dlen = min(required_len, BOUNCE_BUFFER_SIZE);
391 		pr_debug("using decomp dbounce buffer, len %x\n", dlen);
392 	}
393 	if (dlen < c->minimum)
394 		goto usesw;
395 	if (dlen > c->maximum)
396 		dlen = c->maximum;
397 
398 	tmplen = dlen;
399 	timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
400 	do {
401 		dlen = tmplen; /* reset dlen, if we're retrying */
402 		ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
403 	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
404 	if (ret) {
405 usesw:
406 		/* reset everything, sw doesn't have constraints */
407 		src = p->in + padding;
408 		slen = be32_to_cpu(g->compressed_length);
409 		spadding = 0;
410 		dst = p->out;
411 		dlen = p->oremain;
412 		if (dlen < required_len) { /* have ignore bytes */
413 			dst = ctx->dbounce;
414 			dlen = BOUNCE_BUFFER_SIZE;
415 		}
416 		pr_info_ratelimited("using software 842 decompression\n");
417 		ret = sw842_decompress(src, slen, dst, &dlen);
418 	}
419 	if (ret)
420 		return ret;
421 
422 	slen -= spadding;
423 
424 	dlen -= ignore;
425 	if (ignore)
426 		pr_debug("ignoring last %x bytes\n", ignore);
427 
428 	if (dst == ctx->dbounce)
429 		memcpy(p->out, dst, dlen);
430 
431 	pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
432 		 slen, padding, dlen, ignore);
433 
434 	return update_param(p, slen + padding, dlen);
435 }
436 
nx842_crypto_decompress(struct crypto_scomp * tfm,const u8 * src,unsigned int slen,u8 * dst,unsigned int * dlen,void * pctx)437 int nx842_crypto_decompress(struct crypto_scomp *tfm,
438 			    const u8 *src, unsigned int slen,
439 			    u8 *dst, unsigned int *dlen, void *pctx)
440 {
441 	struct nx842_crypto_ctx *ctx = pctx;
442 	struct nx842_crypto_header *hdr;
443 	struct nx842_crypto_param p;
444 	struct nx842_constraints c = *ctx->driver->constraints;
445 	int n, ret, hdr_len;
446 	u16 ignore = 0;
447 
448 	check_constraints(&c);
449 
450 	p.in = (u8 *)src;
451 	p.iremain = slen;
452 	p.out = dst;
453 	p.oremain = *dlen;
454 	p.ototal = 0;
455 
456 	*dlen = 0;
457 
458 	hdr = (struct nx842_crypto_header *)src;
459 
460 	spin_lock_bh(&ctx->lock);
461 
462 	/* If it doesn't start with our header magic number, assume it's a raw
463 	 * 842 compressed buffer and pass it directly to the hardware driver
464 	 */
465 	if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
466 		struct nx842_crypto_header_group g = {
467 			.padding =		0,
468 			.compressed_length =	cpu_to_be32(p.iremain),
469 			.uncompressed_length =	cpu_to_be32(p.oremain),
470 		};
471 
472 		ret = decompress(ctx, &p, &g, &c, 0);
473 		if (ret)
474 			goto unlock;
475 
476 		goto success;
477 	}
478 
479 	if (!hdr->groups) {
480 		pr_err("header has no groups\n");
481 		ret = -EINVAL;
482 		goto unlock;
483 	}
484 	if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
485 		pr_err("header has too many groups %x, max %x\n",
486 		       hdr->groups, NX842_CRYPTO_GROUP_MAX);
487 		ret = -EINVAL;
488 		goto unlock;
489 	}
490 
491 	hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
492 	if (hdr_len > slen) {
493 		ret = -EOVERFLOW;
494 		goto unlock;
495 	}
496 
497 	memcpy(&ctx->header, src, hdr_len);
498 	hdr = container_of(&ctx->header, struct nx842_crypto_header, hdr);
499 
500 	for (n = 0; n < hdr->groups; n++) {
501 		/* ignore applies to last group */
502 		if (n + 1 == hdr->groups)
503 			ignore = be16_to_cpu(hdr->ignore);
504 
505 		ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
506 		if (ret)
507 			goto unlock;
508 	}
509 
510 success:
511 	*dlen = p.ototal;
512 
513 	pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
514 
515 	ret = 0;
516 
517 unlock:
518 	spin_unlock_bh(&ctx->lock);
519 
520 	return ret;
521 }
522 EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
523 
524 MODULE_LICENSE("GPL");
525 MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
526 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
527