1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/zfs_context.h>
27 #include <sys/cmn_err.h>
28 #include <modes/modes.h>
29 #include <sys/crypto/common.h>
30 #include <sys/crypto/icp.h>
31 #include <sys/crypto/impl.h>
32 #include <sys/byteorder.h>
33 #include <sys/simd.h>
34 #include <modes/gcm_impl.h>
35 #ifdef CAN_USE_GCM_ASM
36 #include <aes/aes_impl.h>
37 #include <modes/gcm_asm_rename_funcs.h>
38 #endif
39
40 #define GHASH(c, d, t, o) \
41 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
42 (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
43 (uint64_t *)(void *)(t));
44
45 /* Select GCM implementation */
46 #define IMPL_FASTEST (UINT32_MAX)
47 #define IMPL_CYCLE (UINT32_MAX-1)
48 #ifdef CAN_USE_GCM_ASM
49 #define IMPL_AVX (UINT32_MAX-2)
50 #if CAN_USE_GCM_ASM >= 2
51 #define IMPL_AVX2 (UINT32_MAX-3)
52 #endif
53 #endif
54 #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
55 static uint32_t icp_gcm_impl = IMPL_FASTEST;
56 static uint32_t user_sel_impl = IMPL_FASTEST;
57
58 #ifdef CAN_USE_GCM_ASM
59 /* Does the architecture we run on support the MOVBE instruction? */
60 boolean_t gcm_avx_can_use_movbe = B_FALSE;
61 /*
62 * Whether to use the optimized openssl gcm and ghash implementations.
63 */
64 static gcm_impl gcm_impl_used = GCM_IMPL_GENERIC;
65 #define GCM_IMPL_USED (*(volatile gcm_impl *)&gcm_impl_used)
66
67 extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *);
68
69 static inline boolean_t gcm_avx_will_work(void);
70 static inline boolean_t gcm_avx2_will_work(void);
71 static inline void gcm_use_impl(gcm_impl impl);
72 static inline gcm_impl gcm_toggle_impl(void);
73
74 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
75 crypto_data_t *, size_t);
76
77 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
78 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
79 static int gcm_init_avx(gcm_ctx_t *, const uint8_t *, size_t, const uint8_t *,
80 size_t, size_t);
81 #endif /* ifdef CAN_USE_GCM_ASM */
82
83 /*
84 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
85 * is done in another function.
86 */
87 int
gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))88 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
89 crypto_data_t *out, size_t block_size,
90 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
91 void (*copy_block)(uint8_t *, uint8_t *),
92 void (*xor_block)(uint8_t *, uint8_t *))
93 {
94 #ifdef CAN_USE_GCM_ASM
95 if (ctx->impl != GCM_IMPL_GENERIC)
96 return (gcm_mode_encrypt_contiguous_blocks_avx(
97 ctx, data, length, out, block_size));
98 #endif
99
100 const gcm_impl_ops_t *gops;
101 size_t remainder = length;
102 size_t need = 0;
103 uint8_t *datap = (uint8_t *)data;
104 uint8_t *blockp;
105 uint8_t *lastp;
106 void *iov_or_mp;
107 offset_t offset;
108 uint8_t *out_data_1;
109 uint8_t *out_data_2;
110 size_t out_data_1_len;
111 uint64_t counter;
112 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
113
114 if (length + ctx->gcm_remainder_len < block_size) {
115 /* accumulate bytes here and return */
116 memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
117 datap,
118 length);
119 ctx->gcm_remainder_len += length;
120 if (ctx->gcm_copy_to == NULL) {
121 ctx->gcm_copy_to = datap;
122 }
123 return (CRYPTO_SUCCESS);
124 }
125
126 crypto_init_ptrs(out, &iov_or_mp, &offset);
127
128 gops = gcm_impl_get_ops();
129 do {
130 /* Unprocessed data from last call. */
131 if (ctx->gcm_remainder_len > 0) {
132 need = block_size - ctx->gcm_remainder_len;
133
134 if (need > remainder)
135 return (CRYPTO_DATA_LEN_RANGE);
136
137 memcpy(&((uint8_t *)ctx->gcm_remainder)
138 [ctx->gcm_remainder_len], datap, need);
139
140 blockp = (uint8_t *)ctx->gcm_remainder;
141 } else {
142 blockp = datap;
143 }
144
145 /*
146 * Increment counter. Counter bits are confined
147 * to the bottom 32 bits of the counter block.
148 */
149 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
150 counter = htonll(counter + 1);
151 counter &= counter_mask;
152 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
153
154 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
155 (uint8_t *)ctx->gcm_tmp);
156 xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
157
158 lastp = (uint8_t *)ctx->gcm_tmp;
159
160 ctx->gcm_processed_data_len += block_size;
161
162 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
163 &out_data_1_len, &out_data_2, block_size);
164
165 /* copy block to where it belongs */
166 if (out_data_1_len == block_size) {
167 copy_block(lastp, out_data_1);
168 } else {
169 memcpy(out_data_1, lastp, out_data_1_len);
170 if (out_data_2 != NULL) {
171 memcpy(out_data_2,
172 lastp + out_data_1_len,
173 block_size - out_data_1_len);
174 }
175 }
176 /* update offset */
177 out->cd_offset += block_size;
178
179 /* add ciphertext to the hash */
180 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
181
182 /* Update pointer to next block of data to be processed. */
183 if (ctx->gcm_remainder_len != 0) {
184 datap += need;
185 ctx->gcm_remainder_len = 0;
186 } else {
187 datap += block_size;
188 }
189
190 remainder = (size_t)&data[length] - (size_t)datap;
191
192 /* Incomplete last block. */
193 if (remainder > 0 && remainder < block_size) {
194 memcpy(ctx->gcm_remainder, datap, remainder);
195 ctx->gcm_remainder_len = remainder;
196 ctx->gcm_copy_to = datap;
197 goto out;
198 }
199 ctx->gcm_copy_to = NULL;
200
201 } while (remainder > 0);
202 out:
203 return (CRYPTO_SUCCESS);
204 }
205
206 int
gcm_encrypt_final(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))207 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
208 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
209 void (*copy_block)(uint8_t *, uint8_t *),
210 void (*xor_block)(uint8_t *, uint8_t *))
211 {
212 (void) copy_block;
213 #ifdef CAN_USE_GCM_ASM
214 if (ctx->impl != GCM_IMPL_GENERIC)
215 return (gcm_encrypt_final_avx(ctx, out, block_size));
216 #endif
217
218 const gcm_impl_ops_t *gops;
219 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
220 uint8_t *ghash, *macp = NULL;
221 int i, rv;
222
223 if (out->cd_length <
224 (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
225 return (CRYPTO_DATA_LEN_RANGE);
226 }
227
228 gops = gcm_impl_get_ops();
229 ghash = (uint8_t *)ctx->gcm_ghash;
230
231 if (ctx->gcm_remainder_len > 0) {
232 uint64_t counter;
233 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
234
235 /*
236 * Here is where we deal with data that is not a
237 * multiple of the block size.
238 */
239
240 /*
241 * Increment counter.
242 */
243 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
244 counter = htonll(counter + 1);
245 counter &= counter_mask;
246 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
247
248 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
249 (uint8_t *)ctx->gcm_tmp);
250
251 macp = (uint8_t *)ctx->gcm_remainder;
252 memset(macp + ctx->gcm_remainder_len, 0,
253 block_size - ctx->gcm_remainder_len);
254
255 /* XOR with counter block */
256 for (i = 0; i < ctx->gcm_remainder_len; i++) {
257 macp[i] ^= tmpp[i];
258 }
259
260 /* add ciphertext to the hash */
261 GHASH(ctx, macp, ghash, gops);
262
263 ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
264 }
265
266 ctx->gcm_len_a_len_c[1] =
267 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
268 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
269 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
270 (uint8_t *)ctx->gcm_J0);
271 xor_block((uint8_t *)ctx->gcm_J0, ghash);
272
273 if (ctx->gcm_remainder_len > 0) {
274 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
275 if (rv != CRYPTO_SUCCESS)
276 return (rv);
277 }
278 out->cd_offset += ctx->gcm_remainder_len;
279 ctx->gcm_remainder_len = 0;
280 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
281 if (rv != CRYPTO_SUCCESS)
282 return (rv);
283 out->cd_offset += ctx->gcm_tag_len;
284
285 return (CRYPTO_SUCCESS);
286 }
287
288 /*
289 * This will only deal with decrypting the last block of the input that
290 * might not be a multiple of block length.
291 */
292 static void
gcm_decrypt_incomplete_block(gcm_ctx_t * ctx,size_t block_size,size_t index,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))293 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
294 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
295 void (*xor_block)(uint8_t *, uint8_t *))
296 {
297 uint8_t *datap, *outp, *counterp;
298 uint64_t counter;
299 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
300 int i;
301
302 /*
303 * Increment counter.
304 * Counter bits are confined to the bottom 32 bits
305 */
306 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
307 counter = htonll(counter + 1);
308 counter &= counter_mask;
309 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
310
311 datap = (uint8_t *)ctx->gcm_remainder;
312 outp = &((ctx->gcm_pt_buf)[index]);
313 counterp = (uint8_t *)ctx->gcm_tmp;
314
315 /* authentication tag */
316 memset((uint8_t *)ctx->gcm_tmp, 0, block_size);
317 memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len);
318
319 /* add ciphertext to the hash */
320 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
321
322 /* decrypt remaining ciphertext */
323 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
324
325 /* XOR with counter block */
326 for (i = 0; i < ctx->gcm_remainder_len; i++) {
327 outp[i] = datap[i] ^ counterp[i];
328 }
329 }
330
331 int
gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))332 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
333 crypto_data_t *out, size_t block_size,
334 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
335 void (*copy_block)(uint8_t *, uint8_t *),
336 void (*xor_block)(uint8_t *, uint8_t *))
337 {
338 (void) out, (void) block_size, (void) encrypt_block, (void) copy_block,
339 (void) xor_block;
340 size_t new_len;
341 uint8_t *new;
342
343 /*
344 * Copy contiguous ciphertext input blocks to plaintext buffer.
345 * Ciphertext will be decrypted in the final.
346 */
347 if (length > 0) {
348 new_len = ctx->gcm_pt_buf_len + length;
349 new = vmem_alloc(new_len, KM_SLEEP);
350 if (new == NULL) {
351 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
352 ctx->gcm_pt_buf = NULL;
353 return (CRYPTO_HOST_MEMORY);
354 }
355
356 if (ctx->gcm_pt_buf != NULL) {
357 memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
358 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
359 } else {
360 ASSERT0(ctx->gcm_pt_buf_len);
361 }
362
363 ctx->gcm_pt_buf = new;
364 ctx->gcm_pt_buf_len = new_len;
365 memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data,
366 length);
367 ctx->gcm_processed_data_len += length;
368 }
369
370 ctx->gcm_remainder_len = 0;
371 return (CRYPTO_SUCCESS);
372 }
373
374 int
gcm_decrypt_final(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))375 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
376 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
377 void (*xor_block)(uint8_t *, uint8_t *))
378 {
379 #ifdef CAN_USE_GCM_ASM
380 if (ctx->impl != GCM_IMPL_GENERIC)
381 return (gcm_decrypt_final_avx(ctx, out, block_size));
382 #endif
383
384 const gcm_impl_ops_t *gops;
385 size_t pt_len;
386 size_t remainder;
387 uint8_t *ghash;
388 uint8_t *blockp;
389 uint8_t *cbp;
390 uint64_t counter;
391 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
392 int processed = 0, rv;
393
394 ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
395
396 gops = gcm_impl_get_ops();
397 pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
398 ghash = (uint8_t *)ctx->gcm_ghash;
399 blockp = ctx->gcm_pt_buf;
400 remainder = pt_len;
401 while (remainder > 0) {
402 /* Incomplete last block */
403 if (remainder < block_size) {
404 memcpy(ctx->gcm_remainder, blockp, remainder);
405 ctx->gcm_remainder_len = remainder;
406 /*
407 * not expecting anymore ciphertext, just
408 * compute plaintext for the remaining input
409 */
410 gcm_decrypt_incomplete_block(ctx, block_size,
411 processed, encrypt_block, xor_block);
412 ctx->gcm_remainder_len = 0;
413 goto out;
414 }
415 /* add ciphertext to the hash */
416 GHASH(ctx, blockp, ghash, gops);
417
418 /*
419 * Increment counter.
420 * Counter bits are confined to the bottom 32 bits
421 */
422 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
423 counter = htonll(counter + 1);
424 counter &= counter_mask;
425 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
426
427 cbp = (uint8_t *)ctx->gcm_tmp;
428 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
429
430 /* XOR with ciphertext */
431 xor_block(cbp, blockp);
432
433 processed += block_size;
434 blockp += block_size;
435 remainder -= block_size;
436 }
437 out:
438 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
439 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
440 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
441 (uint8_t *)ctx->gcm_J0);
442 xor_block((uint8_t *)ctx->gcm_J0, ghash);
443
444 /* compare the input authentication tag with what we calculated */
445 if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
446 /* They don't match */
447 return (CRYPTO_INVALID_MAC);
448 } else {
449 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
450 if (rv != CRYPTO_SUCCESS)
451 return (rv);
452 out->cd_offset += pt_len;
453 }
454 return (CRYPTO_SUCCESS);
455 }
456
457 static int
gcm_validate_args(CK_AES_GCM_PARAMS * gcm_param)458 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
459 {
460 size_t tag_len;
461
462 /*
463 * Check the length of the authentication tag (in bits).
464 */
465 tag_len = gcm_param->ulTagBits;
466 switch (tag_len) {
467 case 32:
468 case 64:
469 case 96:
470 case 104:
471 case 112:
472 case 120:
473 case 128:
474 break;
475 default:
476 return (CRYPTO_MECHANISM_PARAM_INVALID);
477 }
478
479 if (gcm_param->ulIvLen == 0)
480 return (CRYPTO_MECHANISM_PARAM_INVALID);
481
482 return (CRYPTO_SUCCESS);
483 }
484
485 static void
gcm_format_initial_blocks(const uint8_t * iv,ulong_t iv_len,gcm_ctx_t * ctx,size_t block_size,void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))486 gcm_format_initial_blocks(const uint8_t *iv, ulong_t iv_len,
487 gcm_ctx_t *ctx, size_t block_size,
488 void (*copy_block)(uint8_t *, uint8_t *),
489 void (*xor_block)(uint8_t *, uint8_t *))
490 {
491 const gcm_impl_ops_t *gops;
492 uint8_t *cb;
493 ulong_t remainder = iv_len;
494 ulong_t processed = 0;
495 uint8_t *datap, *ghash;
496 uint64_t len_a_len_c[2];
497
498 gops = gcm_impl_get_ops();
499 ghash = (uint8_t *)ctx->gcm_ghash;
500 cb = (uint8_t *)ctx->gcm_cb;
501 if (iv_len == 12) {
502 memcpy(cb, iv, 12);
503 cb[12] = 0;
504 cb[13] = 0;
505 cb[14] = 0;
506 cb[15] = 1;
507 /* J0 will be used again in the final */
508 copy_block(cb, (uint8_t *)ctx->gcm_J0);
509 } else {
510 /* GHASH the IV */
511 do {
512 if (remainder < block_size) {
513 memset(cb, 0, block_size);
514 memcpy(cb, &(iv[processed]), remainder);
515 datap = (uint8_t *)cb;
516 remainder = 0;
517 } else {
518 datap = (uint8_t *)(&(iv[processed]));
519 processed += block_size;
520 remainder -= block_size;
521 }
522 GHASH(ctx, datap, ghash, gops);
523 } while (remainder > 0);
524
525 len_a_len_c[0] = 0;
526 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
527 GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
528
529 /* J0 will be used again in the final */
530 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
531 }
532 }
533
534 static int
gcm_init(gcm_ctx_t * ctx,const uint8_t * iv,size_t iv_len,const uint8_t * auth_data,size_t auth_data_len,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))535 gcm_init(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len,
536 const uint8_t *auth_data, size_t auth_data_len, size_t block_size,
537 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
538 void (*copy_block)(uint8_t *, uint8_t *),
539 void (*xor_block)(uint8_t *, uint8_t *))
540 {
541 const gcm_impl_ops_t *gops;
542 uint8_t *ghash, *datap, *authp;
543 size_t remainder, processed;
544
545 /* encrypt zero block to get subkey H */
546 memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
547 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
548 (uint8_t *)ctx->gcm_H);
549
550 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
551 copy_block, xor_block);
552
553 gops = gcm_impl_get_ops();
554 authp = (uint8_t *)ctx->gcm_tmp;
555 ghash = (uint8_t *)ctx->gcm_ghash;
556 memset(authp, 0, block_size);
557 memset(ghash, 0, block_size);
558
559 processed = 0;
560 remainder = auth_data_len;
561 do {
562 if (remainder < block_size) {
563 /*
564 * There's not a block full of data, pad rest of
565 * buffer with zero
566 */
567
568 if (auth_data != NULL) {
569 memset(authp, 0, block_size);
570 memcpy(authp, &(auth_data[processed]),
571 remainder);
572 } else {
573 ASSERT0(remainder);
574 }
575
576 datap = (uint8_t *)authp;
577 remainder = 0;
578 } else {
579 datap = (uint8_t *)(&(auth_data[processed]));
580 processed += block_size;
581 remainder -= block_size;
582 }
583
584 /* add auth data to the hash */
585 GHASH(ctx, datap, ghash, gops);
586
587 } while (remainder > 0);
588
589 return (CRYPTO_SUCCESS);
590 }
591
592 /*
593 * Init the GCM context struct. Handle the cycle and avx implementations here.
594 */
595 int
gcm_init_ctx(gcm_ctx_t * gcm_ctx,char * param,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))596 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param,
597 size_t block_size, int (*encrypt_block)(const void *, const uint8_t *,
598 uint8_t *), void (*copy_block)(uint8_t *, uint8_t *),
599 void (*xor_block)(uint8_t *, uint8_t *))
600 {
601 CK_AES_GCM_PARAMS *gcm_param;
602 int rv = CRYPTO_SUCCESS;
603 size_t tag_len, iv_len;
604
605 if (param != NULL) {
606 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
607
608 /* GCM mode. */
609 if ((rv = gcm_validate_args(gcm_param)) != 0) {
610 return (rv);
611 }
612 gcm_ctx->gcm_flags |= GCM_MODE;
613
614 size_t tbits = gcm_param->ulTagBits;
615 tag_len = CRYPTO_BITS2BYTES(tbits);
616 iv_len = gcm_param->ulIvLen;
617
618 gcm_ctx->gcm_tag_len = tag_len;
619 gcm_ctx->gcm_processed_data_len = 0;
620
621 /* these values are in bits */
622 gcm_ctx->gcm_len_a_len_c[0]
623 = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
624 } else {
625 return (CRYPTO_MECHANISM_PARAM_INVALID);
626 }
627
628 const uint8_t *iv = (const uint8_t *)gcm_param->pIv;
629 const uint8_t *aad = (const uint8_t *)gcm_param->pAAD;
630 size_t aad_len = gcm_param->ulAADLen;
631
632 #ifdef CAN_USE_GCM_ASM
633 boolean_t needs_bswap =
634 ((aes_key_t *)gcm_ctx->gcm_keysched)->ops->needs_byteswap;
635
636 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
637 gcm_ctx->impl = GCM_IMPL_USED;
638 } else {
639 /*
640 * Handle the "cycle" implementation by creating different
641 * contexts, one per implementation.
642 */
643 gcm_ctx->impl = gcm_toggle_impl();
644
645 /* The AVX impl. doesn't handle byte swapped key schedules. */
646 if (needs_bswap == B_TRUE) {
647 gcm_ctx->impl = GCM_IMPL_GENERIC;
648 }
649 /*
650 * If this is an AVX context, use the MOVBE and the BSWAP
651 * variants alternately.
652 */
653 if (gcm_ctx->impl == GCM_IMPL_AVX &&
654 zfs_movbe_available() == B_TRUE) {
655 (void) atomic_toggle_boolean_nv(
656 (volatile boolean_t *)&gcm_avx_can_use_movbe);
657 }
658 }
659 /*
660 * We don't handle byte swapped key schedules in the avx code path,
661 * still they could be created by the aes generic implementation.
662 * Make sure not to use them since we'll corrupt data if we do.
663 */
664 if (gcm_ctx->impl != GCM_IMPL_GENERIC && needs_bswap == B_TRUE) {
665 gcm_ctx->impl = GCM_IMPL_GENERIC;
666
667 cmn_err_once(CE_WARN,
668 "ICP: Can't use the aes generic or cycle implementations "
669 "in combination with the gcm avx or avx2-vaes "
670 "implementation!");
671 cmn_err_once(CE_WARN,
672 "ICP: Falling back to a compatible implementation, "
673 "aes-gcm performance will likely be degraded.");
674 cmn_err_once(CE_WARN,
675 "ICP: Choose at least the x86_64 aes implementation to "
676 "restore performance.");
677 }
678
679 /*
680 * AVX implementations use Htable with sizes depending on
681 * implementation.
682 */
683 if (gcm_ctx->impl != GCM_IMPL_GENERIC) {
684 rv = gcm_init_avx(gcm_ctx, iv, iv_len, aad, aad_len,
685 block_size);
686 }
687 else
688 #endif /* ifdef CAN_USE_GCM_ASM */
689 if (gcm_init(gcm_ctx, iv, iv_len, aad, aad_len, block_size,
690 encrypt_block, copy_block, xor_block) != CRYPTO_SUCCESS) {
691 rv = CRYPTO_MECHANISM_PARAM_INVALID;
692 }
693
694 return (rv);
695 }
696
697 void *
gcm_alloc_ctx(int kmflag)698 gcm_alloc_ctx(int kmflag)
699 {
700 gcm_ctx_t *gcm_ctx;
701
702 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
703 return (NULL);
704
705 gcm_ctx->gcm_flags = GCM_MODE;
706 return (gcm_ctx);
707 }
708
709 /* GCM implementation that contains the fastest methods */
710 static gcm_impl_ops_t gcm_fastest_impl = {
711 .name = "fastest"
712 };
713
714 /* All compiled in implementations */
715 static const gcm_impl_ops_t *gcm_all_impl[] = {
716 &gcm_generic_impl,
717 #if defined(__x86_64) && HAVE_SIMD(PCLMULQDQ)
718 &gcm_pclmulqdq_impl,
719 #endif
720 };
721
722 /* Indicate that benchmark has been completed */
723 static boolean_t gcm_impl_initialized = B_FALSE;
724
725 /* Hold all supported implementations */
726 static size_t gcm_supp_impl_cnt = 0;
727 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
728
729 /*
730 * Returns the GCM operations for encrypt/decrypt/key setup. When a
731 * SIMD implementation is not allowed in the current context, then
732 * fallback to the fastest generic implementation.
733 */
734 const gcm_impl_ops_t *
gcm_impl_get_ops(void)735 gcm_impl_get_ops(void)
736 {
737 if (!kfpu_allowed())
738 return (&gcm_generic_impl);
739
740 const gcm_impl_ops_t *ops = NULL;
741 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
742
743 switch (impl) {
744 case IMPL_FASTEST:
745 ASSERT(gcm_impl_initialized);
746 ops = &gcm_fastest_impl;
747 break;
748 case IMPL_CYCLE:
749 /* Cycle through supported implementations */
750 ASSERT(gcm_impl_initialized);
751 ASSERT3U(gcm_supp_impl_cnt, >, 0);
752 static size_t cycle_impl_idx = 0;
753 size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
754 ops = gcm_supp_impl[idx];
755 break;
756 #ifdef CAN_USE_GCM_ASM
757 case IMPL_AVX:
758 #if CAN_USE_GCM_ASM >= 2
759 case IMPL_AVX2:
760 #endif
761 /*
762 * Make sure that we return a valid implementation while
763 * switching to the avx implementation since there still
764 * may be unfinished non-avx contexts around.
765 */
766 ops = &gcm_generic_impl;
767 break;
768 #endif
769 default:
770 ASSERT3U(impl, <, gcm_supp_impl_cnt);
771 ASSERT3U(gcm_supp_impl_cnt, >, 0);
772 if (impl < ARRAY_SIZE(gcm_all_impl))
773 ops = gcm_supp_impl[impl];
774 break;
775 }
776
777 ASSERT3P(ops, !=, NULL);
778
779 return (ops);
780 }
781
782 /*
783 * Initialize all supported implementations.
784 */
785 void
gcm_impl_init(void)786 gcm_impl_init(void)
787 {
788 gcm_impl_ops_t *curr_impl;
789 int i, c;
790
791 /* Move supported implementations into gcm_supp_impls */
792 for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
793 curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
794
795 if (curr_impl->is_supported())
796 gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
797 }
798 gcm_supp_impl_cnt = c;
799
800 /*
801 * Set the fastest implementation given the assumption that the
802 * hardware accelerated version is the fastest.
803 */
804 #if defined(__x86_64) && HAVE_SIMD(PCLMULQDQ)
805 if (gcm_pclmulqdq_impl.is_supported()) {
806 memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
807 sizeof (gcm_fastest_impl));
808 } else
809 #endif
810 {
811 memcpy(&gcm_fastest_impl, &gcm_generic_impl,
812 sizeof (gcm_fastest_impl));
813 }
814
815 strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
816
817 #ifdef CAN_USE_GCM_ASM
818 /*
819 * Use the avx implementation if it's available and the implementation
820 * hasn't changed from its default value of fastest on module load.
821 */
822 #if CAN_USE_GCM_ASM >= 2
823 if (gcm_avx2_will_work()) {
824 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
825 gcm_use_impl(GCM_IMPL_AVX2);
826 }
827 } else
828 #endif
829 if (gcm_avx_will_work()) {
830 #if HAVE_SIMD(MOVBE)
831 if (zfs_movbe_available() == B_TRUE) {
832 atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
833 }
834 #endif
835 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
836 gcm_use_impl(GCM_IMPL_AVX);
837 }
838 }
839 #endif
840 /* Finish initialization */
841 atomic_swap_32(&icp_gcm_impl, user_sel_impl);
842 gcm_impl_initialized = B_TRUE;
843 }
844
845 static const struct {
846 const char *name;
847 uint32_t sel;
848 } gcm_impl_opts[] = {
849 { "cycle", IMPL_CYCLE },
850 { "fastest", IMPL_FASTEST },
851 #ifdef CAN_USE_GCM_ASM
852 { "avx", IMPL_AVX },
853 { "avx2-vaes", IMPL_AVX2 },
854 #endif
855 };
856
857 /*
858 * Function sets desired gcm implementation.
859 *
860 * If we are called before init(), user preference will be saved in
861 * user_sel_impl, and applied in later init() call. This occurs when module
862 * parameter is specified on module load. Otherwise, directly update
863 * icp_gcm_impl.
864 *
865 * @val Name of gcm implementation to use
866 * @param Unused.
867 */
868 int
gcm_impl_set(const char * val)869 gcm_impl_set(const char *val)
870 {
871 int err = -EINVAL;
872 char req_name[GCM_IMPL_NAME_MAX];
873 uint32_t impl = GCM_IMPL_READ(user_sel_impl);
874 size_t i;
875
876 /* sanitize input */
877 i = strnlen(val, GCM_IMPL_NAME_MAX);
878 if (i == 0 || i >= GCM_IMPL_NAME_MAX)
879 return (err);
880
881 strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
882 while (i > 0 && isspace(req_name[i-1]))
883 i--;
884 req_name[i] = '\0';
885
886 /* Check mandatory options */
887 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
888 #ifdef CAN_USE_GCM_ASM
889 #if CAN_USE_GCM_ASM >= 2
890 /* Ignore avx implementation if it won't work. */
891 if (gcm_impl_opts[i].sel == IMPL_AVX2 &&
892 !gcm_avx2_will_work()) {
893 continue;
894 }
895 #endif
896 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
897 continue;
898 }
899 #endif
900 if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
901 impl = gcm_impl_opts[i].sel;
902 err = 0;
903 break;
904 }
905 }
906
907 /* check all supported impl if init() was already called */
908 if (err != 0 && gcm_impl_initialized) {
909 /* check all supported implementations */
910 for (i = 0; i < gcm_supp_impl_cnt; i++) {
911 if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
912 impl = i;
913 err = 0;
914 break;
915 }
916 }
917 }
918 #ifdef CAN_USE_GCM_ASM
919 /*
920 * Use the avx implementation if available and the requested one is
921 * avx or fastest.
922 */
923 #if CAN_USE_GCM_ASM >= 2
924 if (gcm_avx2_will_work() == B_TRUE &&
925 (impl == IMPL_AVX2 || impl == IMPL_FASTEST)) {
926 gcm_use_impl(GCM_IMPL_AVX2);
927 } else
928 #endif
929 if (gcm_avx_will_work() == B_TRUE &&
930 (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
931 gcm_use_impl(GCM_IMPL_AVX);
932 } else {
933 gcm_use_impl(GCM_IMPL_GENERIC);
934 }
935 #endif
936
937 if (err == 0) {
938 if (gcm_impl_initialized)
939 atomic_swap_32(&icp_gcm_impl, impl);
940 else
941 atomic_swap_32(&user_sel_impl, impl);
942 }
943
944 return (err);
945 }
946
947 #if defined(_KERNEL) && defined(__linux__)
948
949 static int
icp_gcm_impl_set(const char * val,zfs_kernel_param_t * kp)950 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
951 {
952 return (gcm_impl_set(val));
953 }
954
955 static int
icp_gcm_impl_get(char * buffer,zfs_kernel_param_t * kp)956 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
957 {
958 int i, cnt = 0;
959 char *fmt;
960 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
961
962 /* list mandatory options */
963 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
964 #ifdef CAN_USE_GCM_ASM
965 /* Ignore avx implementation if it won't work. */
966 #if CAN_USE_GCM_ASM >= 2
967 if (gcm_impl_opts[i].sel == IMPL_AVX2 &&
968 !gcm_avx2_will_work()) {
969 continue;
970 }
971 #endif
972 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
973 continue;
974 }
975 #endif
976 fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
977 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
978 gcm_impl_opts[i].name);
979 }
980
981 /* list all supported implementations */
982 for (i = 0; i < gcm_supp_impl_cnt; i++) {
983 fmt = (i == impl) ? "[%s] " : "%s ";
984 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
985 gcm_supp_impl[i]->name);
986 }
987
988 return (cnt);
989 }
990
991 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
992 NULL, 0644);
993 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
994 #endif /* defined(__KERNEL) */
995
996 #ifdef CAN_USE_GCM_ASM
997 #define GCM_BLOCK_LEN 16
998 /*
999 * The openssl asm routines are 6x aggregated and need that many bytes
1000 * at minimum.
1001 */
1002 #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
1003 #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
1004 /*
1005 * Ensure the chunk size is reasonable since we are allocating a
1006 * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
1007 */
1008 #define GCM_AVX_MAX_CHUNK_SIZE \
1009 (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
1010
1011 /* Clear the FPU registers since they hold sensitive internal state. */
1012 #define clear_fpu_regs() clear_fpu_regs_avx()
1013
1014 #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
1015
1016 /* Get the chunk size module parameter. */
1017 #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
1018
1019 /*
1020 * Module parameter: number of bytes to process at once while owning the FPU.
1021 * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
1022 * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
1023 */
1024 static uint32_t gcm_avx_chunk_size =
1025 ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1026
1027 /*
1028 * GCM definitions: uint128_t is copied from include/crypto/modes.h
1029 * Avoiding u128 because it is already defined in kernel sources.
1030 */
1031 typedef struct {
1032 uint64_t hi, lo;
1033 } uint128_t;
1034
1035 extern void ASMABI clear_fpu_regs_avx(void);
1036 extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst);
1037 extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr,
1038 const uint32_t pt[4], uint32_t ct[4]);
1039
1040 extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
1041 #if CAN_USE_GCM_ASM >= 2
1042 extern void ASMABI gcm_init_vpclmulqdq_avx2(uint128_t Htable[16],
1043 const uint64_t H[2]);
1044 #endif
1045 extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
1046 const uint8_t *in, size_t len);
1047 #if CAN_USE_GCM_ASM >= 2
1048 extern void ASMABI gcm_ghash_vpclmulqdq_avx2(uint64_t ghash[2],
1049 const uint64_t *Htable, const uint8_t *in, size_t len);
1050 #endif
GHASH_AVX(gcm_ctx_t * ctx,const uint8_t * in,size_t len)1051 static inline void GHASH_AVX(gcm_ctx_t *ctx, const uint8_t *in, size_t len)
1052 {
1053 switch (ctx->impl) {
1054 #if CAN_USE_GCM_ASM >= 2
1055 case GCM_IMPL_AVX2:
1056 gcm_ghash_vpclmulqdq_avx2(ctx->gcm_ghash,
1057 (const uint64_t *)ctx->gcm_Htable, in, len);
1058 break;
1059 #endif
1060
1061 case GCM_IMPL_AVX:
1062 gcm_ghash_avx(ctx->gcm_ghash,
1063 (const uint64_t *)ctx->gcm_Htable, in, len);
1064 break;
1065
1066 default:
1067 VERIFY(B_FALSE);
1068 }
1069 }
1070
1071 typedef size_t ASMABI aesni_gcm_encrypt_impl(const uint8_t *, uint8_t *,
1072 size_t, const void *, uint64_t *, const uint64_t *Htable, uint64_t *);
1073 extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
1074 const void *, uint64_t *, uint64_t *);
1075 #if CAN_USE_GCM_ASM >= 2
1076 extern void ASMABI aes_gcm_enc_update_vaes_avx2(const uint8_t *in,
1077 uint8_t *out, size_t len, const void *key, const uint8_t ivec[16],
1078 const uint128_t Htable[16], uint8_t Xi[16]);
1079 #endif
1080
1081 typedef size_t ASMABI aesni_gcm_decrypt_impl(const uint8_t *, uint8_t *,
1082 size_t, const void *, uint64_t *, const uint64_t *Htable, uint64_t *);
1083 extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
1084 const void *, uint64_t *, uint64_t *);
1085 #if CAN_USE_GCM_ASM >= 2
1086 extern void ASMABI aes_gcm_dec_update_vaes_avx2(const uint8_t *in,
1087 uint8_t *out, size_t len, const void *key, const uint8_t ivec[16],
1088 const uint128_t Htable[16], uint8_t Xi[16]);
1089 #endif
1090
1091 static inline boolean_t
gcm_avx2_will_work(void)1092 gcm_avx2_will_work(void)
1093 {
1094 return (kfpu_allowed() &&
1095 zfs_avx2_available() && zfs_vaes_available() &&
1096 zfs_vpclmulqdq_available());
1097 }
1098
1099 static inline boolean_t
gcm_avx_will_work(void)1100 gcm_avx_will_work(void)
1101 {
1102 /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
1103 return (kfpu_allowed() &&
1104 zfs_avx_available() && zfs_aes_available() &&
1105 zfs_pclmulqdq_available());
1106 }
1107
1108 static inline void
gcm_use_impl(gcm_impl impl)1109 gcm_use_impl(gcm_impl impl)
1110 {
1111 switch (impl) {
1112 #if CAN_USE_GCM_ASM >= 2
1113 case GCM_IMPL_AVX2:
1114 if (gcm_avx2_will_work() == B_TRUE) {
1115 atomic_swap_32(&gcm_impl_used, impl);
1116 return;
1117 }
1118
1119 zfs_fallthrough;
1120 #endif
1121
1122 case GCM_IMPL_AVX:
1123 if (gcm_avx_will_work() == B_TRUE) {
1124 atomic_swap_32(&gcm_impl_used, impl);
1125 return;
1126 }
1127
1128 zfs_fallthrough;
1129
1130 default:
1131 atomic_swap_32(&gcm_impl_used, GCM_IMPL_GENERIC);
1132 }
1133 }
1134
1135 static inline boolean_t
gcm_impl_will_work(gcm_impl impl)1136 gcm_impl_will_work(gcm_impl impl)
1137 {
1138 switch (impl) {
1139 #if CAN_USE_GCM_ASM >= 2
1140 case GCM_IMPL_AVX2:
1141 return (gcm_avx2_will_work());
1142 #endif
1143
1144 case GCM_IMPL_AVX:
1145 return (gcm_avx_will_work());
1146
1147 default:
1148 return (B_TRUE);
1149 }
1150 }
1151
1152 static inline gcm_impl
gcm_toggle_impl(void)1153 gcm_toggle_impl(void)
1154 {
1155 gcm_impl current_impl, new_impl;
1156 do { /* handle races */
1157 current_impl = atomic_load_32(&gcm_impl_used);
1158 new_impl = current_impl;
1159 while (B_TRUE) { /* handle incompatble implementations */
1160 new_impl = (new_impl + 1) % GCM_IMPL_MAX;
1161 if (gcm_impl_will_work(new_impl)) {
1162 break;
1163 }
1164 }
1165
1166 } while (atomic_cas_32(&gcm_impl_used, current_impl, new_impl) !=
1167 current_impl);
1168
1169 return (new_impl);
1170 }
1171
1172
1173 /* Increment the GCM counter block by n. */
1174 static inline void
gcm_incr_counter_block_by(gcm_ctx_t * ctx,int n)1175 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
1176 {
1177 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
1178 uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
1179
1180 counter = htonll(counter + n);
1181 counter &= counter_mask;
1182 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
1183 }
1184
aesni_gcm_encrypt_avx(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1185 static size_t aesni_gcm_encrypt_avx(const uint8_t *in, uint8_t *out,
1186 size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1187 uint64_t *Xip)
1188 {
1189 (void) Htable;
1190 return (aesni_gcm_encrypt(in, out, len, key, iv, Xip));
1191 }
1192
1193 #if CAN_USE_GCM_ASM >= 2
1194 // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
1195 // bits of a |size_t|.
1196 // This is from boringssl/crypto/fipsmodule/aes/gcm.cc.inc
1197 static const size_t kSizeTWithoutLower4Bits = (size_t)-16;
1198
1199 /* The following CRYPTO methods are from boringssl/crypto/internal.h */
CRYPTO_bswap4(uint32_t x)1200 static inline uint32_t CRYPTO_bswap4(uint32_t x) {
1201 return (__builtin_bswap32(x));
1202 }
1203
CRYPTO_load_u32_be(const void * in)1204 static inline uint32_t CRYPTO_load_u32_be(const void *in) {
1205 uint32_t v;
1206 memcpy(&v, in, sizeof (v));
1207 return (CRYPTO_bswap4(v));
1208 }
1209
CRYPTO_store_u32_be(void * out,uint32_t v)1210 static inline void CRYPTO_store_u32_be(void *out, uint32_t v) {
1211 v = CRYPTO_bswap4(v);
1212 memcpy(out, &v, sizeof (v));
1213 }
1214
aesni_gcm_encrypt_avx2(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1215 static size_t aesni_gcm_encrypt_avx2(const uint8_t *in, uint8_t *out,
1216 size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1217 uint64_t *Xip)
1218 {
1219 uint8_t *ivec = (uint8_t *)iv;
1220 len &= kSizeTWithoutLower4Bits;
1221 aes_gcm_enc_update_vaes_avx2(in, out, len, key, ivec,
1222 (const uint128_t *)Htable, (uint8_t *)Xip);
1223 CRYPTO_store_u32_be(&ivec[12],
1224 CRYPTO_load_u32_be(&ivec[12]) + len / 16);
1225 return (len);
1226 }
1227 #endif /* if CAN_USE_GCM_ASM >= 2 */
1228
1229 /*
1230 * Encrypt multiple blocks of data in GCM mode.
1231 * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
1232 * if possible. While processing a chunk the FPU is "locked".
1233 */
1234 static int
gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size)1235 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
1236 size_t length, crypto_data_t *out, size_t block_size)
1237 {
1238 size_t bleft = length;
1239 size_t need = 0;
1240 size_t done = 0;
1241 uint8_t *datap = (uint8_t *)data;
1242 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1243 aesni_gcm_encrypt_impl *encrypt_blocks =
1244 #if CAN_USE_GCM_ASM >= 2
1245 ctx->impl == GCM_IMPL_AVX2 ?
1246 aesni_gcm_encrypt_avx2 :
1247 #endif
1248 aesni_gcm_encrypt_avx;
1249 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1250 uint64_t *ghash = ctx->gcm_ghash;
1251 uint64_t *htable = ctx->gcm_Htable;
1252 uint64_t *cb = ctx->gcm_cb;
1253 uint8_t *ct_buf = NULL;
1254 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1255 int rv = CRYPTO_SUCCESS;
1256
1257 ASSERT(block_size == GCM_BLOCK_LEN);
1258 ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
1259 B_FALSE);
1260 /*
1261 * If the last call left an incomplete block, try to fill
1262 * it first.
1263 */
1264 if (ctx->gcm_remainder_len > 0) {
1265 need = block_size - ctx->gcm_remainder_len;
1266 if (length < need) {
1267 /* Accumulate bytes here and return. */
1268 memcpy((uint8_t *)ctx->gcm_remainder +
1269 ctx->gcm_remainder_len, datap, length);
1270
1271 ctx->gcm_remainder_len += length;
1272 if (ctx->gcm_copy_to == NULL) {
1273 ctx->gcm_copy_to = datap;
1274 }
1275 return (CRYPTO_SUCCESS);
1276 } else {
1277 /* Complete incomplete block. */
1278 memcpy((uint8_t *)ctx->gcm_remainder +
1279 ctx->gcm_remainder_len, datap, need);
1280
1281 ctx->gcm_copy_to = NULL;
1282 }
1283 }
1284
1285 /* Allocate a buffer to encrypt to if there is enough input. */
1286 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1287 ct_buf = vmem_alloc(chunk_size, KM_SLEEP);
1288 if (ct_buf == NULL) {
1289 return (CRYPTO_HOST_MEMORY);
1290 }
1291 }
1292
1293 /* If we completed an incomplete block, encrypt and write it out. */
1294 if (ctx->gcm_remainder_len > 0) {
1295 kfpu_begin();
1296 aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1297 (const uint32_t *)cb, (uint32_t *)tmp);
1298
1299 gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
1300 GHASH_AVX(ctx, tmp, block_size);
1301 clear_fpu_regs();
1302 kfpu_end();
1303 rv = crypto_put_output_data(tmp, out, block_size);
1304 out->cd_offset += block_size;
1305 gcm_incr_counter_block(ctx);
1306 ctx->gcm_processed_data_len += block_size;
1307 bleft -= need;
1308 datap += need;
1309 ctx->gcm_remainder_len = 0;
1310 }
1311
1312 /* Do the bulk encryption in chunk_size blocks. */
1313 for (; bleft >= chunk_size; bleft -= chunk_size) {
1314 kfpu_begin();
1315 done = encrypt_blocks(
1316 datap, ct_buf, chunk_size, key, cb, htable, ghash);
1317
1318 clear_fpu_regs();
1319 kfpu_end();
1320 if (done != chunk_size) {
1321 rv = CRYPTO_FAILED;
1322 goto out_nofpu;
1323 }
1324 rv = crypto_put_output_data(ct_buf, out, chunk_size);
1325 if (rv != CRYPTO_SUCCESS) {
1326 goto out_nofpu;
1327 }
1328 out->cd_offset += chunk_size;
1329 datap += chunk_size;
1330 ctx->gcm_processed_data_len += chunk_size;
1331 }
1332 /* Check if we are already done. */
1333 if (bleft == 0) {
1334 goto out_nofpu;
1335 }
1336 /* Bulk encrypt the remaining data. */
1337 kfpu_begin();
1338 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1339 done = encrypt_blocks(datap, ct_buf, bleft, key, cb, htable,
1340 ghash);
1341 if (done == 0) {
1342 rv = CRYPTO_FAILED;
1343 goto out;
1344 }
1345 rv = crypto_put_output_data(ct_buf, out, done);
1346 if (rv != CRYPTO_SUCCESS) {
1347 goto out;
1348 }
1349 out->cd_offset += done;
1350 ctx->gcm_processed_data_len += done;
1351 datap += done;
1352 bleft -= done;
1353
1354 }
1355 /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
1356 while (bleft > 0) {
1357 if (bleft < block_size) {
1358 memcpy(ctx->gcm_remainder, datap, bleft);
1359 ctx->gcm_remainder_len = bleft;
1360 ctx->gcm_copy_to = datap;
1361 goto out;
1362 }
1363 /* Encrypt, hash and write out. */
1364 aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1365 (const uint32_t *)cb, (uint32_t *)tmp);
1366
1367 gcm_xor_avx(datap, tmp);
1368 GHASH_AVX(ctx, tmp, block_size);
1369 rv = crypto_put_output_data(tmp, out, block_size);
1370 if (rv != CRYPTO_SUCCESS) {
1371 goto out;
1372 }
1373 out->cd_offset += block_size;
1374 gcm_incr_counter_block(ctx);
1375 ctx->gcm_processed_data_len += block_size;
1376 datap += block_size;
1377 bleft -= block_size;
1378 }
1379 out:
1380 clear_fpu_regs();
1381 kfpu_end();
1382 out_nofpu:
1383 if (ct_buf != NULL) {
1384 vmem_free(ct_buf, chunk_size);
1385 }
1386 return (rv);
1387 }
1388
1389 /*
1390 * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
1391 * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
1392 */
1393 static int
gcm_encrypt_final_avx(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size)1394 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1395 {
1396 uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
1397 uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
1398 uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
1399 size_t rem_len = ctx->gcm_remainder_len;
1400 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1401 int aes_rounds = ((aes_key_t *)keysched)->nr;
1402 int rv;
1403
1404 ASSERT(block_size == GCM_BLOCK_LEN);
1405 ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
1406 B_FALSE);
1407
1408 if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
1409 return (CRYPTO_DATA_LEN_RANGE);
1410 }
1411
1412 kfpu_begin();
1413 /* Pad last incomplete block with zeros, encrypt and hash. */
1414 if (rem_len > 0) {
1415 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1416 const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1417
1418 aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
1419 memset(remainder + rem_len, 0, block_size - rem_len);
1420 for (int i = 0; i < rem_len; i++) {
1421 remainder[i] ^= tmp[i];
1422 }
1423 GHASH_AVX(ctx, remainder, block_size);
1424 ctx->gcm_processed_data_len += rem_len;
1425 /* No need to increment counter_block, it's the last block. */
1426 }
1427 /* Finish tag. */
1428 ctx->gcm_len_a_len_c[1] =
1429 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
1430 GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
1431 aes_encrypt_intel(keysched, aes_rounds, J0, J0);
1432
1433 gcm_xor_avx((uint8_t *)J0, ghash);
1434 clear_fpu_regs();
1435 kfpu_end();
1436
1437 /* Output remainder. */
1438 if (rem_len > 0) {
1439 rv = crypto_put_output_data(remainder, out, rem_len);
1440 if (rv != CRYPTO_SUCCESS)
1441 return (rv);
1442 }
1443 out->cd_offset += rem_len;
1444 ctx->gcm_remainder_len = 0;
1445 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
1446 if (rv != CRYPTO_SUCCESS)
1447 return (rv);
1448
1449 out->cd_offset += ctx->gcm_tag_len;
1450 return (CRYPTO_SUCCESS);
1451 }
1452
aesni_gcm_decrypt_avx(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1453 static size_t aesni_gcm_decrypt_avx(const uint8_t *in, uint8_t *out,
1454 size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1455 uint64_t *Xip)
1456 {
1457 (void) Htable;
1458 return (aesni_gcm_decrypt(in, out, len, key, iv, Xip));
1459 }
1460
1461 #if CAN_USE_GCM_ASM >= 2
aesni_gcm_decrypt_avx2(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1462 static size_t aesni_gcm_decrypt_avx2(const uint8_t *in, uint8_t *out,
1463 size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1464 uint64_t *Xip)
1465 {
1466 uint8_t *ivec = (uint8_t *)iv;
1467 len &= kSizeTWithoutLower4Bits;
1468 aes_gcm_dec_update_vaes_avx2(in, out, len, key, ivec,
1469 (const uint128_t *)Htable, (uint8_t *)Xip);
1470 CRYPTO_store_u32_be(&ivec[12],
1471 CRYPTO_load_u32_be(&ivec[12]) + len / 16);
1472 return (len);
1473 }
1474 #endif /* if CAN_USE_GCM_ASM >= 2 */
1475
1476 /*
1477 * Finalize decryption: We just have accumulated crypto text, so now we
1478 * decrypt it here inplace.
1479 */
1480 static int
gcm_decrypt_final_avx(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size)1481 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1482 {
1483 ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
1484 ASSERT3U(block_size, ==, 16);
1485 ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
1486 B_FALSE);
1487
1488 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1489 aesni_gcm_decrypt_impl *decrypt_blocks =
1490 #if CAN_USE_GCM_ASM >= 2
1491 ctx->impl == GCM_IMPL_AVX2 ?
1492 aesni_gcm_decrypt_avx2 :
1493 #endif
1494 aesni_gcm_decrypt_avx;
1495 size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
1496 uint8_t *datap = ctx->gcm_pt_buf;
1497 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1498 uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1499 uint64_t *htable = ctx->gcm_Htable;
1500 uint64_t *ghash = ctx->gcm_ghash;
1501 uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
1502 int rv = CRYPTO_SUCCESS;
1503 size_t bleft, done;
1504
1505 /*
1506 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
1507 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
1508 * GCM_AVX_MIN_DECRYPT_BYTES.
1509 */
1510 for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
1511 kfpu_begin();
1512 done = decrypt_blocks(datap, datap, chunk_size,
1513 (const void *)key, ctx->gcm_cb, htable, ghash);
1514 clear_fpu_regs();
1515 kfpu_end();
1516 if (done != chunk_size) {
1517 return (CRYPTO_FAILED);
1518 }
1519 datap += done;
1520 }
1521 /* Decrypt remainder, which is less than chunk size, in one go. */
1522 kfpu_begin();
1523 if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
1524 done = decrypt_blocks(datap, datap, bleft,
1525 (const void *)key, ctx->gcm_cb, htable, ghash);
1526 if (done == 0) {
1527 clear_fpu_regs();
1528 kfpu_end();
1529 return (CRYPTO_FAILED);
1530 }
1531 datap += done;
1532 bleft -= done;
1533 }
1534 ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
1535
1536 /*
1537 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
1538 * decrypt them block by block.
1539 */
1540 while (bleft > 0) {
1541 /* Incomplete last block. */
1542 if (bleft < block_size) {
1543 uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
1544
1545 memset(lastb, 0, block_size);
1546 memcpy(lastb, datap, bleft);
1547 /* The GCM processing. */
1548 GHASH_AVX(ctx, lastb, block_size);
1549 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1550 for (size_t i = 0; i < bleft; i++) {
1551 datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
1552 }
1553 break;
1554 }
1555 /* The GCM processing. */
1556 GHASH_AVX(ctx, datap, block_size);
1557 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1558 gcm_xor_avx((uint8_t *)tmp, datap);
1559 gcm_incr_counter_block(ctx);
1560
1561 datap += block_size;
1562 bleft -= block_size;
1563 }
1564 if (rv != CRYPTO_SUCCESS) {
1565 clear_fpu_regs();
1566 kfpu_end();
1567 return (rv);
1568 }
1569 /* Decryption done, finish the tag. */
1570 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
1571 GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
1572 aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
1573 (uint32_t *)ctx->gcm_J0);
1574
1575 gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
1576
1577 /* We are done with the FPU, restore its state. */
1578 clear_fpu_regs();
1579 kfpu_end();
1580
1581 /* Compare the input authentication tag with what we calculated. */
1582 if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
1583 /* They don't match. */
1584 return (CRYPTO_INVALID_MAC);
1585 }
1586 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
1587 if (rv != CRYPTO_SUCCESS) {
1588 return (rv);
1589 }
1590 out->cd_offset += pt_len;
1591 return (CRYPTO_SUCCESS);
1592 }
1593
1594 /*
1595 * Initialize the GCM params H, Htabtle and the counter block. Save the
1596 * initial counter block.
1597 */
1598 static int
gcm_init_avx(gcm_ctx_t * ctx,const uint8_t * iv,size_t iv_len,const uint8_t * auth_data,size_t auth_data_len,size_t block_size)1599 gcm_init_avx(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len,
1600 const uint8_t *auth_data, size_t auth_data_len, size_t block_size)
1601 {
1602 uint8_t *cb = (uint8_t *)ctx->gcm_cb;
1603 uint64_t *H = ctx->gcm_H;
1604 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1605 int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
1606 const uint8_t *datap = auth_data;
1607 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1608 size_t bleft;
1609
1610 ASSERT(block_size == GCM_BLOCK_LEN);
1611 ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
1612 B_FALSE);
1613
1614 size_t htab_len = 0;
1615 #if CAN_USE_GCM_ASM >= 2
1616 if (ctx->impl == GCM_IMPL_AVX2) {
1617 /*
1618 * BoringSSL's API specifies uint128_t[16] for htab; but only
1619 * uint128_t[12] are used.
1620 * See https://github.com/google/boringssl/blob/
1621 * 813840dd094f9e9c1b00a7368aa25e656554221f1/crypto/fipsmodule/
1622 * modes/asm/aes-gcm-avx2-x86_64.pl#L198-L200
1623 */
1624 htab_len = (2 * 8 * sizeof (uint128_t));
1625 } else
1626 #endif /* CAN_USE_GCM_ASM >= 2 */
1627 {
1628 htab_len = (2 * 6 * sizeof (uint128_t));
1629 }
1630
1631 ctx->gcm_Htable = kmem_alloc(htab_len, KM_SLEEP);
1632 if (ctx->gcm_Htable == NULL) {
1633 return (CRYPTO_HOST_MEMORY);
1634 }
1635
1636 /* Init H (encrypt zero block) and create the initial counter block. */
1637 memset(H, 0, sizeof (ctx->gcm_H));
1638 kfpu_begin();
1639 aes_encrypt_intel(keysched, aes_rounds,
1640 (const uint32_t *)H, (uint32_t *)H);
1641
1642 #if CAN_USE_GCM_ASM >= 2
1643 if (ctx->impl == GCM_IMPL_AVX2) {
1644 gcm_init_vpclmulqdq_avx2((uint128_t *)ctx->gcm_Htable, H);
1645 } else
1646 #endif /* if CAN_USE_GCM_ASM >= 2 */
1647 {
1648 gcm_init_htab_avx(ctx->gcm_Htable, H);
1649 }
1650
1651 if (iv_len == 12) {
1652 memcpy(cb, iv, 12);
1653 cb[12] = 0;
1654 cb[13] = 0;
1655 cb[14] = 0;
1656 cb[15] = 1;
1657 /* We need the ICB later. */
1658 memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0));
1659 } else {
1660 /*
1661 * Most consumers use 12 byte IVs, so it's OK to use the
1662 * original routines for other IV sizes, just avoid nesting
1663 * kfpu_begin calls.
1664 */
1665 clear_fpu_regs();
1666 kfpu_end();
1667 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
1668 aes_copy_block, aes_xor_block);
1669 kfpu_begin();
1670 }
1671
1672 memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash));
1673
1674 /* Openssl post increments the counter, adjust for that. */
1675 gcm_incr_counter_block(ctx);
1676
1677 /* Ghash AAD in chunk_size blocks. */
1678 for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
1679 GHASH_AVX(ctx, datap, chunk_size);
1680 datap += chunk_size;
1681 clear_fpu_regs();
1682 kfpu_end();
1683 kfpu_begin();
1684 }
1685 /* Ghash the remainder and handle possible incomplete GCM block. */
1686 if (bleft > 0) {
1687 size_t incomp = bleft % block_size;
1688
1689 bleft -= incomp;
1690 if (bleft > 0) {
1691 GHASH_AVX(ctx, datap, bleft);
1692 datap += bleft;
1693 }
1694 if (incomp > 0) {
1695 /* Zero pad and hash incomplete last block. */
1696 uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
1697
1698 memset(authp, 0, block_size);
1699 memcpy(authp, datap, incomp);
1700 GHASH_AVX(ctx, authp, block_size);
1701 }
1702 }
1703 clear_fpu_regs();
1704 kfpu_end();
1705 return (CRYPTO_SUCCESS);
1706 }
1707
1708 #if defined(_KERNEL)
1709 static int
icp_gcm_avx_set_chunk_size(const char * buf,zfs_kernel_param_t * kp)1710 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
1711 {
1712 unsigned long val;
1713 char val_rounded[16];
1714 int error = 0;
1715
1716 error = kstrtoul(buf, 0, &val);
1717 if (error)
1718 return (error);
1719
1720 val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1721
1722 if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
1723 return (-EINVAL);
1724
1725 snprintf(val_rounded, 16, "%u", (uint32_t)val);
1726 error = param_set_uint(val_rounded, kp);
1727 return (error);
1728 }
1729
1730 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
1731 param_get_uint, &gcm_avx_chunk_size, 0644);
1732
1733 MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
1734 "How many bytes to process while owning the FPU");
1735
1736 #endif /* defined(__KERNEL) */
1737 #endif /* ifdef CAN_USE_GCM_ASM */
1738