xref: /linux/arch/x86/crypto/aes-ctr-avx-x86_64.S (revision e78f70bad29c5ae1e1076698b690b15794e9b81e)
1/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
2//
3// Copyright 2025 Google LLC
4//
5// Author: Eric Biggers <ebiggers@google.com>
6//
7// This file is dual-licensed, meaning that you can use it under your choice of
8// either of the following two licenses:
9//
10// Licensed under the Apache License 2.0 (the "License").  You may obtain a copy
11// of the License at
12//
13//	http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20//
21// or
22//
23// Redistribution and use in source and binary forms, with or without
24// modification, are permitted provided that the following conditions are met:
25//
26// 1. Redistributions of source code must retain the above copyright notice,
27//    this list of conditions and the following disclaimer.
28//
29// 2. Redistributions in binary form must reproduce the above copyright
30//    notice, this list of conditions and the following disclaimer in the
31//    documentation and/or other materials provided with the distribution.
32//
33// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
37// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43// POSSIBILITY OF SUCH DAMAGE.
44//
45//------------------------------------------------------------------------------
46//
47// This file contains x86_64 assembly implementations of AES-CTR and AES-XCTR
48// using the following sets of CPU features:
49//	- AES-NI && AVX
50//	- VAES && AVX2
51//	- VAES && AVX512BW && AVX512VL && BMI2
52//
53// See the function definitions at the bottom of the file for more information.
54
55#include <linux/linkage.h>
56#include <linux/cfi_types.h>
57
58.section .rodata
59.p2align 4
60
61.Lbswap_mask:
62	.octa	0x000102030405060708090a0b0c0d0e0f
63
64.Lctr_pattern:
65	.quad	0, 0
66.Lone:
67	.quad	1, 0
68.Ltwo:
69	.quad	2, 0
70	.quad	3, 0
71
72.Lfour:
73	.quad	4, 0
74
75.text
76
77// Move a vector between memory and a register.
78.macro	_vmovdqu	src, dst
79.if VL < 64
80	vmovdqu		\src, \dst
81.else
82	vmovdqu8	\src, \dst
83.endif
84.endm
85
86// Move a vector between registers.
87.macro	_vmovdqa	src, dst
88.if VL < 64
89	vmovdqa		\src, \dst
90.else
91	vmovdqa64	\src, \dst
92.endif
93.endm
94
95// Broadcast a 128-bit value from memory to all 128-bit lanes of a vector
96// register.
97.macro	_vbroadcast128	src, dst
98.if VL == 16
99	vmovdqu		\src, \dst
100.elseif VL == 32
101	vbroadcasti128	\src, \dst
102.else
103	vbroadcasti32x4	\src, \dst
104.endif
105.endm
106
107// XOR two vectors together.
108.macro	_vpxor	src1, src2, dst
109.if VL < 64
110	vpxor		\src1, \src2, \dst
111.else
112	vpxord		\src1, \src2, \dst
113.endif
114.endm
115
116// Load 1 <= %ecx <= 15 bytes from the pointer \src into the xmm register \dst
117// and zeroize any remaining bytes.  Clobbers %rax, %rcx, and \tmp{64,32}.
118.macro	_load_partial_block	src, dst, tmp64, tmp32
119	sub		$8, %ecx		// LEN - 8
120	jle		.Lle8\@
121
122	// Load 9 <= LEN <= 15 bytes.
123	vmovq		(\src), \dst		// Load first 8 bytes
124	mov		(\src, %rcx), %rax	// Load last 8 bytes
125	neg		%ecx
126	shl		$3, %ecx
127	shr		%cl, %rax		// Discard overlapping bytes
128	vpinsrq		$1, %rax, \dst, \dst
129	jmp		.Ldone\@
130
131.Lle8\@:
132	add		$4, %ecx		// LEN - 4
133	jl		.Llt4\@
134
135	// Load 4 <= LEN <= 8 bytes.
136	mov		(\src), %eax		// Load first 4 bytes
137	mov		(\src, %rcx), \tmp32	// Load last 4 bytes
138	jmp		.Lcombine\@
139
140.Llt4\@:
141	// Load 1 <= LEN <= 3 bytes.
142	add		$2, %ecx		// LEN - 2
143	movzbl		(\src), %eax		// Load first byte
144	jl		.Lmovq\@
145	movzwl		(\src, %rcx), \tmp32	// Load last 2 bytes
146.Lcombine\@:
147	shl		$3, %ecx
148	shl		%cl, \tmp64
149	or		\tmp64, %rax		// Combine the two parts
150.Lmovq\@:
151	vmovq		%rax, \dst
152.Ldone\@:
153.endm
154
155// Store 1 <= %ecx <= 15 bytes from the xmm register \src to the pointer \dst.
156// Clobbers %rax, %rcx, and \tmp{64,32}.
157.macro	_store_partial_block	src, dst, tmp64, tmp32
158	sub		$8, %ecx		// LEN - 8
159	jl		.Llt8\@
160
161	// Store 8 <= LEN <= 15 bytes.
162	vpextrq		$1, \src, %rax
163	mov		%ecx, \tmp32
164	shl		$3, %ecx
165	ror		%cl, %rax
166	mov		%rax, (\dst, \tmp64)	// Store last LEN - 8 bytes
167	vmovq		\src, (\dst)		// Store first 8 bytes
168	jmp		.Ldone\@
169
170.Llt8\@:
171	add		$4, %ecx		// LEN - 4
172	jl		.Llt4\@
173
174	// Store 4 <= LEN <= 7 bytes.
175	vpextrd		$1, \src, %eax
176	mov		%ecx, \tmp32
177	shl		$3, %ecx
178	ror		%cl, %eax
179	mov		%eax, (\dst, \tmp64)	// Store last LEN - 4 bytes
180	vmovd		\src, (\dst)		// Store first 4 bytes
181	jmp		.Ldone\@
182
183.Llt4\@:
184	// Store 1 <= LEN <= 3 bytes.
185	vpextrb		$0, \src, 0(\dst)
186	cmp		$-2, %ecx		// LEN - 4 == -2, i.e. LEN == 2?
187	jl		.Ldone\@
188	vpextrb		$1, \src, 1(\dst)
189	je		.Ldone\@
190	vpextrb		$2, \src, 2(\dst)
191.Ldone\@:
192.endm
193
194// Prepare the next two vectors of AES inputs in AESDATA\i0 and AESDATA\i1, and
195// XOR each with the zero-th round key.  Also update LE_CTR if !\final.
196.macro	_prepare_2_ctr_vecs	is_xctr, i0, i1, final=0
197.if \is_xctr
198  .if USE_AVX512
199	vmovdqa64	LE_CTR, AESDATA\i0
200	vpternlogd	$0x96, XCTR_IV, RNDKEY0, AESDATA\i0
201  .else
202	vpxor		XCTR_IV, LE_CTR, AESDATA\i0
203	vpxor		RNDKEY0, AESDATA\i0, AESDATA\i0
204  .endif
205	vpaddq		LE_CTR_INC1, LE_CTR, AESDATA\i1
206
207  .if USE_AVX512
208	vpternlogd	$0x96, XCTR_IV, RNDKEY0, AESDATA\i1
209  .else
210	vpxor		XCTR_IV, AESDATA\i1, AESDATA\i1
211	vpxor		RNDKEY0, AESDATA\i1, AESDATA\i1
212  .endif
213.else
214	vpshufb		BSWAP_MASK, LE_CTR, AESDATA\i0
215	_vpxor		RNDKEY0, AESDATA\i0, AESDATA\i0
216	vpaddq		LE_CTR_INC1, LE_CTR, AESDATA\i1
217	vpshufb		BSWAP_MASK, AESDATA\i1, AESDATA\i1
218	_vpxor		RNDKEY0, AESDATA\i1, AESDATA\i1
219.endif
220.if !\final
221	vpaddq		LE_CTR_INC2, LE_CTR, LE_CTR
222.endif
223.endm
224
225// Do all AES rounds on the data in the given AESDATA vectors, excluding the
226// zero-th and last rounds.
227.macro	_aesenc_loop	vecs:vararg
228	mov		KEY, %rax
2291:
230	_vbroadcast128	(%rax), RNDKEY
231.irp i, \vecs
232	vaesenc		RNDKEY, AESDATA\i, AESDATA\i
233.endr
234	add		$16, %rax
235	cmp		%rax, RNDKEYLAST_PTR
236	jne		1b
237.endm
238
239// Finalize the keystream blocks in the given AESDATA vectors by doing the last
240// AES round, then XOR those keystream blocks with the corresponding data.
241// Reduce latency by doing the XOR before the vaesenclast, utilizing the
242// property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a).
243.macro	_aesenclast_and_xor	vecs:vararg
244.irp i, \vecs
245	_vpxor		\i*VL(SRC), RNDKEYLAST, RNDKEY
246	vaesenclast	RNDKEY, AESDATA\i, AESDATA\i
247.endr
248.irp i, \vecs
249	_vmovdqu	AESDATA\i, \i*VL(DST)
250.endr
251.endm
252
253// XOR the keystream blocks in the specified AESDATA vectors with the
254// corresponding data.
255.macro	_xor_data	vecs:vararg
256.irp i, \vecs
257	_vpxor		\i*VL(SRC), AESDATA\i, AESDATA\i
258.endr
259.irp i, \vecs
260	_vmovdqu	AESDATA\i, \i*VL(DST)
261.endr
262.endm
263
264.macro	_aes_ctr_crypt		is_xctr
265
266	// Define register aliases V0-V15 that map to the xmm, ymm, or zmm
267	// registers according to the selected Vector Length (VL).
268.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
269  .if VL == 16
270	.set	V\i,		%xmm\i
271  .elseif VL == 32
272	.set	V\i,		%ymm\i
273  .elseif VL == 64
274	.set	V\i,		%zmm\i
275  .else
276	.error "Unsupported Vector Length (VL)"
277  .endif
278.endr
279
280	// Function arguments
281	.set	KEY,		%rdi	// Initially points to the start of the
282					// crypto_aes_ctx, then is advanced to
283					// point to the index 1 round key
284	.set	KEY32,		%edi	// Available as temp register after all
285					// keystream blocks have been generated
286	.set	SRC,		%rsi	// Pointer to next source data
287	.set	DST,		%rdx	// Pointer to next destination data
288	.set	LEN,		%ecx	// Remaining length in bytes.
289					// Note: _load_partial_block relies on
290					// this being in %ecx.
291	.set	LEN64,		%rcx	// Zero-extend LEN before using!
292	.set	LEN8,		%cl
293.if \is_xctr
294	.set	XCTR_IV_PTR,	%r8	// const u8 iv[AES_BLOCK_SIZE];
295	.set	XCTR_CTR,	%r9	// u64 ctr;
296.else
297	.set	LE_CTR_PTR,	%r8	// const u64 le_ctr[2];
298.endif
299
300	// Additional local variables
301	.set	RNDKEYLAST_PTR,	%r10
302	.set	AESDATA0,	V0
303	.set	AESDATA0_XMM,	%xmm0
304	.set	AESDATA1,	V1
305	.set	AESDATA1_XMM,	%xmm1
306	.set	AESDATA2,	V2
307	.set	AESDATA3,	V3
308	.set	AESDATA4,	V4
309	.set	AESDATA5,	V5
310	.set	AESDATA6,	V6
311	.set	AESDATA7,	V7
312.if \is_xctr
313	.set	XCTR_IV,	V8
314.else
315	.set	BSWAP_MASK,	V8
316.endif
317	.set	LE_CTR,		V9
318	.set	LE_CTR_XMM,	%xmm9
319	.set	LE_CTR_INC1,	V10
320	.set	LE_CTR_INC2,	V11
321	.set	RNDKEY0,	V12
322	.set	RNDKEYLAST,	V13
323	.set	RNDKEY,		V14
324
325	// Create the first vector of counters.
326.if \is_xctr
327  .if VL == 16
328	vmovq		XCTR_CTR, LE_CTR
329  .elseif VL == 32
330	vmovq		XCTR_CTR, LE_CTR_XMM
331	inc		XCTR_CTR
332	vmovq		XCTR_CTR, AESDATA0_XMM
333	vinserti128	$1, AESDATA0_XMM, LE_CTR, LE_CTR
334  .else
335	vpbroadcastq	XCTR_CTR, LE_CTR
336	vpsrldq		$8, LE_CTR, LE_CTR
337	vpaddq		.Lctr_pattern(%rip), LE_CTR, LE_CTR
338  .endif
339	_vbroadcast128	(XCTR_IV_PTR), XCTR_IV
340.else
341	_vbroadcast128	(LE_CTR_PTR), LE_CTR
342  .if VL > 16
343	vpaddq		.Lctr_pattern(%rip), LE_CTR, LE_CTR
344  .endif
345	_vbroadcast128	.Lbswap_mask(%rip), BSWAP_MASK
346.endif
347
348.if VL == 16
349	_vbroadcast128	.Lone(%rip), LE_CTR_INC1
350.elseif VL == 32
351	_vbroadcast128	.Ltwo(%rip), LE_CTR_INC1
352.else
353	_vbroadcast128	.Lfour(%rip), LE_CTR_INC1
354.endif
355	vpsllq		$1, LE_CTR_INC1, LE_CTR_INC2
356
357	// Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
358	movl		480(KEY), %eax
359
360	// Compute the pointer to the last round key.
361	lea		6*16(KEY, %rax, 4), RNDKEYLAST_PTR
362
363	// Load the zero-th and last round keys.
364	_vbroadcast128	(KEY), RNDKEY0
365	_vbroadcast128	(RNDKEYLAST_PTR), RNDKEYLAST
366
367	// Make KEY point to the first round key.
368	add		$16, KEY
369
370	// This is the main loop, which encrypts 8 vectors of data at a time.
371	add		$-8*VL, LEN
372	jl		.Lloop_8x_done\@
373.Lloop_8x\@:
374	_prepare_2_ctr_vecs	\is_xctr, 0, 1
375	_prepare_2_ctr_vecs	\is_xctr, 2, 3
376	_prepare_2_ctr_vecs	\is_xctr, 4, 5
377	_prepare_2_ctr_vecs	\is_xctr, 6, 7
378	_aesenc_loop	0,1,2,3,4,5,6,7
379	_aesenclast_and_xor 0,1,2,3,4,5,6,7
380	sub		$-8*VL, SRC
381	sub		$-8*VL, DST
382	add		$-8*VL, LEN
383	jge		.Lloop_8x\@
384.Lloop_8x_done\@:
385	sub		$-8*VL, LEN
386	jz		.Ldone\@
387
388	// 1 <= LEN < 8*VL.  Generate 2, 4, or 8 more vectors of keystream
389	// blocks, depending on the remaining LEN.
390
391	_prepare_2_ctr_vecs	\is_xctr, 0, 1
392	_prepare_2_ctr_vecs	\is_xctr, 2, 3
393	cmp		$4*VL, LEN
394	jle		.Lenc_tail_atmost4vecs\@
395
396	// 4*VL < LEN < 8*VL.  Generate 8 vectors of keystream blocks.  Use the
397	// first 4 to XOR 4 full vectors of data.  Then XOR the remaining data.
398	_prepare_2_ctr_vecs	\is_xctr, 4, 5
399	_prepare_2_ctr_vecs	\is_xctr, 6, 7, final=1
400	_aesenc_loop	0,1,2,3,4,5,6,7
401	_aesenclast_and_xor 0,1,2,3
402	vaesenclast	RNDKEYLAST, AESDATA4, AESDATA0
403	vaesenclast	RNDKEYLAST, AESDATA5, AESDATA1
404	vaesenclast	RNDKEYLAST, AESDATA6, AESDATA2
405	vaesenclast	RNDKEYLAST, AESDATA7, AESDATA3
406	sub		$-4*VL, SRC
407	sub		$-4*VL, DST
408	add		$-4*VL, LEN
409	cmp		$1*VL-1, LEN
410	jle		.Lxor_tail_partial_vec_0\@
411	_xor_data	0
412	cmp		$2*VL-1, LEN
413	jle		.Lxor_tail_partial_vec_1\@
414	_xor_data	1
415	cmp		$3*VL-1, LEN
416	jle		.Lxor_tail_partial_vec_2\@
417	_xor_data	2
418	cmp		$4*VL-1, LEN
419	jle		.Lxor_tail_partial_vec_3\@
420	_xor_data	3
421	jmp		.Ldone\@
422
423.Lenc_tail_atmost4vecs\@:
424	cmp		$2*VL, LEN
425	jle		.Lenc_tail_atmost2vecs\@
426
427	// 2*VL < LEN <= 4*VL.  Generate 4 vectors of keystream blocks.  Use the
428	// first 2 to XOR 2 full vectors of data.  Then XOR the remaining data.
429	_aesenc_loop	0,1,2,3
430	_aesenclast_and_xor 0,1
431	vaesenclast	RNDKEYLAST, AESDATA2, AESDATA0
432	vaesenclast	RNDKEYLAST, AESDATA3, AESDATA1
433	sub		$-2*VL, SRC
434	sub		$-2*VL, DST
435	add		$-2*VL, LEN
436	jmp		.Lxor_tail_upto2vecs\@
437
438.Lenc_tail_atmost2vecs\@:
439	// 1 <= LEN <= 2*VL.  Generate 2 vectors of keystream blocks.  Then XOR
440	// the remaining data.
441	_aesenc_loop	0,1
442	vaesenclast	RNDKEYLAST, AESDATA0, AESDATA0
443	vaesenclast	RNDKEYLAST, AESDATA1, AESDATA1
444
445.Lxor_tail_upto2vecs\@:
446	cmp		$1*VL-1, LEN
447	jle		.Lxor_tail_partial_vec_0\@
448	_xor_data	0
449	cmp		$2*VL-1, LEN
450	jle		.Lxor_tail_partial_vec_1\@
451	_xor_data	1
452	jmp		.Ldone\@
453
454.Lxor_tail_partial_vec_1\@:
455	add		$-1*VL, LEN
456	jz		.Ldone\@
457	sub		$-1*VL, SRC
458	sub		$-1*VL, DST
459	_vmovdqa	AESDATA1, AESDATA0
460	jmp		.Lxor_tail_partial_vec_0\@
461
462.Lxor_tail_partial_vec_2\@:
463	add		$-2*VL, LEN
464	jz		.Ldone\@
465	sub		$-2*VL, SRC
466	sub		$-2*VL, DST
467	_vmovdqa	AESDATA2, AESDATA0
468	jmp		.Lxor_tail_partial_vec_0\@
469
470.Lxor_tail_partial_vec_3\@:
471	add		$-3*VL, LEN
472	jz		.Ldone\@
473	sub		$-3*VL, SRC
474	sub		$-3*VL, DST
475	_vmovdqa	AESDATA3, AESDATA0
476
477.Lxor_tail_partial_vec_0\@:
478	// XOR the remaining 1 <= LEN < VL bytes.  It's easy if masked
479	// loads/stores are available; otherwise it's a bit harder...
480.if USE_AVX512
481	mov		$-1, %rax
482	bzhi		LEN64, %rax, %rax
483	kmovq		%rax, %k1
484	vmovdqu8	(SRC), AESDATA1{%k1}{z}
485	vpxord		AESDATA1, AESDATA0, AESDATA0
486	vmovdqu8	AESDATA0, (DST){%k1}
487.else
488  .if VL == 32
489	cmp		$16, LEN
490	jl		1f
491	vpxor		(SRC), AESDATA0_XMM, AESDATA1_XMM
492	vmovdqu		AESDATA1_XMM, (DST)
493	add		$16, SRC
494	add		$16, DST
495	sub		$16, LEN
496	jz		.Ldone\@
497	vextracti128	$1, AESDATA0, AESDATA0_XMM
4981:
499  .endif
500	mov		LEN, %r10d
501	_load_partial_block	SRC, AESDATA1_XMM, KEY, KEY32
502	vpxor		AESDATA1_XMM, AESDATA0_XMM, AESDATA0_XMM
503	mov		%r10d, %ecx
504	_store_partial_block	AESDATA0_XMM, DST, KEY, KEY32
505.endif
506
507.Ldone\@:
508.if VL > 16
509	vzeroupper
510.endif
511	RET
512.endm
513
514// Below are the definitions of the functions generated by the above macro.
515// They have the following prototypes:
516//
517//
518// void aes_ctr64_crypt_##suffix(const struct crypto_aes_ctx *key,
519//				 const u8 *src, u8 *dst, int len,
520//				 const u64 le_ctr[2]);
521//
522// void aes_xctr_crypt_##suffix(const struct crypto_aes_ctx *key,
523//				const u8 *src, u8 *dst, int len,
524//				const u8 iv[AES_BLOCK_SIZE], u64 ctr);
525//
526// Both functions generate |len| bytes of keystream, XOR it with the data from
527// |src|, and write the result to |dst|.  On non-final calls, |len| must be a
528// multiple of 16.  On the final call, |len| can be any value.
529//
530// aes_ctr64_crypt_* implement "regular" CTR, where the keystream is generated
531// from a 128-bit big endian counter that increments by 1 for each AES block.
532// HOWEVER, to keep the assembly code simple, some of the counter management is
533// left to the caller.  aes_ctr64_crypt_* take the counter in little endian
534// form, only increment the low 64 bits internally, do the conversion to big
535// endian internally, and don't write the updated counter back to memory.  The
536// caller is responsible for converting the starting IV to the little endian
537// le_ctr, detecting the (very rare) case of a carry out of the low 64 bits
538// being needed and splitting at that point with a carry done in between, and
539// updating le_ctr after each part if the message is multi-part.
540//
541// aes_xctr_crypt_* implement XCTR as specified in "Length-preserving encryption
542// with HCTR2" (https://eprint.iacr.org/2021/1441.pdf).  XCTR is an
543// easier-to-implement variant of CTR that uses little endian byte order and
544// eliminates carries.  |ctr| is the per-message block counter starting at 1.
545
546.set	VL, 16
547.set	USE_AVX512, 0
548SYM_TYPED_FUNC_START(aes_ctr64_crypt_aesni_avx)
549	_aes_ctr_crypt	0
550SYM_FUNC_END(aes_ctr64_crypt_aesni_avx)
551SYM_TYPED_FUNC_START(aes_xctr_crypt_aesni_avx)
552	_aes_ctr_crypt	1
553SYM_FUNC_END(aes_xctr_crypt_aesni_avx)
554
555#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
556.set	VL, 32
557.set	USE_AVX512, 0
558SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2)
559	_aes_ctr_crypt	0
560SYM_FUNC_END(aes_ctr64_crypt_vaes_avx2)
561SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx2)
562	_aes_ctr_crypt	1
563SYM_FUNC_END(aes_xctr_crypt_vaes_avx2)
564
565.set	VL, 64
566.set	USE_AVX512, 1
567SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx512)
568	_aes_ctr_crypt	0
569SYM_FUNC_END(aes_ctr64_crypt_vaes_avx512)
570SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx512)
571	_aes_ctr_crypt	1
572SYM_FUNC_END(aes_xctr_crypt_vaes_avx512)
573#endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ
574