arm/crypto/ghash-ce-core.S

1 /* SPDX-License-Identifier: GPL-2.0-only */
3  * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
5  * Copyright (C) 2015 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
11 	.arch		armv8-a
12 	.fpu		crypto-neon-fp-armv8
100 	 * This implementation of 64x64 -> 128 bit polynomial multiplication
101 	 * using vmull.p8 instructions (8x8 -> 16) is taken from the paper
104 	 * Ricardo Dahab (https://hal.inria.fr/hal-01506572)
106 	 * It has been slightly tweaked for in-order performance, and to allow
158 	// PMULL (64x64->128) based reduction for CPUs that can do
174 	// 64x64->128 PMULL instruction
210 	vld1.8		{XL2-XM2}, [r2]!
211 1:	vld1.8		{T3-T2}, [r2]!
258 	vld1.8		{XL2-XM2}, [r2]!
308 	vld1.64		{HH3-HH4}, [r3]