xref: /src/crypto/openssl/crypto/whrlpool/wp_block.c (revision f25b8c9fb4f58cf61adb47d7570abe7caa6d385d) !
1 /*
2  * Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 /**
11  * The Whirlpool hashing function.
12  *
13  * See
14  *      P.S.L.M. Barreto, V. Rijmen,
15  *      ``The Whirlpool hashing function,''
16  *      NESSIE submission, 2000 (tweaked version, 2001),
17  *      <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
18  *
19  * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
20  * Vincent Rijmen. Lookup "reference implementations" on
21  * <http://planeta.terra.com.br/informatica/paulobarreto/>
22  *
23  * =============================================================================
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
26  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
33  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
34  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
35  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  *
37  */
38 
39 /*
40  * Whirlpool low level APIs are deprecated for public use, but still ok for
41  * internal use.
42  */
43 #include "internal/deprecated.h"
44 
45 #include "internal/cryptlib.h"
46 #include "wp_local.h"
47 #include <string.h>
48 
49 typedef unsigned char u8;
50 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32)
51 typedef unsigned __int64 u64;
52 #elif defined(__arch64__)
53 typedef unsigned long u64;
54 #else
55 typedef unsigned long long u64;
56 #endif
57 
58 #define ROUNDS 10
59 
60 #define STRICT_ALIGNMENT
61 #if !defined(PEDANTIC) && (defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
62 /*
63  * Well, formally there're couple of other architectures, which permit
64  * unaligned loads, specifically those not crossing cache lines, IA-64 and
65  * PowerPC...
66  */
67 #undef STRICT_ALIGNMENT
68 #endif
69 
70 #ifndef STRICT_ALIGNMENT
71 #ifdef __GNUC__
72 typedef u64 u64_a1 __attribute((__aligned__(1)));
73 #else
74 typedef u64 u64_a1;
75 #endif
76 #endif
77 
78 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
79 typedef u64 u64_aX __attribute((__aligned__(1)));
80 #else
81 typedef u64 u64_aX;
82 #endif
83 
84 #undef SMALL_REGISTER_BANK
85 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
86 #define SMALL_REGISTER_BANK
87 #if defined(WHIRLPOOL_ASM)
88 #ifndef OPENSSL_SMALL_FOOTPRINT
89 /*
90  * it appears that for elder non-MMX
91  * CPUs this is actually faster!
92  */
93 #define OPENSSL_SMALL_FOOTPRINT
94 #endif
95 #define GO_FOR_MMX(ctx, inp, num)                               \
96     do {                                                        \
97         void whirlpool_block_mmx(void *, const void *, size_t); \
98         if (!(OPENSSL_ia32cap_P[0] & (1 << 23)))                \
99             break;                                              \
100         whirlpool_block_mmx(ctx->H.c, inp, num);                \
101         return;                                                 \
102     } while (0)
103 #endif
104 #endif
105 
106 #undef ROTATE
107 #ifndef PEDANTIC
108 #if defined(_MSC_VER)
109 #if defined(_WIN64) /* applies to both IA-64 and AMD64 */
110 #include <stdlib.h>
111 #pragma intrinsic(_rotl64)
112 #define ROTATE(a, n) _rotl64((a), n)
113 #endif
114 #elif defined(__GNUC__) && __GNUC__ >= 2
115 #if defined(__x86_64) || defined(__x86_64__)
116 #if defined(L_ENDIAN)
117 #define ROTATE(a, n) ({ u64 ret; asm ("rolq %1,%0"   \
118                                    : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
119 #elif defined(B_ENDIAN)
120 /*
121  * Most will argue that x86_64 is always little-endian. Well, yes, but
122  * then we have stratus.com who has modified gcc to "emulate"
123  * big-endian on x86. Is there evidence that they [or somebody else]
124  * won't do same for x86_64? Naturally no. And this line is waiting
125  * ready for that brave soul:-)
126  */
127 #define ROTATE(a, n) ({ u64 ret; asm ("rorq %1,%0"   \
128                                    : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
129 #endif
130 #elif defined(__ia64) || defined(__ia64__)
131 #if defined(L_ENDIAN)
132 #define ROTATE(a, n) ({ u64 ret; asm ("shrp %0=%1,%1,%2"     \
133                                    : "=r"(ret) : "r"(a),"M"(64-(n))); ret; })
134 #elif defined(B_ENDIAN)
135 #define ROTATE(a, n) ({ u64 ret; asm ("shrp %0=%1,%1,%2"     \
136                                    : "=r"(ret) : "r"(a),"M"(n)); ret; })
137 #endif
138 #endif
139 #endif
140 #endif
141 
142 #if defined(OPENSSL_SMALL_FOOTPRINT)
143 #if !defined(ROTATE)
144 #if defined(L_ENDIAN) /* little-endians have to rotate left */
145 #define ROTATE(i, n) ((i) << (n) ^ (i) >> (64 - n))
146 #elif defined(B_ENDIAN) /* big-endians have to rotate right */
147 #define ROTATE(i, n) ((i) >> (n) ^ (i) << (64 - n))
148 #endif
149 #endif
150 #if defined(ROTATE) && !defined(STRICT_ALIGNMENT)
151 #define STRICT_ALIGNMENT /* ensure smallest table size */
152 #endif
153 #endif
154 
155 /*
156  * Table size depends on STRICT_ALIGNMENT and whether or not endian-
157  * specific ROTATE macro is defined. If STRICT_ALIGNMENT is not
158  * defined, which is normally the case on x86[_64] CPUs, the table is
159  * 4KB large unconditionally. Otherwise if ROTATE is defined, the
160  * table is 2KB large, and otherwise - 16KB. 2KB table requires a
161  * whole bunch of additional rotations, but I'm willing to "trade,"
162  * because 16KB table certainly trashes L1 cache. I wish all CPUs
163  * could handle unaligned load as 4KB table doesn't trash the cache,
164  * nor does it require additional rotations.
165  */
166 /*
167  * Note that every Cn macro expands as two loads: one byte load and
168  * one quadword load. One can argue that many single-byte loads
169  * is too excessive, as one could load a quadword and "milk" it for
170  * eight 8-bit values instead. Well, yes, but in order to do so *and*
171  * avoid excessive loads you have to accommodate a handful of 64-bit
172  * values in the register bank and issue a bunch of shifts and mask.
173  * It's a tradeoff: loads vs. shift and mask in big register bank[!].
174  * On most CPUs eight single-byte loads are faster and I let other
175  * ones to depend on smart compiler to fold byte loads if beneficial.
176  * Hand-coded assembler would be another alternative:-)
177  */
178 #ifdef STRICT_ALIGNMENT
179 #if defined(ROTATE)
180 #define N 1
181 #define LL(c0, c1, c2, c3, c4, c5, c6, c7) c0, c1, c2, c3, c4, c5, c6, c7
182 #define C0(K, i) (Cx.q[K.c[(i) * 8 + 0]])
183 #define C1(K, i) ROTATE(Cx.q[K.c[(i) * 8 + 1]], 8)
184 #define C2(K, i) ROTATE(Cx.q[K.c[(i) * 8 + 2]], 16)
185 #define C3(K, i) ROTATE(Cx.q[K.c[(i) * 8 + 3]], 24)
186 #define C4(K, i) ROTATE(Cx.q[K.c[(i) * 8 + 4]], 32)
187 #define C5(K, i) ROTATE(Cx.q[K.c[(i) * 8 + 5]], 40)
188 #define C6(K, i) ROTATE(Cx.q[K.c[(i) * 8 + 6]], 48)
189 #define C7(K, i) ROTATE(Cx.q[K.c[(i) * 8 + 7]], 56)
190 #else
191 #define N 8
192 #define LL(c0, c1, c2, c3, c4, c5, c6, c7) c0, c1, c2, c3, c4, c5, c6, c7, \
193                                            c7, c0, c1, c2, c3, c4, c5, c6, \
194                                            c6, c7, c0, c1, c2, c3, c4, c5, \
195                                            c5, c6, c7, c0, c1, c2, c3, c4, \
196                                            c4, c5, c6, c7, c0, c1, c2, c3, \
197                                            c3, c4, c5, c6, c7, c0, c1, c2, \
198                                            c2, c3, c4, c5, c6, c7, c0, c1, \
199                                            c1, c2, c3, c4, c5, c6, c7, c0
200 #define C0(K, i) (Cx.q[0 + 8 * K.c[(i) * 8 + 0]])
201 #define C1(K, i) (Cx.q[1 + 8 * K.c[(i) * 8 + 1]])
202 #define C2(K, i) (Cx.q[2 + 8 * K.c[(i) * 8 + 2]])
203 #define C3(K, i) (Cx.q[3 + 8 * K.c[(i) * 8 + 3]])
204 #define C4(K, i) (Cx.q[4 + 8 * K.c[(i) * 8 + 4]])
205 #define C5(K, i) (Cx.q[5 + 8 * K.c[(i) * 8 + 5]])
206 #define C6(K, i) (Cx.q[6 + 8 * K.c[(i) * 8 + 6]])
207 #define C7(K, i) (Cx.q[7 + 8 * K.c[(i) * 8 + 7]])
208 #endif
209 #else
210 #define N 2
211 #define LL(c0, c1, c2, c3, c4, c5, c6, c7) c0, c1, c2, c3, c4, c5, c6, c7, \
212                                            c0, c1, c2, c3, c4, c5, c6, c7
213 #define C0(K, i) (((u64 *)(Cx.c + 0))[2 * K.c[(i) * 8 + 0]])
214 #define C1(K, i) (((u64_a1 *)(Cx.c + 7))[2 * K.c[(i) * 8 + 1]])
215 #define C2(K, i) (((u64_a1 *)(Cx.c + 6))[2 * K.c[(i) * 8 + 2]])
216 #define C3(K, i) (((u64_a1 *)(Cx.c + 5))[2 * K.c[(i) * 8 + 3]])
217 #define C4(K, i) (((u64_a1 *)(Cx.c + 4))[2 * K.c[(i) * 8 + 4]])
218 #define C5(K, i) (((u64_a1 *)(Cx.c + 3))[2 * K.c[(i) * 8 + 5]])
219 #define C6(K, i) (((u64_a1 *)(Cx.c + 2))[2 * K.c[(i) * 8 + 6]])
220 #define C7(K, i) (((u64_a1 *)(Cx.c + 1))[2 * K.c[(i) * 8 + 7]])
221 #endif
222 
223 static const union {
224     u8 c[(256 * N + ROUNDS) * sizeof(u64)];
225     u64 q[(256 * N + ROUNDS)];
226 } Cx = {
227     { /* Note endian-neutral representation:-) */
228         LL(0x18, 0x18, 0x60, 0x18, 0xc0, 0x78, 0x30, 0xd8),
229         LL(0x23, 0x23, 0x8c, 0x23, 0x05, 0xaf, 0x46, 0x26),
230         LL(0xc6, 0xc6, 0x3f, 0xc6, 0x7e, 0xf9, 0x91, 0xb8),
231         LL(0xe8, 0xe8, 0x87, 0xe8, 0x13, 0x6f, 0xcd, 0xfb),
232         LL(0x87, 0x87, 0x26, 0x87, 0x4c, 0xa1, 0x13, 0xcb),
233         LL(0xb8, 0xb8, 0xda, 0xb8, 0xa9, 0x62, 0x6d, 0x11),
234         LL(0x01, 0x01, 0x04, 0x01, 0x08, 0x05, 0x02, 0x09),
235         LL(0x4f, 0x4f, 0x21, 0x4f, 0x42, 0x6e, 0x9e, 0x0d),
236         LL(0x36, 0x36, 0xd8, 0x36, 0xad, 0xee, 0x6c, 0x9b),
237         LL(0xa6, 0xa6, 0xa2, 0xa6, 0x59, 0x04, 0x51, 0xff),
238         LL(0xd2, 0xd2, 0x6f, 0xd2, 0xde, 0xbd, 0xb9, 0x0c),
239         LL(0xf5, 0xf5, 0xf3, 0xf5, 0xfb, 0x06, 0xf7, 0x0e),
240         LL(0x79, 0x79, 0xf9, 0x79, 0xef, 0x80, 0xf2, 0x96),
241         LL(0x6f, 0x6f, 0xa1, 0x6f, 0x5f, 0xce, 0xde, 0x30),
242         LL(0x91, 0x91, 0x7e, 0x91, 0xfc, 0xef, 0x3f, 0x6d),
243         LL(0x52, 0x52, 0x55, 0x52, 0xaa, 0x07, 0xa4, 0xf8),
244         LL(0x60, 0x60, 0x9d, 0x60, 0x27, 0xfd, 0xc0, 0x47),
245         LL(0xbc, 0xbc, 0xca, 0xbc, 0x89, 0x76, 0x65, 0x35),
246         LL(0x9b, 0x9b, 0x56, 0x9b, 0xac, 0xcd, 0x2b, 0x37),
247         LL(0x8e, 0x8e, 0x02, 0x8e, 0x04, 0x8c, 0x01, 0x8a),
248         LL(0xa3, 0xa3, 0xb6, 0xa3, 0x71, 0x15, 0x5b, 0xd2),
249         LL(0x0c, 0x0c, 0x30, 0x0c, 0x60, 0x3c, 0x18, 0x6c),
250         LL(0x7b, 0x7b, 0xf1, 0x7b, 0xff, 0x8a, 0xf6, 0x84),
251         LL(0x35, 0x35, 0xd4, 0x35, 0xb5, 0xe1, 0x6a, 0x80),
252         LL(0x1d, 0x1d, 0x74, 0x1d, 0xe8, 0x69, 0x3a, 0xf5),
253         LL(0xe0, 0xe0, 0xa7, 0xe0, 0x53, 0x47, 0xdd, 0xb3),
254         LL(0xd7, 0xd7, 0x7b, 0xd7, 0xf6, 0xac, 0xb3, 0x21),
255         LL(0xc2, 0xc2, 0x2f, 0xc2, 0x5e, 0xed, 0x99, 0x9c),
256         LL(0x2e, 0x2e, 0xb8, 0x2e, 0x6d, 0x96, 0x5c, 0x43),
257         LL(0x4b, 0x4b, 0x31, 0x4b, 0x62, 0x7a, 0x96, 0x29),
258         LL(0xfe, 0xfe, 0xdf, 0xfe, 0xa3, 0x21, 0xe1, 0x5d),
259         LL(0x57, 0x57, 0x41, 0x57, 0x82, 0x16, 0xae, 0xd5),
260         LL(0x15, 0x15, 0x54, 0x15, 0xa8, 0x41, 0x2a, 0xbd),
261         LL(0x77, 0x77, 0xc1, 0x77, 0x9f, 0xb6, 0xee, 0xe8),
262         LL(0x37, 0x37, 0xdc, 0x37, 0xa5, 0xeb, 0x6e, 0x92),
263         LL(0xe5, 0xe5, 0xb3, 0xe5, 0x7b, 0x56, 0xd7, 0x9e),
264         LL(0x9f, 0x9f, 0x46, 0x9f, 0x8c, 0xd9, 0x23, 0x13),
265         LL(0xf0, 0xf0, 0xe7, 0xf0, 0xd3, 0x17, 0xfd, 0x23),
266         LL(0x4a, 0x4a, 0x35, 0x4a, 0x6a, 0x7f, 0x94, 0x20),
267         LL(0xda, 0xda, 0x4f, 0xda, 0x9e, 0x95, 0xa9, 0x44),
268         LL(0x58, 0x58, 0x7d, 0x58, 0xfa, 0x25, 0xb0, 0xa2),
269         LL(0xc9, 0xc9, 0x03, 0xc9, 0x06, 0xca, 0x8f, 0xcf),
270         LL(0x29, 0x29, 0xa4, 0x29, 0x55, 0x8d, 0x52, 0x7c),
271         LL(0x0a, 0x0a, 0x28, 0x0a, 0x50, 0x22, 0x14, 0x5a),
272         LL(0xb1, 0xb1, 0xfe, 0xb1, 0xe1, 0x4f, 0x7f, 0x50),
273         LL(0xa0, 0xa0, 0xba, 0xa0, 0x69, 0x1a, 0x5d, 0xc9),
274         LL(0x6b, 0x6b, 0xb1, 0x6b, 0x7f, 0xda, 0xd6, 0x14),
275         LL(0x85, 0x85, 0x2e, 0x85, 0x5c, 0xab, 0x17, 0xd9),
276         LL(0xbd, 0xbd, 0xce, 0xbd, 0x81, 0x73, 0x67, 0x3c),
277         LL(0x5d, 0x5d, 0x69, 0x5d, 0xd2, 0x34, 0xba, 0x8f),
278         LL(0x10, 0x10, 0x40, 0x10, 0x80, 0x50, 0x20, 0x90),
279         LL(0xf4, 0xf4, 0xf7, 0xf4, 0xf3, 0x03, 0xf5, 0x07),
280         LL(0xcb, 0xcb, 0x0b, 0xcb, 0x16, 0xc0, 0x8b, 0xdd),
281         LL(0x3e, 0x3e, 0xf8, 0x3e, 0xed, 0xc6, 0x7c, 0xd3),
282         LL(0x05, 0x05, 0x14, 0x05, 0x28, 0x11, 0x0a, 0x2d),
283         LL(0x67, 0x67, 0x81, 0x67, 0x1f, 0xe6, 0xce, 0x78),
284         LL(0xe4, 0xe4, 0xb7, 0xe4, 0x73, 0x53, 0xd5, 0x97),
285         LL(0x27, 0x27, 0x9c, 0x27, 0x25, 0xbb, 0x4e, 0x02),
286         LL(0x41, 0x41, 0x19, 0x41, 0x32, 0x58, 0x82, 0x73),
287         LL(0x8b, 0x8b, 0x16, 0x8b, 0x2c, 0x9d, 0x0b, 0xa7),
288         LL(0xa7, 0xa7, 0xa6, 0xa7, 0x51, 0x01, 0x53, 0xf6),
289         LL(0x7d, 0x7d, 0xe9, 0x7d, 0xcf, 0x94, 0xfa, 0xb2),
290         LL(0x95, 0x95, 0x6e, 0x95, 0xdc, 0xfb, 0x37, 0x49),
291         LL(0xd8, 0xd8, 0x47, 0xd8, 0x8e, 0x9f, 0xad, 0x56),
292         LL(0xfb, 0xfb, 0xcb, 0xfb, 0x8b, 0x30, 0xeb, 0x70),
293         LL(0xee, 0xee, 0x9f, 0xee, 0x23, 0x71, 0xc1, 0xcd),
294         LL(0x7c, 0x7c, 0xed, 0x7c, 0xc7, 0x91, 0xf8, 0xbb),
295         LL(0x66, 0x66, 0x85, 0x66, 0x17, 0xe3, 0xcc, 0x71),
296         LL(0xdd, 0xdd, 0x53, 0xdd, 0xa6, 0x8e, 0xa7, 0x7b),
297         LL(0x17, 0x17, 0x5c, 0x17, 0xb8, 0x4b, 0x2e, 0xaf),
298         LL(0x47, 0x47, 0x01, 0x47, 0x02, 0x46, 0x8e, 0x45),
299         LL(0x9e, 0x9e, 0x42, 0x9e, 0x84, 0xdc, 0x21, 0x1a),
300         LL(0xca, 0xca, 0x0f, 0xca, 0x1e, 0xc5, 0x89, 0xd4),
301         LL(0x2d, 0x2d, 0xb4, 0x2d, 0x75, 0x99, 0x5a, 0x58),
302         LL(0xbf, 0xbf, 0xc6, 0xbf, 0x91, 0x79, 0x63, 0x2e),
303         LL(0x07, 0x07, 0x1c, 0x07, 0x38, 0x1b, 0x0e, 0x3f),
304         LL(0xad, 0xad, 0x8e, 0xad, 0x01, 0x23, 0x47, 0xac),
305         LL(0x5a, 0x5a, 0x75, 0x5a, 0xea, 0x2f, 0xb4, 0xb0),
306         LL(0x83, 0x83, 0x36, 0x83, 0x6c, 0xb5, 0x1b, 0xef),
307         LL(0x33, 0x33, 0xcc, 0x33, 0x85, 0xff, 0x66, 0xb6),
308         LL(0x63, 0x63, 0x91, 0x63, 0x3f, 0xf2, 0xc6, 0x5c),
309         LL(0x02, 0x02, 0x08, 0x02, 0x10, 0x0a, 0x04, 0x12),
310         LL(0xaa, 0xaa, 0x92, 0xaa, 0x39, 0x38, 0x49, 0x93),
311         LL(0x71, 0x71, 0xd9, 0x71, 0xaf, 0xa8, 0xe2, 0xde),
312         LL(0xc8, 0xc8, 0x07, 0xc8, 0x0e, 0xcf, 0x8d, 0xc6),
313         LL(0x19, 0x19, 0x64, 0x19, 0xc8, 0x7d, 0x32, 0xd1),
314         LL(0x49, 0x49, 0x39, 0x49, 0x72, 0x70, 0x92, 0x3b),
315         LL(0xd9, 0xd9, 0x43, 0xd9, 0x86, 0x9a, 0xaf, 0x5f),
316         LL(0xf2, 0xf2, 0xef, 0xf2, 0xc3, 0x1d, 0xf9, 0x31),
317         LL(0xe3, 0xe3, 0xab, 0xe3, 0x4b, 0x48, 0xdb, 0xa8),
318         LL(0x5b, 0x5b, 0x71, 0x5b, 0xe2, 0x2a, 0xb6, 0xb9),
319         LL(0x88, 0x88, 0x1a, 0x88, 0x34, 0x92, 0x0d, 0xbc),
320         LL(0x9a, 0x9a, 0x52, 0x9a, 0xa4, 0xc8, 0x29, 0x3e),
321         LL(0x26, 0x26, 0x98, 0x26, 0x2d, 0xbe, 0x4c, 0x0b),
322         LL(0x32, 0x32, 0xc8, 0x32, 0x8d, 0xfa, 0x64, 0xbf),
323         LL(0xb0, 0xb0, 0xfa, 0xb0, 0xe9, 0x4a, 0x7d, 0x59),
324         LL(0xe9, 0xe9, 0x83, 0xe9, 0x1b, 0x6a, 0xcf, 0xf2),
325         LL(0x0f, 0x0f, 0x3c, 0x0f, 0x78, 0x33, 0x1e, 0x77),
326         LL(0xd5, 0xd5, 0x73, 0xd5, 0xe6, 0xa6, 0xb7, 0x33),
327         LL(0x80, 0x80, 0x3a, 0x80, 0x74, 0xba, 0x1d, 0xf4),
328         LL(0xbe, 0xbe, 0xc2, 0xbe, 0x99, 0x7c, 0x61, 0x27),
329         LL(0xcd, 0xcd, 0x13, 0xcd, 0x26, 0xde, 0x87, 0xeb),
330         LL(0x34, 0x34, 0xd0, 0x34, 0xbd, 0xe4, 0x68, 0x89),
331         LL(0x48, 0x48, 0x3d, 0x48, 0x7a, 0x75, 0x90, 0x32),
332         LL(0xff, 0xff, 0xdb, 0xff, 0xab, 0x24, 0xe3, 0x54),
333         LL(0x7a, 0x7a, 0xf5, 0x7a, 0xf7, 0x8f, 0xf4, 0x8d),
334         LL(0x90, 0x90, 0x7a, 0x90, 0xf4, 0xea, 0x3d, 0x64),
335         LL(0x5f, 0x5f, 0x61, 0x5f, 0xc2, 0x3e, 0xbe, 0x9d),
336         LL(0x20, 0x20, 0x80, 0x20, 0x1d, 0xa0, 0x40, 0x3d),
337         LL(0x68, 0x68, 0xbd, 0x68, 0x67, 0xd5, 0xd0, 0x0f),
338         LL(0x1a, 0x1a, 0x68, 0x1a, 0xd0, 0x72, 0x34, 0xca),
339         LL(0xae, 0xae, 0x82, 0xae, 0x19, 0x2c, 0x41, 0xb7),
340         LL(0xb4, 0xb4, 0xea, 0xb4, 0xc9, 0x5e, 0x75, 0x7d),
341         LL(0x54, 0x54, 0x4d, 0x54, 0x9a, 0x19, 0xa8, 0xce),
342         LL(0x93, 0x93, 0x76, 0x93, 0xec, 0xe5, 0x3b, 0x7f),
343         LL(0x22, 0x22, 0x88, 0x22, 0x0d, 0xaa, 0x44, 0x2f),
344         LL(0x64, 0x64, 0x8d, 0x64, 0x07, 0xe9, 0xc8, 0x63),
345         LL(0xf1, 0xf1, 0xe3, 0xf1, 0xdb, 0x12, 0xff, 0x2a),
346         LL(0x73, 0x73, 0xd1, 0x73, 0xbf, 0xa2, 0xe6, 0xcc),
347         LL(0x12, 0x12, 0x48, 0x12, 0x90, 0x5a, 0x24, 0x82),
348         LL(0x40, 0x40, 0x1d, 0x40, 0x3a, 0x5d, 0x80, 0x7a),
349         LL(0x08, 0x08, 0x20, 0x08, 0x40, 0x28, 0x10, 0x48),
350         LL(0xc3, 0xc3, 0x2b, 0xc3, 0x56, 0xe8, 0x9b, 0x95),
351         LL(0xec, 0xec, 0x97, 0xec, 0x33, 0x7b, 0xc5, 0xdf),
352         LL(0xdb, 0xdb, 0x4b, 0xdb, 0x96, 0x90, 0xab, 0x4d),
353         LL(0xa1, 0xa1, 0xbe, 0xa1, 0x61, 0x1f, 0x5f, 0xc0),
354         LL(0x8d, 0x8d, 0x0e, 0x8d, 0x1c, 0x83, 0x07, 0x91),
355         LL(0x3d, 0x3d, 0xf4, 0x3d, 0xf5, 0xc9, 0x7a, 0xc8),
356         LL(0x97, 0x97, 0x66, 0x97, 0xcc, 0xf1, 0x33, 0x5b),
357         LL(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
358         LL(0xcf, 0xcf, 0x1b, 0xcf, 0x36, 0xd4, 0x83, 0xf9),
359         LL(0x2b, 0x2b, 0xac, 0x2b, 0x45, 0x87, 0x56, 0x6e),
360         LL(0x76, 0x76, 0xc5, 0x76, 0x97, 0xb3, 0xec, 0xe1),
361         LL(0x82, 0x82, 0x32, 0x82, 0x64, 0xb0, 0x19, 0xe6),
362         LL(0xd6, 0xd6, 0x7f, 0xd6, 0xfe, 0xa9, 0xb1, 0x28),
363         LL(0x1b, 0x1b, 0x6c, 0x1b, 0xd8, 0x77, 0x36, 0xc3),
364         LL(0xb5, 0xb5, 0xee, 0xb5, 0xc1, 0x5b, 0x77, 0x74),
365         LL(0xaf, 0xaf, 0x86, 0xaf, 0x11, 0x29, 0x43, 0xbe),
366         LL(0x6a, 0x6a, 0xb5, 0x6a, 0x77, 0xdf, 0xd4, 0x1d),
367         LL(0x50, 0x50, 0x5d, 0x50, 0xba, 0x0d, 0xa0, 0xea),
368         LL(0x45, 0x45, 0x09, 0x45, 0x12, 0x4c, 0x8a, 0x57),
369         LL(0xf3, 0xf3, 0xeb, 0xf3, 0xcb, 0x18, 0xfb, 0x38),
370         LL(0x30, 0x30, 0xc0, 0x30, 0x9d, 0xf0, 0x60, 0xad),
371         LL(0xef, 0xef, 0x9b, 0xef, 0x2b, 0x74, 0xc3, 0xc4),
372         LL(0x3f, 0x3f, 0xfc, 0x3f, 0xe5, 0xc3, 0x7e, 0xda),
373         LL(0x55, 0x55, 0x49, 0x55, 0x92, 0x1c, 0xaa, 0xc7),
374         LL(0xa2, 0xa2, 0xb2, 0xa2, 0x79, 0x10, 0x59, 0xdb),
375         LL(0xea, 0xea, 0x8f, 0xea, 0x03, 0x65, 0xc9, 0xe9),
376         LL(0x65, 0x65, 0x89, 0x65, 0x0f, 0xec, 0xca, 0x6a),
377         LL(0xba, 0xba, 0xd2, 0xba, 0xb9, 0x68, 0x69, 0x03),
378         LL(0x2f, 0x2f, 0xbc, 0x2f, 0x65, 0x93, 0x5e, 0x4a),
379         LL(0xc0, 0xc0, 0x27, 0xc0, 0x4e, 0xe7, 0x9d, 0x8e),
380         LL(0xde, 0xde, 0x5f, 0xde, 0xbe, 0x81, 0xa1, 0x60),
381         LL(0x1c, 0x1c, 0x70, 0x1c, 0xe0, 0x6c, 0x38, 0xfc),
382         LL(0xfd, 0xfd, 0xd3, 0xfd, 0xbb, 0x2e, 0xe7, 0x46),
383         LL(0x4d, 0x4d, 0x29, 0x4d, 0x52, 0x64, 0x9a, 0x1f),
384         LL(0x92, 0x92, 0x72, 0x92, 0xe4, 0xe0, 0x39, 0x76),
385         LL(0x75, 0x75, 0xc9, 0x75, 0x8f, 0xbc, 0xea, 0xfa),
386         LL(0x06, 0x06, 0x18, 0x06, 0x30, 0x1e, 0x0c, 0x36),
387         LL(0x8a, 0x8a, 0x12, 0x8a, 0x24, 0x98, 0x09, 0xae),
388         LL(0xb2, 0xb2, 0xf2, 0xb2, 0xf9, 0x40, 0x79, 0x4b),
389         LL(0xe6, 0xe6, 0xbf, 0xe6, 0x63, 0x59, 0xd1, 0x85),
390         LL(0x0e, 0x0e, 0x38, 0x0e, 0x70, 0x36, 0x1c, 0x7e),
391         LL(0x1f, 0x1f, 0x7c, 0x1f, 0xf8, 0x63, 0x3e, 0xe7),
392         LL(0x62, 0x62, 0x95, 0x62, 0x37, 0xf7, 0xc4, 0x55),
393         LL(0xd4, 0xd4, 0x77, 0xd4, 0xee, 0xa3, 0xb5, 0x3a),
394         LL(0xa8, 0xa8, 0x9a, 0xa8, 0x29, 0x32, 0x4d, 0x81),
395         LL(0x96, 0x96, 0x62, 0x96, 0xc4, 0xf4, 0x31, 0x52),
396         LL(0xf9, 0xf9, 0xc3, 0xf9, 0x9b, 0x3a, 0xef, 0x62),
397         LL(0xc5, 0xc5, 0x33, 0xc5, 0x66, 0xf6, 0x97, 0xa3),
398         LL(0x25, 0x25, 0x94, 0x25, 0x35, 0xb1, 0x4a, 0x10),
399         LL(0x59, 0x59, 0x79, 0x59, 0xf2, 0x20, 0xb2, 0xab),
400         LL(0x84, 0x84, 0x2a, 0x84, 0x54, 0xae, 0x15, 0xd0),
401         LL(0x72, 0x72, 0xd5, 0x72, 0xb7, 0xa7, 0xe4, 0xc5),
402         LL(0x39, 0x39, 0xe4, 0x39, 0xd5, 0xdd, 0x72, 0xec),
403         LL(0x4c, 0x4c, 0x2d, 0x4c, 0x5a, 0x61, 0x98, 0x16),
404         LL(0x5e, 0x5e, 0x65, 0x5e, 0xca, 0x3b, 0xbc, 0x94),
405         LL(0x78, 0x78, 0xfd, 0x78, 0xe7, 0x85, 0xf0, 0x9f),
406         LL(0x38, 0x38, 0xe0, 0x38, 0xdd, 0xd8, 0x70, 0xe5),
407         LL(0x8c, 0x8c, 0x0a, 0x8c, 0x14, 0x86, 0x05, 0x98),
408         LL(0xd1, 0xd1, 0x63, 0xd1, 0xc6, 0xb2, 0xbf, 0x17),
409         LL(0xa5, 0xa5, 0xae, 0xa5, 0x41, 0x0b, 0x57, 0xe4),
410         LL(0xe2, 0xe2, 0xaf, 0xe2, 0x43, 0x4d, 0xd9, 0xa1),
411         LL(0x61, 0x61, 0x99, 0x61, 0x2f, 0xf8, 0xc2, 0x4e),
412         LL(0xb3, 0xb3, 0xf6, 0xb3, 0xf1, 0x45, 0x7b, 0x42),
413         LL(0x21, 0x21, 0x84, 0x21, 0x15, 0xa5, 0x42, 0x34),
414         LL(0x9c, 0x9c, 0x4a, 0x9c, 0x94, 0xd6, 0x25, 0x08),
415         LL(0x1e, 0x1e, 0x78, 0x1e, 0xf0, 0x66, 0x3c, 0xee),
416         LL(0x43, 0x43, 0x11, 0x43, 0x22, 0x52, 0x86, 0x61),
417         LL(0xc7, 0xc7, 0x3b, 0xc7, 0x76, 0xfc, 0x93, 0xb1),
418         LL(0xfc, 0xfc, 0xd7, 0xfc, 0xb3, 0x2b, 0xe5, 0x4f),
419         LL(0x04, 0x04, 0x10, 0x04, 0x20, 0x14, 0x08, 0x24),
420         LL(0x51, 0x51, 0x59, 0x51, 0xb2, 0x08, 0xa2, 0xe3),
421         LL(0x99, 0x99, 0x5e, 0x99, 0xbc, 0xc7, 0x2f, 0x25),
422         LL(0x6d, 0x6d, 0xa9, 0x6d, 0x4f, 0xc4, 0xda, 0x22),
423         LL(0x0d, 0x0d, 0x34, 0x0d, 0x68, 0x39, 0x1a, 0x65),
424         LL(0xfa, 0xfa, 0xcf, 0xfa, 0x83, 0x35, 0xe9, 0x79),
425         LL(0xdf, 0xdf, 0x5b, 0xdf, 0xb6, 0x84, 0xa3, 0x69),
426         LL(0x7e, 0x7e, 0xe5, 0x7e, 0xd7, 0x9b, 0xfc, 0xa9),
427         LL(0x24, 0x24, 0x90, 0x24, 0x3d, 0xb4, 0x48, 0x19),
428         LL(0x3b, 0x3b, 0xec, 0x3b, 0xc5, 0xd7, 0x76, 0xfe),
429         LL(0xab, 0xab, 0x96, 0xab, 0x31, 0x3d, 0x4b, 0x9a),
430         LL(0xce, 0xce, 0x1f, 0xce, 0x3e, 0xd1, 0x81, 0xf0),
431         LL(0x11, 0x11, 0x44, 0x11, 0x88, 0x55, 0x22, 0x99),
432         LL(0x8f, 0x8f, 0x06, 0x8f, 0x0c, 0x89, 0x03, 0x83),
433         LL(0x4e, 0x4e, 0x25, 0x4e, 0x4a, 0x6b, 0x9c, 0x04),
434         LL(0xb7, 0xb7, 0xe6, 0xb7, 0xd1, 0x51, 0x73, 0x66),
435         LL(0xeb, 0xeb, 0x8b, 0xeb, 0x0b, 0x60, 0xcb, 0xe0),
436         LL(0x3c, 0x3c, 0xf0, 0x3c, 0xfd, 0xcc, 0x78, 0xc1),
437         LL(0x81, 0x81, 0x3e, 0x81, 0x7c, 0xbf, 0x1f, 0xfd),
438         LL(0x94, 0x94, 0x6a, 0x94, 0xd4, 0xfe, 0x35, 0x40),
439         LL(0xf7, 0xf7, 0xfb, 0xf7, 0xeb, 0x0c, 0xf3, 0x1c),
440         LL(0xb9, 0xb9, 0xde, 0xb9, 0xa1, 0x67, 0x6f, 0x18),
441         LL(0x13, 0x13, 0x4c, 0x13, 0x98, 0x5f, 0x26, 0x8b),
442         LL(0x2c, 0x2c, 0xb0, 0x2c, 0x7d, 0x9c, 0x58, 0x51),
443         LL(0xd3, 0xd3, 0x6b, 0xd3, 0xd6, 0xb8, 0xbb, 0x05),
444         LL(0xe7, 0xe7, 0xbb, 0xe7, 0x6b, 0x5c, 0xd3, 0x8c),
445         LL(0x6e, 0x6e, 0xa5, 0x6e, 0x57, 0xcb, 0xdc, 0x39),
446         LL(0xc4, 0xc4, 0x37, 0xc4, 0x6e, 0xf3, 0x95, 0xaa),
447         LL(0x03, 0x03, 0x0c, 0x03, 0x18, 0x0f, 0x06, 0x1b),
448         LL(0x56, 0x56, 0x45, 0x56, 0x8a, 0x13, 0xac, 0xdc),
449         LL(0x44, 0x44, 0x0d, 0x44, 0x1a, 0x49, 0x88, 0x5e),
450         LL(0x7f, 0x7f, 0xe1, 0x7f, 0xdf, 0x9e, 0xfe, 0xa0),
451         LL(0xa9, 0xa9, 0x9e, 0xa9, 0x21, 0x37, 0x4f, 0x88),
452         LL(0x2a, 0x2a, 0xa8, 0x2a, 0x4d, 0x82, 0x54, 0x67),
453         LL(0xbb, 0xbb, 0xd6, 0xbb, 0xb1, 0x6d, 0x6b, 0x0a),
454         LL(0xc1, 0xc1, 0x23, 0xc1, 0x46, 0xe2, 0x9f, 0x87),
455         LL(0x53, 0x53, 0x51, 0x53, 0xa2, 0x02, 0xa6, 0xf1),
456         LL(0xdc, 0xdc, 0x57, 0xdc, 0xae, 0x8b, 0xa5, 0x72),
457         LL(0x0b, 0x0b, 0x2c, 0x0b, 0x58, 0x27, 0x16, 0x53),
458         LL(0x9d, 0x9d, 0x4e, 0x9d, 0x9c, 0xd3, 0x27, 0x01),
459         LL(0x6c, 0x6c, 0xad, 0x6c, 0x47, 0xc1, 0xd8, 0x2b),
460         LL(0x31, 0x31, 0xc4, 0x31, 0x95, 0xf5, 0x62, 0xa4),
461         LL(0x74, 0x74, 0xcd, 0x74, 0x87, 0xb9, 0xe8, 0xf3),
462         LL(0xf6, 0xf6, 0xff, 0xf6, 0xe3, 0x09, 0xf1, 0x15),
463         LL(0x46, 0x46, 0x05, 0x46, 0x0a, 0x43, 0x8c, 0x4c),
464         LL(0xac, 0xac, 0x8a, 0xac, 0x09, 0x26, 0x45, 0xa5),
465         LL(0x89, 0x89, 0x1e, 0x89, 0x3c, 0x97, 0x0f, 0xb5),
466         LL(0x14, 0x14, 0x50, 0x14, 0xa0, 0x44, 0x28, 0xb4),
467         LL(0xe1, 0xe1, 0xa3, 0xe1, 0x5b, 0x42, 0xdf, 0xba),
468         LL(0x16, 0x16, 0x58, 0x16, 0xb0, 0x4e, 0x2c, 0xa6),
469         LL(0x3a, 0x3a, 0xe8, 0x3a, 0xcd, 0xd2, 0x74, 0xf7),
470         LL(0x69, 0x69, 0xb9, 0x69, 0x6f, 0xd0, 0xd2, 0x06),
471         LL(0x09, 0x09, 0x24, 0x09, 0x48, 0x2d, 0x12, 0x41),
472         LL(0x70, 0x70, 0xdd, 0x70, 0xa7, 0xad, 0xe0, 0xd7),
473         LL(0xb6, 0xb6, 0xe2, 0xb6, 0xd9, 0x54, 0x71, 0x6f),
474         LL(0xd0, 0xd0, 0x67, 0xd0, 0xce, 0xb7, 0xbd, 0x1e),
475         LL(0xed, 0xed, 0x93, 0xed, 0x3b, 0x7e, 0xc7, 0xd6),
476         LL(0xcc, 0xcc, 0x17, 0xcc, 0x2e, 0xdb, 0x85, 0xe2),
477         LL(0x42, 0x42, 0x15, 0x42, 0x2a, 0x57, 0x84, 0x68),
478         LL(0x98, 0x98, 0x5a, 0x98, 0xb4, 0xc2, 0x2d, 0x2c),
479         LL(0xa4, 0xa4, 0xaa, 0xa4, 0x49, 0x0e, 0x55, 0xed),
480         LL(0x28, 0x28, 0xa0, 0x28, 0x5d, 0x88, 0x50, 0x75),
481         LL(0x5c, 0x5c, 0x6d, 0x5c, 0xda, 0x31, 0xb8, 0x86),
482         LL(0xf8, 0xf8, 0xc7, 0xf8, 0x93, 0x3f, 0xed, 0x6b),
483         LL(0x86, 0x86, 0x22, 0x86, 0x44, 0xa4, 0x11, 0xc2),
484 #define RC (&(Cx.q[256 * N]))
485         0x18, 0x23, 0xc6, 0xe8, 0x87, 0xb8, 0x01, 0x4f,
486         /* rc[ROUNDS] */
487         0x36, 0xa6, 0xd2, 0xf5, 0x79, 0x6f, 0x91, 0x52, 0x60, 0xbc, 0x9b,
488         0x8e, 0xa3, 0x0c, 0x7b, 0x35, 0x1d, 0xe0, 0xd7, 0xc2, 0x2e, 0x4b,
489         0xfe, 0x57, 0x15, 0x77, 0x37, 0xe5, 0x9f, 0xf0, 0x4a, 0xda, 0x58,
490         0xc9, 0x29, 0x0a, 0xb1, 0xa0, 0x6b, 0x85, 0xbd, 0x5d, 0x10, 0xf4,
491         0xcb, 0x3e, 0x05, 0x67, 0xe4, 0x27, 0x41, 0x8b, 0xa7, 0x7d, 0x95,
492         0xd8, 0xfb, 0xee, 0x7c, 0x66, 0xdd, 0x17, 0x47, 0x9e, 0xca, 0x2d,
493         0xbf, 0x07, 0xad, 0x5a, 0x83, 0x33 }
494 };
495 
whirlpool_block(WHIRLPOOL_CTX * ctx,const void * inp,size_t n)496 void whirlpool_block(WHIRLPOOL_CTX *ctx, const void *inp, size_t n)
497 {
498     int r;
499     const u8 *p = inp;
500     union {
501         u64 q[8];
502         u8 c[64];
503     } S, K, *H = (void *)ctx->H.q;
504 
505 #ifdef GO_FOR_MMX
506     GO_FOR_MMX(ctx, inp, n);
507 #endif
508     do {
509 #ifdef OPENSSL_SMALL_FOOTPRINT
510         u64 L[8];
511         int i;
512 
513         for (i = 0; i < 64; i++)
514             S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
515         for (r = 0; r < ROUNDS; r++) {
516             for (i = 0; i < 8; i++) {
517                 L[i] = i ? 0 : RC[r];
518                 L[i] ^= C0(K, i) ^ C1(K, (i - 1) & 7) ^ C2(K, (i - 2) & 7) ^ C3(K, (i - 3) & 7) ^ C4(K, (i - 4) & 7) ^ C5(K, (i - 5) & 7) ^ C6(K, (i - 6) & 7) ^ C7(K, (i - 7) & 7);
519             }
520             memcpy(K.q, L, 64);
521             for (i = 0; i < 8; i++) {
522                 L[i] ^= C0(S, i) ^ C1(S, (i - 1) & 7) ^ C2(S, (i - 2) & 7) ^ C3(S, (i - 3) & 7) ^ C4(S, (i - 4) & 7) ^ C5(S, (i - 5) & 7) ^ C6(S, (i - 6) & 7) ^ C7(S, (i - 7) & 7);
523             }
524             memcpy(S.q, L, 64);
525         }
526         for (i = 0; i < 64; i++)
527             H->c[i] ^= S.c[i] ^ p[i];
528 #else
529         u64 L0, L1, L2, L3, L4, L5, L6, L7;
530 
531 #ifdef STRICT_ALIGNMENT
532         if ((size_t)p & 7) {
533             memcpy(S.c, p, 64);
534             S.q[0] ^= (K.q[0] = H->q[0]);
535             S.q[1] ^= (K.q[1] = H->q[1]);
536             S.q[2] ^= (K.q[2] = H->q[2]);
537             S.q[3] ^= (K.q[3] = H->q[3]);
538             S.q[4] ^= (K.q[4] = H->q[4]);
539             S.q[5] ^= (K.q[5] = H->q[5]);
540             S.q[6] ^= (K.q[6] = H->q[6]);
541             S.q[7] ^= (K.q[7] = H->q[7]);
542         } else
543 #endif
544         {
545             const u64_aX *pa = (const u64_aX *)p;
546             S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
547             S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
548             S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
549             S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
550             S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
551             S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
552             S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
553             S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
554         }
555 
556         for (r = 0; r < ROUNDS; r++) {
557 #ifdef SMALL_REGISTER_BANK
558             L0 = C0(K, 0) ^ C1(K, 7) ^ C2(K, 6) ^ C3(K, 5) ^ C4(K, 4) ^ C5(K, 3) ^ C6(K, 2) ^ C7(K, 1) ^ RC[r];
559             L1 = C0(K, 1) ^ C1(K, 0) ^ C2(K, 7) ^ C3(K, 6) ^ C4(K, 5) ^ C5(K, 4) ^ C6(K, 3) ^ C7(K, 2);
560             L2 = C0(K, 2) ^ C1(K, 1) ^ C2(K, 0) ^ C3(K, 7) ^ C4(K, 6) ^ C5(K, 5) ^ C6(K, 4) ^ C7(K, 3);
561             L3 = C0(K, 3) ^ C1(K, 2) ^ C2(K, 1) ^ C3(K, 0) ^ C4(K, 7) ^ C5(K, 6) ^ C6(K, 5) ^ C7(K, 4);
562             L4 = C0(K, 4) ^ C1(K, 3) ^ C2(K, 2) ^ C3(K, 1) ^ C4(K, 0) ^ C5(K, 7) ^ C6(K, 6) ^ C7(K, 5);
563             L5 = C0(K, 5) ^ C1(K, 4) ^ C2(K, 3) ^ C3(K, 2) ^ C4(K, 1) ^ C5(K, 0) ^ C6(K, 7) ^ C7(K, 6);
564             L6 = C0(K, 6) ^ C1(K, 5) ^ C2(K, 4) ^ C3(K, 3) ^ C4(K, 2) ^ C5(K, 1) ^ C6(K, 0) ^ C7(K, 7);
565             L7 = C0(K, 7) ^ C1(K, 6) ^ C2(K, 5) ^ C3(K, 4) ^ C4(K, 3) ^ C5(K, 2) ^ C6(K, 1) ^ C7(K, 0);
566 
567             K.q[0] = L0;
568             K.q[1] = L1;
569             K.q[2] = L2;
570             K.q[3] = L3;
571             K.q[4] = L4;
572             K.q[5] = L5;
573             K.q[6] = L6;
574             K.q[7] = L7;
575 
576             L0 ^= C0(S, 0) ^ C1(S, 7) ^ C2(S, 6) ^ C3(S, 5) ^ C4(S, 4) ^ C5(S, 3) ^ C6(S, 2) ^ C7(S, 1);
577             L1 ^= C0(S, 1) ^ C1(S, 0) ^ C2(S, 7) ^ C3(S, 6) ^ C4(S, 5) ^ C5(S, 4) ^ C6(S, 3) ^ C7(S, 2);
578             L2 ^= C0(S, 2) ^ C1(S, 1) ^ C2(S, 0) ^ C3(S, 7) ^ C4(S, 6) ^ C5(S, 5) ^ C6(S, 4) ^ C7(S, 3);
579             L3 ^= C0(S, 3) ^ C1(S, 2) ^ C2(S, 1) ^ C3(S, 0) ^ C4(S, 7) ^ C5(S, 6) ^ C6(S, 5) ^ C7(S, 4);
580             L4 ^= C0(S, 4) ^ C1(S, 3) ^ C2(S, 2) ^ C3(S, 1) ^ C4(S, 0) ^ C5(S, 7) ^ C6(S, 6) ^ C7(S, 5);
581             L5 ^= C0(S, 5) ^ C1(S, 4) ^ C2(S, 3) ^ C3(S, 2) ^ C4(S, 1) ^ C5(S, 0) ^ C6(S, 7) ^ C7(S, 6);
582             L6 ^= C0(S, 6) ^ C1(S, 5) ^ C2(S, 4) ^ C3(S, 3) ^ C4(S, 2) ^ C5(S, 1) ^ C6(S, 0) ^ C7(S, 7);
583             L7 ^= C0(S, 7) ^ C1(S, 6) ^ C2(S, 5) ^ C3(S, 4) ^ C4(S, 3) ^ C5(S, 2) ^ C6(S, 1) ^ C7(S, 0);
584 
585             S.q[0] = L0;
586             S.q[1] = L1;
587             S.q[2] = L2;
588             S.q[3] = L3;
589             S.q[4] = L4;
590             S.q[5] = L5;
591             S.q[6] = L6;
592             S.q[7] = L7;
593 #else
594             L0 = C0(K, 0);
595             L1 = C1(K, 0);
596             L2 = C2(K, 0);
597             L3 = C3(K, 0);
598             L4 = C4(K, 0);
599             L5 = C5(K, 0);
600             L6 = C6(K, 0);
601             L7 = C7(K, 0);
602             L0 ^= RC[r];
603 
604             L1 ^= C0(K, 1);
605             L2 ^= C1(K, 1);
606             L3 ^= C2(K, 1);
607             L4 ^= C3(K, 1);
608             L5 ^= C4(K, 1);
609             L6 ^= C5(K, 1);
610             L7 ^= C6(K, 1);
611             L0 ^= C7(K, 1);
612 
613             L2 ^= C0(K, 2);
614             L3 ^= C1(K, 2);
615             L4 ^= C2(K, 2);
616             L5 ^= C3(K, 2);
617             L6 ^= C4(K, 2);
618             L7 ^= C5(K, 2);
619             L0 ^= C6(K, 2);
620             L1 ^= C7(K, 2);
621 
622             L3 ^= C0(K, 3);
623             L4 ^= C1(K, 3);
624             L5 ^= C2(K, 3);
625             L6 ^= C3(K, 3);
626             L7 ^= C4(K, 3);
627             L0 ^= C5(K, 3);
628             L1 ^= C6(K, 3);
629             L2 ^= C7(K, 3);
630 
631             L4 ^= C0(K, 4);
632             L5 ^= C1(K, 4);
633             L6 ^= C2(K, 4);
634             L7 ^= C3(K, 4);
635             L0 ^= C4(K, 4);
636             L1 ^= C5(K, 4);
637             L2 ^= C6(K, 4);
638             L3 ^= C7(K, 4);
639 
640             L5 ^= C0(K, 5);
641             L6 ^= C1(K, 5);
642             L7 ^= C2(K, 5);
643             L0 ^= C3(K, 5);
644             L1 ^= C4(K, 5);
645             L2 ^= C5(K, 5);
646             L3 ^= C6(K, 5);
647             L4 ^= C7(K, 5);
648 
649             L6 ^= C0(K, 6);
650             L7 ^= C1(K, 6);
651             L0 ^= C2(K, 6);
652             L1 ^= C3(K, 6);
653             L2 ^= C4(K, 6);
654             L3 ^= C5(K, 6);
655             L4 ^= C6(K, 6);
656             L5 ^= C7(K, 6);
657 
658             L7 ^= C0(K, 7);
659             L0 ^= C1(K, 7);
660             L1 ^= C2(K, 7);
661             L2 ^= C3(K, 7);
662             L3 ^= C4(K, 7);
663             L4 ^= C5(K, 7);
664             L5 ^= C6(K, 7);
665             L6 ^= C7(K, 7);
666 
667             K.q[0] = L0;
668             K.q[1] = L1;
669             K.q[2] = L2;
670             K.q[3] = L3;
671             K.q[4] = L4;
672             K.q[5] = L5;
673             K.q[6] = L6;
674             K.q[7] = L7;
675 
676             L0 ^= C0(S, 0);
677             L1 ^= C1(S, 0);
678             L2 ^= C2(S, 0);
679             L3 ^= C3(S, 0);
680             L4 ^= C4(S, 0);
681             L5 ^= C5(S, 0);
682             L6 ^= C6(S, 0);
683             L7 ^= C7(S, 0);
684 
685             L1 ^= C0(S, 1);
686             L2 ^= C1(S, 1);
687             L3 ^= C2(S, 1);
688             L4 ^= C3(S, 1);
689             L5 ^= C4(S, 1);
690             L6 ^= C5(S, 1);
691             L7 ^= C6(S, 1);
692             L0 ^= C7(S, 1);
693 
694             L2 ^= C0(S, 2);
695             L3 ^= C1(S, 2);
696             L4 ^= C2(S, 2);
697             L5 ^= C3(S, 2);
698             L6 ^= C4(S, 2);
699             L7 ^= C5(S, 2);
700             L0 ^= C6(S, 2);
701             L1 ^= C7(S, 2);
702 
703             L3 ^= C0(S, 3);
704             L4 ^= C1(S, 3);
705             L5 ^= C2(S, 3);
706             L6 ^= C3(S, 3);
707             L7 ^= C4(S, 3);
708             L0 ^= C5(S, 3);
709             L1 ^= C6(S, 3);
710             L2 ^= C7(S, 3);
711 
712             L4 ^= C0(S, 4);
713             L5 ^= C1(S, 4);
714             L6 ^= C2(S, 4);
715             L7 ^= C3(S, 4);
716             L0 ^= C4(S, 4);
717             L1 ^= C5(S, 4);
718             L2 ^= C6(S, 4);
719             L3 ^= C7(S, 4);
720 
721             L5 ^= C0(S, 5);
722             L6 ^= C1(S, 5);
723             L7 ^= C2(S, 5);
724             L0 ^= C3(S, 5);
725             L1 ^= C4(S, 5);
726             L2 ^= C5(S, 5);
727             L3 ^= C6(S, 5);
728             L4 ^= C7(S, 5);
729 
730             L6 ^= C0(S, 6);
731             L7 ^= C1(S, 6);
732             L0 ^= C2(S, 6);
733             L1 ^= C3(S, 6);
734             L2 ^= C4(S, 6);
735             L3 ^= C5(S, 6);
736             L4 ^= C6(S, 6);
737             L5 ^= C7(S, 6);
738 
739             L7 ^= C0(S, 7);
740             L0 ^= C1(S, 7);
741             L1 ^= C2(S, 7);
742             L2 ^= C3(S, 7);
743             L3 ^= C4(S, 7);
744             L4 ^= C5(S, 7);
745             L5 ^= C6(S, 7);
746             L6 ^= C7(S, 7);
747 
748             S.q[0] = L0;
749             S.q[1] = L1;
750             S.q[2] = L2;
751             S.q[3] = L3;
752             S.q[4] = L4;
753             S.q[5] = L5;
754             S.q[6] = L6;
755             S.q[7] = L7;
756 #endif
757         }
758 
759 #ifdef STRICT_ALIGNMENT
760         if ((size_t)p & 7) {
761             int i;
762             for (i = 0; i < 64; i++)
763                 H->c[i] ^= S.c[i] ^ p[i];
764         } else
765 #endif
766         {
767             const u64_aX *pa = (const u64_aX *)p;
768             H->q[0] ^= S.q[0] ^ pa[0];
769             H->q[1] ^= S.q[1] ^ pa[1];
770             H->q[2] ^= S.q[2] ^ pa[2];
771             H->q[3] ^= S.q[3] ^ pa[3];
772             H->q[4] ^= S.q[4] ^ pa[4];
773             H->q[5] ^= S.q[5] ^ pa[5];
774             H->q[6] ^= S.q[6] ^ pa[6];
775             H->q[7] ^= S.q[7] ^ pa[7];
776         }
777 #endif
778         p += 64;
779     } while (--n);
780 }
781