1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Twofish Cipher 3-way parallel algorithm (x86_64) 4 * 5 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 6 */ 7 8#include <linux/linkage.h> 9#include <linux/cfi_types.h> 10 11.file "twofish-x86_64-asm-3way.S" 12.text 13 14/* structure of crypto context */ 15#define s0 0 16#define s1 1024 17#define s2 2048 18#define s3 3072 19#define w 4096 20#define k 4128 21 22/********************************************************************** 23 3-way twofish 24 **********************************************************************/ 25#define CTX %rdi 26#define RIO %rdx 27 28#define RAB0 %rax 29#define RAB1 %rbx 30#define RAB2 %rcx 31 32#define RAB0d %eax 33#define RAB1d %ebx 34#define RAB2d %ecx 35 36#define RAB0bh %ah 37#define RAB1bh %bh 38#define RAB2bh %ch 39 40#define RAB0bl %al 41#define RAB1bl %bl 42#define RAB2bl %cl 43 44#define CD0 0x0(%rsp) 45#define CD1 0x8(%rsp) 46#define CD2 0x10(%rsp) 47 48# used only before/after all rounds 49#define RCD0 %r8 50#define RCD1 %r9 51#define RCD2 %r10 52 53# used only during rounds 54#define RX0 %r8 55#define RX1 %r9 56#define RX2 %r10 57 58#define RX0d %r8d 59#define RX1d %r9d 60#define RX2d %r10d 61 62#define RY0 %r11 63#define RY1 %r12 64#define RY2 %r13 65 66#define RY0d %r11d 67#define RY1d %r12d 68#define RY2d %r13d 69 70#define RT0 %rdx 71#define RT1 %rsi 72 73#define RT0d %edx 74#define RT1d %esi 75 76#define RT1bl %sil 77 78#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ 79 movzbl ab ## bl, tmp2 ## d; \ 80 movzbl ab ## bh, tmp1 ## d; \ 81 rorq $(rot), ab; \ 82 op1##l T0(CTX, tmp2, 4), dst ## d; \ 83 op2##l T1(CTX, tmp1, 4), dst ## d; 84 85#define swap_ab_with_cd(ab, cd, tmp) \ 86 movq cd, tmp; \ 87 movq ab, cd; \ 88 movq tmp, ab; 89 90/* 91 * Combined G1 & G2 function. Reordered with help of rotates to have moves 92 * at beginning. 93 */ 94#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \ 95 /* G1,1 && G2,1 */ \ 96 do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \ 97 do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \ 98 \ 99 do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \ 100 do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \ 101 \ 102 do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \ 103 do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \ 104 \ 105 /* G1,2 && G2,2 */ \ 106 do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ 107 do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ 108 swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \ 109 \ 110 do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ 111 do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ 112 swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \ 113 \ 114 do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ 115 do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ 116 swap_ab_with_cd(ab ## 2, cd ## 2, RT0); 117 118#define enc_round_end(ab, x, y, n) \ 119 addl y ## d, x ## d; \ 120 addl x ## d, y ## d; \ 121 addl k+4*(2*(n))(CTX), x ## d; \ 122 xorl ab ## d, x ## d; \ 123 addl k+4*(2*(n)+1)(CTX), y ## d; \ 124 shrq $32, ab; \ 125 roll $1, ab ## d; \ 126 xorl y ## d, ab ## d; \ 127 shlq $32, ab; \ 128 rorl $1, x ## d; \ 129 orq x, ab; 130 131#define dec_round_end(ba, x, y, n) \ 132 addl y ## d, x ## d; \ 133 addl x ## d, y ## d; \ 134 addl k+4*(2*(n))(CTX), x ## d; \ 135 addl k+4*(2*(n)+1)(CTX), y ## d; \ 136 xorl ba ## d, y ## d; \ 137 shrq $32, ba; \ 138 roll $1, ba ## d; \ 139 xorl x ## d, ba ## d; \ 140 shlq $32, ba; \ 141 rorl $1, y ## d; \ 142 orq y, ba; 143 144#define encrypt_round3(ab, cd, n) \ 145 g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \ 146 \ 147 enc_round_end(ab ## 0, RX0, RY0, n); \ 148 enc_round_end(ab ## 1, RX1, RY1, n); \ 149 enc_round_end(ab ## 2, RX2, RY2, n); 150 151#define decrypt_round3(ba, dc, n) \ 152 g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \ 153 \ 154 dec_round_end(ba ## 0, RX0, RY0, n); \ 155 dec_round_end(ba ## 1, RX1, RY1, n); \ 156 dec_round_end(ba ## 2, RX2, RY2, n); 157 158#define encrypt_cycle3(ab, cd, n) \ 159 encrypt_round3(ab, cd, n*2); \ 160 encrypt_round3(ab, cd, (n*2)+1); 161 162#define decrypt_cycle3(ba, dc, n) \ 163 decrypt_round3(ba, dc, (n*2)+1); \ 164 decrypt_round3(ba, dc, (n*2)); 165 166#define push_cd() \ 167 pushq RCD2; \ 168 pushq RCD1; \ 169 pushq RCD0; 170 171#define pop_cd() \ 172 popq RCD0; \ 173 popq RCD1; \ 174 popq RCD2; 175 176#define inpack3(in, n, xy, m) \ 177 movq 4*(n)(in), xy ## 0; \ 178 xorq w+4*m(CTX), xy ## 0; \ 179 \ 180 movq 4*(4+(n))(in), xy ## 1; \ 181 xorq w+4*m(CTX), xy ## 1; \ 182 \ 183 movq 4*(8+(n))(in), xy ## 2; \ 184 xorq w+4*m(CTX), xy ## 2; 185 186#define outunpack3(op, out, n, xy, m) \ 187 xorq w+4*m(CTX), xy ## 0; \ 188 op ## q xy ## 0, 4*(n)(out); \ 189 \ 190 xorq w+4*m(CTX), xy ## 1; \ 191 op ## q xy ## 1, 4*(4+(n))(out); \ 192 \ 193 xorq w+4*m(CTX), xy ## 2; \ 194 op ## q xy ## 2, 4*(8+(n))(out); 195 196#define inpack_enc3() \ 197 inpack3(RIO, 0, RAB, 0); \ 198 inpack3(RIO, 2, RCD, 2); 199 200#define outunpack_enc3(op) \ 201 outunpack3(op, RIO, 2, RAB, 6); \ 202 outunpack3(op, RIO, 0, RCD, 4); 203 204#define inpack_dec3() \ 205 inpack3(RIO, 0, RAB, 4); \ 206 rorq $32, RAB0; \ 207 rorq $32, RAB1; \ 208 rorq $32, RAB2; \ 209 inpack3(RIO, 2, RCD, 6); \ 210 rorq $32, RCD0; \ 211 rorq $32, RCD1; \ 212 rorq $32, RCD2; 213 214#define outunpack_dec3() \ 215 rorq $32, RCD0; \ 216 rorq $32, RCD1; \ 217 rorq $32, RCD2; \ 218 outunpack3(mov, RIO, 0, RCD, 0); \ 219 rorq $32, RAB0; \ 220 rorq $32, RAB1; \ 221 rorq $32, RAB2; \ 222 outunpack3(mov, RIO, 2, RAB, 2); 223 224SYM_TYPED_FUNC_START(__twofish_enc_blk_3way) 225 /* input: 226 * %rdi: ctx, CTX 227 * %rsi: dst 228 * %rdx: src, RIO 229 * %rcx: bool, if true: xor output 230 */ 231 pushq %r13; 232 pushq %r12; 233 pushq %rbx; 234 235 pushq %rcx; /* bool xor */ 236 pushq %rsi; /* dst */ 237 238 inpack_enc3(); 239 240 push_cd(); 241 encrypt_cycle3(RAB, CD, 0); 242 encrypt_cycle3(RAB, CD, 1); 243 encrypt_cycle3(RAB, CD, 2); 244 encrypt_cycle3(RAB, CD, 3); 245 encrypt_cycle3(RAB, CD, 4); 246 encrypt_cycle3(RAB, CD, 5); 247 encrypt_cycle3(RAB, CD, 6); 248 encrypt_cycle3(RAB, CD, 7); 249 pop_cd(); 250 251 popq RIO; /* dst */ 252 popq RT1; /* bool xor */ 253 254 testb RT1bl, RT1bl; 255 jnz .L__enc_xor3; 256 257 outunpack_enc3(mov); 258 259 popq %rbx; 260 popq %r12; 261 popq %r13; 262 RET; 263 264.L__enc_xor3: 265 outunpack_enc3(xor); 266 267 popq %rbx; 268 popq %r12; 269 popq %r13; 270 RET; 271SYM_FUNC_END(__twofish_enc_blk_3way) 272 273SYM_TYPED_FUNC_START(twofish_dec_blk_3way) 274 /* input: 275 * %rdi: ctx, CTX 276 * %rsi: dst 277 * %rdx: src, RIO 278 */ 279 pushq %r13; 280 pushq %r12; 281 pushq %rbx; 282 283 pushq %rsi; /* dst */ 284 285 inpack_dec3(); 286 287 push_cd(); 288 decrypt_cycle3(RAB, CD, 7); 289 decrypt_cycle3(RAB, CD, 6); 290 decrypt_cycle3(RAB, CD, 5); 291 decrypt_cycle3(RAB, CD, 4); 292 decrypt_cycle3(RAB, CD, 3); 293 decrypt_cycle3(RAB, CD, 2); 294 decrypt_cycle3(RAB, CD, 1); 295 decrypt_cycle3(RAB, CD, 0); 296 pop_cd(); 297 298 popq RIO; /* dst */ 299 300 outunpack_dec3(); 301 302 popq %rbx; 303 popq %r12; 304 popq %r13; 305 RET; 306SYM_FUNC_END(twofish_dec_blk_3way) 307