1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Implement AES algorithm in Intel AES-NI instructions. 4 * 5 * The white paper of AES-NI instructions can be downloaded from: 6 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf 7 * 8 * Copyright (C) 2008, Intel Corp. 9 * Author: Huang Ying <ying.huang@intel.com> 10 * Vinodh Gopal <vinodh.gopal@intel.com> 11 * Kahraman Akdemir 12 * 13 * Copyright (c) 2010, Intel Corporation. 14 * 15 * Ported x86_64 version to x86: 16 * Author: Mathias Krause <minipli@googlemail.com> 17 */ 18 19#include <linux/linkage.h> 20#include <linux/objtool.h> 21#include <asm/frame.h> 22 23#define STATE1 %xmm0 24#define STATE2 %xmm4 25#define STATE3 %xmm5 26#define STATE4 %xmm6 27#define STATE STATE1 28#define IN1 %xmm1 29#define IN2 %xmm7 30#define IN3 %xmm8 31#define IN4 %xmm9 32#define IN IN1 33#define KEY %xmm2 34#define IV %xmm3 35 36#define BSWAP_MASK %xmm10 37#define CTR %xmm11 38#define INC %xmm12 39 40#define GF128MUL_MASK %xmm7 41 42#ifdef __x86_64__ 43#define AREG %rax 44#define KEYP %rdi 45#define OUTP %rsi 46#define UKEYP OUTP 47#define INP %rdx 48#define LEN %rcx 49#define IVP %r8 50#define KLEN %r9d 51#define T1 %r10 52#define TKEYP T1 53#define T2 %r11 54#define TCTR_LOW T2 55#else 56#define AREG %eax 57#define KEYP %edi 58#define OUTP AREG 59#define UKEYP OUTP 60#define INP %edx 61#define LEN %esi 62#define IVP %ebp 63#define KLEN %ebx 64#define T1 %ecx 65#define TKEYP T1 66#endif 67 68SYM_FUNC_START_LOCAL(_key_expansion_256a) 69 pshufd $0b11111111, %xmm1, %xmm1 70 shufps $0b00010000, %xmm0, %xmm4 71 pxor %xmm4, %xmm0 72 shufps $0b10001100, %xmm0, %xmm4 73 pxor %xmm4, %xmm0 74 pxor %xmm1, %xmm0 75 movaps %xmm0, (TKEYP) 76 add $0x10, TKEYP 77 RET 78SYM_FUNC_END(_key_expansion_256a) 79SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a) 80 81SYM_FUNC_START_LOCAL(_key_expansion_192a) 82 pshufd $0b01010101, %xmm1, %xmm1 83 shufps $0b00010000, %xmm0, %xmm4 84 pxor %xmm4, %xmm0 85 shufps $0b10001100, %xmm0, %xmm4 86 pxor %xmm4, %xmm0 87 pxor %xmm1, %xmm0 88 89 movaps %xmm2, %xmm5 90 movaps %xmm2, %xmm6 91 pslldq $4, %xmm5 92 pshufd $0b11111111, %xmm0, %xmm3 93 pxor %xmm3, %xmm2 94 pxor %xmm5, %xmm2 95 96 movaps %xmm0, %xmm1 97 shufps $0b01000100, %xmm0, %xmm6 98 movaps %xmm6, (TKEYP) 99 shufps $0b01001110, %xmm2, %xmm1 100 movaps %xmm1, 0x10(TKEYP) 101 add $0x20, TKEYP 102 RET 103SYM_FUNC_END(_key_expansion_192a) 104 105SYM_FUNC_START_LOCAL(_key_expansion_192b) 106 pshufd $0b01010101, %xmm1, %xmm1 107 shufps $0b00010000, %xmm0, %xmm4 108 pxor %xmm4, %xmm0 109 shufps $0b10001100, %xmm0, %xmm4 110 pxor %xmm4, %xmm0 111 pxor %xmm1, %xmm0 112 113 movaps %xmm2, %xmm5 114 pslldq $4, %xmm5 115 pshufd $0b11111111, %xmm0, %xmm3 116 pxor %xmm3, %xmm2 117 pxor %xmm5, %xmm2 118 119 movaps %xmm0, (TKEYP) 120 add $0x10, TKEYP 121 RET 122SYM_FUNC_END(_key_expansion_192b) 123 124SYM_FUNC_START_LOCAL(_key_expansion_256b) 125 pshufd $0b10101010, %xmm1, %xmm1 126 shufps $0b00010000, %xmm2, %xmm4 127 pxor %xmm4, %xmm2 128 shufps $0b10001100, %xmm2, %xmm4 129 pxor %xmm4, %xmm2 130 pxor %xmm1, %xmm2 131 movaps %xmm2, (TKEYP) 132 add $0x10, TKEYP 133 RET 134SYM_FUNC_END(_key_expansion_256b) 135 136/* 137 * void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 138 * unsigned int key_len) 139 */ 140SYM_FUNC_START(aesni_set_key) 141 FRAME_BEGIN 142#ifndef __x86_64__ 143 pushl KEYP 144 movl (FRAME_OFFSET+8)(%esp), KEYP # ctx 145 movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key 146 movl (FRAME_OFFSET+16)(%esp), %edx # key_len 147#endif 148 movups (UKEYP), %xmm0 # user key (first 16 bytes) 149 movaps %xmm0, (KEYP) 150 lea 0x10(KEYP), TKEYP # key addr 151 movl %edx, 480(KEYP) 152 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x 153 cmp $24, %dl 154 jb .Lenc_key128 155 je .Lenc_key192 156 movups 0x10(UKEYP), %xmm2 # other user key 157 movaps %xmm2, (TKEYP) 158 add $0x10, TKEYP 159 aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 160 call _key_expansion_256a 161 aeskeygenassist $0x1, %xmm0, %xmm1 162 call _key_expansion_256b 163 aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 164 call _key_expansion_256a 165 aeskeygenassist $0x2, %xmm0, %xmm1 166 call _key_expansion_256b 167 aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 168 call _key_expansion_256a 169 aeskeygenassist $0x4, %xmm0, %xmm1 170 call _key_expansion_256b 171 aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 172 call _key_expansion_256a 173 aeskeygenassist $0x8, %xmm0, %xmm1 174 call _key_expansion_256b 175 aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 176 call _key_expansion_256a 177 aeskeygenassist $0x10, %xmm0, %xmm1 178 call _key_expansion_256b 179 aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 180 call _key_expansion_256a 181 aeskeygenassist $0x20, %xmm0, %xmm1 182 call _key_expansion_256b 183 aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 184 call _key_expansion_256a 185 jmp .Ldec_key 186.Lenc_key192: 187 movq 0x10(UKEYP), %xmm2 # other user key 188 aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 189 call _key_expansion_192a 190 aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 191 call _key_expansion_192b 192 aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 193 call _key_expansion_192a 194 aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 195 call _key_expansion_192b 196 aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 197 call _key_expansion_192a 198 aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 199 call _key_expansion_192b 200 aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 201 call _key_expansion_192a 202 aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 203 call _key_expansion_192b 204 jmp .Ldec_key 205.Lenc_key128: 206 aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 207 call _key_expansion_128 208 aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 209 call _key_expansion_128 210 aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 211 call _key_expansion_128 212 aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 213 call _key_expansion_128 214 aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 215 call _key_expansion_128 216 aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 217 call _key_expansion_128 218 aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 219 call _key_expansion_128 220 aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 221 call _key_expansion_128 222 aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 223 call _key_expansion_128 224 aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 225 call _key_expansion_128 226.Ldec_key: 227 sub $0x10, TKEYP 228 movaps (KEYP), %xmm0 229 movaps (TKEYP), %xmm1 230 movaps %xmm0, 240(TKEYP) 231 movaps %xmm1, 240(KEYP) 232 add $0x10, KEYP 233 lea 240-16(TKEYP), UKEYP 234.align 4 235.Ldec_key_loop: 236 movaps (KEYP), %xmm0 237 aesimc %xmm0, %xmm1 238 movaps %xmm1, (UKEYP) 239 add $0x10, KEYP 240 sub $0x10, UKEYP 241 cmp TKEYP, KEYP 242 jb .Ldec_key_loop 243#ifndef __x86_64__ 244 popl KEYP 245#endif 246 FRAME_END 247 RET 248SYM_FUNC_END(aesni_set_key) 249 250/* 251 * void aesni_enc(const void *ctx, u8 *dst, const u8 *src) 252 */ 253SYM_FUNC_START(aesni_enc) 254 FRAME_BEGIN 255#ifndef __x86_64__ 256 pushl KEYP 257 pushl KLEN 258 movl (FRAME_OFFSET+12)(%esp), KEYP # ctx 259 movl (FRAME_OFFSET+16)(%esp), OUTP # dst 260 movl (FRAME_OFFSET+20)(%esp), INP # src 261#endif 262 movl 480(KEYP), KLEN # key length 263 movups (INP), STATE # input 264 call _aesni_enc1 265 movups STATE, (OUTP) # output 266#ifndef __x86_64__ 267 popl KLEN 268 popl KEYP 269#endif 270 FRAME_END 271 RET 272SYM_FUNC_END(aesni_enc) 273 274/* 275 * _aesni_enc1: internal ABI 276 * input: 277 * KEYP: key struct pointer 278 * KLEN: round count 279 * STATE: initial state (input) 280 * output: 281 * STATE: finial state (output) 282 * changed: 283 * KEY 284 * TKEYP (T1) 285 */ 286SYM_FUNC_START_LOCAL(_aesni_enc1) 287 movaps (KEYP), KEY # key 288 mov KEYP, TKEYP 289 pxor KEY, STATE # round 0 290 add $0x30, TKEYP 291 cmp $24, KLEN 292 jb .Lenc128 293 lea 0x20(TKEYP), TKEYP 294 je .Lenc192 295 add $0x20, TKEYP 296 movaps -0x60(TKEYP), KEY 297 aesenc KEY, STATE 298 movaps -0x50(TKEYP), KEY 299 aesenc KEY, STATE 300.align 4 301.Lenc192: 302 movaps -0x40(TKEYP), KEY 303 aesenc KEY, STATE 304 movaps -0x30(TKEYP), KEY 305 aesenc KEY, STATE 306.align 4 307.Lenc128: 308 movaps -0x20(TKEYP), KEY 309 aesenc KEY, STATE 310 movaps -0x10(TKEYP), KEY 311 aesenc KEY, STATE 312 movaps (TKEYP), KEY 313 aesenc KEY, STATE 314 movaps 0x10(TKEYP), KEY 315 aesenc KEY, STATE 316 movaps 0x20(TKEYP), KEY 317 aesenc KEY, STATE 318 movaps 0x30(TKEYP), KEY 319 aesenc KEY, STATE 320 movaps 0x40(TKEYP), KEY 321 aesenc KEY, STATE 322 movaps 0x50(TKEYP), KEY 323 aesenc KEY, STATE 324 movaps 0x60(TKEYP), KEY 325 aesenc KEY, STATE 326 movaps 0x70(TKEYP), KEY 327 aesenclast KEY, STATE 328 RET 329SYM_FUNC_END(_aesni_enc1) 330 331/* 332 * _aesni_enc4: internal ABI 333 * input: 334 * KEYP: key struct pointer 335 * KLEN: round count 336 * STATE1: initial state (input) 337 * STATE2 338 * STATE3 339 * STATE4 340 * output: 341 * STATE1: finial state (output) 342 * STATE2 343 * STATE3 344 * STATE4 345 * changed: 346 * KEY 347 * TKEYP (T1) 348 */ 349SYM_FUNC_START_LOCAL(_aesni_enc4) 350 movaps (KEYP), KEY # key 351 mov KEYP, TKEYP 352 pxor KEY, STATE1 # round 0 353 pxor KEY, STATE2 354 pxor KEY, STATE3 355 pxor KEY, STATE4 356 add $0x30, TKEYP 357 cmp $24, KLEN 358 jb .L4enc128 359 lea 0x20(TKEYP), TKEYP 360 je .L4enc192 361 add $0x20, TKEYP 362 movaps -0x60(TKEYP), KEY 363 aesenc KEY, STATE1 364 aesenc KEY, STATE2 365 aesenc KEY, STATE3 366 aesenc KEY, STATE4 367 movaps -0x50(TKEYP), KEY 368 aesenc KEY, STATE1 369 aesenc KEY, STATE2 370 aesenc KEY, STATE3 371 aesenc KEY, STATE4 372#.align 4 373.L4enc192: 374 movaps -0x40(TKEYP), KEY 375 aesenc KEY, STATE1 376 aesenc KEY, STATE2 377 aesenc KEY, STATE3 378 aesenc KEY, STATE4 379 movaps -0x30(TKEYP), KEY 380 aesenc KEY, STATE1 381 aesenc KEY, STATE2 382 aesenc KEY, STATE3 383 aesenc KEY, STATE4 384#.align 4 385.L4enc128: 386 movaps -0x20(TKEYP), KEY 387 aesenc KEY, STATE1 388 aesenc KEY, STATE2 389 aesenc KEY, STATE3 390 aesenc KEY, STATE4 391 movaps -0x10(TKEYP), KEY 392 aesenc KEY, STATE1 393 aesenc KEY, STATE2 394 aesenc KEY, STATE3 395 aesenc KEY, STATE4 396 movaps (TKEYP), KEY 397 aesenc KEY, STATE1 398 aesenc KEY, STATE2 399 aesenc KEY, STATE3 400 aesenc KEY, STATE4 401 movaps 0x10(TKEYP), KEY 402 aesenc KEY, STATE1 403 aesenc KEY, STATE2 404 aesenc KEY, STATE3 405 aesenc KEY, STATE4 406 movaps 0x20(TKEYP), KEY 407 aesenc KEY, STATE1 408 aesenc KEY, STATE2 409 aesenc KEY, STATE3 410 aesenc KEY, STATE4 411 movaps 0x30(TKEYP), KEY 412 aesenc KEY, STATE1 413 aesenc KEY, STATE2 414 aesenc KEY, STATE3 415 aesenc KEY, STATE4 416 movaps 0x40(TKEYP), KEY 417 aesenc KEY, STATE1 418 aesenc KEY, STATE2 419 aesenc KEY, STATE3 420 aesenc KEY, STATE4 421 movaps 0x50(TKEYP), KEY 422 aesenc KEY, STATE1 423 aesenc KEY, STATE2 424 aesenc KEY, STATE3 425 aesenc KEY, STATE4 426 movaps 0x60(TKEYP), KEY 427 aesenc KEY, STATE1 428 aesenc KEY, STATE2 429 aesenc KEY, STATE3 430 aesenc KEY, STATE4 431 movaps 0x70(TKEYP), KEY 432 aesenclast KEY, STATE1 # last round 433 aesenclast KEY, STATE2 434 aesenclast KEY, STATE3 435 aesenclast KEY, STATE4 436 RET 437SYM_FUNC_END(_aesni_enc4) 438 439/* 440 * void aesni_dec (const void *ctx, u8 *dst, const u8 *src) 441 */ 442SYM_FUNC_START(aesni_dec) 443 FRAME_BEGIN 444#ifndef __x86_64__ 445 pushl KEYP 446 pushl KLEN 447 movl (FRAME_OFFSET+12)(%esp), KEYP # ctx 448 movl (FRAME_OFFSET+16)(%esp), OUTP # dst 449 movl (FRAME_OFFSET+20)(%esp), INP # src 450#endif 451 mov 480(KEYP), KLEN # key length 452 add $240, KEYP 453 movups (INP), STATE # input 454 call _aesni_dec1 455 movups STATE, (OUTP) #output 456#ifndef __x86_64__ 457 popl KLEN 458 popl KEYP 459#endif 460 FRAME_END 461 RET 462SYM_FUNC_END(aesni_dec) 463 464/* 465 * _aesni_dec1: internal ABI 466 * input: 467 * KEYP: key struct pointer 468 * KLEN: key length 469 * STATE: initial state (input) 470 * output: 471 * STATE: finial state (output) 472 * changed: 473 * KEY 474 * TKEYP (T1) 475 */ 476SYM_FUNC_START_LOCAL(_aesni_dec1) 477 movaps (KEYP), KEY # key 478 mov KEYP, TKEYP 479 pxor KEY, STATE # round 0 480 add $0x30, TKEYP 481 cmp $24, KLEN 482 jb .Ldec128 483 lea 0x20(TKEYP), TKEYP 484 je .Ldec192 485 add $0x20, TKEYP 486 movaps -0x60(TKEYP), KEY 487 aesdec KEY, STATE 488 movaps -0x50(TKEYP), KEY 489 aesdec KEY, STATE 490.align 4 491.Ldec192: 492 movaps -0x40(TKEYP), KEY 493 aesdec KEY, STATE 494 movaps -0x30(TKEYP), KEY 495 aesdec KEY, STATE 496.align 4 497.Ldec128: 498 movaps -0x20(TKEYP), KEY 499 aesdec KEY, STATE 500 movaps -0x10(TKEYP), KEY 501 aesdec KEY, STATE 502 movaps (TKEYP), KEY 503 aesdec KEY, STATE 504 movaps 0x10(TKEYP), KEY 505 aesdec KEY, STATE 506 movaps 0x20(TKEYP), KEY 507 aesdec KEY, STATE 508 movaps 0x30(TKEYP), KEY 509 aesdec KEY, STATE 510 movaps 0x40(TKEYP), KEY 511 aesdec KEY, STATE 512 movaps 0x50(TKEYP), KEY 513 aesdec KEY, STATE 514 movaps 0x60(TKEYP), KEY 515 aesdec KEY, STATE 516 movaps 0x70(TKEYP), KEY 517 aesdeclast KEY, STATE 518 RET 519SYM_FUNC_END(_aesni_dec1) 520 521/* 522 * _aesni_dec4: internal ABI 523 * input: 524 * KEYP: key struct pointer 525 * KLEN: key length 526 * STATE1: initial state (input) 527 * STATE2 528 * STATE3 529 * STATE4 530 * output: 531 * STATE1: finial state (output) 532 * STATE2 533 * STATE3 534 * STATE4 535 * changed: 536 * KEY 537 * TKEYP (T1) 538 */ 539SYM_FUNC_START_LOCAL(_aesni_dec4) 540 movaps (KEYP), KEY # key 541 mov KEYP, TKEYP 542 pxor KEY, STATE1 # round 0 543 pxor KEY, STATE2 544 pxor KEY, STATE3 545 pxor KEY, STATE4 546 add $0x30, TKEYP 547 cmp $24, KLEN 548 jb .L4dec128 549 lea 0x20(TKEYP), TKEYP 550 je .L4dec192 551 add $0x20, TKEYP 552 movaps -0x60(TKEYP), KEY 553 aesdec KEY, STATE1 554 aesdec KEY, STATE2 555 aesdec KEY, STATE3 556 aesdec KEY, STATE4 557 movaps -0x50(TKEYP), KEY 558 aesdec KEY, STATE1 559 aesdec KEY, STATE2 560 aesdec KEY, STATE3 561 aesdec KEY, STATE4 562.align 4 563.L4dec192: 564 movaps -0x40(TKEYP), KEY 565 aesdec KEY, STATE1 566 aesdec KEY, STATE2 567 aesdec KEY, STATE3 568 aesdec KEY, STATE4 569 movaps -0x30(TKEYP), KEY 570 aesdec KEY, STATE1 571 aesdec KEY, STATE2 572 aesdec KEY, STATE3 573 aesdec KEY, STATE4 574.align 4 575.L4dec128: 576 movaps -0x20(TKEYP), KEY 577 aesdec KEY, STATE1 578 aesdec KEY, STATE2 579 aesdec KEY, STATE3 580 aesdec KEY, STATE4 581 movaps -0x10(TKEYP), KEY 582 aesdec KEY, STATE1 583 aesdec KEY, STATE2 584 aesdec KEY, STATE3 585 aesdec KEY, STATE4 586 movaps (TKEYP), KEY 587 aesdec KEY, STATE1 588 aesdec KEY, STATE2 589 aesdec KEY, STATE3 590 aesdec KEY, STATE4 591 movaps 0x10(TKEYP), KEY 592 aesdec KEY, STATE1 593 aesdec KEY, STATE2 594 aesdec KEY, STATE3 595 aesdec KEY, STATE4 596 movaps 0x20(TKEYP), KEY 597 aesdec KEY, STATE1 598 aesdec KEY, STATE2 599 aesdec KEY, STATE3 600 aesdec KEY, STATE4 601 movaps 0x30(TKEYP), KEY 602 aesdec KEY, STATE1 603 aesdec KEY, STATE2 604 aesdec KEY, STATE3 605 aesdec KEY, STATE4 606 movaps 0x40(TKEYP), KEY 607 aesdec KEY, STATE1 608 aesdec KEY, STATE2 609 aesdec KEY, STATE3 610 aesdec KEY, STATE4 611 movaps 0x50(TKEYP), KEY 612 aesdec KEY, STATE1 613 aesdec KEY, STATE2 614 aesdec KEY, STATE3 615 aesdec KEY, STATE4 616 movaps 0x60(TKEYP), KEY 617 aesdec KEY, STATE1 618 aesdec KEY, STATE2 619 aesdec KEY, STATE3 620 aesdec KEY, STATE4 621 movaps 0x70(TKEYP), KEY 622 aesdeclast KEY, STATE1 # last round 623 aesdeclast KEY, STATE2 624 aesdeclast KEY, STATE3 625 aesdeclast KEY, STATE4 626 RET 627SYM_FUNC_END(_aesni_dec4) 628 629/* 630 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 631 * size_t len) 632 */ 633SYM_FUNC_START(aesni_ecb_enc) 634 FRAME_BEGIN 635#ifndef __x86_64__ 636 pushl LEN 637 pushl KEYP 638 pushl KLEN 639 movl (FRAME_OFFSET+16)(%esp), KEYP # ctx 640 movl (FRAME_OFFSET+20)(%esp), OUTP # dst 641 movl (FRAME_OFFSET+24)(%esp), INP # src 642 movl (FRAME_OFFSET+28)(%esp), LEN # len 643#endif 644 test LEN, LEN # check length 645 jz .Lecb_enc_ret 646 mov 480(KEYP), KLEN 647 cmp $16, LEN 648 jb .Lecb_enc_ret 649 cmp $64, LEN 650 jb .Lecb_enc_loop1 651.align 4 652.Lecb_enc_loop4: 653 movups (INP), STATE1 654 movups 0x10(INP), STATE2 655 movups 0x20(INP), STATE3 656 movups 0x30(INP), STATE4 657 call _aesni_enc4 658 movups STATE1, (OUTP) 659 movups STATE2, 0x10(OUTP) 660 movups STATE3, 0x20(OUTP) 661 movups STATE4, 0x30(OUTP) 662 sub $64, LEN 663 add $64, INP 664 add $64, OUTP 665 cmp $64, LEN 666 jge .Lecb_enc_loop4 667 cmp $16, LEN 668 jb .Lecb_enc_ret 669.align 4 670.Lecb_enc_loop1: 671 movups (INP), STATE1 672 call _aesni_enc1 673 movups STATE1, (OUTP) 674 sub $16, LEN 675 add $16, INP 676 add $16, OUTP 677 cmp $16, LEN 678 jge .Lecb_enc_loop1 679.Lecb_enc_ret: 680#ifndef __x86_64__ 681 popl KLEN 682 popl KEYP 683 popl LEN 684#endif 685 FRAME_END 686 RET 687SYM_FUNC_END(aesni_ecb_enc) 688 689/* 690 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 691 * size_t len); 692 */ 693SYM_FUNC_START(aesni_ecb_dec) 694 FRAME_BEGIN 695#ifndef __x86_64__ 696 pushl LEN 697 pushl KEYP 698 pushl KLEN 699 movl (FRAME_OFFSET+16)(%esp), KEYP # ctx 700 movl (FRAME_OFFSET+20)(%esp), OUTP # dst 701 movl (FRAME_OFFSET+24)(%esp), INP # src 702 movl (FRAME_OFFSET+28)(%esp), LEN # len 703#endif 704 test LEN, LEN 705 jz .Lecb_dec_ret 706 mov 480(KEYP), KLEN 707 add $240, KEYP 708 cmp $16, LEN 709 jb .Lecb_dec_ret 710 cmp $64, LEN 711 jb .Lecb_dec_loop1 712.align 4 713.Lecb_dec_loop4: 714 movups (INP), STATE1 715 movups 0x10(INP), STATE2 716 movups 0x20(INP), STATE3 717 movups 0x30(INP), STATE4 718 call _aesni_dec4 719 movups STATE1, (OUTP) 720 movups STATE2, 0x10(OUTP) 721 movups STATE3, 0x20(OUTP) 722 movups STATE4, 0x30(OUTP) 723 sub $64, LEN 724 add $64, INP 725 add $64, OUTP 726 cmp $64, LEN 727 jge .Lecb_dec_loop4 728 cmp $16, LEN 729 jb .Lecb_dec_ret 730.align 4 731.Lecb_dec_loop1: 732 movups (INP), STATE1 733 call _aesni_dec1 734 movups STATE1, (OUTP) 735 sub $16, LEN 736 add $16, INP 737 add $16, OUTP 738 cmp $16, LEN 739 jge .Lecb_dec_loop1 740.Lecb_dec_ret: 741#ifndef __x86_64__ 742 popl KLEN 743 popl KEYP 744 popl LEN 745#endif 746 FRAME_END 747 RET 748SYM_FUNC_END(aesni_ecb_dec) 749 750/* 751 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 752 * size_t len, u8 *iv) 753 */ 754SYM_FUNC_START(aesni_cbc_enc) 755 FRAME_BEGIN 756#ifndef __x86_64__ 757 pushl IVP 758 pushl LEN 759 pushl KEYP 760 pushl KLEN 761 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 762 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 763 movl (FRAME_OFFSET+28)(%esp), INP # src 764 movl (FRAME_OFFSET+32)(%esp), LEN # len 765 movl (FRAME_OFFSET+36)(%esp), IVP # iv 766#endif 767 cmp $16, LEN 768 jb .Lcbc_enc_ret 769 mov 480(KEYP), KLEN 770 movups (IVP), STATE # load iv as initial state 771.align 4 772.Lcbc_enc_loop: 773 movups (INP), IN # load input 774 pxor IN, STATE 775 call _aesni_enc1 776 movups STATE, (OUTP) # store output 777 sub $16, LEN 778 add $16, INP 779 add $16, OUTP 780 cmp $16, LEN 781 jge .Lcbc_enc_loop 782 movups STATE, (IVP) 783.Lcbc_enc_ret: 784#ifndef __x86_64__ 785 popl KLEN 786 popl KEYP 787 popl LEN 788 popl IVP 789#endif 790 FRAME_END 791 RET 792SYM_FUNC_END(aesni_cbc_enc) 793 794/* 795 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 796 * size_t len, u8 *iv) 797 */ 798SYM_FUNC_START(aesni_cbc_dec) 799 FRAME_BEGIN 800#ifndef __x86_64__ 801 pushl IVP 802 pushl LEN 803 pushl KEYP 804 pushl KLEN 805 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 806 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 807 movl (FRAME_OFFSET+28)(%esp), INP # src 808 movl (FRAME_OFFSET+32)(%esp), LEN # len 809 movl (FRAME_OFFSET+36)(%esp), IVP # iv 810#endif 811 cmp $16, LEN 812 jb .Lcbc_dec_just_ret 813 mov 480(KEYP), KLEN 814 add $240, KEYP 815 movups (IVP), IV 816 cmp $64, LEN 817 jb .Lcbc_dec_loop1 818.align 4 819.Lcbc_dec_loop4: 820 movups (INP), IN1 821 movaps IN1, STATE1 822 movups 0x10(INP), IN2 823 movaps IN2, STATE2 824#ifdef __x86_64__ 825 movups 0x20(INP), IN3 826 movaps IN3, STATE3 827 movups 0x30(INP), IN4 828 movaps IN4, STATE4 829#else 830 movups 0x20(INP), IN1 831 movaps IN1, STATE3 832 movups 0x30(INP), IN2 833 movaps IN2, STATE4 834#endif 835 call _aesni_dec4 836 pxor IV, STATE1 837#ifdef __x86_64__ 838 pxor IN1, STATE2 839 pxor IN2, STATE3 840 pxor IN3, STATE4 841 movaps IN4, IV 842#else 843 pxor IN1, STATE4 844 movaps IN2, IV 845 movups (INP), IN1 846 pxor IN1, STATE2 847 movups 0x10(INP), IN2 848 pxor IN2, STATE3 849#endif 850 movups STATE1, (OUTP) 851 movups STATE2, 0x10(OUTP) 852 movups STATE3, 0x20(OUTP) 853 movups STATE4, 0x30(OUTP) 854 sub $64, LEN 855 add $64, INP 856 add $64, OUTP 857 cmp $64, LEN 858 jge .Lcbc_dec_loop4 859 cmp $16, LEN 860 jb .Lcbc_dec_ret 861.align 4 862.Lcbc_dec_loop1: 863 movups (INP), IN 864 movaps IN, STATE 865 call _aesni_dec1 866 pxor IV, STATE 867 movups STATE, (OUTP) 868 movaps IN, IV 869 sub $16, LEN 870 add $16, INP 871 add $16, OUTP 872 cmp $16, LEN 873 jge .Lcbc_dec_loop1 874.Lcbc_dec_ret: 875 movups IV, (IVP) 876.Lcbc_dec_just_ret: 877#ifndef __x86_64__ 878 popl KLEN 879 popl KEYP 880 popl LEN 881 popl IVP 882#endif 883 FRAME_END 884 RET 885SYM_FUNC_END(aesni_cbc_dec) 886 887/* 888 * void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 889 * size_t len, u8 *iv) 890 */ 891SYM_FUNC_START(aesni_cts_cbc_enc) 892 FRAME_BEGIN 893#ifndef __x86_64__ 894 pushl IVP 895 pushl LEN 896 pushl KEYP 897 pushl KLEN 898 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 899 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 900 movl (FRAME_OFFSET+28)(%esp), INP # src 901 movl (FRAME_OFFSET+32)(%esp), LEN # len 902 movl (FRAME_OFFSET+36)(%esp), IVP # iv 903 lea .Lcts_permute_table, T1 904#else 905 lea .Lcts_permute_table(%rip), T1 906#endif 907 mov 480(KEYP), KLEN 908 movups (IVP), STATE 909 sub $16, LEN 910 mov T1, IVP 911 add $32, IVP 912 add LEN, T1 913 sub LEN, IVP 914 movups (T1), %xmm4 915 movups (IVP), %xmm5 916 917 movups (INP), IN1 918 add LEN, INP 919 movups (INP), IN2 920 921 pxor IN1, STATE 922 call _aesni_enc1 923 924 pshufb %xmm5, IN2 925 pxor STATE, IN2 926 pshufb %xmm4, STATE 927 add OUTP, LEN 928 movups STATE, (LEN) 929 930 movaps IN2, STATE 931 call _aesni_enc1 932 movups STATE, (OUTP) 933 934#ifndef __x86_64__ 935 popl KLEN 936 popl KEYP 937 popl LEN 938 popl IVP 939#endif 940 FRAME_END 941 RET 942SYM_FUNC_END(aesni_cts_cbc_enc) 943 944/* 945 * void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 946 * size_t len, u8 *iv) 947 */ 948SYM_FUNC_START(aesni_cts_cbc_dec) 949 FRAME_BEGIN 950#ifndef __x86_64__ 951 pushl IVP 952 pushl LEN 953 pushl KEYP 954 pushl KLEN 955 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 956 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 957 movl (FRAME_OFFSET+28)(%esp), INP # src 958 movl (FRAME_OFFSET+32)(%esp), LEN # len 959 movl (FRAME_OFFSET+36)(%esp), IVP # iv 960 lea .Lcts_permute_table, T1 961#else 962 lea .Lcts_permute_table(%rip), T1 963#endif 964 mov 480(KEYP), KLEN 965 add $240, KEYP 966 movups (IVP), IV 967 sub $16, LEN 968 mov T1, IVP 969 add $32, IVP 970 add LEN, T1 971 sub LEN, IVP 972 movups (T1), %xmm4 973 974 movups (INP), STATE 975 add LEN, INP 976 movups (INP), IN1 977 978 call _aesni_dec1 979 movaps STATE, IN2 980 pshufb %xmm4, STATE 981 pxor IN1, STATE 982 983 add OUTP, LEN 984 movups STATE, (LEN) 985 986 movups (IVP), %xmm0 987 pshufb %xmm0, IN1 988 pblendvb IN2, IN1 989 movaps IN1, STATE 990 call _aesni_dec1 991 992 pxor IV, STATE 993 movups STATE, (OUTP) 994 995#ifndef __x86_64__ 996 popl KLEN 997 popl KEYP 998 popl LEN 999 popl IVP 1000#endif 1001 FRAME_END 1002 RET 1003SYM_FUNC_END(aesni_cts_cbc_dec) 1004 1005.pushsection .rodata 1006.align 16 1007.Lcts_permute_table: 1008 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 1009 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 1010 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 1011 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f 1012 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 1013 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 1014#ifdef __x86_64__ 1015.Lbswap_mask: 1016 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 1017#endif 1018.popsection 1019 1020#ifdef __x86_64__ 1021/* 1022 * _aesni_inc_init: internal ABI 1023 * setup registers used by _aesni_inc 1024 * input: 1025 * IV 1026 * output: 1027 * CTR: == IV, in little endian 1028 * TCTR_LOW: == lower qword of CTR 1029 * INC: == 1, in little endian 1030 * BSWAP_MASK == endian swapping mask 1031 */ 1032SYM_FUNC_START_LOCAL(_aesni_inc_init) 1033 movaps .Lbswap_mask(%rip), BSWAP_MASK 1034 movaps IV, CTR 1035 pshufb BSWAP_MASK, CTR 1036 mov $1, TCTR_LOW 1037 movq TCTR_LOW, INC 1038 movq CTR, TCTR_LOW 1039 RET 1040SYM_FUNC_END(_aesni_inc_init) 1041 1042/* 1043 * _aesni_inc: internal ABI 1044 * Increase IV by 1, IV is in big endian 1045 * input: 1046 * IV 1047 * CTR: == IV, in little endian 1048 * TCTR_LOW: == lower qword of CTR 1049 * INC: == 1, in little endian 1050 * BSWAP_MASK == endian swapping mask 1051 * output: 1052 * IV: Increase by 1 1053 * changed: 1054 * CTR: == output IV, in little endian 1055 * TCTR_LOW: == lower qword of CTR 1056 */ 1057SYM_FUNC_START_LOCAL(_aesni_inc) 1058 paddq INC, CTR 1059 add $1, TCTR_LOW 1060 jnc .Linc_low 1061 pslldq $8, INC 1062 paddq INC, CTR 1063 psrldq $8, INC 1064.Linc_low: 1065 movaps CTR, IV 1066 pshufb BSWAP_MASK, IV 1067 RET 1068SYM_FUNC_END(_aesni_inc) 1069 1070/* 1071 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, 1072 * size_t len, u8 *iv) 1073 */ 1074SYM_FUNC_START(aesni_ctr_enc) 1075 ANNOTATE_NOENDBR 1076 FRAME_BEGIN 1077 cmp $16, LEN 1078 jb .Lctr_enc_just_ret 1079 mov 480(KEYP), KLEN 1080 movups (IVP), IV 1081 call _aesni_inc_init 1082 cmp $64, LEN 1083 jb .Lctr_enc_loop1 1084.align 4 1085.Lctr_enc_loop4: 1086 movaps IV, STATE1 1087 call _aesni_inc 1088 movups (INP), IN1 1089 movaps IV, STATE2 1090 call _aesni_inc 1091 movups 0x10(INP), IN2 1092 movaps IV, STATE3 1093 call _aesni_inc 1094 movups 0x20(INP), IN3 1095 movaps IV, STATE4 1096 call _aesni_inc 1097 movups 0x30(INP), IN4 1098 call _aesni_enc4 1099 pxor IN1, STATE1 1100 movups STATE1, (OUTP) 1101 pxor IN2, STATE2 1102 movups STATE2, 0x10(OUTP) 1103 pxor IN3, STATE3 1104 movups STATE3, 0x20(OUTP) 1105 pxor IN4, STATE4 1106 movups STATE4, 0x30(OUTP) 1107 sub $64, LEN 1108 add $64, INP 1109 add $64, OUTP 1110 cmp $64, LEN 1111 jge .Lctr_enc_loop4 1112 cmp $16, LEN 1113 jb .Lctr_enc_ret 1114.align 4 1115.Lctr_enc_loop1: 1116 movaps IV, STATE 1117 call _aesni_inc 1118 movups (INP), IN 1119 call _aesni_enc1 1120 pxor IN, STATE 1121 movups STATE, (OUTP) 1122 sub $16, LEN 1123 add $16, INP 1124 add $16, OUTP 1125 cmp $16, LEN 1126 jge .Lctr_enc_loop1 1127.Lctr_enc_ret: 1128 movups IV, (IVP) 1129.Lctr_enc_just_ret: 1130 FRAME_END 1131 RET 1132SYM_FUNC_END(aesni_ctr_enc) 1133 1134#endif 1135 1136.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 1137.align 16 1138.Lgf128mul_x_ble_mask: 1139 .octa 0x00000000000000010000000000000087 1140.previous 1141 1142/* 1143 * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs 1144 * input: 1145 * IV: current IV 1146 * GF128MUL_MASK == mask with 0x87 and 0x01 1147 * output: 1148 * IV: next IV 1149 * changed: 1150 * KEY: == temporary value 1151 */ 1152.macro _aesni_gf128mul_x_ble 1153 pshufd $0x13, IV, KEY 1154 paddq IV, IV 1155 psrad $31, KEY 1156 pand GF128MUL_MASK, KEY 1157 pxor KEY, IV 1158.endm 1159 1160.macro _aesni_xts_crypt enc 1161 FRAME_BEGIN 1162#ifndef __x86_64__ 1163 pushl IVP 1164 pushl LEN 1165 pushl KEYP 1166 pushl KLEN 1167 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 1168 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 1169 movl (FRAME_OFFSET+28)(%esp), INP # src 1170 movl (FRAME_OFFSET+32)(%esp), LEN # len 1171 movl (FRAME_OFFSET+36)(%esp), IVP # iv 1172 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK 1173#else 1174 movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK 1175#endif 1176 movups (IVP), IV 1177 1178 mov 480(KEYP), KLEN 1179.if !\enc 1180 add $240, KEYP 1181 1182 test $15, LEN 1183 jz .Lxts_loop4\@ 1184 sub $16, LEN 1185.endif 1186 1187.Lxts_loop4\@: 1188 sub $64, LEN 1189 jl .Lxts_1x\@ 1190 1191 movdqa IV, STATE1 1192 movdqu 0x00(INP), IN 1193 pxor IN, STATE1 1194 movdqu IV, 0x00(OUTP) 1195 1196 _aesni_gf128mul_x_ble 1197 movdqa IV, STATE2 1198 movdqu 0x10(INP), IN 1199 pxor IN, STATE2 1200 movdqu IV, 0x10(OUTP) 1201 1202 _aesni_gf128mul_x_ble 1203 movdqa IV, STATE3 1204 movdqu 0x20(INP), IN 1205 pxor IN, STATE3 1206 movdqu IV, 0x20(OUTP) 1207 1208 _aesni_gf128mul_x_ble 1209 movdqa IV, STATE4 1210 movdqu 0x30(INP), IN 1211 pxor IN, STATE4 1212 movdqu IV, 0x30(OUTP) 1213 1214.if \enc 1215 call _aesni_enc4 1216.else 1217 call _aesni_dec4 1218.endif 1219 1220 movdqu 0x00(OUTP), IN 1221 pxor IN, STATE1 1222 movdqu STATE1, 0x00(OUTP) 1223 1224 movdqu 0x10(OUTP), IN 1225 pxor IN, STATE2 1226 movdqu STATE2, 0x10(OUTP) 1227 1228 movdqu 0x20(OUTP), IN 1229 pxor IN, STATE3 1230 movdqu STATE3, 0x20(OUTP) 1231 1232 movdqu 0x30(OUTP), IN 1233 pxor IN, STATE4 1234 movdqu STATE4, 0x30(OUTP) 1235 1236 _aesni_gf128mul_x_ble 1237 1238 add $64, INP 1239 add $64, OUTP 1240 test LEN, LEN 1241 jnz .Lxts_loop4\@ 1242 1243.Lxts_ret_iv\@: 1244 movups IV, (IVP) 1245 1246.Lxts_ret\@: 1247#ifndef __x86_64__ 1248 popl KLEN 1249 popl KEYP 1250 popl LEN 1251 popl IVP 1252#endif 1253 FRAME_END 1254 RET 1255 1256.Lxts_1x\@: 1257 add $64, LEN 1258 jz .Lxts_ret_iv\@ 1259.if \enc 1260 sub $16, LEN 1261 jl .Lxts_cts4\@ 1262.endif 1263 1264.Lxts_loop1\@: 1265 movdqu (INP), STATE 1266.if \enc 1267 pxor IV, STATE 1268 call _aesni_enc1 1269.else 1270 add $16, INP 1271 sub $16, LEN 1272 jl .Lxts_cts1\@ 1273 pxor IV, STATE 1274 call _aesni_dec1 1275.endif 1276 pxor IV, STATE 1277 _aesni_gf128mul_x_ble 1278 1279 test LEN, LEN 1280 jz .Lxts_out\@ 1281 1282.if \enc 1283 add $16, INP 1284 sub $16, LEN 1285 jl .Lxts_cts1\@ 1286.endif 1287 1288 movdqu STATE, (OUTP) 1289 add $16, OUTP 1290 jmp .Lxts_loop1\@ 1291 1292.Lxts_out\@: 1293 movdqu STATE, (OUTP) 1294 jmp .Lxts_ret_iv\@ 1295 1296.if \enc 1297.Lxts_cts4\@: 1298 movdqa STATE4, STATE 1299 sub $16, OUTP 1300.Lxts_cts1\@: 1301.else 1302.Lxts_cts1\@: 1303 movdqa IV, STATE4 1304 _aesni_gf128mul_x_ble 1305 1306 pxor IV, STATE 1307 call _aesni_dec1 1308 pxor IV, STATE 1309.endif 1310#ifndef __x86_64__ 1311 lea .Lcts_permute_table, T1 1312#else 1313 lea .Lcts_permute_table(%rip), T1 1314#endif 1315 add LEN, INP /* rewind input pointer */ 1316 add $16, LEN /* # bytes in final block */ 1317 movups (INP), IN1 1318 1319 mov T1, IVP 1320 add $32, IVP 1321 add LEN, T1 1322 sub LEN, IVP 1323 add OUTP, LEN 1324 1325 movups (T1), %xmm4 1326 movaps STATE, IN2 1327 pshufb %xmm4, STATE 1328 movups STATE, (LEN) 1329 1330 movups (IVP), %xmm0 1331 pshufb %xmm0, IN1 1332 pblendvb IN2, IN1 1333 movaps IN1, STATE 1334 1335.if \enc 1336 pxor IV, STATE 1337 call _aesni_enc1 1338 pxor IV, STATE 1339.else 1340 pxor STATE4, STATE 1341 call _aesni_dec1 1342 pxor STATE4, STATE 1343.endif 1344 1345 movups STATE, (OUTP) 1346 jmp .Lxts_ret\@ 1347.endm 1348 1349/* 1350 * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst, 1351 * const u8 *src, unsigned int len, le128 *iv) 1352 */ 1353SYM_FUNC_START(aesni_xts_enc) 1354 _aesni_xts_crypt 1 1355SYM_FUNC_END(aesni_xts_enc) 1356 1357/* 1358 * void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst, 1359 * const u8 *src, unsigned int len, le128 *iv) 1360 */ 1361SYM_FUNC_START(aesni_xts_dec) 1362 _aesni_xts_crypt 0 1363SYM_FUNC_END(aesni_xts_dec) 1364