149aa7c00SJason A. Donenfeld/* SPDX-License-Identifier: GPL-2.0 OR MIT */ 249aa7c00SJason A. Donenfeld/* 349aa7c00SJason A. Donenfeld * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. 449aa7c00SJason A. Donenfeld * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 549aa7c00SJason A. Donenfeld */ 649aa7c00SJason A. Donenfeld 749aa7c00SJason A. Donenfeld#define MASK_U32 0x3c 849aa7c00SJason A. Donenfeld#define CHACHA20_BLOCK_SIZE 64 949aa7c00SJason A. Donenfeld#define STACK_SIZE 32 1049aa7c00SJason A. Donenfeld 1149aa7c00SJason A. Donenfeld#define X0 $t0 1249aa7c00SJason A. Donenfeld#define X1 $t1 1349aa7c00SJason A. Donenfeld#define X2 $t2 1449aa7c00SJason A. Donenfeld#define X3 $t3 1549aa7c00SJason A. Donenfeld#define X4 $t4 1649aa7c00SJason A. Donenfeld#define X5 $t5 1749aa7c00SJason A. Donenfeld#define X6 $t6 1849aa7c00SJason A. Donenfeld#define X7 $t7 1949aa7c00SJason A. Donenfeld#define X8 $t8 2049aa7c00SJason A. Donenfeld#define X9 $t9 2149aa7c00SJason A. Donenfeld#define X10 $v1 2249aa7c00SJason A. Donenfeld#define X11 $s6 2349aa7c00SJason A. Donenfeld#define X12 $s5 2449aa7c00SJason A. Donenfeld#define X13 $s4 2549aa7c00SJason A. Donenfeld#define X14 $s3 2649aa7c00SJason A. Donenfeld#define X15 $s2 2749aa7c00SJason A. Donenfeld/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ 2849aa7c00SJason A. Donenfeld#define T0 $s1 2949aa7c00SJason A. Donenfeld#define T1 $s0 3049aa7c00SJason A. Donenfeld#define T(n) T ## n 3149aa7c00SJason A. Donenfeld#define X(n) X ## n 3249aa7c00SJason A. Donenfeld 3349aa7c00SJason A. Donenfeld/* Input arguments */ 3449aa7c00SJason A. Donenfeld#define STATE $a0 3549aa7c00SJason A. Donenfeld#define OUT $a1 3649aa7c00SJason A. Donenfeld#define IN $a2 3749aa7c00SJason A. Donenfeld#define BYTES $a3 3849aa7c00SJason A. Donenfeld 3949aa7c00SJason A. Donenfeld/* Output argument */ 4049aa7c00SJason A. Donenfeld/* NONCE[0] is kept in a register and not in memory. 4149aa7c00SJason A. Donenfeld * We don't want to touch original value in memory. 4249aa7c00SJason A. Donenfeld * Must be incremented every loop iteration. 4349aa7c00SJason A. Donenfeld */ 4449aa7c00SJason A. Donenfeld#define NONCE_0 $v0 4549aa7c00SJason A. Donenfeld 4649aa7c00SJason A. Donenfeld/* SAVED_X and SAVED_CA are set in the jump table. 4749aa7c00SJason A. Donenfeld * Use regs which are overwritten on exit else we don't leak clear data. 4849aa7c00SJason A. Donenfeld * They are used to handling the last bytes which are not multiple of 4. 4949aa7c00SJason A. Donenfeld */ 5049aa7c00SJason A. Donenfeld#define SAVED_X X15 5149aa7c00SJason A. Donenfeld#define SAVED_CA $s7 5249aa7c00SJason A. Donenfeld 5349aa7c00SJason A. Donenfeld#define IS_UNALIGNED $s7 5449aa7c00SJason A. Donenfeld 5549aa7c00SJason A. Donenfeld#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 5649aa7c00SJason A. Donenfeld#define MSB 0 5749aa7c00SJason A. Donenfeld#define LSB 3 5849aa7c00SJason A. Donenfeld#define CPU_TO_LE32(n) \ 59*22375adaSEric Biggers wsbh n, n; \ 6049aa7c00SJason A. Donenfeld rotr n, 16; 6149aa7c00SJason A. Donenfeld#else 6249aa7c00SJason A. Donenfeld#define MSB 3 6349aa7c00SJason A. Donenfeld#define LSB 0 6449aa7c00SJason A. Donenfeld#define CPU_TO_LE32(n) 6549aa7c00SJason A. Donenfeld#endif 6649aa7c00SJason A. Donenfeld 6749aa7c00SJason A. Donenfeld#define FOR_EACH_WORD(x) \ 6849aa7c00SJason A. Donenfeld x( 0); \ 6949aa7c00SJason A. Donenfeld x( 1); \ 7049aa7c00SJason A. Donenfeld x( 2); \ 7149aa7c00SJason A. Donenfeld x( 3); \ 7249aa7c00SJason A. Donenfeld x( 4); \ 7349aa7c00SJason A. Donenfeld x( 5); \ 7449aa7c00SJason A. Donenfeld x( 6); \ 7549aa7c00SJason A. Donenfeld x( 7); \ 7649aa7c00SJason A. Donenfeld x( 8); \ 7749aa7c00SJason A. Donenfeld x( 9); \ 7849aa7c00SJason A. Donenfeld x(10); \ 7949aa7c00SJason A. Donenfeld x(11); \ 8049aa7c00SJason A. Donenfeld x(12); \ 8149aa7c00SJason A. Donenfeld x(13); \ 8249aa7c00SJason A. Donenfeld x(14); \ 8349aa7c00SJason A. Donenfeld x(15); 8449aa7c00SJason A. Donenfeld 8549aa7c00SJason A. Donenfeld#define FOR_EACH_WORD_REV(x) \ 8649aa7c00SJason A. Donenfeld x(15); \ 8749aa7c00SJason A. Donenfeld x(14); \ 8849aa7c00SJason A. Donenfeld x(13); \ 8949aa7c00SJason A. Donenfeld x(12); \ 9049aa7c00SJason A. Donenfeld x(11); \ 9149aa7c00SJason A. Donenfeld x(10); \ 9249aa7c00SJason A. Donenfeld x( 9); \ 9349aa7c00SJason A. Donenfeld x( 8); \ 9449aa7c00SJason A. Donenfeld x( 7); \ 9549aa7c00SJason A. Donenfeld x( 6); \ 9649aa7c00SJason A. Donenfeld x( 5); \ 9749aa7c00SJason A. Donenfeld x( 4); \ 9849aa7c00SJason A. Donenfeld x( 3); \ 9949aa7c00SJason A. Donenfeld x( 2); \ 10049aa7c00SJason A. Donenfeld x( 1); \ 10149aa7c00SJason A. Donenfeld x( 0); 10249aa7c00SJason A. Donenfeld 10349aa7c00SJason A. Donenfeld#define PLUS_ONE_0 1 10449aa7c00SJason A. Donenfeld#define PLUS_ONE_1 2 10549aa7c00SJason A. Donenfeld#define PLUS_ONE_2 3 10649aa7c00SJason A. Donenfeld#define PLUS_ONE_3 4 10749aa7c00SJason A. Donenfeld#define PLUS_ONE_4 5 10849aa7c00SJason A. Donenfeld#define PLUS_ONE_5 6 10949aa7c00SJason A. Donenfeld#define PLUS_ONE_6 7 11049aa7c00SJason A. Donenfeld#define PLUS_ONE_7 8 11149aa7c00SJason A. Donenfeld#define PLUS_ONE_8 9 11249aa7c00SJason A. Donenfeld#define PLUS_ONE_9 10 11349aa7c00SJason A. Donenfeld#define PLUS_ONE_10 11 11449aa7c00SJason A. Donenfeld#define PLUS_ONE_11 12 11549aa7c00SJason A. Donenfeld#define PLUS_ONE_12 13 11649aa7c00SJason A. Donenfeld#define PLUS_ONE_13 14 11749aa7c00SJason A. Donenfeld#define PLUS_ONE_14 15 11849aa7c00SJason A. Donenfeld#define PLUS_ONE_15 16 11949aa7c00SJason A. Donenfeld#define PLUS_ONE(x) PLUS_ONE_ ## x 12049aa7c00SJason A. Donenfeld#define _CONCAT3(a,b,c) a ## b ## c 12149aa7c00SJason A. Donenfeld#define CONCAT3(a,b,c) _CONCAT3(a,b,c) 12249aa7c00SJason A. Donenfeld 12349aa7c00SJason A. Donenfeld#define STORE_UNALIGNED(x) \ 1243a2f58f3SArd BiesheuvelCONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ 12549aa7c00SJason A. Donenfeld .if (x != 12); \ 12649aa7c00SJason A. Donenfeld lw T0, (x*4)(STATE); \ 12749aa7c00SJason A. Donenfeld .endif; \ 12849aa7c00SJason A. Donenfeld lwl T1, (x*4)+MSB ## (IN); \ 12949aa7c00SJason A. Donenfeld lwr T1, (x*4)+LSB ## (IN); \ 13049aa7c00SJason A. Donenfeld .if (x == 12); \ 13149aa7c00SJason A. Donenfeld addu X ## x, NONCE_0; \ 13249aa7c00SJason A. Donenfeld .else; \ 13349aa7c00SJason A. Donenfeld addu X ## x, T0; \ 13449aa7c00SJason A. Donenfeld .endif; \ 13549aa7c00SJason A. Donenfeld CPU_TO_LE32(X ## x); \ 13649aa7c00SJason A. Donenfeld xor X ## x, T1; \ 13749aa7c00SJason A. Donenfeld swl X ## x, (x*4)+MSB ## (OUT); \ 13849aa7c00SJason A. Donenfeld swr X ## x, (x*4)+LSB ## (OUT); 13949aa7c00SJason A. Donenfeld 14049aa7c00SJason A. Donenfeld#define STORE_ALIGNED(x) \ 1413a2f58f3SArd BiesheuvelCONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ 14249aa7c00SJason A. Donenfeld .if (x != 12); \ 14349aa7c00SJason A. Donenfeld lw T0, (x*4)(STATE); \ 14449aa7c00SJason A. Donenfeld .endif; \ 14549aa7c00SJason A. Donenfeld lw T1, (x*4) ## (IN); \ 14649aa7c00SJason A. Donenfeld .if (x == 12); \ 14749aa7c00SJason A. Donenfeld addu X ## x, NONCE_0; \ 14849aa7c00SJason A. Donenfeld .else; \ 14949aa7c00SJason A. Donenfeld addu X ## x, T0; \ 15049aa7c00SJason A. Donenfeld .endif; \ 15149aa7c00SJason A. Donenfeld CPU_TO_LE32(X ## x); \ 15249aa7c00SJason A. Donenfeld xor X ## x, T1; \ 15349aa7c00SJason A. Donenfeld sw X ## x, (x*4) ## (OUT); 15449aa7c00SJason A. Donenfeld 15549aa7c00SJason A. Donenfeld/* Jump table macro. 15649aa7c00SJason A. Donenfeld * Used for setup and handling the last bytes, which are not multiple of 4. 15749aa7c00SJason A. Donenfeld * X15 is free to store Xn 15849aa7c00SJason A. Donenfeld * Every jumptable entry must be equal in size. 15949aa7c00SJason A. Donenfeld */ 16049aa7c00SJason A. Donenfeld#define JMPTBL_ALIGNED(x) \ 1613a2f58f3SArd Biesheuvel.Lchacha_mips_jmptbl_aligned_ ## x: ; \ 16249aa7c00SJason A. Donenfeld .set noreorder; \ 1633a2f58f3SArd Biesheuvel b .Lchacha_mips_xor_aligned_ ## x ## _b; \ 16449aa7c00SJason A. Donenfeld .if (x == 12); \ 16549aa7c00SJason A. Donenfeld addu SAVED_X, X ## x, NONCE_0; \ 16649aa7c00SJason A. Donenfeld .else; \ 16749aa7c00SJason A. Donenfeld addu SAVED_X, X ## x, SAVED_CA; \ 16849aa7c00SJason A. Donenfeld .endif; \ 16949aa7c00SJason A. Donenfeld .set reorder 17049aa7c00SJason A. Donenfeld 17149aa7c00SJason A. Donenfeld#define JMPTBL_UNALIGNED(x) \ 1723a2f58f3SArd Biesheuvel.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ 17349aa7c00SJason A. Donenfeld .set noreorder; \ 1743a2f58f3SArd Biesheuvel b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ 17549aa7c00SJason A. Donenfeld .if (x == 12); \ 17649aa7c00SJason A. Donenfeld addu SAVED_X, X ## x, NONCE_0; \ 17749aa7c00SJason A. Donenfeld .else; \ 17849aa7c00SJason A. Donenfeld addu SAVED_X, X ## x, SAVED_CA; \ 17949aa7c00SJason A. Donenfeld .endif; \ 18049aa7c00SJason A. Donenfeld .set reorder 18149aa7c00SJason A. Donenfeld 18249aa7c00SJason A. Donenfeld#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ 18349aa7c00SJason A. Donenfeld addu X(A), X(K); \ 18449aa7c00SJason A. Donenfeld addu X(B), X(L); \ 18549aa7c00SJason A. Donenfeld addu X(C), X(M); \ 18649aa7c00SJason A. Donenfeld addu X(D), X(N); \ 18749aa7c00SJason A. Donenfeld xor X(V), X(A); \ 18849aa7c00SJason A. Donenfeld xor X(W), X(B); \ 18949aa7c00SJason A. Donenfeld xor X(Y), X(C); \ 19049aa7c00SJason A. Donenfeld xor X(Z), X(D); \ 191*22375adaSEric Biggers rotr X(V), 32 - S; \ 192*22375adaSEric Biggers rotr X(W), 32 - S; \ 193*22375adaSEric Biggers rotr X(Y), 32 - S; \ 194*22375adaSEric Biggers rotr X(Z), 32 - S; 19549aa7c00SJason A. Donenfeld 19649aa7c00SJason A. Donenfeld.text 19749aa7c00SJason A. Donenfeld.set reorder 19849aa7c00SJason A. Donenfeld.set noat 1993a2f58f3SArd Biesheuvel.globl chacha_crypt_arch 2003a2f58f3SArd Biesheuvel.ent chacha_crypt_arch 2013a2f58f3SArd Biesheuvelchacha_crypt_arch: 20249aa7c00SJason A. Donenfeld .frame $sp, STACK_SIZE, $ra 20349aa7c00SJason A. Donenfeld 2043a2f58f3SArd Biesheuvel /* Load number of rounds */ 2053a2f58f3SArd Biesheuvel lw $at, 16($sp) 2063a2f58f3SArd Biesheuvel 20749aa7c00SJason A. Donenfeld addiu $sp, -STACK_SIZE 20849aa7c00SJason A. Donenfeld 20949aa7c00SJason A. Donenfeld /* Return bytes = 0. */ 2103a2f58f3SArd Biesheuvel beqz BYTES, .Lchacha_mips_end 21149aa7c00SJason A. Donenfeld 21249aa7c00SJason A. Donenfeld lw NONCE_0, 48(STATE) 21349aa7c00SJason A. Donenfeld 21449aa7c00SJason A. Donenfeld /* Save s0-s7 */ 21549aa7c00SJason A. Donenfeld sw $s0, 0($sp) 21649aa7c00SJason A. Donenfeld sw $s1, 4($sp) 21749aa7c00SJason A. Donenfeld sw $s2, 8($sp) 21849aa7c00SJason A. Donenfeld sw $s3, 12($sp) 21949aa7c00SJason A. Donenfeld sw $s4, 16($sp) 22049aa7c00SJason A. Donenfeld sw $s5, 20($sp) 22149aa7c00SJason A. Donenfeld sw $s6, 24($sp) 22249aa7c00SJason A. Donenfeld sw $s7, 28($sp) 22349aa7c00SJason A. Donenfeld 22449aa7c00SJason A. Donenfeld /* Test IN or OUT is unaligned. 22549aa7c00SJason A. Donenfeld * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 22649aa7c00SJason A. Donenfeld */ 22749aa7c00SJason A. Donenfeld or IS_UNALIGNED, IN, OUT 22849aa7c00SJason A. Donenfeld andi IS_UNALIGNED, 0x3 22949aa7c00SJason A. Donenfeld 2303a2f58f3SArd Biesheuvel b .Lchacha_rounds_start 23149aa7c00SJason A. Donenfeld 23249aa7c00SJason A. Donenfeld.align 4 2333a2f58f3SArd Biesheuvel.Loop_chacha_rounds: 23449aa7c00SJason A. Donenfeld addiu IN, CHACHA20_BLOCK_SIZE 23549aa7c00SJason A. Donenfeld addiu OUT, CHACHA20_BLOCK_SIZE 23649aa7c00SJason A. Donenfeld addiu NONCE_0, 1 23749aa7c00SJason A. Donenfeld 2383a2f58f3SArd Biesheuvel.Lchacha_rounds_start: 23949aa7c00SJason A. Donenfeld lw X0, 0(STATE) 24049aa7c00SJason A. Donenfeld lw X1, 4(STATE) 24149aa7c00SJason A. Donenfeld lw X2, 8(STATE) 24249aa7c00SJason A. Donenfeld lw X3, 12(STATE) 24349aa7c00SJason A. Donenfeld 24449aa7c00SJason A. Donenfeld lw X4, 16(STATE) 24549aa7c00SJason A. Donenfeld lw X5, 20(STATE) 24649aa7c00SJason A. Donenfeld lw X6, 24(STATE) 24749aa7c00SJason A. Donenfeld lw X7, 28(STATE) 24849aa7c00SJason A. Donenfeld lw X8, 32(STATE) 24949aa7c00SJason A. Donenfeld lw X9, 36(STATE) 25049aa7c00SJason A. Donenfeld lw X10, 40(STATE) 25149aa7c00SJason A. Donenfeld lw X11, 44(STATE) 25249aa7c00SJason A. Donenfeld 25349aa7c00SJason A. Donenfeld move X12, NONCE_0 25449aa7c00SJason A. Donenfeld lw X13, 52(STATE) 25549aa7c00SJason A. Donenfeld lw X14, 56(STATE) 25649aa7c00SJason A. Donenfeld lw X15, 60(STATE) 25749aa7c00SJason A. Donenfeld 2583a2f58f3SArd Biesheuvel.Loop_chacha_xor_rounds: 25949aa7c00SJason A. Donenfeld addiu $at, -2 26049aa7c00SJason A. Donenfeld AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); 26149aa7c00SJason A. Donenfeld AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); 26249aa7c00SJason A. Donenfeld AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); 26349aa7c00SJason A. Donenfeld AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); 26449aa7c00SJason A. Donenfeld AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); 26549aa7c00SJason A. Donenfeld AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); 26649aa7c00SJason A. Donenfeld AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); 26749aa7c00SJason A. Donenfeld AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); 2683a2f58f3SArd Biesheuvel bnez $at, .Loop_chacha_xor_rounds 26949aa7c00SJason A. Donenfeld 27049aa7c00SJason A. Donenfeld addiu BYTES, -(CHACHA20_BLOCK_SIZE) 27149aa7c00SJason A. Donenfeld 27249aa7c00SJason A. Donenfeld /* Is data src/dst unaligned? Jump */ 2733a2f58f3SArd Biesheuvel bnez IS_UNALIGNED, .Loop_chacha_unaligned 27449aa7c00SJason A. Donenfeld 27549aa7c00SJason A. Donenfeld /* Set number rounds here to fill delayslot. */ 2763a2f58f3SArd Biesheuvel lw $at, (STACK_SIZE+16)($sp) 27749aa7c00SJason A. Donenfeld 27849aa7c00SJason A. Donenfeld /* BYTES < 0, it has no full block. */ 2793a2f58f3SArd Biesheuvel bltz BYTES, .Lchacha_mips_no_full_block_aligned 28049aa7c00SJason A. Donenfeld 28149aa7c00SJason A. Donenfeld FOR_EACH_WORD_REV(STORE_ALIGNED) 28249aa7c00SJason A. Donenfeld 28349aa7c00SJason A. Donenfeld /* BYTES > 0? Loop again. */ 2843a2f58f3SArd Biesheuvel bgtz BYTES, .Loop_chacha_rounds 28549aa7c00SJason A. Donenfeld 28649aa7c00SJason A. Donenfeld /* Place this here to fill delay slot */ 28749aa7c00SJason A. Donenfeld addiu NONCE_0, 1 28849aa7c00SJason A. Donenfeld 28949aa7c00SJason A. Donenfeld /* BYTES < 0? Handle last bytes */ 2903a2f58f3SArd Biesheuvel bltz BYTES, .Lchacha_mips_xor_bytes 29149aa7c00SJason A. Donenfeld 2923a2f58f3SArd Biesheuvel.Lchacha_mips_xor_done: 29349aa7c00SJason A. Donenfeld /* Restore used registers */ 29449aa7c00SJason A. Donenfeld lw $s0, 0($sp) 29549aa7c00SJason A. Donenfeld lw $s1, 4($sp) 29649aa7c00SJason A. Donenfeld lw $s2, 8($sp) 29749aa7c00SJason A. Donenfeld lw $s3, 12($sp) 29849aa7c00SJason A. Donenfeld lw $s4, 16($sp) 29949aa7c00SJason A. Donenfeld lw $s5, 20($sp) 30049aa7c00SJason A. Donenfeld lw $s6, 24($sp) 30149aa7c00SJason A. Donenfeld lw $s7, 28($sp) 30249aa7c00SJason A. Donenfeld 30349aa7c00SJason A. Donenfeld /* Write NONCE_0 back to right location in state */ 30449aa7c00SJason A. Donenfeld sw NONCE_0, 48(STATE) 30549aa7c00SJason A. Donenfeld 3063a2f58f3SArd Biesheuvel.Lchacha_mips_end: 30749aa7c00SJason A. Donenfeld addiu $sp, STACK_SIZE 30849aa7c00SJason A. Donenfeld jr $ra 30949aa7c00SJason A. Donenfeld 3103a2f58f3SArd Biesheuvel.Lchacha_mips_no_full_block_aligned: 31149aa7c00SJason A. Donenfeld /* Restore the offset on BYTES */ 31249aa7c00SJason A. Donenfeld addiu BYTES, CHACHA20_BLOCK_SIZE 31349aa7c00SJason A. Donenfeld 31449aa7c00SJason A. Donenfeld /* Get number of full WORDS */ 31549aa7c00SJason A. Donenfeld andi $at, BYTES, MASK_U32 31649aa7c00SJason A. Donenfeld 31749aa7c00SJason A. Donenfeld /* Load upper half of jump table addr */ 3183a2f58f3SArd Biesheuvel lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) 31949aa7c00SJason A. Donenfeld 32049aa7c00SJason A. Donenfeld /* Calculate lower half jump table offset */ 32149aa7c00SJason A. Donenfeld ins T0, $at, 1, 6 32249aa7c00SJason A. Donenfeld 32349aa7c00SJason A. Donenfeld /* Add offset to STATE */ 32449aa7c00SJason A. Donenfeld addu T1, STATE, $at 32549aa7c00SJason A. Donenfeld 32649aa7c00SJason A. Donenfeld /* Add lower half jump table addr */ 3273a2f58f3SArd Biesheuvel addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) 32849aa7c00SJason A. Donenfeld 32949aa7c00SJason A. Donenfeld /* Read value from STATE */ 33049aa7c00SJason A. Donenfeld lw SAVED_CA, 0(T1) 33149aa7c00SJason A. Donenfeld 33249aa7c00SJason A. Donenfeld /* Store remaining bytecounter as negative value */ 33349aa7c00SJason A. Donenfeld subu BYTES, $at, BYTES 33449aa7c00SJason A. Donenfeld 33549aa7c00SJason A. Donenfeld jr T0 33649aa7c00SJason A. Donenfeld 33749aa7c00SJason A. Donenfeld /* Jump table */ 33849aa7c00SJason A. Donenfeld FOR_EACH_WORD(JMPTBL_ALIGNED) 33949aa7c00SJason A. Donenfeld 34049aa7c00SJason A. Donenfeld 3413a2f58f3SArd Biesheuvel.Loop_chacha_unaligned: 34249aa7c00SJason A. Donenfeld /* Set number rounds here to fill delayslot. */ 3433a2f58f3SArd Biesheuvel lw $at, (STACK_SIZE+16)($sp) 34449aa7c00SJason A. Donenfeld 34549aa7c00SJason A. Donenfeld /* BYTES > 0, it has no full block. */ 3463a2f58f3SArd Biesheuvel bltz BYTES, .Lchacha_mips_no_full_block_unaligned 34749aa7c00SJason A. Donenfeld 34849aa7c00SJason A. Donenfeld FOR_EACH_WORD_REV(STORE_UNALIGNED) 34949aa7c00SJason A. Donenfeld 35049aa7c00SJason A. Donenfeld /* BYTES > 0? Loop again. */ 3513a2f58f3SArd Biesheuvel bgtz BYTES, .Loop_chacha_rounds 35249aa7c00SJason A. Donenfeld 35349aa7c00SJason A. Donenfeld /* Write NONCE_0 back to right location in state */ 35449aa7c00SJason A. Donenfeld sw NONCE_0, 48(STATE) 35549aa7c00SJason A. Donenfeld 35649aa7c00SJason A. Donenfeld .set noreorder 35749aa7c00SJason A. Donenfeld /* Fall through to byte handling */ 3583a2f58f3SArd Biesheuvel bgez BYTES, .Lchacha_mips_xor_done 3593a2f58f3SArd Biesheuvel.Lchacha_mips_xor_unaligned_0_b: 3603a2f58f3SArd Biesheuvel.Lchacha_mips_xor_aligned_0_b: 36149aa7c00SJason A. Donenfeld /* Place this here to fill delay slot */ 36249aa7c00SJason A. Donenfeld addiu NONCE_0, 1 36349aa7c00SJason A. Donenfeld .set reorder 36449aa7c00SJason A. Donenfeld 3653a2f58f3SArd Biesheuvel.Lchacha_mips_xor_bytes: 36649aa7c00SJason A. Donenfeld addu IN, $at 36749aa7c00SJason A. Donenfeld addu OUT, $at 36849aa7c00SJason A. Donenfeld /* First byte */ 36949aa7c00SJason A. Donenfeld lbu T1, 0(IN) 37049aa7c00SJason A. Donenfeld addiu $at, BYTES, 1 37149aa7c00SJason A. Donenfeld xor T1, SAVED_X 37249aa7c00SJason A. Donenfeld sb T1, 0(OUT) 3733a2f58f3SArd Biesheuvel beqz $at, .Lchacha_mips_xor_done 37449aa7c00SJason A. Donenfeld /* Second byte */ 37549aa7c00SJason A. Donenfeld lbu T1, 1(IN) 37649aa7c00SJason A. Donenfeld addiu $at, BYTES, 2 377*22375adaSEric Biggers rotr SAVED_X, 8 37849aa7c00SJason A. Donenfeld xor T1, SAVED_X 37949aa7c00SJason A. Donenfeld sb T1, 1(OUT) 3803a2f58f3SArd Biesheuvel beqz $at, .Lchacha_mips_xor_done 38149aa7c00SJason A. Donenfeld /* Third byte */ 38249aa7c00SJason A. Donenfeld lbu T1, 2(IN) 383*22375adaSEric Biggers rotr SAVED_X, 8 38449aa7c00SJason A. Donenfeld xor T1, SAVED_X 38549aa7c00SJason A. Donenfeld sb T1, 2(OUT) 3863a2f58f3SArd Biesheuvel b .Lchacha_mips_xor_done 38749aa7c00SJason A. Donenfeld 3883a2f58f3SArd Biesheuvel.Lchacha_mips_no_full_block_unaligned: 38949aa7c00SJason A. Donenfeld /* Restore the offset on BYTES */ 39049aa7c00SJason A. Donenfeld addiu BYTES, CHACHA20_BLOCK_SIZE 39149aa7c00SJason A. Donenfeld 39249aa7c00SJason A. Donenfeld /* Get number of full WORDS */ 39349aa7c00SJason A. Donenfeld andi $at, BYTES, MASK_U32 39449aa7c00SJason A. Donenfeld 39549aa7c00SJason A. Donenfeld /* Load upper half of jump table addr */ 3963a2f58f3SArd Biesheuvel lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) 39749aa7c00SJason A. Donenfeld 39849aa7c00SJason A. Donenfeld /* Calculate lower half jump table offset */ 39949aa7c00SJason A. Donenfeld ins T0, $at, 1, 6 40049aa7c00SJason A. Donenfeld 40149aa7c00SJason A. Donenfeld /* Add offset to STATE */ 40249aa7c00SJason A. Donenfeld addu T1, STATE, $at 40349aa7c00SJason A. Donenfeld 40449aa7c00SJason A. Donenfeld /* Add lower half jump table addr */ 4053a2f58f3SArd Biesheuvel addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) 40649aa7c00SJason A. Donenfeld 40749aa7c00SJason A. Donenfeld /* Read value from STATE */ 40849aa7c00SJason A. Donenfeld lw SAVED_CA, 0(T1) 40949aa7c00SJason A. Donenfeld 41049aa7c00SJason A. Donenfeld /* Store remaining bytecounter as negative value */ 41149aa7c00SJason A. Donenfeld subu BYTES, $at, BYTES 41249aa7c00SJason A. Donenfeld 41349aa7c00SJason A. Donenfeld jr T0 41449aa7c00SJason A. Donenfeld 41549aa7c00SJason A. Donenfeld /* Jump table */ 41649aa7c00SJason A. Donenfeld FOR_EACH_WORD(JMPTBL_UNALIGNED) 4173a2f58f3SArd Biesheuvel.end chacha_crypt_arch 4183a2f58f3SArd Biesheuvel.set at 4193a2f58f3SArd Biesheuvel 4203a2f58f3SArd Biesheuvel/* Input arguments 4213a2f58f3SArd Biesheuvel * STATE $a0 4223a2f58f3SArd Biesheuvel * OUT $a1 4233a2f58f3SArd Biesheuvel * NROUND $a2 4243a2f58f3SArd Biesheuvel */ 4253a2f58f3SArd Biesheuvel 4263a2f58f3SArd Biesheuvel#undef X12 4273a2f58f3SArd Biesheuvel#undef X13 4283a2f58f3SArd Biesheuvel#undef X14 4293a2f58f3SArd Biesheuvel#undef X15 4303a2f58f3SArd Biesheuvel 4313a2f58f3SArd Biesheuvel#define X12 $a3 4323a2f58f3SArd Biesheuvel#define X13 $at 4333a2f58f3SArd Biesheuvel#define X14 $v0 4343a2f58f3SArd Biesheuvel#define X15 STATE 4353a2f58f3SArd Biesheuvel 4363a2f58f3SArd Biesheuvel.set noat 4373a2f58f3SArd Biesheuvel.globl hchacha_block_arch 4383a2f58f3SArd Biesheuvel.ent hchacha_block_arch 4393a2f58f3SArd Biesheuvelhchacha_block_arch: 4403a2f58f3SArd Biesheuvel .frame $sp, STACK_SIZE, $ra 4413a2f58f3SArd Biesheuvel 4423a2f58f3SArd Biesheuvel addiu $sp, -STACK_SIZE 4433a2f58f3SArd Biesheuvel 4443a2f58f3SArd Biesheuvel /* Save X11(s6) */ 4453a2f58f3SArd Biesheuvel sw X11, 0($sp) 4463a2f58f3SArd Biesheuvel 4473a2f58f3SArd Biesheuvel lw X0, 0(STATE) 4483a2f58f3SArd Biesheuvel lw X1, 4(STATE) 4493a2f58f3SArd Biesheuvel lw X2, 8(STATE) 4503a2f58f3SArd Biesheuvel lw X3, 12(STATE) 4513a2f58f3SArd Biesheuvel lw X4, 16(STATE) 4523a2f58f3SArd Biesheuvel lw X5, 20(STATE) 4533a2f58f3SArd Biesheuvel lw X6, 24(STATE) 4543a2f58f3SArd Biesheuvel lw X7, 28(STATE) 4553a2f58f3SArd Biesheuvel lw X8, 32(STATE) 4563a2f58f3SArd Biesheuvel lw X9, 36(STATE) 4573a2f58f3SArd Biesheuvel lw X10, 40(STATE) 4583a2f58f3SArd Biesheuvel lw X11, 44(STATE) 4593a2f58f3SArd Biesheuvel lw X12, 48(STATE) 4603a2f58f3SArd Biesheuvel lw X13, 52(STATE) 4613a2f58f3SArd Biesheuvel lw X14, 56(STATE) 4623a2f58f3SArd Biesheuvel lw X15, 60(STATE) 4633a2f58f3SArd Biesheuvel 4643a2f58f3SArd Biesheuvel.Loop_hchacha_xor_rounds: 4653a2f58f3SArd Biesheuvel addiu $a2, -2 4663a2f58f3SArd Biesheuvel AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); 4673a2f58f3SArd Biesheuvel AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); 4683a2f58f3SArd Biesheuvel AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); 4693a2f58f3SArd Biesheuvel AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); 4703a2f58f3SArd Biesheuvel AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); 4713a2f58f3SArd Biesheuvel AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); 4723a2f58f3SArd Biesheuvel AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); 4733a2f58f3SArd Biesheuvel AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); 4743a2f58f3SArd Biesheuvel bnez $a2, .Loop_hchacha_xor_rounds 4753a2f58f3SArd Biesheuvel 4763a2f58f3SArd Biesheuvel /* Restore used register */ 4773a2f58f3SArd Biesheuvel lw X11, 0($sp) 4783a2f58f3SArd Biesheuvel 4793a2f58f3SArd Biesheuvel sw X0, 0(OUT) 4803a2f58f3SArd Biesheuvel sw X1, 4(OUT) 4813a2f58f3SArd Biesheuvel sw X2, 8(OUT) 4823a2f58f3SArd Biesheuvel sw X3, 12(OUT) 4833a2f58f3SArd Biesheuvel sw X12, 16(OUT) 4843a2f58f3SArd Biesheuvel sw X13, 20(OUT) 4853a2f58f3SArd Biesheuvel sw X14, 24(OUT) 4863a2f58f3SArd Biesheuvel sw X15, 28(OUT) 4873a2f58f3SArd Biesheuvel 4883a2f58f3SArd Biesheuvel addiu $sp, STACK_SIZE 4893a2f58f3SArd Biesheuvel jr $ra 4903a2f58f3SArd Biesheuvel.end hchacha_block_arch 49149aa7c00SJason A. Donenfeld.set at 492