Lines Matching +full:sub +full:- +full:blocks
2 * Implement fast SHA-1 with AVX2 instructions. (x86_64)
59 * SHA-1 implementation with Intel(R) AVX2 instruction set extensions.
62 *Visit http://software.intel.com/en-us/articles/
63 *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
65 *Updates 20-byte SHA-1 record at start of 'state', from 'input', for
66 *even number of 'blocks' consecutive 64-byte blocks.
69 * struct sha1_state *state, const u8* input, int blocks );
146 * - 80 DWORDs per iteration * 2
201 /* message scheduling pre-compute for rounds 0-15 */
209 vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
224 * message scheduling pre-compute for rounds 16-31
226 * pre-calculate K+w[i] values and store to mem
229 * "brute force" vectorization for rounds 16-31 only
230 * due to w[i]->w[i-3] dependency
237 /* w[i-14] */
239 vpsrldq $4, WY_minus_04, WY_TMP /* w[i-3] */
266 * in SHA-1 specification:
267 * w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1
269 * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
271 * since w[i]=>w[i-3] dependency is broken
359 rorx $(32-30), B, B /* b>>>2 */
388 rorx $(32-5), A, TA /* T2 = A >>> 5 */
389 rorx $(32-30),A, TB /* b>>>2 for next round */
391 PRECALC (\r) /* msg scheduling for next 2 blocks */
408 rorx $(32-5), A, TA /* T2 = A >>> 5 */
410 rorx $(32-30), A, TB /* b>>>2 for next round */
412 PRECALC (\r) /* msg scheduling for next 2 blocks */
427 PRECALC (\r) /* msg scheduling for next 2 blocks */
434 rorx $(32-5), A, TA /* T2 = A >>> 5 */
435 rorx $(32-30), A, TB /* b>>>2 for next round */
459 * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
474 # Precalc WK for first 2 blocks
525 sub $1, BLOCKS_CTR
583 sub $1, BLOCKS_CTR
622 * macro implements SHA-1 function's body for several 64-byte blocks
639 and $~(0x20-1), %rsp
640 sub $RESERVE_STACK, %rsp