xref: /linux/arch/powerpc/crypto/ghashp10-ppc.pl (revision 81d358b118dc364bd147432db569d4d400a5a4f2)
155d762daSDanny Tsen#!/usr/bin/env perl
255d762daSDanny Tsen# SPDX-License-Identifier: GPL-2.0
355d762daSDanny Tsen
455d762daSDanny Tsen# This code is taken from the OpenSSL project but the author (Andy Polyakov)
555d762daSDanny Tsen# has relicensed it under the GPLv2. Therefore this program is free software;
655d762daSDanny Tsen# you can redistribute it and/or modify it under the terms of the GNU General
755d762daSDanny Tsen# Public License version 2 as published by the Free Software Foundation.
855d762daSDanny Tsen#
955d762daSDanny Tsen# The original headers, including the original license headers, are
1055d762daSDanny Tsen# included below for completeness.
1155d762daSDanny Tsen
1255d762daSDanny Tsen# ====================================================================
1355d762daSDanny Tsen# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
1455d762daSDanny Tsen# project. The module is, however, dual licensed under OpenSSL and
1555d762daSDanny Tsen# CRYPTOGAMS licenses depending on where you obtain it. For further
1655d762daSDanny Tsen# details see https://www.openssl.org/~appro/cryptogams/.
1755d762daSDanny Tsen# ====================================================================
1855d762daSDanny Tsen#
1955d762daSDanny Tsen# GHASH for PowerISA v2.07.
2055d762daSDanny Tsen#
2155d762daSDanny Tsen# July 2014
2255d762daSDanny Tsen#
2355d762daSDanny Tsen# Accurate performance measurements are problematic, because it's
2455d762daSDanny Tsen# always virtualized setup with possibly throttled processor.
2555d762daSDanny Tsen# Relative comparison is therefore more informative. This initial
2655d762daSDanny Tsen# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
2755d762daSDanny Tsen# faster than "4-bit" integer-only compiler-generated 64-bit code.
2855d762daSDanny Tsen# "Initial version" means that there is room for futher improvement.
2955d762daSDanny Tsen
3055d762daSDanny Tsen$flavour=shift;
3155d762daSDanny Tsen$output =shift;
3255d762daSDanny Tsen
3355d762daSDanny Tsenif ($flavour =~ /64/) {
3455d762daSDanny Tsen	$SIZE_T=8;
3555d762daSDanny Tsen	$LRSAVE=2*$SIZE_T;
3655d762daSDanny Tsen	$STU="stdu";
3755d762daSDanny Tsen	$POP="ld";
3855d762daSDanny Tsen	$PUSH="std";
3955d762daSDanny Tsen} elsif ($flavour =~ /32/) {
4055d762daSDanny Tsen	$SIZE_T=4;
4155d762daSDanny Tsen	$LRSAVE=$SIZE_T;
4255d762daSDanny Tsen	$STU="stwu";
4355d762daSDanny Tsen	$POP="lwz";
4455d762daSDanny Tsen	$PUSH="stw";
4555d762daSDanny Tsen} else { die "nonsense $flavour"; }
4655d762daSDanny Tsen
4755d762daSDanny Tsen$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4855d762daSDanny Tsen( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
4955d762daSDanny Tsen( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
5055d762daSDanny Tsendie "can't locate ppc-xlate.pl";
5155d762daSDanny Tsen
5255d762daSDanny Tsenopen STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
5355d762daSDanny Tsen
5455d762daSDanny Tsenmy ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
5555d762daSDanny Tsen
5655d762daSDanny Tsenmy ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
5755d762daSDanny Tsenmy ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
5855d762daSDanny Tsenmy ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
5955d762daSDanny Tsenmy $vrsave="r12";
6055d762daSDanny Tsenmy ($t4,$t5,$t6) = ($Hl,$H,$Hh);
6155d762daSDanny Tsen
6255d762daSDanny Tsen$code=<<___;
6355d762daSDanny Tsen.machine	"any"
6455d762daSDanny Tsen
6555d762daSDanny Tsen.text
6655d762daSDanny Tsen
67*81d358b1SMichael Ellerman.globl	.gcm_init_p10
6855d762daSDanny Tsen	lis		r0,0xfff0
6955d762daSDanny Tsen	li		r8,0x10
7055d762daSDanny Tsen	mfspr		$vrsave,256
7155d762daSDanny Tsen	li		r9,0x20
7255d762daSDanny Tsen	mtspr		256,r0
7355d762daSDanny Tsen	li		r10,0x30
7455d762daSDanny Tsen	lvx_u		$H,0,r4			# load H
7555d762daSDanny Tsen	le?xor		r7,r7,r7
7655d762daSDanny Tsen	le?addi		r7,r7,0x8		# need a vperm start with 08
7755d762daSDanny Tsen	le?lvsr		5,0,r7
7855d762daSDanny Tsen	le?vspltisb	6,0x0f
7955d762daSDanny Tsen	le?vxor		5,5,6			# set a b-endian mask
8055d762daSDanny Tsen	le?vperm	$H,$H,$H,5
8155d762daSDanny Tsen
8255d762daSDanny Tsen	vspltisb	$xC2,-16		# 0xf0
8355d762daSDanny Tsen	vspltisb	$t0,1			# one
8455d762daSDanny Tsen	vaddubm		$xC2,$xC2,$xC2		# 0xe0
8555d762daSDanny Tsen	vxor		$zero,$zero,$zero
8655d762daSDanny Tsen	vor		$xC2,$xC2,$t0		# 0xe1
8755d762daSDanny Tsen	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
8855d762daSDanny Tsen	vsldoi		$t1,$zero,$t0,1		# ...1
8955d762daSDanny Tsen	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
9055d762daSDanny Tsen	vspltisb	$t2,7
9155d762daSDanny Tsen	vor		$xC2,$xC2,$t1		# 0xc2....01
9255d762daSDanny Tsen	vspltb		$t1,$H,0		# most significant byte
9355d762daSDanny Tsen	vsl		$H,$H,$t0		# H<<=1
9455d762daSDanny Tsen	vsrab		$t1,$t1,$t2		# broadcast carry bit
9555d762daSDanny Tsen	vand		$t1,$t1,$xC2
9655d762daSDanny Tsen	vxor		$H,$H,$t1		# twisted H
9755d762daSDanny Tsen
9855d762daSDanny Tsen	vsldoi		$H,$H,$H,8		# twist even more ...
9955d762daSDanny Tsen	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
10055d762daSDanny Tsen	vsldoi		$Hl,$zero,$H,8		# ... and split
10155d762daSDanny Tsen	vsldoi		$Hh,$H,$zero,8
10255d762daSDanny Tsen
10355d762daSDanny Tsen	stvx_u		$xC2,0,r3		# save pre-computed table
10455d762daSDanny Tsen	stvx_u		$Hl,r8,r3
10555d762daSDanny Tsen	stvx_u		$H, r9,r3
10655d762daSDanny Tsen	stvx_u		$Hh,r10,r3
10755d762daSDanny Tsen
10855d762daSDanny Tsen	mtspr		256,$vrsave
10955d762daSDanny Tsen	blr
11055d762daSDanny Tsen	.long		0
11155d762daSDanny Tsen	.byte		0,12,0x14,0,0,0,2,0
11255d762daSDanny Tsen	.long		0
113*81d358b1SMichael Ellerman.size	.gcm_init_p10,.-.gcm_init_p10
11455d762daSDanny Tsen
11555d762daSDanny Tsen.globl	.gcm_init_htable
11655d762daSDanny Tsen	lis		r0,0xfff0
11755d762daSDanny Tsen	li		r8,0x10
11855d762daSDanny Tsen	mfspr		$vrsave,256
11955d762daSDanny Tsen	li		r9,0x20
12055d762daSDanny Tsen	mtspr		256,r0
12155d762daSDanny Tsen	li		r10,0x30
12255d762daSDanny Tsen	lvx_u		$H,0,r4			# load H
12355d762daSDanny Tsen
12455d762daSDanny Tsen	vspltisb	$xC2,-16		# 0xf0
12555d762daSDanny Tsen	vspltisb	$t0,1			# one
12655d762daSDanny Tsen	vaddubm		$xC2,$xC2,$xC2		# 0xe0
12755d762daSDanny Tsen	vxor		$zero,$zero,$zero
12855d762daSDanny Tsen	vor		$xC2,$xC2,$t0		# 0xe1
12955d762daSDanny Tsen	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
13055d762daSDanny Tsen	vsldoi		$t1,$zero,$t0,1		# ...1
13155d762daSDanny Tsen	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
13255d762daSDanny Tsen	vspltisb	$t2,7
13355d762daSDanny Tsen	vor		$xC2,$xC2,$t1		# 0xc2....01
13455d762daSDanny Tsen	vspltb		$t1,$H,0		# most significant byte
13555d762daSDanny Tsen	vsl		$H,$H,$t0		# H<<=1
13655d762daSDanny Tsen	vsrab		$t1,$t1,$t2		# broadcast carry bit
13755d762daSDanny Tsen	vand		$t1,$t1,$xC2
13855d762daSDanny Tsen	vxor		$IN,$H,$t1		# twisted H
13955d762daSDanny Tsen
14055d762daSDanny Tsen	vsldoi		$H,$IN,$IN,8		# twist even more ...
14155d762daSDanny Tsen	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
14255d762daSDanny Tsen	vsldoi		$Hl,$zero,$H,8		# ... and split
14355d762daSDanny Tsen	vsldoi		$Hh,$H,$zero,8
14455d762daSDanny Tsen
14555d762daSDanny Tsen	stvx_u		$xC2,0,r3		# save pre-computed table
14655d762daSDanny Tsen	stvx_u		$Hl,r8,r3
14755d762daSDanny Tsen	li		r8,0x40
14855d762daSDanny Tsen	stvx_u		$H, r9,r3
14955d762daSDanny Tsen	li		r9,0x50
15055d762daSDanny Tsen	stvx_u		$Hh,r10,r3
15155d762daSDanny Tsen	li		r10,0x60
15255d762daSDanny Tsen
15355d762daSDanny Tsen	vpmsumd		$Xl,$IN,$Hl		# H.lo·H.lo
15455d762daSDanny Tsen	vpmsumd		$Xm,$IN,$H		# H.hi·H.lo+H.lo·H.hi
15555d762daSDanny Tsen	vpmsumd		$Xh,$IN,$Hh		# H.hi·H.hi
15655d762daSDanny Tsen
15755d762daSDanny Tsen	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
15855d762daSDanny Tsen
15955d762daSDanny Tsen	vsldoi		$t0,$Xm,$zero,8
16055d762daSDanny Tsen	vsldoi		$t1,$zero,$Xm,8
16155d762daSDanny Tsen	vxor		$Xl,$Xl,$t0
16255d762daSDanny Tsen	vxor		$Xh,$Xh,$t1
16355d762daSDanny Tsen
16455d762daSDanny Tsen	vsldoi		$Xl,$Xl,$Xl,8
16555d762daSDanny Tsen	vxor		$Xl,$Xl,$t2
16655d762daSDanny Tsen
16755d762daSDanny Tsen	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
16855d762daSDanny Tsen	vpmsumd		$Xl,$Xl,$xC2
16955d762daSDanny Tsen	vxor		$t1,$t1,$Xh
17055d762daSDanny Tsen	vxor		$IN1,$Xl,$t1
17155d762daSDanny Tsen
17255d762daSDanny Tsen	vsldoi		$H2,$IN1,$IN1,8
17355d762daSDanny Tsen	vsldoi		$H2l,$zero,$H2,8
17455d762daSDanny Tsen	vsldoi		$H2h,$H2,$zero,8
17555d762daSDanny Tsen
17655d762daSDanny Tsen	stvx_u		$H2l,r8,r3		# save H^2
17755d762daSDanny Tsen	li		r8,0x70
17855d762daSDanny Tsen	stvx_u		$H2,r9,r3
17955d762daSDanny Tsen	li		r9,0x80
18055d762daSDanny Tsen	stvx_u		$H2h,r10,r3
18155d762daSDanny Tsen	li		r10,0x90
18255d762daSDanny Tsen
18355d762daSDanny Tsen	vpmsumd		$Xl,$IN,$H2l		# H.lo·H^2.lo
18455d762daSDanny Tsen	 vpmsumd	$Xl1,$IN1,$H2l		# H^2.lo·H^2.lo
18555d762daSDanny Tsen	vpmsumd		$Xm,$IN,$H2		# H.hi·H^2.lo+H.lo·H^2.hi
18655d762daSDanny Tsen	 vpmsumd	$Xm1,$IN1,$H2		# H^2.hi·H^2.lo+H^2.lo·H^2.hi
18755d762daSDanny Tsen	vpmsumd		$Xh,$IN,$H2h		# H.hi·H^2.hi
18855d762daSDanny Tsen	 vpmsumd	$Xh1,$IN1,$H2h		# H^2.hi·H^2.hi
18955d762daSDanny Tsen
19055d762daSDanny Tsen	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
19155d762daSDanny Tsen	 vpmsumd	$t6,$Xl1,$xC2		# 1st reduction phase
19255d762daSDanny Tsen
19355d762daSDanny Tsen	vsldoi		$t0,$Xm,$zero,8
19455d762daSDanny Tsen	vsldoi		$t1,$zero,$Xm,8
19555d762daSDanny Tsen	 vsldoi		$t4,$Xm1,$zero,8
19655d762daSDanny Tsen	 vsldoi		$t5,$zero,$Xm1,8
19755d762daSDanny Tsen	vxor		$Xl,$Xl,$t0
19855d762daSDanny Tsen	vxor		$Xh,$Xh,$t1
19955d762daSDanny Tsen	 vxor		$Xl1,$Xl1,$t4
20055d762daSDanny Tsen	 vxor		$Xh1,$Xh1,$t5
20155d762daSDanny Tsen
20255d762daSDanny Tsen	vsldoi		$Xl,$Xl,$Xl,8
20355d762daSDanny Tsen	 vsldoi		$Xl1,$Xl1,$Xl1,8
20455d762daSDanny Tsen	vxor		$Xl,$Xl,$t2
20555d762daSDanny Tsen	 vxor		$Xl1,$Xl1,$t6
20655d762daSDanny Tsen
20755d762daSDanny Tsen	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
20855d762daSDanny Tsen	 vsldoi		$t5,$Xl1,$Xl1,8		# 2nd reduction phase
20955d762daSDanny Tsen	vpmsumd		$Xl,$Xl,$xC2
21055d762daSDanny Tsen	 vpmsumd	$Xl1,$Xl1,$xC2
21155d762daSDanny Tsen	vxor		$t1,$t1,$Xh
21255d762daSDanny Tsen	 vxor		$t5,$t5,$Xh1
21355d762daSDanny Tsen	vxor		$Xl,$Xl,$t1
21455d762daSDanny Tsen	 vxor		$Xl1,$Xl1,$t5
21555d762daSDanny Tsen
21655d762daSDanny Tsen	vsldoi		$H,$Xl,$Xl,8
21755d762daSDanny Tsen	 vsldoi		$H2,$Xl1,$Xl1,8
21855d762daSDanny Tsen	vsldoi		$Hl,$zero,$H,8
21955d762daSDanny Tsen	vsldoi		$Hh,$H,$zero,8
22055d762daSDanny Tsen	 vsldoi		$H2l,$zero,$H2,8
22155d762daSDanny Tsen	 vsldoi		$H2h,$H2,$zero,8
22255d762daSDanny Tsen
22355d762daSDanny Tsen	stvx_u		$Hl,r8,r3		# save H^3
22455d762daSDanny Tsen	li		r8,0xa0
22555d762daSDanny Tsen	stvx_u		$H,r9,r3
22655d762daSDanny Tsen	li		r9,0xb0
22755d762daSDanny Tsen	stvx_u		$Hh,r10,r3
22855d762daSDanny Tsen	li		r10,0xc0
22955d762daSDanny Tsen	 stvx_u		$H2l,r8,r3		# save H^4
23055d762daSDanny Tsen	 stvx_u		$H2,r9,r3
23155d762daSDanny Tsen	 stvx_u		$H2h,r10,r3
23255d762daSDanny Tsen
23355d762daSDanny Tsen	mtspr		256,$vrsave
23455d762daSDanny Tsen	blr
23555d762daSDanny Tsen	.long		0
23655d762daSDanny Tsen	.byte		0,12,0x14,0,0,0,2,0
23755d762daSDanny Tsen	.long		0
23855d762daSDanny Tsen.size	.gcm_init_htable,.-.gcm_init_htable
23955d762daSDanny Tsen
240*81d358b1SMichael Ellerman.globl	.gcm_gmult_p10
24155d762daSDanny Tsen	lis		r0,0xfff8
24255d762daSDanny Tsen	li		r8,0x10
24355d762daSDanny Tsen	mfspr		$vrsave,256
24455d762daSDanny Tsen	li		r9,0x20
24555d762daSDanny Tsen	mtspr		256,r0
24655d762daSDanny Tsen	li		r10,0x30
24755d762daSDanny Tsen	lvx_u		$IN,0,$Xip		# load Xi
24855d762daSDanny Tsen
24955d762daSDanny Tsen	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
25055d762daSDanny Tsen	 le?lvsl	$lemask,r0,r0
25155d762daSDanny Tsen	lvx_u		$H, r9,$Htbl
25255d762daSDanny Tsen	 le?vspltisb	$t0,0x07
25355d762daSDanny Tsen	lvx_u		$Hh,r10,$Htbl
25455d762daSDanny Tsen	 le?vxor	$lemask,$lemask,$t0
25555d762daSDanny Tsen	lvx_u		$xC2,0,$Htbl
25655d762daSDanny Tsen	 le?vperm	$IN,$IN,$IN,$lemask
25755d762daSDanny Tsen	vxor		$zero,$zero,$zero
25855d762daSDanny Tsen
25955d762daSDanny Tsen	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
26055d762daSDanny Tsen	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
26155d762daSDanny Tsen	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
26255d762daSDanny Tsen
26355d762daSDanny Tsen	vpmsumd		$t2,$Xl,$xC2		# 1st phase
26455d762daSDanny Tsen
26555d762daSDanny Tsen	vsldoi		$t0,$Xm,$zero,8
26655d762daSDanny Tsen	vsldoi		$t1,$zero,$Xm,8
26755d762daSDanny Tsen	vxor		$Xl,$Xl,$t0
26855d762daSDanny Tsen	vxor		$Xh,$Xh,$t1
26955d762daSDanny Tsen
27055d762daSDanny Tsen	vsldoi		$Xl,$Xl,$Xl,8
27155d762daSDanny Tsen	vxor		$Xl,$Xl,$t2
27255d762daSDanny Tsen
27355d762daSDanny Tsen	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
27455d762daSDanny Tsen	vpmsumd		$Xl,$Xl,$xC2
27555d762daSDanny Tsen	vxor		$t1,$t1,$Xh
27655d762daSDanny Tsen	vxor		$Xl,$Xl,$t1
27755d762daSDanny Tsen
27855d762daSDanny Tsen	le?vperm	$Xl,$Xl,$Xl,$lemask
27955d762daSDanny Tsen	stvx_u		$Xl,0,$Xip		# write out Xi
28055d762daSDanny Tsen
28155d762daSDanny Tsen	mtspr		256,$vrsave
28255d762daSDanny Tsen	blr
28355d762daSDanny Tsen	.long		0
28455d762daSDanny Tsen	.byte		0,12,0x14,0,0,0,2,0
28555d762daSDanny Tsen	.long		0
286*81d358b1SMichael Ellerman.size	.gcm_gmult_p10,.-.gcm_gmult_p10
28755d762daSDanny Tsen
288*81d358b1SMichael Ellerman.globl	.gcm_ghash_p10
28955d762daSDanny Tsen	lis		r0,0xfff8
29055d762daSDanny Tsen	li		r8,0x10
29155d762daSDanny Tsen	mfspr		$vrsave,256
29255d762daSDanny Tsen	li		r9,0x20
29355d762daSDanny Tsen	mtspr		256,r0
29455d762daSDanny Tsen	li		r10,0x30
29555d762daSDanny Tsen	lvx_u		$Xl,0,$Xip		# load Xi
29655d762daSDanny Tsen
29755d762daSDanny Tsen	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
29855d762daSDanny Tsen	 le?lvsl	$lemask,r0,r0
29955d762daSDanny Tsen	lvx_u		$H, r9,$Htbl
30055d762daSDanny Tsen	 le?vspltisb	$t0,0x07
30155d762daSDanny Tsen	lvx_u		$Hh,r10,$Htbl
30255d762daSDanny Tsen	 le?vxor	$lemask,$lemask,$t0
30355d762daSDanny Tsen	lvx_u		$xC2,0,$Htbl
30455d762daSDanny Tsen	 le?vperm	$Xl,$Xl,$Xl,$lemask
30555d762daSDanny Tsen	vxor		$zero,$zero,$zero
30655d762daSDanny Tsen
30755d762daSDanny Tsen	lvx_u		$IN,0,$inp
30855d762daSDanny Tsen	addi		$inp,$inp,16
30955d762daSDanny Tsen	subi		$len,$len,16
31055d762daSDanny Tsen	 le?vperm	$IN,$IN,$IN,$lemask
31155d762daSDanny Tsen	vxor		$IN,$IN,$Xl
31255d762daSDanny Tsen	b		Loop
31355d762daSDanny Tsen
31455d762daSDanny Tsen.align	5
31555d762daSDanny TsenLoop:
31655d762daSDanny Tsen	 subic		$len,$len,16
31755d762daSDanny Tsen	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
31855d762daSDanny Tsen	 subfe.		r0,r0,r0		# borrow?-1:0
31955d762daSDanny Tsen	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
32055d762daSDanny Tsen	 and		r0,r0,$len
32155d762daSDanny Tsen	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
32255d762daSDanny Tsen	 add		$inp,$inp,r0
32355d762daSDanny Tsen
32455d762daSDanny Tsen	vpmsumd		$t2,$Xl,$xC2		# 1st phase
32555d762daSDanny Tsen
32655d762daSDanny Tsen	vsldoi		$t0,$Xm,$zero,8
32755d762daSDanny Tsen	vsldoi		$t1,$zero,$Xm,8
32855d762daSDanny Tsen	vxor		$Xl,$Xl,$t0
32955d762daSDanny Tsen	vxor		$Xh,$Xh,$t1
33055d762daSDanny Tsen
33155d762daSDanny Tsen	vsldoi		$Xl,$Xl,$Xl,8
33255d762daSDanny Tsen	vxor		$Xl,$Xl,$t2
33355d762daSDanny Tsen	 lvx_u		$IN,0,$inp
33455d762daSDanny Tsen	 addi		$inp,$inp,16
33555d762daSDanny Tsen
33655d762daSDanny Tsen	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
33755d762daSDanny Tsen	vpmsumd		$Xl,$Xl,$xC2
33855d762daSDanny Tsen	 le?vperm	$IN,$IN,$IN,$lemask
33955d762daSDanny Tsen	vxor		$t1,$t1,$Xh
34055d762daSDanny Tsen	vxor		$IN,$IN,$t1
34155d762daSDanny Tsen	vxor		$IN,$IN,$Xl
34255d762daSDanny Tsen	beq		Loop			# did $len-=16 borrow?
34355d762daSDanny Tsen
34455d762daSDanny Tsen	vxor		$Xl,$Xl,$t1
34555d762daSDanny Tsen	le?vperm	$Xl,$Xl,$Xl,$lemask
34655d762daSDanny Tsen	stvx_u		$Xl,0,$Xip		# write out Xi
34755d762daSDanny Tsen
34855d762daSDanny Tsen	mtspr		256,$vrsave
34955d762daSDanny Tsen	blr
35055d762daSDanny Tsen	.long		0
35155d762daSDanny Tsen	.byte		0,12,0x14,0,0,0,4,0
35255d762daSDanny Tsen	.long		0
353*81d358b1SMichael Ellerman.size	.gcm_ghash_p10,.-.gcm_ghash_p10
35455d762daSDanny Tsen
35555d762daSDanny Tsen.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
35655d762daSDanny Tsen.align  2
35755d762daSDanny Tsen___
35855d762daSDanny Tsen
35955d762daSDanny Tsenforeach (split("\n",$code)) {
36055d762daSDanny Tsen	if ($flavour =~ /le$/o) {	# little-endian
36155d762daSDanny Tsen	    s/le\?//o		or
36255d762daSDanny Tsen	    s/be\?/#be#/o;
36355d762daSDanny Tsen	} else {
36455d762daSDanny Tsen	    s/le\?/#le#/o	or
36555d762daSDanny Tsen	    s/be\?//o;
36655d762daSDanny Tsen	}
36755d762daSDanny Tsen	print $_,"\n";
36855d762daSDanny Tsen}
36955d762daSDanny Tsen
37055d762daSDanny Tsenclose STDOUT; # enforce flush
371