xref: /openbsd/lib/libcrypto/sha/asm/sha1-parisc.pl (revision 73471bf0)
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA1 block procedure for PA-RISC.
11
12# June 2009.
13#
14# On PA-7100LC performance is >30% better than gcc 3.2 generated code
15# for aligned input and >50% better for unaligned. Compared to vendor
16# compiler on PA-8600 it's almost 60% faster in 64-bit build and just
17# few percent faster in 32-bit one (this for aligned input, data for
18# unaligned input is not available).
19#
20# Special thanks to polarhome.com for providing HP-UX account.
21
22$flavour = shift;
23$output = shift;
24open STDOUT,">$output";
25
26if ($flavour =~ /64/) {
27	$LEVEL		="2.0W";
28	$SIZE_T		=8;
29	$FRAME_MARKER	=80;
30	$SAVED_RP	=16;
31	$PUSH		="std";
32	$PUSHMA		="std,ma";
33	$POP		="ldd";
34	$POPMB		="ldd,mb";
35} else {
36	$LEVEL		="1.0";
37	$SIZE_T		=4;
38	$FRAME_MARKER	=48;
39	$SAVED_RP	=20;
40	$PUSH		="stw";
41	$PUSHMA		="stwm";
42	$POP		="ldw";
43	$POPMB		="ldwm";
44}
45
46$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
47				#                 [+ argument transfer]
48$ctx="%r26";		# arg0
49$inp="%r25";		# arg1
50$num="%r24";		# arg2
51
52$t0="%r28";
53$t1="%r29";
54$K="%r31";
55
56@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
57    "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
58
59@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
60
61sub BODY_00_19 {
62my ($i,$a,$b,$c,$d,$e)=@_;
63my $j=$i+1;
64$code.=<<___ if ($i<15);
65	addl	$K,$e,$e	; $i
66	shd	$a,$a,27,$t1
67	addl	@X[$i],$e,$e
68	and	$c,$b,$t0
69	addl	$t1,$e,$e
70	andcm	$d,$b,$t1
71	shd	$b,$b,2,$b
72	or	$t1,$t0,$t0
73	addl	$t0,$e,$e
74___
75$code.=<<___ if ($i>=15);	# with forward Xupdate
76	addl	$K,$e,$e	; $i
77	shd	$a,$a,27,$t1
78	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
79	addl	@X[$i%16],$e,$e
80	and	$c,$b,$t0
81	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
82	addl	$t1,$e,$e
83	andcm	$d,$b,$t1
84	shd	$b,$b,2,$b
85	or	$t1,$t0,$t0
86	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
87	add	$t0,$e,$e
88	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
89___
90}
91
92sub BODY_20_39 {
93my ($i,$a,$b,$c,$d,$e)=@_;
94my $j=$i+1;
95$code.=<<___ if ($i<79);
96	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]	; $i
97	addl	$K,$e,$e
98	shd	$a,$a,27,$t1
99	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
100	addl	@X[$i%16],$e,$e
101	xor	$b,$c,$t0
102	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
103	addl	$t1,$e,$e
104	shd	$b,$b,2,$b
105	xor	$d,$t0,$t0
106	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
107	addl	$t0,$e,$e
108___
109$code.=<<___ if ($i==79);	# with context load
110	ldw	0($ctx),@X[0]	; $i
111	addl	$K,$e,$e
112	shd	$a,$a,27,$t1
113	ldw	4($ctx),@X[1]
114	addl	@X[$i%16],$e,$e
115	xor	$b,$c,$t0
116	ldw	8($ctx),@X[2]
117	addl	$t1,$e,$e
118	shd	$b,$b,2,$b
119	xor	$d,$t0,$t0
120	ldw	12($ctx),@X[3]
121	addl	$t0,$e,$e
122	ldw	16($ctx),@X[4]
123___
124}
125
126sub BODY_40_59 {
127my ($i,$a,$b,$c,$d,$e)=@_;
128my $j=$i+1;
129$code.=<<___;
130	shd	$a,$a,27,$t1	; $i
131	addl	$K,$e,$e
132	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
133	xor	$d,$c,$t0
134	addl	@X[$i%16],$e,$e
135	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
136	and	$b,$t0,$t0
137	addl	$t1,$e,$e
138	shd	$b,$b,2,$b
139	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
140	addl	$t0,$e,$e
141	and	$d,$c,$t1
142	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
143	addl	$t1,$e,$e
144___
145}
146
147$code=<<___;
148	.LEVEL	$LEVEL
149#if 0
150	.SPACE	\$TEXT\$
151	.SUBSPA	\$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
152#else
153	.text
154#endif
155
156	.EXPORT	sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
157sha1_block_data_order
158	.PROC
159	.CALLINFO	FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
160	.ENTRY
161	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
162	$PUSHMA	%r3,$FRAME(%sp)
163	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
164	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
165	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
166	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
167	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
168	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
169	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
170	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
171	$PUSH	%r12,`-$FRAME+9*$SIZE_T`(%sp)
172	$PUSH	%r13,`-$FRAME+10*$SIZE_T`(%sp)
173	$PUSH	%r14,`-$FRAME+11*$SIZE_T`(%sp)
174	$PUSH	%r15,`-$FRAME+12*$SIZE_T`(%sp)
175	$PUSH	%r16,`-$FRAME+13*$SIZE_T`(%sp)
176
177	ldw	0($ctx),$A
178	ldw	4($ctx),$B
179	ldw	8($ctx),$C
180	ldw	12($ctx),$D
181	ldw	16($ctx),$E
182
183	extru	$inp,31,2,$t0		; t0=inp&3;
184	sh3addl	$t0,%r0,$t0		; t0*=8;
185	subi	32,$t0,$t0		; t0=32-t0;
186	mtctl	$t0,%cr11		; %sar=t0;
187
188L\$oop
189	ldi	3,$t0
190	andcm	$inp,$t0,$t0		; 64-bit neutral
191___
192	for ($i=0;$i<15;$i++) {		# load input block
193	$code.="\tldw	`4*$i`($t0),@X[$i]\n";		}
194$code.=<<___;
195	cmpb,*=	$inp,$t0,L\$aligned
196	ldw	60($t0),@X[15]
197	ldw	64($t0),@X[16]
198___
199	for ($i=0;$i<16;$i++) {		# align input
200	$code.="\tvshd	@X[$i],@X[$i+1],@X[$i]\n";	}
201$code.=<<___;
202L\$aligned
203	ldil	L'0x5a827000,$K		; K_00_19
204	ldo	0x999($K),$K
205___
206for ($i=0;$i<20;$i++)   { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
207$code.=<<___;
208	ldil	L'0x6ed9e000,$K		; K_20_39
209	ldo	0xba1($K),$K
210___
211
212for (;$i<40;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
213$code.=<<___;
214	ldil	L'0x8f1bb000,$K		; K_40_59
215	ldo	0xcdc($K),$K
216___
217
218for (;$i<60;$i++)       { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
219$code.=<<___;
220	ldil	L'0xca62c000,$K		; K_60_79
221	ldo	0x1d6($K),$K
222___
223for (;$i<80;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
224
225$code.=<<___;
226	addl	@X[0],$A,$A
227	addl	@X[1],$B,$B
228	addl	@X[2],$C,$C
229	addl	@X[3],$D,$D
230	addl	@X[4],$E,$E
231	stw	$A,0($ctx)
232	stw	$B,4($ctx)
233	stw	$C,8($ctx)
234	stw	$D,12($ctx)
235	stw	$E,16($ctx)
236	addib,*<> -1,$num,L\$oop
237	ldo	64($inp),$inp
238
239	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2	; standard epilogue
240	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
241	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
242	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
243	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
244	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
245	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
246	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
247	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
248	$POP	`-$FRAME+9*$SIZE_T`(%sp),%r12
249	$POP	`-$FRAME+10*$SIZE_T`(%sp),%r13
250	$POP	`-$FRAME+11*$SIZE_T`(%sp),%r14
251	$POP	`-$FRAME+12*$SIZE_T`(%sp),%r15
252	$POP	`-$FRAME+13*$SIZE_T`(%sp),%r16
253	bv	(%r2)
254	.EXIT
255	$POPMB	-$FRAME(%sp),%r3
256	.PROCEND
257
258	.data
259	.STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
260___
261
262$code =~ s/\`([^\`]*)\`/eval $1/gem;
263$code =~ s/,\*/,/gm		if ($SIZE_T==4);
264$code =~ s/\bbv\b/bve/gm	if ($SIZE_T==8);
265print $code;
266close STDOUT;
267