1#! /usr/bin/env perl
2# Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# I let hardware handle unaligned input(*), except on page boundaries
18# (see below for details). Otherwise straightforward implementation
19# with X vector in register bank.
20#
21# (*) this means that this module is inappropriate for PPC403? Does
22#     anybody know if pre-POWER3 can sustain unaligned load?
23
24# 			-m64	-m32
25# ----------------------------------
26# PPC970,gcc-4.0.0	+76%	+59%
27# Power6,xlc-7		+68%	+33%
28
29$flavour = shift;
30
31if ($flavour =~ /64/) {
32	$SIZE_T	=8;
33	$LRSAVE	=2*$SIZE_T;
34	$UCMP	="cmpld";
35	$STU	="stdu";
36	$POP	="ld";
37	$PUSH	="std";
38} elsif ($flavour =~ /32/) {
39	$SIZE_T	=4;
40	$LRSAVE	=$SIZE_T;
41	$UCMP	="cmplw";
42	$STU	="stwu";
43	$POP	="lwz";
44	$PUSH	="stw";
45} else { die "nonsense $flavour"; }
46
47# Define endianness based on flavour
48# i.e.: linux64le
49$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
50
51$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
52( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
53( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
54die "can't locate ppc-xlate.pl";
55
56open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
57
58$FRAME=24*$SIZE_T+64;
59$LOCALS=6*$SIZE_T;
60
61$K  ="r0";
62$sp ="r1";
63$toc="r2";
64$ctx="r3";
65$inp="r4";
66$num="r5";
67$t0 ="r15";
68$t1 ="r6";
69
70$A  ="r7";
71$B  ="r8";
72$C  ="r9";
73$D  ="r10";
74$E  ="r11";
75$T  ="r12";
76
77@V=($A,$B,$C,$D,$E,$T);
78@X=("r16","r17","r18","r19","r20","r21","r22","r23",
79    "r24","r25","r26","r27","r28","r29","r30","r31");
80
81sub loadbe {
82my ($dst, $src, $temp_reg) = @_;
83$code.=<<___ if (!$LITTLE_ENDIAN);
84	lwz	$dst,$src
85___
86$code.=<<___ if ($LITTLE_ENDIAN);
87	lwz	$temp_reg,$src
88	rotlwi	$dst,$temp_reg,8
89	rlwimi	$dst,$temp_reg,24,0,7
90	rlwimi	$dst,$temp_reg,24,16,23
91___
92}
93
94sub BODY_00_19 {
95my ($i,$a,$b,$c,$d,$e,$f)=@_;
96my $j=$i+1;
97
98	# Since the last value of $f is discarded, we can use
99	# it as a temp reg to swap byte-order when needed.
100	loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
101	loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
102$code.=<<___ if ($i<15);
103	add	$f,$K,$e
104	rotlwi	$e,$a,5
105	add	$f,$f,@X[$i]
106	and	$t0,$c,$b
107	add	$f,$f,$e
108	andc	$t1,$d,$b
109	rotlwi	$b,$b,30
110	or	$t0,$t0,$t1
111	add	$f,$f,$t0
112___
113$code.=<<___ if ($i>=15);
114	add	$f,$K,$e
115	rotlwi	$e,$a,5
116	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
117	add	$f,$f,@X[$i%16]
118	and	$t0,$c,$b
119	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
120	add	$f,$f,$e
121	andc	$t1,$d,$b
122	rotlwi	$b,$b,30
123	or	$t0,$t0,$t1
124	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
125	add	$f,$f,$t0
126	rotlwi	@X[$j%16],@X[$j%16],1
127___
128}
129
130sub BODY_20_39 {
131my ($i,$a,$b,$c,$d,$e,$f)=@_;
132my $j=$i+1;
133$code.=<<___ if ($i<79);
134	add	$f,$K,$e
135	xor	$t0,$b,$d
136	rotlwi	$e,$a,5
137	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
138	add	$f,$f,@X[$i%16]
139	xor	$t0,$t0,$c
140	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
141	add	$f,$f,$t0
142	rotlwi	$b,$b,30
143	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
144	add	$f,$f,$e
145	rotlwi	@X[$j%16],@X[$j%16],1
146___
147$code.=<<___ if ($i==79);
148	add	$f,$K,$e
149	xor	$t0,$b,$d
150	rotlwi	$e,$a,5
151	lwz	r16,0($ctx)
152	add	$f,$f,@X[$i%16]
153	xor	$t0,$t0,$c
154	lwz	r17,4($ctx)
155	add	$f,$f,$t0
156	rotlwi	$b,$b,30
157	lwz	r18,8($ctx)
158	lwz	r19,12($ctx)
159	add	$f,$f,$e
160	lwz	r20,16($ctx)
161___
162}
163
164sub BODY_40_59 {
165my ($i,$a,$b,$c,$d,$e,$f)=@_;
166my $j=$i+1;
167$code.=<<___;
168	add	$f,$K,$e
169	rotlwi	$e,$a,5
170	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
171	add	$f,$f,@X[$i%16]
172	and	$t0,$b,$c
173	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
174	add	$f,$f,$e
175	or	$t1,$b,$c
176	rotlwi	$b,$b,30
177	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
178	and	$t1,$t1,$d
179	or	$t0,$t0,$t1
180	rotlwi	@X[$j%16],@X[$j%16],1
181	add	$f,$f,$t0
182___
183}
184
185$code=<<___;
186.machine	"any"
187.text
188
189.globl	.sha1_block_data_order
190.align	4
191.sha1_block_data_order:
192	$STU	$sp,-$FRAME($sp)
193	mflr	r0
194	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
195	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
196	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
197	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
198	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
199	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
200	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
201	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
202	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
203	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
204	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
205	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
206	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
207	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
208	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
209	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
210	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
211	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
212	lwz	$A,0($ctx)
213	lwz	$B,4($ctx)
214	lwz	$C,8($ctx)
215	lwz	$D,12($ctx)
216	lwz	$E,16($ctx)
217	andi.	r0,$inp,3
218	bne	Lunaligned
219Laligned:
220	mtctr	$num
221	bl	Lsha1_block_private
222	b	Ldone
223
224; PowerPC specification allows an implementation to be ill-behaved
225; upon unaligned access which crosses page boundary. "Better safe
226; than sorry" principle makes me treat it specially. But I don't
227; look for particular offending word, but rather for 64-byte input
228; block which crosses the boundary. Once found that block is aligned
229; and hashed separately...
230.align	4
231Lunaligned:
232	subfic	$t1,$inp,4096
233	andi.	$t1,$t1,4095	; distance to closest page boundary
234	srwi.	$t1,$t1,6	; t1/=64
235	beq	Lcross_page
236	$UCMP	$num,$t1
237	ble	Laligned	; didn't cross the page boundary
238	mtctr	$t1
239	subfc	$num,$t1,$num
240	bl	Lsha1_block_private
241Lcross_page:
242	li	$t1,16
243	mtctr	$t1
244	addi	r20,$sp,$LOCALS	; spot within the frame
245Lmemcpy:
246	lbz	r16,0($inp)
247	lbz	r17,1($inp)
248	lbz	r18,2($inp)
249	lbz	r19,3($inp)
250	addi	$inp,$inp,4
251	stb	r16,0(r20)
252	stb	r17,1(r20)
253	stb	r18,2(r20)
254	stb	r19,3(r20)
255	addi	r20,r20,4
256	bdnz	Lmemcpy
257
258	$PUSH	$inp,`$FRAME-$SIZE_T*18`($sp)
259	li	$t1,1
260	addi	$inp,$sp,$LOCALS
261	mtctr	$t1
262	bl	Lsha1_block_private
263	$POP	$inp,`$FRAME-$SIZE_T*18`($sp)
264	addic.	$num,$num,-1
265	bne	Lunaligned
266
267Ldone:
268	$POP	r0,`$FRAME+$LRSAVE`($sp)
269	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
270	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
271	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
272	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
273	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
274	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
275	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
276	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
277	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
278	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
279	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
280	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
281	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
282	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
283	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
284	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
285	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
286	mtlr	r0
287	addi	$sp,$sp,$FRAME
288	blr
289	.long	0
290	.byte	0,12,4,1,0x80,18,3,0
291	.long	0
292___
293
294# This is private block function, which uses tailored calling
295# interface, namely upon entry SHA_CTX is pre-loaded to given
296# registers and counter register contains amount of chunks to
297# digest...
298$code.=<<___;
299.align	4
300Lsha1_block_private:
301___
302$code.=<<___;	# load K_00_19
303	lis	$K,0x5a82
304	ori	$K,$K,0x7999
305___
306for($i=0;$i<20;$i++)	{ &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
307$code.=<<___;	# load K_20_39
308	lis	$K,0x6ed9
309	ori	$K,$K,0xeba1
310___
311for(;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
312$code.=<<___;	# load K_40_59
313	lis	$K,0x8f1b
314	ori	$K,$K,0xbcdc
315___
316for(;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
317$code.=<<___;	# load K_60_79
318	lis	$K,0xca62
319	ori	$K,$K,0xc1d6
320___
321for(;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
322$code.=<<___;
323	add	r16,r16,$E
324	add	r17,r17,$T
325	add	r18,r18,$A
326	add	r19,r19,$B
327	add	r20,r20,$C
328	stw	r16,0($ctx)
329	mr	$A,r16
330	stw	r17,4($ctx)
331	mr	$B,r17
332	stw	r18,8($ctx)
333	mr	$C,r18
334	stw	r19,12($ctx)
335	mr	$D,r19
336	stw	r20,16($ctx)
337	mr	$E,r20
338	addi	$inp,$inp,`16*4`
339	bdnz	Lsha1_block_private
340	blr
341	.long	0
342	.byte	0,12,0x14,0,0,0,0,0
343.size	.sha1_block_data_order,.-.sha1_block_data_order
344___
345$code.=<<___;
346.asciz	"SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
347___
348
349$code =~ s/\`([^\`]*)\`/eval $1/gem;
350print $code;
351close STDOUT or die "error closing STDOUT: $!";
352