1#! /usr/bin/env perl
2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# RC4 for PA-RISC.
18
19# June 2009.
20#
21# Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
22# For reference, [4x] unrolled loop is >40% faster than folded one.
23# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
24# is believed to be not sufficient to justify the effort...
25#
26# Special thanks to polarhome.com for providing HP-UX account.
27
28$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
29
30# $output is the last argument if it looks like a file (it has an extension)
31# $flavour is the first argument if it doesn't look like a file
32$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
33$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
34
35$output and open STDOUT,">$output";
36
37if ($flavour =~ /64/) {
38	$LEVEL		="2.0W";
39	$SIZE_T		=8;
40	$FRAME_MARKER	=80;
41	$SAVED_RP	=16;
42	$PUSH		="std";
43	$PUSHMA		="std,ma";
44	$POP		="ldd";
45	$POPMB		="ldd,mb";
46} else {
47	$LEVEL		="1.0";
48	$SIZE_T		=4;
49	$FRAME_MARKER	=48;
50	$SAVED_RP	=20;
51	$PUSH		="stw";
52	$PUSHMA		="stwm";
53	$POP		="ldw";
54	$POPMB		="ldwm";
55}
56
57$FRAME=4*$SIZE_T+$FRAME_MARKER;	# 4 saved regs + frame marker
58				#                [+ argument transfer]
59$SZ=1;				# defaults to RC4_CHAR
60if (open CONF,"<${dir}../../opensslconf.h") {
61    while(<CONF>) {
62	if (m/#\s*define\s+RC4_INT\s+(.*)/) {
63	    $SZ = ($1=~/char$/) ? 1 : 4;
64	    last;
65	}
66    }
67    close CONF;
68}
69
70if ($SZ==1) {	# RC4_CHAR
71    $LD="ldb";
72    $LDX="ldbx";
73    $MKX="addl";
74    $ST="stb";
75} else {	# RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
76    $LD="ldw";
77    $LDX="ldwx,s";
78    $MKX="sh2addl";
79    $ST="stw";
80}
81
82$key="%r26";
83$len="%r25";
84$inp="%r24";
85$out="%r23";
86
87@XX=("%r19","%r20");
88@TX=("%r21","%r22");
89$YY="%r28";
90$TY="%r29";
91
92$acc="%r1";
93$ix="%r2";
94$iy="%r3";
95$dat0="%r4";
96$dat1="%r5";
97$rem="%r6";
98$mask="%r31";
99
100sub unrolledloopbody {
101for ($i=0;$i<4;$i++) {
102$code.=<<___;
103	ldo	1($XX[0]),$XX[1]
104	`sprintf("$LDX	%$TY(%$key),%$dat1") if ($i>0)`
105	and	$mask,$XX[1],$XX[1]
106	$LDX	$YY($key),$TY
107	$MKX	$YY,$key,$ix
108	$LDX	$XX[1]($key),$TX[1]
109	$MKX	$XX[0],$key,$iy
110	$ST	$TX[0],0($ix)
111	comclr,<> $XX[1],$YY,%r0	; conditional
112	copy	$TX[0],$TX[1]		; move
113	`sprintf("%sdep	%$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
114	$ST	$TY,0($iy)
115	addl	$TX[0],$TY,$TY
116	addl	$TX[1],$YY,$YY
117	and	$mask,$TY,$TY
118	and	$mask,$YY,$YY
119___
120push(@TX,shift(@TX)); push(@XX,shift(@XX));	# "rotate" registers
121} }
122
123sub foldedloop {
124my ($label,$count)=@_;
125$code.=<<___;
126$label
127	$MKX	$YY,$key,$iy
128	$LDX	$YY($key),$TY
129	$MKX	$XX[0],$key,$ix
130	$ST	$TX[0],0($iy)
131	ldo	1($XX[0]),$XX[0]
132	$ST	$TY,0($ix)
133	addl	$TX[0],$TY,$TY
134	ldbx	$inp($out),$dat1
135	and	$mask,$TY,$TY
136	and	$mask,$XX[0],$XX[0]
137	$LDX	$TY($key),$acc
138	$LDX	$XX[0]($key),$TX[0]
139	ldo	1($out),$out
140	xor	$dat1,$acc,$acc
141	addl	$TX[0],$YY,$YY
142	stb	$acc,-1($out)
143	addib,<> -1,$count,$label	; $count is always small
144	and	$mask,$YY,$YY
145___
146}
147
148$code=<<___;
149	.LEVEL	$LEVEL
150	.SPACE	\$TEXT\$
151	.SUBSPA	\$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
152
153	.EXPORT	RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
154RC4
155	.PROC
156	.CALLINFO	FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
157	.ENTRY
158	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
159	$PUSHMA	%r3,$FRAME(%sp)
160	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
161	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
162	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
163
164	cmpib,*= 0,$len,L\$abort
165	sub	$inp,$out,$inp		; distance between $inp and $out
166
167	$LD	`0*$SZ`($key),$XX[0]
168	$LD	`1*$SZ`($key),$YY
169	ldo	`2*$SZ`($key),$key
170
171	ldi	0xff,$mask
172	ldi	3,$dat0
173
174	ldo	1($XX[0]),$XX[0]	; warm up loop
175	and	$mask,$XX[0],$XX[0]
176	$LDX	$XX[0]($key),$TX[0]
177	addl	$TX[0],$YY,$YY
178	cmpib,*>>= 6,$len,L\$oop1	; is $len large enough to bother?
179	and	$mask,$YY,$YY
180
181	and,<>	$out,$dat0,$rem		; is $out aligned?
182	b	L\$alignedout
183	subi	4,$rem,$rem
184	sub	$len,$rem,$len
185___
186&foldedloop("L\$alignout",$rem);	# process till $out is aligned
187
188$code.=<<___;
189L\$alignedout				; $len is at least 4 here
190	and,<>	$inp,$dat0,$acc		; is $inp aligned?
191	b	L\$oop4
192	sub	$inp,$acc,$rem		; align $inp
193
194	sh3addl	$acc,%r0,$acc
195	subi	32,$acc,$acc
196	mtctl	$acc,%cr11		; load %sar with vshd align factor
197	ldwx	$rem($out),$dat0
198	ldo	4($rem),$rem
199L\$oop4misalignedinp
200___
201&unrolledloopbody();
202$code.=<<___;
203	$LDX	$TY($key),$ix
204	ldwx	$rem($out),$dat1
205	ldo	-4($len),$len
206	or	$ix,$acc,$acc		; last piece, no need to dep
207	vshd	$dat0,$dat1,$iy		; align data
208	copy	$dat1,$dat0
209	xor	$iy,$acc,$acc
210	stw	$acc,0($out)
211	cmpib,*<< 3,$len,L\$oop4misalignedinp
212	ldo	4($out),$out
213	cmpib,*= 0,$len,L\$done
214	nop
215	b	L\$oop1
216	nop
217
218	.ALIGN	8
219L\$oop4
220___
221&unrolledloopbody();
222$code.=<<___;
223	$LDX	$TY($key),$ix
224	ldwx	$inp($out),$dat0
225	ldo	-4($len),$len
226	or	$ix,$acc,$acc		; last piece, no need to dep
227	xor	$dat0,$acc,$acc
228	stw	$acc,0($out)
229	cmpib,*<< 3,$len,L\$oop4
230	ldo	4($out),$out
231	cmpib,*= 0,$len,L\$done
232	nop
233___
234&foldedloop("L\$oop1",$len);
235$code.=<<___;
236L\$done
237	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2
238	ldo	-1($XX[0]),$XX[0]	; chill out loop
239	sub	$YY,$TX[0],$YY
240	and	$mask,$XX[0],$XX[0]
241	and	$mask,$YY,$YY
242	$ST	$XX[0],`-2*$SZ`($key)
243	$ST	$YY,`-1*$SZ`($key)
244	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
245	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
246	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
247L\$abort
248	bv	(%r2)
249	.EXIT
250	$POPMB	-$FRAME(%sp),%r3
251	.PROCEND
252___
253
254$code.=<<___;
255
256	.EXPORT	RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
257	.ALIGN	8
258RC4_set_key
259	.PROC
260	.CALLINFO	NO_CALLS
261	.ENTRY
262	$ST	%r0,`0*$SZ`($key)
263	$ST	%r0,`1*$SZ`($key)
264	ldo	`2*$SZ`($key),$key
265	copy	%r0,@XX[0]
266L\$1st
267	$ST	@XX[0],0($key)
268	ldo	1(@XX[0]),@XX[0]
269	bb,>=	@XX[0],`31-8`,L\$1st	; @XX[0]<256
270	ldo	$SZ($key),$key
271
272	ldo	`-256*$SZ`($key),$key	; rewind $key
273	addl	$len,$inp,$inp		; $inp to point at the end
274	sub	%r0,$len,%r23		; inverse index
275	copy	%r0,@XX[0]
276	copy	%r0,@XX[1]
277	ldi	0xff,$mask
278
279L\$2nd
280	$LDX	@XX[0]($key),@TX[0]
281	ldbx	%r23($inp),@TX[1]
282	addi,nuv 1,%r23,%r23		; increment and conditional
283	sub	%r0,$len,%r23		; inverse index
284	addl	@TX[0],@XX[1],@XX[1]
285	addl	@TX[1],@XX[1],@XX[1]
286	and	$mask,@XX[1],@XX[1]
287	$MKX	@XX[0],$key,$TY
288	$LDX	@XX[1]($key),@TX[1]
289	$MKX	@XX[1],$key,$YY
290	ldo	1(@XX[0]),@XX[0]
291	$ST	@TX[0],0($YY)
292	bb,>=	@XX[0],`31-8`,L\$2nd	; @XX[0]<256
293	$ST	@TX[1],0($TY)
294
295	bv,n	(%r2)
296	.EXIT
297	nop
298	.PROCEND
299
300	.EXPORT	RC4_options,ENTRY
301	.ALIGN	8
302RC4_options
303	.PROC
304	.CALLINFO	NO_CALLS
305	.ENTRY
306	blr	%r0,%r28
307	ldi	3,%r1
308L\$pic
309	andcm	%r28,%r1,%r28
310	bv	(%r2)
311	.EXIT
312	ldo	L\$opts-L\$pic(%r28),%r28
313	.PROCEND
314	.ALIGN	8
315L\$opts
316	.STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
317	.STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
318___
319
320if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
321	=~ /GNU assembler/) {
322    $gnuas = 1;
323}
324
325foreach(split("\n",$code)) {
326	s/\`([^\`]*)\`/eval $1/ge;
327
328	s/(\.LEVEL\s+2\.0)W/$1w/	if ($gnuas && $SIZE_T==8);
329	s/\.SPACE\s+\$TEXT\$/.text/	if ($gnuas && $SIZE_T==8);
330	s/\.SUBSPA.*//			if ($gnuas && $SIZE_T==8);
331	s/cmpib,\*/comib,/		if ($SIZE_T==4);
332	s/\bbv\b/bve/			if ($SIZE_T==8);
333
334	print $_,"\n";
335}
336close STDOUT or die "error closing STDOUT: $!";
337