1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4//
5// ARM version of md5block.go
6
7#include "textflag.h"
8
9// SHA-1 block routine. See sha1block.go for Go equivalent.
10//
11// There are 80 rounds of 4 types:
12//   - rounds 0-15 are type 1 and load data (ROUND1 macro).
13//   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
14//   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
15//   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
16//   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
17//
18// Each round loads or shuffles the data, then computes a per-round
19// function of b, c, d, and then mixes the result into and rotates the
20// five registers a, b, c, d, e holding the intermediate results.
21//
22// The register rotation is implemented by rotating the arguments to
23// the round macros instead of by explicit move instructions.
24
25// Register definitions
26#define Rdata	R0	// Pointer to incoming data
27#define Rconst	R1	// Current constant for SHA round
28#define Ra	R2		// SHA-1 accumulator
29#define Rb	R3		// SHA-1 accumulator
30#define Rc	R4		// SHA-1 accumulator
31#define Rd	R5		// SHA-1 accumulator
32#define Re	R6		// SHA-1 accumulator
33#define Rt0	R7		// Temporary
34#define Rt1	R8		// Temporary
35// r9, r10 are forbidden
36// r11 is OK provided you check the assembler that no synthetic instructions use it
37#define Rt2	R11		// Temporary
38#define Rctr	R12	// loop counter
39#define Rw	R14		// point to w buffer
40
41// func block(dig *digest, p []byte)
42// 0(FP) is *digest
43// 4(FP) is p.array (struct Slice)
44// 8(FP) is p.len
45//12(FP) is p.cap
46//
47// Stack frame
48#define p_end	end-4(SP)		// pointer to the end of data
49#define p_data	data-8(SP)	// current data pointer (unused?)
50#define w_buf	buf-(8+4*80)(SP)	//80 words temporary buffer w uint32[80]
51#define saved	abcde-(8+4*80+4*5)(SP)	// saved sha1 registers a,b,c,d,e - these must be last (unused?)
52// Total size +4 for saved LR is 352
53
54	// w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3]
55	// e += w[i]
56#define LOAD(Re) \
57	MOVBU	2(Rdata), Rt0 ; \
58	MOVBU	3(Rdata), Rt1 ; \
59	MOVBU	1(Rdata), Rt2 ; \
60	ORR	Rt0<<8, Rt1, Rt0	    ; \
61	MOVBU.P	4(Rdata), Rt1 ; \
62	ORR	Rt2<<16, Rt0, Rt0	    ; \
63	ORR	Rt1<<24, Rt0, Rt0	    ; \
64	MOVW.P	Rt0, 4(Rw)		    ; \
65	ADD	Rt0, Re, Re
66
67	// tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
68	// w[i&0xf] = tmp<<1 | tmp>>(32-1)
69	// e += w[i&0xf]
70#define SHUFFLE(Re) \
71	MOVW	(-16*4)(Rw), Rt0 ; \
72	MOVW	(-14*4)(Rw), Rt1 ; \
73	MOVW	(-8*4)(Rw), Rt2  ; \
74	EOR	Rt0, Rt1, Rt0  ; \
75	MOVW	(-3*4)(Rw), Rt1  ; \
76	EOR	Rt2, Rt0, Rt0  ; \
77	EOR	Rt0, Rt1, Rt0  ; \
78	MOVW	Rt0@>(32-1), Rt0  ; \
79	MOVW.P	Rt0, 4(Rw)	  ; \
80	ADD	Rt0, Re, Re
81
82	// t1 = (b & c) | ((~b) & d)
83#define FUNC1(Ra, Rb, Rc, Rd, Re) \
84	MVN	Rb, Rt1	   ; \
85	AND	Rb, Rc, Rt0  ; \
86	AND	Rd, Rt1, Rt1 ; \
87	ORR	Rt0, Rt1, Rt1
88
89	// t1 = b ^ c ^ d
90#define FUNC2(Ra, Rb, Rc, Rd, Re) \
91	EOR	Rb, Rc, Rt1 ; \
92	EOR	Rd, Rt1, Rt1
93
94	// t1 = (b & c) | (b & d) | (c & d) =
95	// t1 = (b & c) | ((b | c) & d)
96#define FUNC3(Ra, Rb, Rc, Rd, Re) \
97	ORR	Rb, Rc, Rt0  ; \
98	AND	Rb, Rc, Rt1  ; \
99	AND	Rd, Rt0, Rt0 ; \
100	ORR	Rt0, Rt1, Rt1
101
102#define FUNC4 FUNC2
103
104	// a5 := a<<5 | a>>(32-5)
105	// b = b<<30 | b>>(32-30)
106	// e = a5 + t1 + e + const
107#define MIX(Ra, Rb, Rc, Rd, Re) \
108	ADD	Rt1, Re, Re	 ; \
109	MOVW	Rb@>(32-30), Rb	 ; \
110	ADD	Ra@>(32-5), Re, Re ; \
111	ADD	Rconst, Re, Re
112
113#define ROUND1(Ra, Rb, Rc, Rd, Re) \
114	LOAD(Re)		; \
115	FUNC1(Ra, Rb, Rc, Rd, Re)	; \
116	MIX(Ra, Rb, Rc, Rd, Re)
117
118#define ROUND1x(Ra, Rb, Rc, Rd, Re) \
119	SHUFFLE(Re)	; \
120	FUNC1(Ra, Rb, Rc, Rd, Re)	; \
121	MIX(Ra, Rb, Rc, Rd, Re)
122
123#define ROUND2(Ra, Rb, Rc, Rd, Re) \
124	SHUFFLE(Re)	; \
125	FUNC2(Ra, Rb, Rc, Rd, Re)	; \
126	MIX(Ra, Rb, Rc, Rd, Re)
127
128#define ROUND3(Ra, Rb, Rc, Rd, Re) \
129	SHUFFLE(Re)	; \
130	FUNC3(Ra, Rb, Rc, Rd, Re)	; \
131	MIX(Ra, Rb, Rc, Rd, Re)
132
133#define ROUND4(Ra, Rb, Rc, Rd, Re) \
134	SHUFFLE(Re)	; \
135	FUNC4(Ra, Rb, Rc, Rd, Re)	; \
136	MIX(Ra, Rb, Rc, Rd, Re)
137
138
139// func block(dig *digest, p []byte)
140TEXT	·block(SB), 0, $352-16
141	MOVW	p+4(FP), Rdata	// pointer to the data
142	MOVW	p_len+8(FP), Rt0	// number of bytes
143	ADD	Rdata, Rt0
144	MOVW	Rt0, p_end	// pointer to end of data
145
146	// Load up initial SHA-1 accumulator
147	MOVW	dig+0(FP), Rt0
148	MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re]
149
150loop:
151	// Save registers at SP+4 onwards
152	MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13)
153
154	MOVW	$w_buf, Rw
155	MOVW	$0x5A827999, Rconst
156	MOVW	$3, Rctr
157loop1:	ROUND1(Ra, Rb, Rc, Rd, Re)
158	ROUND1(Re, Ra, Rb, Rc, Rd)
159	ROUND1(Rd, Re, Ra, Rb, Rc)
160	ROUND1(Rc, Rd, Re, Ra, Rb)
161	ROUND1(Rb, Rc, Rd, Re, Ra)
162	SUB.S	$1, Rctr
163	BNE	loop1
164
165	ROUND1(Ra, Rb, Rc, Rd, Re)
166	ROUND1x(Re, Ra, Rb, Rc, Rd)
167	ROUND1x(Rd, Re, Ra, Rb, Rc)
168	ROUND1x(Rc, Rd, Re, Ra, Rb)
169	ROUND1x(Rb, Rc, Rd, Re, Ra)
170
171	MOVW	$0x6ED9EBA1, Rconst
172	MOVW	$4, Rctr
173loop2:	ROUND2(Ra, Rb, Rc, Rd, Re)
174	ROUND2(Re, Ra, Rb, Rc, Rd)
175	ROUND2(Rd, Re, Ra, Rb, Rc)
176	ROUND2(Rc, Rd, Re, Ra, Rb)
177	ROUND2(Rb, Rc, Rd, Re, Ra)
178	SUB.S	$1, Rctr
179	BNE	loop2
180
181	MOVW	$0x8F1BBCDC, Rconst
182	MOVW	$4, Rctr
183loop3:	ROUND3(Ra, Rb, Rc, Rd, Re)
184	ROUND3(Re, Ra, Rb, Rc, Rd)
185	ROUND3(Rd, Re, Ra, Rb, Rc)
186	ROUND3(Rc, Rd, Re, Ra, Rb)
187	ROUND3(Rb, Rc, Rd, Re, Ra)
188	SUB.S	$1, Rctr
189	BNE	loop3
190
191	MOVW	$0xCA62C1D6, Rconst
192	MOVW	$4, Rctr
193loop4:	ROUND4(Ra, Rb, Rc, Rd, Re)
194	ROUND4(Re, Ra, Rb, Rc, Rd)
195	ROUND4(Rd, Re, Ra, Rb, Rc)
196	ROUND4(Rc, Rd, Re, Ra, Rb)
197	ROUND4(Rb, Rc, Rd, Re, Ra)
198	SUB.S	$1, Rctr
199	BNE	loop4
200
201	// Accumulate - restoring registers from SP+4
202	MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw]
203	ADD	Rt0, Ra
204	ADD	Rt1, Rb
205	ADD	Rt2, Rc
206	ADD	Rctr, Rd
207	ADD	Rw, Re
208
209	MOVW	p_end, Rt0
210	CMP	Rt0, Rdata
211	BLO	loop
212
213	// Save final SHA-1 accumulator
214	MOVW	dig+0(FP), Rt0
215	MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0)
216
217	RET
218