1#include "arm_arch.h"
2
3.text
4
5.global	sha1_block_data_order
6.type	sha1_block_data_order,%function
7
8.align	2
9sha1_block_data_order:
10	stmdb	sp!,{r4-r12,lr}
11	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
12	ldmia	r0,{r3,r4,r5,r6,r7}
13.Lloop:
14	ldr	r8,.LK_00_19
15	mov	r14,sp
16	sub	sp,sp,#15*4
17	mov	r5,r5,ror#30
18	mov	r6,r6,ror#30
19	mov	r7,r7,ror#30		@ [6]
20.L_00_15:
21#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
22	ldrb	r10,[r1,#2]
23	ldrb	r9,[r1,#3]
24	ldrb	r11,[r1,#1]
25	add	r7,r8,r7,ror#2			@ E+=K_00_19
26	ldrb	r12,[r1],#4
27	orr	r9,r9,r10,lsl#8
28	eor	r10,r5,r6			@ F_xx_xx
29	orr	r9,r9,r11,lsl#16
30	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
31	orr	r9,r9,r12,lsl#24
32#else
33	ldr	r9,[r1],#4			@ handles unaligned
34	add	r7,r8,r7,ror#2			@ E+=K_00_19
35	eor	r10,r5,r6			@ F_xx_xx
36	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
37#ifdef __ARMEL__
38	rev	r9,r9				@ byte swap
39#endif
40#endif
41	and	r10,r4,r10,ror#2
42	add	r7,r7,r9			@ E+=X[i]
43	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
44	str	r9,[r14,#-4]!
45	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
46#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
47	ldrb	r10,[r1,#2]
48	ldrb	r9,[r1,#3]
49	ldrb	r11,[r1,#1]
50	add	r6,r8,r6,ror#2			@ E+=K_00_19
51	ldrb	r12,[r1],#4
52	orr	r9,r9,r10,lsl#8
53	eor	r10,r4,r5			@ F_xx_xx
54	orr	r9,r9,r11,lsl#16
55	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
56	orr	r9,r9,r12,lsl#24
57#else
58	ldr	r9,[r1],#4			@ handles unaligned
59	add	r6,r8,r6,ror#2			@ E+=K_00_19
60	eor	r10,r4,r5			@ F_xx_xx
61	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
62#ifdef __ARMEL__
63	rev	r9,r9				@ byte swap
64#endif
65#endif
66	and	r10,r3,r10,ror#2
67	add	r6,r6,r9			@ E+=X[i]
68	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
69	str	r9,[r14,#-4]!
70	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
71#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
72	ldrb	r10,[r1,#2]
73	ldrb	r9,[r1,#3]
74	ldrb	r11,[r1,#1]
75	add	r5,r8,r5,ror#2			@ E+=K_00_19
76	ldrb	r12,[r1],#4
77	orr	r9,r9,r10,lsl#8
78	eor	r10,r3,r4			@ F_xx_xx
79	orr	r9,r9,r11,lsl#16
80	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
81	orr	r9,r9,r12,lsl#24
82#else
83	ldr	r9,[r1],#4			@ handles unaligned
84	add	r5,r8,r5,ror#2			@ E+=K_00_19
85	eor	r10,r3,r4			@ F_xx_xx
86	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
87#ifdef __ARMEL__
88	rev	r9,r9				@ byte swap
89#endif
90#endif
91	and	r10,r7,r10,ror#2
92	add	r5,r5,r9			@ E+=X[i]
93	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
94	str	r9,[r14,#-4]!
95	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
96#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
97	ldrb	r10,[r1,#2]
98	ldrb	r9,[r1,#3]
99	ldrb	r11,[r1,#1]
100	add	r4,r8,r4,ror#2			@ E+=K_00_19
101	ldrb	r12,[r1],#4
102	orr	r9,r9,r10,lsl#8
103	eor	r10,r7,r3			@ F_xx_xx
104	orr	r9,r9,r11,lsl#16
105	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
106	orr	r9,r9,r12,lsl#24
107#else
108	ldr	r9,[r1],#4			@ handles unaligned
109	add	r4,r8,r4,ror#2			@ E+=K_00_19
110	eor	r10,r7,r3			@ F_xx_xx
111	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
112#ifdef __ARMEL__
113	rev	r9,r9				@ byte swap
114#endif
115#endif
116	and	r10,r6,r10,ror#2
117	add	r4,r4,r9			@ E+=X[i]
118	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
119	str	r9,[r14,#-4]!
120	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
121#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
122	ldrb	r10,[r1,#2]
123	ldrb	r9,[r1,#3]
124	ldrb	r11,[r1,#1]
125	add	r3,r8,r3,ror#2			@ E+=K_00_19
126	ldrb	r12,[r1],#4
127	orr	r9,r9,r10,lsl#8
128	eor	r10,r6,r7			@ F_xx_xx
129	orr	r9,r9,r11,lsl#16
130	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
131	orr	r9,r9,r12,lsl#24
132#else
133	ldr	r9,[r1],#4			@ handles unaligned
134	add	r3,r8,r3,ror#2			@ E+=K_00_19
135	eor	r10,r6,r7			@ F_xx_xx
136	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
137#ifdef __ARMEL__
138	rev	r9,r9				@ byte swap
139#endif
140#endif
141	and	r10,r5,r10,ror#2
142	add	r3,r3,r9			@ E+=X[i]
143	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
144	str	r9,[r14,#-4]!
145	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
146	teq	r14,sp
147	bne	.L_00_15		@ [((11+4)*5+2)*3]
148	sub	sp,sp,#25*4
149#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
150	ldrb	r10,[r1,#2]
151	ldrb	r9,[r1,#3]
152	ldrb	r11,[r1,#1]
153	add	r7,r8,r7,ror#2			@ E+=K_00_19
154	ldrb	r12,[r1],#4
155	orr	r9,r9,r10,lsl#8
156	eor	r10,r5,r6			@ F_xx_xx
157	orr	r9,r9,r11,lsl#16
158	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
159	orr	r9,r9,r12,lsl#24
160#else
161	ldr	r9,[r1],#4			@ handles unaligned
162	add	r7,r8,r7,ror#2			@ E+=K_00_19
163	eor	r10,r5,r6			@ F_xx_xx
164	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
165#ifdef __ARMEL__
166	rev	r9,r9				@ byte swap
167#endif
168#endif
169	and	r10,r4,r10,ror#2
170	add	r7,r7,r9			@ E+=X[i]
171	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
172	str	r9,[r14,#-4]!
173	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
174	ldr	r9,[r14,#15*4]
175	ldr	r10,[r14,#13*4]
176	ldr	r11,[r14,#7*4]
177	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
178	ldr	r12,[r14,#2*4]
179	eor	r9,r9,r10
180	eor	r11,r11,r12			@ 1 cycle stall
181	eor	r10,r4,r5			@ F_xx_xx
182	mov	r9,r9,ror#31
183	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
184	eor	r9,r9,r11,ror#31
185	str	r9,[r14,#-4]!
186	and r10,r3,r10,ror#2					@ F_xx_xx
187						@ F_xx_xx
188	add	r6,r6,r9			@ E+=X[i]
189	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
190	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
191	ldr	r9,[r14,#15*4]
192	ldr	r10,[r14,#13*4]
193	ldr	r11,[r14,#7*4]
194	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
195	ldr	r12,[r14,#2*4]
196	eor	r9,r9,r10
197	eor	r11,r11,r12			@ 1 cycle stall
198	eor	r10,r3,r4			@ F_xx_xx
199	mov	r9,r9,ror#31
200	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
201	eor	r9,r9,r11,ror#31
202	str	r9,[r14,#-4]!
203	and r10,r7,r10,ror#2					@ F_xx_xx
204						@ F_xx_xx
205	add	r5,r5,r9			@ E+=X[i]
206	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
207	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
208	ldr	r9,[r14,#15*4]
209	ldr	r10,[r14,#13*4]
210	ldr	r11,[r14,#7*4]
211	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
212	ldr	r12,[r14,#2*4]
213	eor	r9,r9,r10
214	eor	r11,r11,r12			@ 1 cycle stall
215	eor	r10,r7,r3			@ F_xx_xx
216	mov	r9,r9,ror#31
217	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
218	eor	r9,r9,r11,ror#31
219	str	r9,[r14,#-4]!
220	and r10,r6,r10,ror#2					@ F_xx_xx
221						@ F_xx_xx
222	add	r4,r4,r9			@ E+=X[i]
223	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
224	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
225	ldr	r9,[r14,#15*4]
226	ldr	r10,[r14,#13*4]
227	ldr	r11,[r14,#7*4]
228	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
229	ldr	r12,[r14,#2*4]
230	eor	r9,r9,r10
231	eor	r11,r11,r12			@ 1 cycle stall
232	eor	r10,r6,r7			@ F_xx_xx
233	mov	r9,r9,ror#31
234	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
235	eor	r9,r9,r11,ror#31
236	str	r9,[r14,#-4]!
237	and r10,r5,r10,ror#2					@ F_xx_xx
238						@ F_xx_xx
239	add	r3,r3,r9			@ E+=X[i]
240	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
241	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
242
243	ldr	r8,.LK_20_39		@ [+15+16*4]
244	cmn	sp,#0			@ [+3], clear carry to denote 20_39
245.L_20_39_or_60_79:
246	ldr	r9,[r14,#15*4]
247	ldr	r10,[r14,#13*4]
248	ldr	r11,[r14,#7*4]
249	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
250	ldr	r12,[r14,#2*4]
251	eor	r9,r9,r10
252	eor	r11,r11,r12			@ 1 cycle stall
253	eor	r10,r5,r6			@ F_xx_xx
254	mov	r9,r9,ror#31
255	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
256	eor	r9,r9,r11,ror#31
257	str	r9,[r14,#-4]!
258	eor r10,r4,r10,ror#2					@ F_xx_xx
259						@ F_xx_xx
260	add	r7,r7,r9			@ E+=X[i]
261	add	r7,r7,r10			@ E+=F_20_39(B,C,D)
262	ldr	r9,[r14,#15*4]
263	ldr	r10,[r14,#13*4]
264	ldr	r11,[r14,#7*4]
265	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
266	ldr	r12,[r14,#2*4]
267	eor	r9,r9,r10
268	eor	r11,r11,r12			@ 1 cycle stall
269	eor	r10,r4,r5			@ F_xx_xx
270	mov	r9,r9,ror#31
271	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
272	eor	r9,r9,r11,ror#31
273	str	r9,[r14,#-4]!
274	eor r10,r3,r10,ror#2					@ F_xx_xx
275						@ F_xx_xx
276	add	r6,r6,r9			@ E+=X[i]
277	add	r6,r6,r10			@ E+=F_20_39(B,C,D)
278	ldr	r9,[r14,#15*4]
279	ldr	r10,[r14,#13*4]
280	ldr	r11,[r14,#7*4]
281	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
282	ldr	r12,[r14,#2*4]
283	eor	r9,r9,r10
284	eor	r11,r11,r12			@ 1 cycle stall
285	eor	r10,r3,r4			@ F_xx_xx
286	mov	r9,r9,ror#31
287	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
288	eor	r9,r9,r11,ror#31
289	str	r9,[r14,#-4]!
290	eor r10,r7,r10,ror#2					@ F_xx_xx
291						@ F_xx_xx
292	add	r5,r5,r9			@ E+=X[i]
293	add	r5,r5,r10			@ E+=F_20_39(B,C,D)
294	ldr	r9,[r14,#15*4]
295	ldr	r10,[r14,#13*4]
296	ldr	r11,[r14,#7*4]
297	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
298	ldr	r12,[r14,#2*4]
299	eor	r9,r9,r10
300	eor	r11,r11,r12			@ 1 cycle stall
301	eor	r10,r7,r3			@ F_xx_xx
302	mov	r9,r9,ror#31
303	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
304	eor	r9,r9,r11,ror#31
305	str	r9,[r14,#-4]!
306	eor r10,r6,r10,ror#2					@ F_xx_xx
307						@ F_xx_xx
308	add	r4,r4,r9			@ E+=X[i]
309	add	r4,r4,r10			@ E+=F_20_39(B,C,D)
310	ldr	r9,[r14,#15*4]
311	ldr	r10,[r14,#13*4]
312	ldr	r11,[r14,#7*4]
313	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
314	ldr	r12,[r14,#2*4]
315	eor	r9,r9,r10
316	eor	r11,r11,r12			@ 1 cycle stall
317	eor	r10,r6,r7			@ F_xx_xx
318	mov	r9,r9,ror#31
319	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
320	eor	r9,r9,r11,ror#31
321	str	r9,[r14,#-4]!
322	eor r10,r5,r10,ror#2					@ F_xx_xx
323						@ F_xx_xx
324	add	r3,r3,r9			@ E+=X[i]
325	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
326	teq	r14,sp			@ preserve carry
327	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
328	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
329
330	ldr	r8,.LK_40_59
331	sub	sp,sp,#20*4		@ [+2]
332.L_40_59:
333	ldr	r9,[r14,#15*4]
334	ldr	r10,[r14,#13*4]
335	ldr	r11,[r14,#7*4]
336	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
337	ldr	r12,[r14,#2*4]
338	eor	r9,r9,r10
339	eor	r11,r11,r12			@ 1 cycle stall
340	eor	r10,r5,r6			@ F_xx_xx
341	mov	r9,r9,ror#31
342	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
343	eor	r9,r9,r11,ror#31
344	str	r9,[r14,#-4]!
345	and r10,r4,r10,ror#2					@ F_xx_xx
346	and r11,r5,r6					@ F_xx_xx
347	add	r7,r7,r9			@ E+=X[i]
348	add	r7,r7,r10			@ E+=F_40_59(B,C,D)
349	add	r7,r7,r11,ror#2
350	ldr	r9,[r14,#15*4]
351	ldr	r10,[r14,#13*4]
352	ldr	r11,[r14,#7*4]
353	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
354	ldr	r12,[r14,#2*4]
355	eor	r9,r9,r10
356	eor	r11,r11,r12			@ 1 cycle stall
357	eor	r10,r4,r5			@ F_xx_xx
358	mov	r9,r9,ror#31
359	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
360	eor	r9,r9,r11,ror#31
361	str	r9,[r14,#-4]!
362	and r10,r3,r10,ror#2					@ F_xx_xx
363	and r11,r4,r5					@ F_xx_xx
364	add	r6,r6,r9			@ E+=X[i]
365	add	r6,r6,r10			@ E+=F_40_59(B,C,D)
366	add	r6,r6,r11,ror#2
367	ldr	r9,[r14,#15*4]
368	ldr	r10,[r14,#13*4]
369	ldr	r11,[r14,#7*4]
370	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
371	ldr	r12,[r14,#2*4]
372	eor	r9,r9,r10
373	eor	r11,r11,r12			@ 1 cycle stall
374	eor	r10,r3,r4			@ F_xx_xx
375	mov	r9,r9,ror#31
376	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
377	eor	r9,r9,r11,ror#31
378	str	r9,[r14,#-4]!
379	and r10,r7,r10,ror#2					@ F_xx_xx
380	and r11,r3,r4					@ F_xx_xx
381	add	r5,r5,r9			@ E+=X[i]
382	add	r5,r5,r10			@ E+=F_40_59(B,C,D)
383	add	r5,r5,r11,ror#2
384	ldr	r9,[r14,#15*4]
385	ldr	r10,[r14,#13*4]
386	ldr	r11,[r14,#7*4]
387	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
388	ldr	r12,[r14,#2*4]
389	eor	r9,r9,r10
390	eor	r11,r11,r12			@ 1 cycle stall
391	eor	r10,r7,r3			@ F_xx_xx
392	mov	r9,r9,ror#31
393	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
394	eor	r9,r9,r11,ror#31
395	str	r9,[r14,#-4]!
396	and r10,r6,r10,ror#2					@ F_xx_xx
397	and r11,r7,r3					@ F_xx_xx
398	add	r4,r4,r9			@ E+=X[i]
399	add	r4,r4,r10			@ E+=F_40_59(B,C,D)
400	add	r4,r4,r11,ror#2
401	ldr	r9,[r14,#15*4]
402	ldr	r10,[r14,#13*4]
403	ldr	r11,[r14,#7*4]
404	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
405	ldr	r12,[r14,#2*4]
406	eor	r9,r9,r10
407	eor	r11,r11,r12			@ 1 cycle stall
408	eor	r10,r6,r7			@ F_xx_xx
409	mov	r9,r9,ror#31
410	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
411	eor	r9,r9,r11,ror#31
412	str	r9,[r14,#-4]!
413	and r10,r5,r10,ror#2					@ F_xx_xx
414	and r11,r6,r7					@ F_xx_xx
415	add	r3,r3,r9			@ E+=X[i]
416	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
417	add	r3,r3,r11,ror#2
418	teq	r14,sp
419	bne	.L_40_59		@ [+((12+5)*5+2)*4]
420
421	ldr	r8,.LK_60_79
422	sub	sp,sp,#20*4
423	cmp	sp,#0			@ set carry to denote 60_79
424	b	.L_20_39_or_60_79	@ [+4], spare 300 bytes
425.L_done:
426	add	sp,sp,#80*4		@ "deallocate" stack frame
427	ldmia	r0,{r8,r9,r10,r11,r12}
428	add	r3,r8,r3
429	add	r4,r9,r4
430	add	r5,r10,r5,ror#2
431	add	r6,r11,r6,ror#2
432	add	r7,r12,r7,ror#2
433	stmia	r0,{r3,r4,r5,r6,r7}
434	teq	r1,r2
435	bne	.Lloop			@ [+18], total 1307
436
437#if __ARM_ARCH__>=5
438	ldmia	sp!,{r4-r12,pc}
439#else
440	ldmia	sp!,{r4-r12,lr}
441	tst	lr,#1
442	moveq	pc,lr			@ be binary compatible with V4, yet
443	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
444#endif
445.align	2
446.LK_00_19:	.word	0x5a827999
447.LK_20_39:	.word	0x6ed9eba1
448.LK_40_59:	.word	0x8f1bbcdc
449.LK_60_79:	.word	0xca62c1d6
450.size	sha1_block_data_order,.-sha1_block_data_order
451.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
452.align	2
453#if defined(HAVE_GNU_STACK)
454.section .note.GNU-stack,"",%progbits
455#endif
456