xref: /freebsd/sys/crypto/openssl/arm/sha256-armv4.S (revision 4d846d26)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from sha256-armv4.pl. */
3@ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
4@
5@ Licensed under the OpenSSL license (the "License").  You may not use
6@ this file except in compliance with the License.  You can obtain a copy
7@ in the file LICENSE in the source distribution or at
8@ https://www.openssl.org/source/license.html
9
10
11@ ====================================================================
12@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
13@ project. The module is, however, dual licensed under OpenSSL and
14@ CRYPTOGAMS licenses depending on where you obtain it. For further
15@ details see http://www.openssl.org/~appro/cryptogams/.
16@
17@ Permission to use under GPL terms is granted.
18@ ====================================================================
19
20@ SHA256 block procedure for ARMv4. May 2007.
21
22@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
23@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
24@ byte [on single-issue Xscale PXA250 core].
25
26@ July 2010.
27@
28@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
29@ Cortex A8 core and ~20 cycles per processed byte.
30
31@ February 2011.
32@
33@ Profiler-assisted and platform-specific optimization resulted in 16%
34@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
35
36@ September 2013.
37@
38@ Add NEON implementation. On Cortex A8 it was measured to process one
39@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
40@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
41@ code (meaning that latter performs sub-optimally, nothing was done
42@ about it).
43
44@ May 2014.
45@
46@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
47
48#ifndef __KERNEL__
49# include "arm_arch.h"
50#else
51# define __ARM_ARCH__ __LINUX_ARM_ARCH__
52# define __ARM_MAX_ARCH__ 7
53#endif
54
55.text
56#if defined(__thumb2__)
57.syntax	unified
58.thumb
59#else
60.code	32
61#endif
62
63.type	K256,%object
64.align	5
65K256:
66.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
67.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
68.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
69.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
70.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
71.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
72.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
73.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
74.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
75.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
76.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
77.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
78.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
79.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
80.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
81.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
82.size	K256,.-K256
83.word	0				@ terminator
84#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
85.LOPENSSL_armcap:
86.word	OPENSSL_armcap_P-.Lsha256_block_data_order
87#endif
88.align	5
89
90.globl	sha256_block_data_order
91.type	sha256_block_data_order,%function
92sha256_block_data_order:
93.Lsha256_block_data_order:
94#if __ARM_ARCH__<7 && !defined(__thumb2__)
95	sub	r3,pc,#8		@ sha256_block_data_order
96#else
97	adr	r3,.Lsha256_block_data_order
98#endif
99#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
100	ldr	r12,.LOPENSSL_armcap
101	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
102#ifdef	__APPLE__
103	ldr	r12,[r12]
104#endif
105	tst	r12,#ARMV8_SHA256
106	bne	.LARMv8
107	tst	r12,#ARMV7_NEON
108	bne	.LNEON
109#endif
110	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
111	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
112	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
113	sub	r14,r3,#256+32	@ K256
114	sub	sp,sp,#16*4		@ alloca(X[16])
115.Loop:
116# if __ARM_ARCH__>=7
117	ldr	r2,[r1],#4
118# else
119	ldrb	r2,[r1,#3]
120# endif
121	eor	r3,r5,r6		@ magic
122	eor	r12,r12,r12
123#if __ARM_ARCH__>=7
124	@ ldr	r2,[r1],#4			@ 0
125# if 0==15
126	str	r1,[sp,#17*4]			@ make room for r1
127# endif
128	eor	r0,r8,r8,ror#5
129	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
130	eor	r0,r0,r8,ror#19	@ Sigma1(e)
131# ifndef __ARMEB__
132	rev	r2,r2
133# endif
134#else
135	@ ldrb	r2,[r1,#3]			@ 0
136	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
137	ldrb	r12,[r1,#2]
138	ldrb	r0,[r1,#1]
139	orr	r2,r2,r12,lsl#8
140	ldrb	r12,[r1],#4
141	orr	r2,r2,r0,lsl#16
142# if 0==15
143	str	r1,[sp,#17*4]			@ make room for r1
144# endif
145	eor	r0,r8,r8,ror#5
146	orr	r2,r2,r12,lsl#24
147	eor	r0,r0,r8,ror#19	@ Sigma1(e)
148#endif
149	ldr	r12,[r14],#4			@ *K256++
150	add	r11,r11,r2			@ h+=X[i]
151	str	r2,[sp,#0*4]
152	eor	r2,r9,r10
153	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
154	and	r2,r2,r8
155	add	r11,r11,r12			@ h+=K256[i]
156	eor	r2,r2,r10			@ Ch(e,f,g)
157	eor	r0,r4,r4,ror#11
158	add	r11,r11,r2			@ h+=Ch(e,f,g)
159#if 0==31
160	and	r12,r12,#0xff
161	cmp	r12,#0xf2			@ done?
162#endif
163#if 0<15
164# if __ARM_ARCH__>=7
165	ldr	r2,[r1],#4			@ prefetch
166# else
167	ldrb	r2,[r1,#3]
168# endif
169	eor	r12,r4,r5			@ a^b, b^c in next round
170#else
171	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
172	eor	r12,r4,r5			@ a^b, b^c in next round
173	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
174#endif
175	eor	r0,r0,r4,ror#20	@ Sigma0(a)
176	and	r3,r3,r12			@ (b^c)&=(a^b)
177	add	r7,r7,r11			@ d+=h
178	eor	r3,r3,r5			@ Maj(a,b,c)
179	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
180	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
181#if __ARM_ARCH__>=7
182	@ ldr	r2,[r1],#4			@ 1
183# if 1==15
184	str	r1,[sp,#17*4]			@ make room for r1
185# endif
186	eor	r0,r7,r7,ror#5
187	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
188	eor	r0,r0,r7,ror#19	@ Sigma1(e)
189# ifndef __ARMEB__
190	rev	r2,r2
191# endif
192#else
193	@ ldrb	r2,[r1,#3]			@ 1
194	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
195	ldrb	r3,[r1,#2]
196	ldrb	r0,[r1,#1]
197	orr	r2,r2,r3,lsl#8
198	ldrb	r3,[r1],#4
199	orr	r2,r2,r0,lsl#16
200# if 1==15
201	str	r1,[sp,#17*4]			@ make room for r1
202# endif
203	eor	r0,r7,r7,ror#5
204	orr	r2,r2,r3,lsl#24
205	eor	r0,r0,r7,ror#19	@ Sigma1(e)
206#endif
207	ldr	r3,[r14],#4			@ *K256++
208	add	r10,r10,r2			@ h+=X[i]
209	str	r2,[sp,#1*4]
210	eor	r2,r8,r9
211	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
212	and	r2,r2,r7
213	add	r10,r10,r3			@ h+=K256[i]
214	eor	r2,r2,r9			@ Ch(e,f,g)
215	eor	r0,r11,r11,ror#11
216	add	r10,r10,r2			@ h+=Ch(e,f,g)
217#if 1==31
218	and	r3,r3,#0xff
219	cmp	r3,#0xf2			@ done?
220#endif
221#if 1<15
222# if __ARM_ARCH__>=7
223	ldr	r2,[r1],#4			@ prefetch
224# else
225	ldrb	r2,[r1,#3]
226# endif
227	eor	r3,r11,r4			@ a^b, b^c in next round
228#else
229	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
230	eor	r3,r11,r4			@ a^b, b^c in next round
231	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
232#endif
233	eor	r0,r0,r11,ror#20	@ Sigma0(a)
234	and	r12,r12,r3			@ (b^c)&=(a^b)
235	add	r6,r6,r10			@ d+=h
236	eor	r12,r12,r4			@ Maj(a,b,c)
237	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
238	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
239#if __ARM_ARCH__>=7
240	@ ldr	r2,[r1],#4			@ 2
241# if 2==15
242	str	r1,[sp,#17*4]			@ make room for r1
243# endif
244	eor	r0,r6,r6,ror#5
245	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
246	eor	r0,r0,r6,ror#19	@ Sigma1(e)
247# ifndef __ARMEB__
248	rev	r2,r2
249# endif
250#else
251	@ ldrb	r2,[r1,#3]			@ 2
252	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
253	ldrb	r12,[r1,#2]
254	ldrb	r0,[r1,#1]
255	orr	r2,r2,r12,lsl#8
256	ldrb	r12,[r1],#4
257	orr	r2,r2,r0,lsl#16
258# if 2==15
259	str	r1,[sp,#17*4]			@ make room for r1
260# endif
261	eor	r0,r6,r6,ror#5
262	orr	r2,r2,r12,lsl#24
263	eor	r0,r0,r6,ror#19	@ Sigma1(e)
264#endif
265	ldr	r12,[r14],#4			@ *K256++
266	add	r9,r9,r2			@ h+=X[i]
267	str	r2,[sp,#2*4]
268	eor	r2,r7,r8
269	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
270	and	r2,r2,r6
271	add	r9,r9,r12			@ h+=K256[i]
272	eor	r2,r2,r8			@ Ch(e,f,g)
273	eor	r0,r10,r10,ror#11
274	add	r9,r9,r2			@ h+=Ch(e,f,g)
275#if 2==31
276	and	r12,r12,#0xff
277	cmp	r12,#0xf2			@ done?
278#endif
279#if 2<15
280# if __ARM_ARCH__>=7
281	ldr	r2,[r1],#4			@ prefetch
282# else
283	ldrb	r2,[r1,#3]
284# endif
285	eor	r12,r10,r11			@ a^b, b^c in next round
286#else
287	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
288	eor	r12,r10,r11			@ a^b, b^c in next round
289	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
290#endif
291	eor	r0,r0,r10,ror#20	@ Sigma0(a)
292	and	r3,r3,r12			@ (b^c)&=(a^b)
293	add	r5,r5,r9			@ d+=h
294	eor	r3,r3,r11			@ Maj(a,b,c)
295	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
296	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
297#if __ARM_ARCH__>=7
298	@ ldr	r2,[r1],#4			@ 3
299# if 3==15
300	str	r1,[sp,#17*4]			@ make room for r1
301# endif
302	eor	r0,r5,r5,ror#5
303	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
304	eor	r0,r0,r5,ror#19	@ Sigma1(e)
305# ifndef __ARMEB__
306	rev	r2,r2
307# endif
308#else
309	@ ldrb	r2,[r1,#3]			@ 3
310	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
311	ldrb	r3,[r1,#2]
312	ldrb	r0,[r1,#1]
313	orr	r2,r2,r3,lsl#8
314	ldrb	r3,[r1],#4
315	orr	r2,r2,r0,lsl#16
316# if 3==15
317	str	r1,[sp,#17*4]			@ make room for r1
318# endif
319	eor	r0,r5,r5,ror#5
320	orr	r2,r2,r3,lsl#24
321	eor	r0,r0,r5,ror#19	@ Sigma1(e)
322#endif
323	ldr	r3,[r14],#4			@ *K256++
324	add	r8,r8,r2			@ h+=X[i]
325	str	r2,[sp,#3*4]
326	eor	r2,r6,r7
327	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
328	and	r2,r2,r5
329	add	r8,r8,r3			@ h+=K256[i]
330	eor	r2,r2,r7			@ Ch(e,f,g)
331	eor	r0,r9,r9,ror#11
332	add	r8,r8,r2			@ h+=Ch(e,f,g)
333#if 3==31
334	and	r3,r3,#0xff
335	cmp	r3,#0xf2			@ done?
336#endif
337#if 3<15
338# if __ARM_ARCH__>=7
339	ldr	r2,[r1],#4			@ prefetch
340# else
341	ldrb	r2,[r1,#3]
342# endif
343	eor	r3,r9,r10			@ a^b, b^c in next round
344#else
345	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
346	eor	r3,r9,r10			@ a^b, b^c in next round
347	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
348#endif
349	eor	r0,r0,r9,ror#20	@ Sigma0(a)
350	and	r12,r12,r3			@ (b^c)&=(a^b)
351	add	r4,r4,r8			@ d+=h
352	eor	r12,r12,r10			@ Maj(a,b,c)
353	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
354	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
355#if __ARM_ARCH__>=7
356	@ ldr	r2,[r1],#4			@ 4
357# if 4==15
358	str	r1,[sp,#17*4]			@ make room for r1
359# endif
360	eor	r0,r4,r4,ror#5
361	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
362	eor	r0,r0,r4,ror#19	@ Sigma1(e)
363# ifndef __ARMEB__
364	rev	r2,r2
365# endif
366#else
367	@ ldrb	r2,[r1,#3]			@ 4
368	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
369	ldrb	r12,[r1,#2]
370	ldrb	r0,[r1,#1]
371	orr	r2,r2,r12,lsl#8
372	ldrb	r12,[r1],#4
373	orr	r2,r2,r0,lsl#16
374# if 4==15
375	str	r1,[sp,#17*4]			@ make room for r1
376# endif
377	eor	r0,r4,r4,ror#5
378	orr	r2,r2,r12,lsl#24
379	eor	r0,r0,r4,ror#19	@ Sigma1(e)
380#endif
381	ldr	r12,[r14],#4			@ *K256++
382	add	r7,r7,r2			@ h+=X[i]
383	str	r2,[sp,#4*4]
384	eor	r2,r5,r6
385	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
386	and	r2,r2,r4
387	add	r7,r7,r12			@ h+=K256[i]
388	eor	r2,r2,r6			@ Ch(e,f,g)
389	eor	r0,r8,r8,ror#11
390	add	r7,r7,r2			@ h+=Ch(e,f,g)
391#if 4==31
392	and	r12,r12,#0xff
393	cmp	r12,#0xf2			@ done?
394#endif
395#if 4<15
396# if __ARM_ARCH__>=7
397	ldr	r2,[r1],#4			@ prefetch
398# else
399	ldrb	r2,[r1,#3]
400# endif
401	eor	r12,r8,r9			@ a^b, b^c in next round
402#else
403	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
404	eor	r12,r8,r9			@ a^b, b^c in next round
405	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
406#endif
407	eor	r0,r0,r8,ror#20	@ Sigma0(a)
408	and	r3,r3,r12			@ (b^c)&=(a^b)
409	add	r11,r11,r7			@ d+=h
410	eor	r3,r3,r9			@ Maj(a,b,c)
411	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
412	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
413#if __ARM_ARCH__>=7
414	@ ldr	r2,[r1],#4			@ 5
415# if 5==15
416	str	r1,[sp,#17*4]			@ make room for r1
417# endif
418	eor	r0,r11,r11,ror#5
419	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
420	eor	r0,r0,r11,ror#19	@ Sigma1(e)
421# ifndef __ARMEB__
422	rev	r2,r2
423# endif
424#else
425	@ ldrb	r2,[r1,#3]			@ 5
426	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
427	ldrb	r3,[r1,#2]
428	ldrb	r0,[r1,#1]
429	orr	r2,r2,r3,lsl#8
430	ldrb	r3,[r1],#4
431	orr	r2,r2,r0,lsl#16
432# if 5==15
433	str	r1,[sp,#17*4]			@ make room for r1
434# endif
435	eor	r0,r11,r11,ror#5
436	orr	r2,r2,r3,lsl#24
437	eor	r0,r0,r11,ror#19	@ Sigma1(e)
438#endif
439	ldr	r3,[r14],#4			@ *K256++
440	add	r6,r6,r2			@ h+=X[i]
441	str	r2,[sp,#5*4]
442	eor	r2,r4,r5
443	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
444	and	r2,r2,r11
445	add	r6,r6,r3			@ h+=K256[i]
446	eor	r2,r2,r5			@ Ch(e,f,g)
447	eor	r0,r7,r7,ror#11
448	add	r6,r6,r2			@ h+=Ch(e,f,g)
449#if 5==31
450	and	r3,r3,#0xff
451	cmp	r3,#0xf2			@ done?
452#endif
453#if 5<15
454# if __ARM_ARCH__>=7
455	ldr	r2,[r1],#4			@ prefetch
456# else
457	ldrb	r2,[r1,#3]
458# endif
459	eor	r3,r7,r8			@ a^b, b^c in next round
460#else
461	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
462	eor	r3,r7,r8			@ a^b, b^c in next round
463	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
464#endif
465	eor	r0,r0,r7,ror#20	@ Sigma0(a)
466	and	r12,r12,r3			@ (b^c)&=(a^b)
467	add	r10,r10,r6			@ d+=h
468	eor	r12,r12,r8			@ Maj(a,b,c)
469	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
470	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
471#if __ARM_ARCH__>=7
472	@ ldr	r2,[r1],#4			@ 6
473# if 6==15
474	str	r1,[sp,#17*4]			@ make room for r1
475# endif
476	eor	r0,r10,r10,ror#5
477	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
478	eor	r0,r0,r10,ror#19	@ Sigma1(e)
479# ifndef __ARMEB__
480	rev	r2,r2
481# endif
482#else
483	@ ldrb	r2,[r1,#3]			@ 6
484	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
485	ldrb	r12,[r1,#2]
486	ldrb	r0,[r1,#1]
487	orr	r2,r2,r12,lsl#8
488	ldrb	r12,[r1],#4
489	orr	r2,r2,r0,lsl#16
490# if 6==15
491	str	r1,[sp,#17*4]			@ make room for r1
492# endif
493	eor	r0,r10,r10,ror#5
494	orr	r2,r2,r12,lsl#24
495	eor	r0,r0,r10,ror#19	@ Sigma1(e)
496#endif
497	ldr	r12,[r14],#4			@ *K256++
498	add	r5,r5,r2			@ h+=X[i]
499	str	r2,[sp,#6*4]
500	eor	r2,r11,r4
501	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
502	and	r2,r2,r10
503	add	r5,r5,r12			@ h+=K256[i]
504	eor	r2,r2,r4			@ Ch(e,f,g)
505	eor	r0,r6,r6,ror#11
506	add	r5,r5,r2			@ h+=Ch(e,f,g)
507#if 6==31
508	and	r12,r12,#0xff
509	cmp	r12,#0xf2			@ done?
510#endif
511#if 6<15
512# if __ARM_ARCH__>=7
513	ldr	r2,[r1],#4			@ prefetch
514# else
515	ldrb	r2,[r1,#3]
516# endif
517	eor	r12,r6,r7			@ a^b, b^c in next round
518#else
519	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
520	eor	r12,r6,r7			@ a^b, b^c in next round
521	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
522#endif
523	eor	r0,r0,r6,ror#20	@ Sigma0(a)
524	and	r3,r3,r12			@ (b^c)&=(a^b)
525	add	r9,r9,r5			@ d+=h
526	eor	r3,r3,r7			@ Maj(a,b,c)
527	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
528	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
529#if __ARM_ARCH__>=7
530	@ ldr	r2,[r1],#4			@ 7
531# if 7==15
532	str	r1,[sp,#17*4]			@ make room for r1
533# endif
534	eor	r0,r9,r9,ror#5
535	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
536	eor	r0,r0,r9,ror#19	@ Sigma1(e)
537# ifndef __ARMEB__
538	rev	r2,r2
539# endif
540#else
541	@ ldrb	r2,[r1,#3]			@ 7
542	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
543	ldrb	r3,[r1,#2]
544	ldrb	r0,[r1,#1]
545	orr	r2,r2,r3,lsl#8
546	ldrb	r3,[r1],#4
547	orr	r2,r2,r0,lsl#16
548# if 7==15
549	str	r1,[sp,#17*4]			@ make room for r1
550# endif
551	eor	r0,r9,r9,ror#5
552	orr	r2,r2,r3,lsl#24
553	eor	r0,r0,r9,ror#19	@ Sigma1(e)
554#endif
555	ldr	r3,[r14],#4			@ *K256++
556	add	r4,r4,r2			@ h+=X[i]
557	str	r2,[sp,#7*4]
558	eor	r2,r10,r11
559	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
560	and	r2,r2,r9
561	add	r4,r4,r3			@ h+=K256[i]
562	eor	r2,r2,r11			@ Ch(e,f,g)
563	eor	r0,r5,r5,ror#11
564	add	r4,r4,r2			@ h+=Ch(e,f,g)
565#if 7==31
566	and	r3,r3,#0xff
567	cmp	r3,#0xf2			@ done?
568#endif
569#if 7<15
570# if __ARM_ARCH__>=7
571	ldr	r2,[r1],#4			@ prefetch
572# else
573	ldrb	r2,[r1,#3]
574# endif
575	eor	r3,r5,r6			@ a^b, b^c in next round
576#else
577	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
578	eor	r3,r5,r6			@ a^b, b^c in next round
579	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
580#endif
581	eor	r0,r0,r5,ror#20	@ Sigma0(a)
582	and	r12,r12,r3			@ (b^c)&=(a^b)
583	add	r8,r8,r4			@ d+=h
584	eor	r12,r12,r6			@ Maj(a,b,c)
585	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
586	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
587#if __ARM_ARCH__>=7
588	@ ldr	r2,[r1],#4			@ 8
589# if 8==15
590	str	r1,[sp,#17*4]			@ make room for r1
591# endif
592	eor	r0,r8,r8,ror#5
593	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
594	eor	r0,r0,r8,ror#19	@ Sigma1(e)
595# ifndef __ARMEB__
596	rev	r2,r2
597# endif
598#else
599	@ ldrb	r2,[r1,#3]			@ 8
600	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
601	ldrb	r12,[r1,#2]
602	ldrb	r0,[r1,#1]
603	orr	r2,r2,r12,lsl#8
604	ldrb	r12,[r1],#4
605	orr	r2,r2,r0,lsl#16
606# if 8==15
607	str	r1,[sp,#17*4]			@ make room for r1
608# endif
609	eor	r0,r8,r8,ror#5
610	orr	r2,r2,r12,lsl#24
611	eor	r0,r0,r8,ror#19	@ Sigma1(e)
612#endif
613	ldr	r12,[r14],#4			@ *K256++
614	add	r11,r11,r2			@ h+=X[i]
615	str	r2,[sp,#8*4]
616	eor	r2,r9,r10
617	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
618	and	r2,r2,r8
619	add	r11,r11,r12			@ h+=K256[i]
620	eor	r2,r2,r10			@ Ch(e,f,g)
621	eor	r0,r4,r4,ror#11
622	add	r11,r11,r2			@ h+=Ch(e,f,g)
623#if 8==31
624	and	r12,r12,#0xff
625	cmp	r12,#0xf2			@ done?
626#endif
627#if 8<15
628# if __ARM_ARCH__>=7
629	ldr	r2,[r1],#4			@ prefetch
630# else
631	ldrb	r2,[r1,#3]
632# endif
633	eor	r12,r4,r5			@ a^b, b^c in next round
634#else
635	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
636	eor	r12,r4,r5			@ a^b, b^c in next round
637	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
638#endif
639	eor	r0,r0,r4,ror#20	@ Sigma0(a)
640	and	r3,r3,r12			@ (b^c)&=(a^b)
641	add	r7,r7,r11			@ d+=h
642	eor	r3,r3,r5			@ Maj(a,b,c)
643	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
644	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
645#if __ARM_ARCH__>=7
646	@ ldr	r2,[r1],#4			@ 9
647# if 9==15
648	str	r1,[sp,#17*4]			@ make room for r1
649# endif
650	eor	r0,r7,r7,ror#5
651	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
652	eor	r0,r0,r7,ror#19	@ Sigma1(e)
653# ifndef __ARMEB__
654	rev	r2,r2
655# endif
656#else
657	@ ldrb	r2,[r1,#3]			@ 9
658	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
659	ldrb	r3,[r1,#2]
660	ldrb	r0,[r1,#1]
661	orr	r2,r2,r3,lsl#8
662	ldrb	r3,[r1],#4
663	orr	r2,r2,r0,lsl#16
664# if 9==15
665	str	r1,[sp,#17*4]			@ make room for r1
666# endif
667	eor	r0,r7,r7,ror#5
668	orr	r2,r2,r3,lsl#24
669	eor	r0,r0,r7,ror#19	@ Sigma1(e)
670#endif
671	ldr	r3,[r14],#4			@ *K256++
672	add	r10,r10,r2			@ h+=X[i]
673	str	r2,[sp,#9*4]
674	eor	r2,r8,r9
675	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
676	and	r2,r2,r7
677	add	r10,r10,r3			@ h+=K256[i]
678	eor	r2,r2,r9			@ Ch(e,f,g)
679	eor	r0,r11,r11,ror#11
680	add	r10,r10,r2			@ h+=Ch(e,f,g)
681#if 9==31
682	and	r3,r3,#0xff
683	cmp	r3,#0xf2			@ done?
684#endif
685#if 9<15
686# if __ARM_ARCH__>=7
687	ldr	r2,[r1],#4			@ prefetch
688# else
689	ldrb	r2,[r1,#3]
690# endif
691	eor	r3,r11,r4			@ a^b, b^c in next round
692#else
693	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
694	eor	r3,r11,r4			@ a^b, b^c in next round
695	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
696#endif
697	eor	r0,r0,r11,ror#20	@ Sigma0(a)
698	and	r12,r12,r3			@ (b^c)&=(a^b)
699	add	r6,r6,r10			@ d+=h
700	eor	r12,r12,r4			@ Maj(a,b,c)
701	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
702	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
703#if __ARM_ARCH__>=7
704	@ ldr	r2,[r1],#4			@ 10
705# if 10==15
706	str	r1,[sp,#17*4]			@ make room for r1
707# endif
708	eor	r0,r6,r6,ror#5
709	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
710	eor	r0,r0,r6,ror#19	@ Sigma1(e)
711# ifndef __ARMEB__
712	rev	r2,r2
713# endif
714#else
715	@ ldrb	r2,[r1,#3]			@ 10
716	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
717	ldrb	r12,[r1,#2]
718	ldrb	r0,[r1,#1]
719	orr	r2,r2,r12,lsl#8
720	ldrb	r12,[r1],#4
721	orr	r2,r2,r0,lsl#16
722# if 10==15
723	str	r1,[sp,#17*4]			@ make room for r1
724# endif
725	eor	r0,r6,r6,ror#5
726	orr	r2,r2,r12,lsl#24
727	eor	r0,r0,r6,ror#19	@ Sigma1(e)
728#endif
729	ldr	r12,[r14],#4			@ *K256++
730	add	r9,r9,r2			@ h+=X[i]
731	str	r2,[sp,#10*4]
732	eor	r2,r7,r8
733	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
734	and	r2,r2,r6
735	add	r9,r9,r12			@ h+=K256[i]
736	eor	r2,r2,r8			@ Ch(e,f,g)
737	eor	r0,r10,r10,ror#11
738	add	r9,r9,r2			@ h+=Ch(e,f,g)
739#if 10==31
740	and	r12,r12,#0xff
741	cmp	r12,#0xf2			@ done?
742#endif
743#if 10<15
744# if __ARM_ARCH__>=7
745	ldr	r2,[r1],#4			@ prefetch
746# else
747	ldrb	r2,[r1,#3]
748# endif
749	eor	r12,r10,r11			@ a^b, b^c in next round
750#else
751	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
752	eor	r12,r10,r11			@ a^b, b^c in next round
753	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
754#endif
755	eor	r0,r0,r10,ror#20	@ Sigma0(a)
756	and	r3,r3,r12			@ (b^c)&=(a^b)
757	add	r5,r5,r9			@ d+=h
758	eor	r3,r3,r11			@ Maj(a,b,c)
759	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
760	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
761#if __ARM_ARCH__>=7
762	@ ldr	r2,[r1],#4			@ 11
763# if 11==15
764	str	r1,[sp,#17*4]			@ make room for r1
765# endif
766	eor	r0,r5,r5,ror#5
767	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
768	eor	r0,r0,r5,ror#19	@ Sigma1(e)
769# ifndef __ARMEB__
770	rev	r2,r2
771# endif
772#else
773	@ ldrb	r2,[r1,#3]			@ 11
774	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
775	ldrb	r3,[r1,#2]
776	ldrb	r0,[r1,#1]
777	orr	r2,r2,r3,lsl#8
778	ldrb	r3,[r1],#4
779	orr	r2,r2,r0,lsl#16
780# if 11==15
781	str	r1,[sp,#17*4]			@ make room for r1
782# endif
783	eor	r0,r5,r5,ror#5
784	orr	r2,r2,r3,lsl#24
785	eor	r0,r0,r5,ror#19	@ Sigma1(e)
786#endif
787	ldr	r3,[r14],#4			@ *K256++
788	add	r8,r8,r2			@ h+=X[i]
789	str	r2,[sp,#11*4]
790	eor	r2,r6,r7
791	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
792	and	r2,r2,r5
793	add	r8,r8,r3			@ h+=K256[i]
794	eor	r2,r2,r7			@ Ch(e,f,g)
795	eor	r0,r9,r9,ror#11
796	add	r8,r8,r2			@ h+=Ch(e,f,g)
797#if 11==31
798	and	r3,r3,#0xff
799	cmp	r3,#0xf2			@ done?
800#endif
801#if 11<15
802# if __ARM_ARCH__>=7
803	ldr	r2,[r1],#4			@ prefetch
804# else
805	ldrb	r2,[r1,#3]
806# endif
807	eor	r3,r9,r10			@ a^b, b^c in next round
808#else
809	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
810	eor	r3,r9,r10			@ a^b, b^c in next round
811	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
812#endif
813	eor	r0,r0,r9,ror#20	@ Sigma0(a)
814	and	r12,r12,r3			@ (b^c)&=(a^b)
815	add	r4,r4,r8			@ d+=h
816	eor	r12,r12,r10			@ Maj(a,b,c)
817	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
818	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
819#if __ARM_ARCH__>=7
820	@ ldr	r2,[r1],#4			@ 12
821# if 12==15
822	str	r1,[sp,#17*4]			@ make room for r1
823# endif
824	eor	r0,r4,r4,ror#5
825	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
826	eor	r0,r0,r4,ror#19	@ Sigma1(e)
827# ifndef __ARMEB__
828	rev	r2,r2
829# endif
830#else
831	@ ldrb	r2,[r1,#3]			@ 12
832	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
833	ldrb	r12,[r1,#2]
834	ldrb	r0,[r1,#1]
835	orr	r2,r2,r12,lsl#8
836	ldrb	r12,[r1],#4
837	orr	r2,r2,r0,lsl#16
838# if 12==15
839	str	r1,[sp,#17*4]			@ make room for r1
840# endif
841	eor	r0,r4,r4,ror#5
842	orr	r2,r2,r12,lsl#24
843	eor	r0,r0,r4,ror#19	@ Sigma1(e)
844#endif
845	ldr	r12,[r14],#4			@ *K256++
846	add	r7,r7,r2			@ h+=X[i]
847	str	r2,[sp,#12*4]
848	eor	r2,r5,r6
849	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
850	and	r2,r2,r4
851	add	r7,r7,r12			@ h+=K256[i]
852	eor	r2,r2,r6			@ Ch(e,f,g)
853	eor	r0,r8,r8,ror#11
854	add	r7,r7,r2			@ h+=Ch(e,f,g)
855#if 12==31
856	and	r12,r12,#0xff
857	cmp	r12,#0xf2			@ done?
858#endif
859#if 12<15
860# if __ARM_ARCH__>=7
861	ldr	r2,[r1],#4			@ prefetch
862# else
863	ldrb	r2,[r1,#3]
864# endif
865	eor	r12,r8,r9			@ a^b, b^c in next round
866#else
867	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
868	eor	r12,r8,r9			@ a^b, b^c in next round
869	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
870#endif
871	eor	r0,r0,r8,ror#20	@ Sigma0(a)
872	and	r3,r3,r12			@ (b^c)&=(a^b)
873	add	r11,r11,r7			@ d+=h
874	eor	r3,r3,r9			@ Maj(a,b,c)
875	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
876	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
877#if __ARM_ARCH__>=7
878	@ ldr	r2,[r1],#4			@ 13
879# if 13==15
880	str	r1,[sp,#17*4]			@ make room for r1
881# endif
882	eor	r0,r11,r11,ror#5
883	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
884	eor	r0,r0,r11,ror#19	@ Sigma1(e)
885# ifndef __ARMEB__
886	rev	r2,r2
887# endif
888#else
889	@ ldrb	r2,[r1,#3]			@ 13
890	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
891	ldrb	r3,[r1,#2]
892	ldrb	r0,[r1,#1]
893	orr	r2,r2,r3,lsl#8
894	ldrb	r3,[r1],#4
895	orr	r2,r2,r0,lsl#16
896# if 13==15
897	str	r1,[sp,#17*4]			@ make room for r1
898# endif
899	eor	r0,r11,r11,ror#5
900	orr	r2,r2,r3,lsl#24
901	eor	r0,r0,r11,ror#19	@ Sigma1(e)
902#endif
903	ldr	r3,[r14],#4			@ *K256++
904	add	r6,r6,r2			@ h+=X[i]
905	str	r2,[sp,#13*4]
906	eor	r2,r4,r5
907	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
908	and	r2,r2,r11
909	add	r6,r6,r3			@ h+=K256[i]
910	eor	r2,r2,r5			@ Ch(e,f,g)
911	eor	r0,r7,r7,ror#11
912	add	r6,r6,r2			@ h+=Ch(e,f,g)
913#if 13==31
914	and	r3,r3,#0xff
915	cmp	r3,#0xf2			@ done?
916#endif
917#if 13<15
918# if __ARM_ARCH__>=7
919	ldr	r2,[r1],#4			@ prefetch
920# else
921	ldrb	r2,[r1,#3]
922# endif
923	eor	r3,r7,r8			@ a^b, b^c in next round
924#else
925	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
926	eor	r3,r7,r8			@ a^b, b^c in next round
927	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
928#endif
929	eor	r0,r0,r7,ror#20	@ Sigma0(a)
930	and	r12,r12,r3			@ (b^c)&=(a^b)
931	add	r10,r10,r6			@ d+=h
932	eor	r12,r12,r8			@ Maj(a,b,c)
933	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
934	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
935#if __ARM_ARCH__>=7
936	@ ldr	r2,[r1],#4			@ 14
937# if 14==15
938	str	r1,[sp,#17*4]			@ make room for r1
939# endif
940	eor	r0,r10,r10,ror#5
941	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
942	eor	r0,r0,r10,ror#19	@ Sigma1(e)
943# ifndef __ARMEB__
944	rev	r2,r2
945# endif
946#else
947	@ ldrb	r2,[r1,#3]			@ 14
948	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
949	ldrb	r12,[r1,#2]
950	ldrb	r0,[r1,#1]
951	orr	r2,r2,r12,lsl#8
952	ldrb	r12,[r1],#4
953	orr	r2,r2,r0,lsl#16
954# if 14==15
955	str	r1,[sp,#17*4]			@ make room for r1
956# endif
957	eor	r0,r10,r10,ror#5
958	orr	r2,r2,r12,lsl#24
959	eor	r0,r0,r10,ror#19	@ Sigma1(e)
960#endif
961	ldr	r12,[r14],#4			@ *K256++
962	add	r5,r5,r2			@ h+=X[i]
963	str	r2,[sp,#14*4]
964	eor	r2,r11,r4
965	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
966	and	r2,r2,r10
967	add	r5,r5,r12			@ h+=K256[i]
968	eor	r2,r2,r4			@ Ch(e,f,g)
969	eor	r0,r6,r6,ror#11
970	add	r5,r5,r2			@ h+=Ch(e,f,g)
971#if 14==31
972	and	r12,r12,#0xff
973	cmp	r12,#0xf2			@ done?
974#endif
975#if 14<15
976# if __ARM_ARCH__>=7
977	ldr	r2,[r1],#4			@ prefetch
978# else
979	ldrb	r2,[r1,#3]
980# endif
981	eor	r12,r6,r7			@ a^b, b^c in next round
982#else
983	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
984	eor	r12,r6,r7			@ a^b, b^c in next round
985	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
986#endif
987	eor	r0,r0,r6,ror#20	@ Sigma0(a)
988	and	r3,r3,r12			@ (b^c)&=(a^b)
989	add	r9,r9,r5			@ d+=h
990	eor	r3,r3,r7			@ Maj(a,b,c)
991	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
992	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
993#if __ARM_ARCH__>=7
994	@ ldr	r2,[r1],#4			@ 15
995# if 15==15
996	str	r1,[sp,#17*4]			@ make room for r1
997# endif
998	eor	r0,r9,r9,ror#5
999	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1000	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1001# ifndef __ARMEB__
1002	rev	r2,r2
1003# endif
1004#else
1005	@ ldrb	r2,[r1,#3]			@ 15
1006	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1007	ldrb	r3,[r1,#2]
1008	ldrb	r0,[r1,#1]
1009	orr	r2,r2,r3,lsl#8
1010	ldrb	r3,[r1],#4
1011	orr	r2,r2,r0,lsl#16
1012# if 15==15
1013	str	r1,[sp,#17*4]			@ make room for r1
1014# endif
1015	eor	r0,r9,r9,ror#5
1016	orr	r2,r2,r3,lsl#24
1017	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1018#endif
1019	ldr	r3,[r14],#4			@ *K256++
1020	add	r4,r4,r2			@ h+=X[i]
1021	str	r2,[sp,#15*4]
1022	eor	r2,r10,r11
1023	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1024	and	r2,r2,r9
1025	add	r4,r4,r3			@ h+=K256[i]
1026	eor	r2,r2,r11			@ Ch(e,f,g)
1027	eor	r0,r5,r5,ror#11
1028	add	r4,r4,r2			@ h+=Ch(e,f,g)
1029#if 15==31
1030	and	r3,r3,#0xff
1031	cmp	r3,#0xf2			@ done?
1032#endif
1033#if 15<15
1034# if __ARM_ARCH__>=7
1035	ldr	r2,[r1],#4			@ prefetch
1036# else
1037	ldrb	r2,[r1,#3]
1038# endif
1039	eor	r3,r5,r6			@ a^b, b^c in next round
1040#else
1041	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1042	eor	r3,r5,r6			@ a^b, b^c in next round
1043	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1044#endif
1045	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1046	and	r12,r12,r3			@ (b^c)&=(a^b)
1047	add	r8,r8,r4			@ d+=h
1048	eor	r12,r12,r6			@ Maj(a,b,c)
1049	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1050	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1051.Lrounds_16_xx:
1052	@ ldr	r2,[sp,#1*4]		@ 16
1053	@ ldr	r1,[sp,#14*4]
1054	mov	r0,r2,ror#7
1055	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1056	mov	r12,r1,ror#17
1057	eor	r0,r0,r2,ror#18
1058	eor	r12,r12,r1,ror#19
1059	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1060	ldr	r2,[sp,#0*4]
1061	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1062	ldr	r1,[sp,#9*4]
1063
1064	add	r12,r12,r0
1065	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1066	add	r2,r2,r12
1067	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1068	add	r2,r2,r1			@ X[i]
1069	ldr	r12,[r14],#4			@ *K256++
1070	add	r11,r11,r2			@ h+=X[i]
1071	str	r2,[sp,#0*4]
1072	eor	r2,r9,r10
1073	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1074	and	r2,r2,r8
1075	add	r11,r11,r12			@ h+=K256[i]
1076	eor	r2,r2,r10			@ Ch(e,f,g)
1077	eor	r0,r4,r4,ror#11
1078	add	r11,r11,r2			@ h+=Ch(e,f,g)
1079#if 16==31
1080	and	r12,r12,#0xff
1081	cmp	r12,#0xf2			@ done?
1082#endif
1083#if 16<15
1084# if __ARM_ARCH__>=7
1085	ldr	r2,[r1],#4			@ prefetch
1086# else
1087	ldrb	r2,[r1,#3]
1088# endif
1089	eor	r12,r4,r5			@ a^b, b^c in next round
1090#else
1091	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1092	eor	r12,r4,r5			@ a^b, b^c in next round
1093	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1094#endif
1095	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1096	and	r3,r3,r12			@ (b^c)&=(a^b)
1097	add	r7,r7,r11			@ d+=h
1098	eor	r3,r3,r5			@ Maj(a,b,c)
1099	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1100	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1101	@ ldr	r2,[sp,#2*4]		@ 17
1102	@ ldr	r1,[sp,#15*4]
1103	mov	r0,r2,ror#7
1104	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1105	mov	r3,r1,ror#17
1106	eor	r0,r0,r2,ror#18
1107	eor	r3,r3,r1,ror#19
1108	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1109	ldr	r2,[sp,#1*4]
1110	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1111	ldr	r1,[sp,#10*4]
1112
1113	add	r3,r3,r0
1114	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1115	add	r2,r2,r3
1116	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1117	add	r2,r2,r1			@ X[i]
1118	ldr	r3,[r14],#4			@ *K256++
1119	add	r10,r10,r2			@ h+=X[i]
1120	str	r2,[sp,#1*4]
1121	eor	r2,r8,r9
1122	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1123	and	r2,r2,r7
1124	add	r10,r10,r3			@ h+=K256[i]
1125	eor	r2,r2,r9			@ Ch(e,f,g)
1126	eor	r0,r11,r11,ror#11
1127	add	r10,r10,r2			@ h+=Ch(e,f,g)
1128#if 17==31
1129	and	r3,r3,#0xff
1130	cmp	r3,#0xf2			@ done?
1131#endif
1132#if 17<15
1133# if __ARM_ARCH__>=7
1134	ldr	r2,[r1],#4			@ prefetch
1135# else
1136	ldrb	r2,[r1,#3]
1137# endif
1138	eor	r3,r11,r4			@ a^b, b^c in next round
1139#else
1140	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1141	eor	r3,r11,r4			@ a^b, b^c in next round
1142	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1143#endif
1144	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1145	and	r12,r12,r3			@ (b^c)&=(a^b)
1146	add	r6,r6,r10			@ d+=h
1147	eor	r12,r12,r4			@ Maj(a,b,c)
1148	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1149	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1150	@ ldr	r2,[sp,#3*4]		@ 18
1151	@ ldr	r1,[sp,#0*4]
1152	mov	r0,r2,ror#7
1153	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1154	mov	r12,r1,ror#17
1155	eor	r0,r0,r2,ror#18
1156	eor	r12,r12,r1,ror#19
1157	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1158	ldr	r2,[sp,#2*4]
1159	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1160	ldr	r1,[sp,#11*4]
1161
1162	add	r12,r12,r0
1163	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1164	add	r2,r2,r12
1165	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1166	add	r2,r2,r1			@ X[i]
1167	ldr	r12,[r14],#4			@ *K256++
1168	add	r9,r9,r2			@ h+=X[i]
1169	str	r2,[sp,#2*4]
1170	eor	r2,r7,r8
1171	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1172	and	r2,r2,r6
1173	add	r9,r9,r12			@ h+=K256[i]
1174	eor	r2,r2,r8			@ Ch(e,f,g)
1175	eor	r0,r10,r10,ror#11
1176	add	r9,r9,r2			@ h+=Ch(e,f,g)
1177#if 18==31
1178	and	r12,r12,#0xff
1179	cmp	r12,#0xf2			@ done?
1180#endif
1181#if 18<15
1182# if __ARM_ARCH__>=7
1183	ldr	r2,[r1],#4			@ prefetch
1184# else
1185	ldrb	r2,[r1,#3]
1186# endif
1187	eor	r12,r10,r11			@ a^b, b^c in next round
1188#else
1189	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1190	eor	r12,r10,r11			@ a^b, b^c in next round
1191	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1192#endif
1193	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1194	and	r3,r3,r12			@ (b^c)&=(a^b)
1195	add	r5,r5,r9			@ d+=h
1196	eor	r3,r3,r11			@ Maj(a,b,c)
1197	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1198	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1199	@ ldr	r2,[sp,#4*4]		@ 19
1200	@ ldr	r1,[sp,#1*4]
1201	mov	r0,r2,ror#7
1202	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1203	mov	r3,r1,ror#17
1204	eor	r0,r0,r2,ror#18
1205	eor	r3,r3,r1,ror#19
1206	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1207	ldr	r2,[sp,#3*4]
1208	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1209	ldr	r1,[sp,#12*4]
1210
1211	add	r3,r3,r0
1212	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1213	add	r2,r2,r3
1214	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1215	add	r2,r2,r1			@ X[i]
1216	ldr	r3,[r14],#4			@ *K256++
1217	add	r8,r8,r2			@ h+=X[i]
1218	str	r2,[sp,#3*4]
1219	eor	r2,r6,r7
1220	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1221	and	r2,r2,r5
1222	add	r8,r8,r3			@ h+=K256[i]
1223	eor	r2,r2,r7			@ Ch(e,f,g)
1224	eor	r0,r9,r9,ror#11
1225	add	r8,r8,r2			@ h+=Ch(e,f,g)
1226#if 19==31
1227	and	r3,r3,#0xff
1228	cmp	r3,#0xf2			@ done?
1229#endif
1230#if 19<15
1231# if __ARM_ARCH__>=7
1232	ldr	r2,[r1],#4			@ prefetch
1233# else
1234	ldrb	r2,[r1,#3]
1235# endif
1236	eor	r3,r9,r10			@ a^b, b^c in next round
1237#else
1238	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1239	eor	r3,r9,r10			@ a^b, b^c in next round
1240	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1241#endif
1242	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1243	and	r12,r12,r3			@ (b^c)&=(a^b)
1244	add	r4,r4,r8			@ d+=h
1245	eor	r12,r12,r10			@ Maj(a,b,c)
1246	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1247	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1248	@ ldr	r2,[sp,#5*4]		@ 20
1249	@ ldr	r1,[sp,#2*4]
1250	mov	r0,r2,ror#7
1251	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1252	mov	r12,r1,ror#17
1253	eor	r0,r0,r2,ror#18
1254	eor	r12,r12,r1,ror#19
1255	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1256	ldr	r2,[sp,#4*4]
1257	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1258	ldr	r1,[sp,#13*4]
1259
1260	add	r12,r12,r0
1261	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1262	add	r2,r2,r12
1263	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1264	add	r2,r2,r1			@ X[i]
1265	ldr	r12,[r14],#4			@ *K256++
1266	add	r7,r7,r2			@ h+=X[i]
1267	str	r2,[sp,#4*4]
1268	eor	r2,r5,r6
1269	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1270	and	r2,r2,r4
1271	add	r7,r7,r12			@ h+=K256[i]
1272	eor	r2,r2,r6			@ Ch(e,f,g)
1273	eor	r0,r8,r8,ror#11
1274	add	r7,r7,r2			@ h+=Ch(e,f,g)
1275#if 20==31
1276	and	r12,r12,#0xff
1277	cmp	r12,#0xf2			@ done?
1278#endif
1279#if 20<15
1280# if __ARM_ARCH__>=7
1281	ldr	r2,[r1],#4			@ prefetch
1282# else
1283	ldrb	r2,[r1,#3]
1284# endif
1285	eor	r12,r8,r9			@ a^b, b^c in next round
1286#else
1287	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1288	eor	r12,r8,r9			@ a^b, b^c in next round
1289	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1290#endif
1291	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1292	and	r3,r3,r12			@ (b^c)&=(a^b)
1293	add	r11,r11,r7			@ d+=h
1294	eor	r3,r3,r9			@ Maj(a,b,c)
1295	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1296	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1297	@ ldr	r2,[sp,#6*4]		@ 21
1298	@ ldr	r1,[sp,#3*4]
1299	mov	r0,r2,ror#7
1300	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1301	mov	r3,r1,ror#17
1302	eor	r0,r0,r2,ror#18
1303	eor	r3,r3,r1,ror#19
1304	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1305	ldr	r2,[sp,#5*4]
1306	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1307	ldr	r1,[sp,#14*4]
1308
1309	add	r3,r3,r0
1310	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1311	add	r2,r2,r3
1312	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1313	add	r2,r2,r1			@ X[i]
1314	ldr	r3,[r14],#4			@ *K256++
1315	add	r6,r6,r2			@ h+=X[i]
1316	str	r2,[sp,#5*4]
1317	eor	r2,r4,r5
1318	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1319	and	r2,r2,r11
1320	add	r6,r6,r3			@ h+=K256[i]
1321	eor	r2,r2,r5			@ Ch(e,f,g)
1322	eor	r0,r7,r7,ror#11
1323	add	r6,r6,r2			@ h+=Ch(e,f,g)
1324#if 21==31
1325	and	r3,r3,#0xff
1326	cmp	r3,#0xf2			@ done?
1327#endif
1328#if 21<15
1329# if __ARM_ARCH__>=7
1330	ldr	r2,[r1],#4			@ prefetch
1331# else
1332	ldrb	r2,[r1,#3]
1333# endif
1334	eor	r3,r7,r8			@ a^b, b^c in next round
1335#else
1336	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1337	eor	r3,r7,r8			@ a^b, b^c in next round
1338	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1339#endif
1340	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1341	and	r12,r12,r3			@ (b^c)&=(a^b)
1342	add	r10,r10,r6			@ d+=h
1343	eor	r12,r12,r8			@ Maj(a,b,c)
1344	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1345	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1346	@ ldr	r2,[sp,#7*4]		@ 22
1347	@ ldr	r1,[sp,#4*4]
1348	mov	r0,r2,ror#7
1349	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1350	mov	r12,r1,ror#17
1351	eor	r0,r0,r2,ror#18
1352	eor	r12,r12,r1,ror#19
1353	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1354	ldr	r2,[sp,#6*4]
1355	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1356	ldr	r1,[sp,#15*4]
1357
1358	add	r12,r12,r0
1359	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1360	add	r2,r2,r12
1361	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1362	add	r2,r2,r1			@ X[i]
1363	ldr	r12,[r14],#4			@ *K256++
1364	add	r5,r5,r2			@ h+=X[i]
1365	str	r2,[sp,#6*4]
1366	eor	r2,r11,r4
1367	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1368	and	r2,r2,r10
1369	add	r5,r5,r12			@ h+=K256[i]
1370	eor	r2,r2,r4			@ Ch(e,f,g)
1371	eor	r0,r6,r6,ror#11
1372	add	r5,r5,r2			@ h+=Ch(e,f,g)
1373#if 22==31
1374	and	r12,r12,#0xff
1375	cmp	r12,#0xf2			@ done?
1376#endif
1377#if 22<15
1378# if __ARM_ARCH__>=7
1379	ldr	r2,[r1],#4			@ prefetch
1380# else
1381	ldrb	r2,[r1,#3]
1382# endif
1383	eor	r12,r6,r7			@ a^b, b^c in next round
1384#else
1385	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1386	eor	r12,r6,r7			@ a^b, b^c in next round
1387	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1388#endif
1389	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1390	and	r3,r3,r12			@ (b^c)&=(a^b)
1391	add	r9,r9,r5			@ d+=h
1392	eor	r3,r3,r7			@ Maj(a,b,c)
1393	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1394	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1395	@ ldr	r2,[sp,#8*4]		@ 23
1396	@ ldr	r1,[sp,#5*4]
1397	mov	r0,r2,ror#7
1398	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1399	mov	r3,r1,ror#17
1400	eor	r0,r0,r2,ror#18
1401	eor	r3,r3,r1,ror#19
1402	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1403	ldr	r2,[sp,#7*4]
1404	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1405	ldr	r1,[sp,#0*4]
1406
1407	add	r3,r3,r0
1408	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1409	add	r2,r2,r3
1410	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1411	add	r2,r2,r1			@ X[i]
1412	ldr	r3,[r14],#4			@ *K256++
1413	add	r4,r4,r2			@ h+=X[i]
1414	str	r2,[sp,#7*4]
1415	eor	r2,r10,r11
1416	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1417	and	r2,r2,r9
1418	add	r4,r4,r3			@ h+=K256[i]
1419	eor	r2,r2,r11			@ Ch(e,f,g)
1420	eor	r0,r5,r5,ror#11
1421	add	r4,r4,r2			@ h+=Ch(e,f,g)
1422#if 23==31
1423	and	r3,r3,#0xff
1424	cmp	r3,#0xf2			@ done?
1425#endif
1426#if 23<15
1427# if __ARM_ARCH__>=7
1428	ldr	r2,[r1],#4			@ prefetch
1429# else
1430	ldrb	r2,[r1,#3]
1431# endif
1432	eor	r3,r5,r6			@ a^b, b^c in next round
1433#else
1434	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1435	eor	r3,r5,r6			@ a^b, b^c in next round
1436	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1437#endif
1438	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1439	and	r12,r12,r3			@ (b^c)&=(a^b)
1440	add	r8,r8,r4			@ d+=h
1441	eor	r12,r12,r6			@ Maj(a,b,c)
1442	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1443	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1444	@ ldr	r2,[sp,#9*4]		@ 24
1445	@ ldr	r1,[sp,#6*4]
1446	mov	r0,r2,ror#7
1447	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1448	mov	r12,r1,ror#17
1449	eor	r0,r0,r2,ror#18
1450	eor	r12,r12,r1,ror#19
1451	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1452	ldr	r2,[sp,#8*4]
1453	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1454	ldr	r1,[sp,#1*4]
1455
1456	add	r12,r12,r0
1457	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1458	add	r2,r2,r12
1459	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1460	add	r2,r2,r1			@ X[i]
1461	ldr	r12,[r14],#4			@ *K256++
1462	add	r11,r11,r2			@ h+=X[i]
1463	str	r2,[sp,#8*4]
1464	eor	r2,r9,r10
1465	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1466	and	r2,r2,r8
1467	add	r11,r11,r12			@ h+=K256[i]
1468	eor	r2,r2,r10			@ Ch(e,f,g)
1469	eor	r0,r4,r4,ror#11
1470	add	r11,r11,r2			@ h+=Ch(e,f,g)
1471#if 24==31
1472	and	r12,r12,#0xff
1473	cmp	r12,#0xf2			@ done?
1474#endif
1475#if 24<15
1476# if __ARM_ARCH__>=7
1477	ldr	r2,[r1],#4			@ prefetch
1478# else
1479	ldrb	r2,[r1,#3]
1480# endif
1481	eor	r12,r4,r5			@ a^b, b^c in next round
1482#else
1483	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1484	eor	r12,r4,r5			@ a^b, b^c in next round
1485	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1486#endif
1487	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1488	and	r3,r3,r12			@ (b^c)&=(a^b)
1489	add	r7,r7,r11			@ d+=h
1490	eor	r3,r3,r5			@ Maj(a,b,c)
1491	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1492	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1493	@ ldr	r2,[sp,#10*4]		@ 25
1494	@ ldr	r1,[sp,#7*4]
1495	mov	r0,r2,ror#7
1496	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1497	mov	r3,r1,ror#17
1498	eor	r0,r0,r2,ror#18
1499	eor	r3,r3,r1,ror#19
1500	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1501	ldr	r2,[sp,#9*4]
1502	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1503	ldr	r1,[sp,#2*4]
1504
1505	add	r3,r3,r0
1506	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1507	add	r2,r2,r3
1508	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1509	add	r2,r2,r1			@ X[i]
1510	ldr	r3,[r14],#4			@ *K256++
1511	add	r10,r10,r2			@ h+=X[i]
1512	str	r2,[sp,#9*4]
1513	eor	r2,r8,r9
1514	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1515	and	r2,r2,r7
1516	add	r10,r10,r3			@ h+=K256[i]
1517	eor	r2,r2,r9			@ Ch(e,f,g)
1518	eor	r0,r11,r11,ror#11
1519	add	r10,r10,r2			@ h+=Ch(e,f,g)
1520#if 25==31
1521	and	r3,r3,#0xff
1522	cmp	r3,#0xf2			@ done?
1523#endif
1524#if 25<15
1525# if __ARM_ARCH__>=7
1526	ldr	r2,[r1],#4			@ prefetch
1527# else
1528	ldrb	r2,[r1,#3]
1529# endif
1530	eor	r3,r11,r4			@ a^b, b^c in next round
1531#else
1532	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1533	eor	r3,r11,r4			@ a^b, b^c in next round
1534	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1535#endif
1536	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1537	and	r12,r12,r3			@ (b^c)&=(a^b)
1538	add	r6,r6,r10			@ d+=h
1539	eor	r12,r12,r4			@ Maj(a,b,c)
1540	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1541	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1542	@ ldr	r2,[sp,#11*4]		@ 26
1543	@ ldr	r1,[sp,#8*4]
1544	mov	r0,r2,ror#7
1545	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1546	mov	r12,r1,ror#17
1547	eor	r0,r0,r2,ror#18
1548	eor	r12,r12,r1,ror#19
1549	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1550	ldr	r2,[sp,#10*4]
1551	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1552	ldr	r1,[sp,#3*4]
1553
1554	add	r12,r12,r0
1555	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1556	add	r2,r2,r12
1557	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1558	add	r2,r2,r1			@ X[i]
1559	ldr	r12,[r14],#4			@ *K256++
1560	add	r9,r9,r2			@ h+=X[i]
1561	str	r2,[sp,#10*4]
1562	eor	r2,r7,r8
1563	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1564	and	r2,r2,r6
1565	add	r9,r9,r12			@ h+=K256[i]
1566	eor	r2,r2,r8			@ Ch(e,f,g)
1567	eor	r0,r10,r10,ror#11
1568	add	r9,r9,r2			@ h+=Ch(e,f,g)
1569#if 26==31
1570	and	r12,r12,#0xff
1571	cmp	r12,#0xf2			@ done?
1572#endif
1573#if 26<15
1574# if __ARM_ARCH__>=7
1575	ldr	r2,[r1],#4			@ prefetch
1576# else
1577	ldrb	r2,[r1,#3]
1578# endif
1579	eor	r12,r10,r11			@ a^b, b^c in next round
1580#else
1581	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1582	eor	r12,r10,r11			@ a^b, b^c in next round
1583	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1584#endif
1585	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1586	and	r3,r3,r12			@ (b^c)&=(a^b)
1587	add	r5,r5,r9			@ d+=h
1588	eor	r3,r3,r11			@ Maj(a,b,c)
1589	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1590	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1591	@ ldr	r2,[sp,#12*4]		@ 27
1592	@ ldr	r1,[sp,#9*4]
1593	mov	r0,r2,ror#7
1594	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1595	mov	r3,r1,ror#17
1596	eor	r0,r0,r2,ror#18
1597	eor	r3,r3,r1,ror#19
1598	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1599	ldr	r2,[sp,#11*4]
1600	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1601	ldr	r1,[sp,#4*4]
1602
1603	add	r3,r3,r0
1604	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1605	add	r2,r2,r3
1606	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1607	add	r2,r2,r1			@ X[i]
1608	ldr	r3,[r14],#4			@ *K256++
1609	add	r8,r8,r2			@ h+=X[i]
1610	str	r2,[sp,#11*4]
1611	eor	r2,r6,r7
1612	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1613	and	r2,r2,r5
1614	add	r8,r8,r3			@ h+=K256[i]
1615	eor	r2,r2,r7			@ Ch(e,f,g)
1616	eor	r0,r9,r9,ror#11
1617	add	r8,r8,r2			@ h+=Ch(e,f,g)
1618#if 27==31
1619	and	r3,r3,#0xff
1620	cmp	r3,#0xf2			@ done?
1621#endif
1622#if 27<15
1623# if __ARM_ARCH__>=7
1624	ldr	r2,[r1],#4			@ prefetch
1625# else
1626	ldrb	r2,[r1,#3]
1627# endif
1628	eor	r3,r9,r10			@ a^b, b^c in next round
1629#else
1630	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1631	eor	r3,r9,r10			@ a^b, b^c in next round
1632	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1633#endif
1634	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1635	and	r12,r12,r3			@ (b^c)&=(a^b)
1636	add	r4,r4,r8			@ d+=h
1637	eor	r12,r12,r10			@ Maj(a,b,c)
1638	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1639	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1640	@ ldr	r2,[sp,#13*4]		@ 28
1641	@ ldr	r1,[sp,#10*4]
1642	mov	r0,r2,ror#7
1643	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1644	mov	r12,r1,ror#17
1645	eor	r0,r0,r2,ror#18
1646	eor	r12,r12,r1,ror#19
1647	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1648	ldr	r2,[sp,#12*4]
1649	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1650	ldr	r1,[sp,#5*4]
1651
1652	add	r12,r12,r0
1653	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1654	add	r2,r2,r12
1655	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1656	add	r2,r2,r1			@ X[i]
1657	ldr	r12,[r14],#4			@ *K256++
1658	add	r7,r7,r2			@ h+=X[i]
1659	str	r2,[sp,#12*4]
1660	eor	r2,r5,r6
1661	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1662	and	r2,r2,r4
1663	add	r7,r7,r12			@ h+=K256[i]
1664	eor	r2,r2,r6			@ Ch(e,f,g)
1665	eor	r0,r8,r8,ror#11
1666	add	r7,r7,r2			@ h+=Ch(e,f,g)
1667#if 28==31
1668	and	r12,r12,#0xff
1669	cmp	r12,#0xf2			@ done?
1670#endif
1671#if 28<15
1672# if __ARM_ARCH__>=7
1673	ldr	r2,[r1],#4			@ prefetch
1674# else
1675	ldrb	r2,[r1,#3]
1676# endif
1677	eor	r12,r8,r9			@ a^b, b^c in next round
1678#else
1679	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1680	eor	r12,r8,r9			@ a^b, b^c in next round
1681	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1682#endif
1683	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1684	and	r3,r3,r12			@ (b^c)&=(a^b)
1685	add	r11,r11,r7			@ d+=h
1686	eor	r3,r3,r9			@ Maj(a,b,c)
1687	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1688	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1689	@ ldr	r2,[sp,#14*4]		@ 29
1690	@ ldr	r1,[sp,#11*4]
1691	mov	r0,r2,ror#7
1692	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1693	mov	r3,r1,ror#17
1694	eor	r0,r0,r2,ror#18
1695	eor	r3,r3,r1,ror#19
1696	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1697	ldr	r2,[sp,#13*4]
1698	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1699	ldr	r1,[sp,#6*4]
1700
1701	add	r3,r3,r0
1702	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1703	add	r2,r2,r3
1704	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1705	add	r2,r2,r1			@ X[i]
1706	ldr	r3,[r14],#4			@ *K256++
1707	add	r6,r6,r2			@ h+=X[i]
1708	str	r2,[sp,#13*4]
1709	eor	r2,r4,r5
1710	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1711	and	r2,r2,r11
1712	add	r6,r6,r3			@ h+=K256[i]
1713	eor	r2,r2,r5			@ Ch(e,f,g)
1714	eor	r0,r7,r7,ror#11
1715	add	r6,r6,r2			@ h+=Ch(e,f,g)
1716#if 29==31
1717	and	r3,r3,#0xff
1718	cmp	r3,#0xf2			@ done?
1719#endif
1720#if 29<15
1721# if __ARM_ARCH__>=7
1722	ldr	r2,[r1],#4			@ prefetch
1723# else
1724	ldrb	r2,[r1,#3]
1725# endif
1726	eor	r3,r7,r8			@ a^b, b^c in next round
1727#else
1728	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1729	eor	r3,r7,r8			@ a^b, b^c in next round
1730	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1731#endif
1732	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1733	and	r12,r12,r3			@ (b^c)&=(a^b)
1734	add	r10,r10,r6			@ d+=h
1735	eor	r12,r12,r8			@ Maj(a,b,c)
1736	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1737	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1738	@ ldr	r2,[sp,#15*4]		@ 30
1739	@ ldr	r1,[sp,#12*4]
1740	mov	r0,r2,ror#7
1741	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1742	mov	r12,r1,ror#17
1743	eor	r0,r0,r2,ror#18
1744	eor	r12,r12,r1,ror#19
1745	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1746	ldr	r2,[sp,#14*4]
1747	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1748	ldr	r1,[sp,#7*4]
1749
1750	add	r12,r12,r0
1751	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1752	add	r2,r2,r12
1753	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1754	add	r2,r2,r1			@ X[i]
1755	ldr	r12,[r14],#4			@ *K256++
1756	add	r5,r5,r2			@ h+=X[i]
1757	str	r2,[sp,#14*4]
1758	eor	r2,r11,r4
1759	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1760	and	r2,r2,r10
1761	add	r5,r5,r12			@ h+=K256[i]
1762	eor	r2,r2,r4			@ Ch(e,f,g)
1763	eor	r0,r6,r6,ror#11
1764	add	r5,r5,r2			@ h+=Ch(e,f,g)
1765#if 30==31
1766	and	r12,r12,#0xff
1767	cmp	r12,#0xf2			@ done?
1768#endif
1769#if 30<15
1770# if __ARM_ARCH__>=7
1771	ldr	r2,[r1],#4			@ prefetch
1772# else
1773	ldrb	r2,[r1,#3]
1774# endif
1775	eor	r12,r6,r7			@ a^b, b^c in next round
1776#else
1777	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1778	eor	r12,r6,r7			@ a^b, b^c in next round
1779	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1780#endif
1781	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1782	and	r3,r3,r12			@ (b^c)&=(a^b)
1783	add	r9,r9,r5			@ d+=h
1784	eor	r3,r3,r7			@ Maj(a,b,c)
1785	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1786	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1787	@ ldr	r2,[sp,#0*4]		@ 31
1788	@ ldr	r1,[sp,#13*4]
1789	mov	r0,r2,ror#7
1790	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1791	mov	r3,r1,ror#17
1792	eor	r0,r0,r2,ror#18
1793	eor	r3,r3,r1,ror#19
1794	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1795	ldr	r2,[sp,#15*4]
1796	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1797	ldr	r1,[sp,#8*4]
1798
1799	add	r3,r3,r0
1800	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1801	add	r2,r2,r3
1802	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1803	add	r2,r2,r1			@ X[i]
1804	ldr	r3,[r14],#4			@ *K256++
1805	add	r4,r4,r2			@ h+=X[i]
1806	str	r2,[sp,#15*4]
1807	eor	r2,r10,r11
1808	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1809	and	r2,r2,r9
1810	add	r4,r4,r3			@ h+=K256[i]
1811	eor	r2,r2,r11			@ Ch(e,f,g)
1812	eor	r0,r5,r5,ror#11
1813	add	r4,r4,r2			@ h+=Ch(e,f,g)
1814#if 31==31
1815	and	r3,r3,#0xff
1816	cmp	r3,#0xf2			@ done?
1817#endif
1818#if 31<15
1819# if __ARM_ARCH__>=7
1820	ldr	r2,[r1],#4			@ prefetch
1821# else
1822	ldrb	r2,[r1,#3]
1823# endif
1824	eor	r3,r5,r6			@ a^b, b^c in next round
1825#else
1826	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1827	eor	r3,r5,r6			@ a^b, b^c in next round
1828	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1829#endif
1830	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1831	and	r12,r12,r3			@ (b^c)&=(a^b)
1832	add	r8,r8,r4			@ d+=h
1833	eor	r12,r12,r6			@ Maj(a,b,c)
1834	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1835	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1836#ifdef	__thumb2__
1837	ite	eq			@ Thumb2 thing, sanity check in ARM
1838#endif
1839	ldreq	r3,[sp,#16*4]		@ pull ctx
1840	bne	.Lrounds_16_xx
1841
1842	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1843	ldr	r0,[r3,#0]
1844	ldr	r2,[r3,#4]
1845	ldr	r12,[r3,#8]
1846	add	r4,r4,r0
1847	ldr	r0,[r3,#12]
1848	add	r5,r5,r2
1849	ldr	r2,[r3,#16]
1850	add	r6,r6,r12
1851	ldr	r12,[r3,#20]
1852	add	r7,r7,r0
1853	ldr	r0,[r3,#24]
1854	add	r8,r8,r2
1855	ldr	r2,[r3,#28]
1856	add	r9,r9,r12
1857	ldr	r1,[sp,#17*4]		@ pull inp
1858	ldr	r12,[sp,#18*4]		@ pull inp+len
1859	add	r10,r10,r0
1860	add	r11,r11,r2
1861	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1862	cmp	r1,r12
1863	sub	r14,r14,#256	@ rewind Ktbl
1864	bne	.Loop
1865
1866	add	sp,sp,#19*4	@ destroy frame
1867#if __ARM_ARCH__>=5
1868	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1869#else
1870	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1871	tst	lr,#1
1872	moveq	pc,lr			@ be binary compatible with V4, yet
1873.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1874#endif
1875.size	sha256_block_data_order,.-sha256_block_data_order
1876#if __ARM_MAX_ARCH__>=7
1877.arch	armv7-a
1878.fpu	neon
1879
1880.globl	sha256_block_data_order_neon
1881.type	sha256_block_data_order_neon,%function
1882.align	5
1883.skip	16
1884sha256_block_data_order_neon:
1885.LNEON:
1886	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1887
1888	sub	r11,sp,#16*4+16
1889	adr	r14,K256
1890	bic	r11,r11,#15		@ align for 128-bit stores
1891	mov	r12,sp
1892	mov	sp,r11			@ alloca
1893	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1894
1895	vld1.8	{q0},[r1]!
1896	vld1.8	{q1},[r1]!
1897	vld1.8	{q2},[r1]!
1898	vld1.8	{q3},[r1]!
1899	vld1.32	{q8},[r14,:128]!
1900	vld1.32	{q9},[r14,:128]!
1901	vld1.32	{q10},[r14,:128]!
1902	vld1.32	{q11},[r14,:128]!
1903	vrev32.8	q0,q0		@ yes, even on
1904	str	r0,[sp,#64]
1905	vrev32.8	q1,q1		@ big-endian
1906	str	r1,[sp,#68]
1907	mov	r1,sp
1908	vrev32.8	q2,q2
1909	str	r2,[sp,#72]
1910	vrev32.8	q3,q3
1911	str	r12,[sp,#76]		@ save original sp
1912	vadd.i32	q8,q8,q0
1913	vadd.i32	q9,q9,q1
1914	vst1.32	{q8},[r1,:128]!
1915	vadd.i32	q10,q10,q2
1916	vst1.32	{q9},[r1,:128]!
1917	vadd.i32	q11,q11,q3
1918	vst1.32	{q10},[r1,:128]!
1919	vst1.32	{q11},[r1,:128]!
1920
1921	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1922	sub	r1,r1,#64
1923	ldr	r2,[sp,#0]
1924	eor	r12,r12,r12
1925	eor	r3,r5,r6
1926	b	.L_00_48
1927
1928.align	4
1929.L_00_48:
1930	vext.8	q8,q0,q1,#4
1931	add	r11,r11,r2
1932	eor	r2,r9,r10
1933	eor	r0,r8,r8,ror#5
1934	vext.8	q9,q2,q3,#4
1935	add	r4,r4,r12
1936	and	r2,r2,r8
1937	eor	r12,r0,r8,ror#19
1938	vshr.u32	q10,q8,#7
1939	eor	r0,r4,r4,ror#11
1940	eor	r2,r2,r10
1941	vadd.i32	q0,q0,q9
1942	add	r11,r11,r12,ror#6
1943	eor	r12,r4,r5
1944	vshr.u32	q9,q8,#3
1945	eor	r0,r0,r4,ror#20
1946	add	r11,r11,r2
1947	vsli.32	q10,q8,#25
1948	ldr	r2,[sp,#4]
1949	and	r3,r3,r12
1950	vshr.u32	q11,q8,#18
1951	add	r7,r7,r11
1952	add	r11,r11,r0,ror#2
1953	eor	r3,r3,r5
1954	veor	q9,q9,q10
1955	add	r10,r10,r2
1956	vsli.32	q11,q8,#14
1957	eor	r2,r8,r9
1958	eor	r0,r7,r7,ror#5
1959	vshr.u32	d24,d7,#17
1960	add	r11,r11,r3
1961	and	r2,r2,r7
1962	veor	q9,q9,q11
1963	eor	r3,r0,r7,ror#19
1964	eor	r0,r11,r11,ror#11
1965	vsli.32	d24,d7,#15
1966	eor	r2,r2,r9
1967	add	r10,r10,r3,ror#6
1968	vshr.u32	d25,d7,#10
1969	eor	r3,r11,r4
1970	eor	r0,r0,r11,ror#20
1971	vadd.i32	q0,q0,q9
1972	add	r10,r10,r2
1973	ldr	r2,[sp,#8]
1974	veor	d25,d25,d24
1975	and	r12,r12,r3
1976	add	r6,r6,r10
1977	vshr.u32	d24,d7,#19
1978	add	r10,r10,r0,ror#2
1979	eor	r12,r12,r4
1980	vsli.32	d24,d7,#13
1981	add	r9,r9,r2
1982	eor	r2,r7,r8
1983	veor	d25,d25,d24
1984	eor	r0,r6,r6,ror#5
1985	add	r10,r10,r12
1986	vadd.i32	d0,d0,d25
1987	and	r2,r2,r6
1988	eor	r12,r0,r6,ror#19
1989	vshr.u32	d24,d0,#17
1990	eor	r0,r10,r10,ror#11
1991	eor	r2,r2,r8
1992	vsli.32	d24,d0,#15
1993	add	r9,r9,r12,ror#6
1994	eor	r12,r10,r11
1995	vshr.u32	d25,d0,#10
1996	eor	r0,r0,r10,ror#20
1997	add	r9,r9,r2
1998	veor	d25,d25,d24
1999	ldr	r2,[sp,#12]
2000	and	r3,r3,r12
2001	vshr.u32	d24,d0,#19
2002	add	r5,r5,r9
2003	add	r9,r9,r0,ror#2
2004	eor	r3,r3,r11
2005	vld1.32	{q8},[r14,:128]!
2006	add	r8,r8,r2
2007	vsli.32	d24,d0,#13
2008	eor	r2,r6,r7
2009	eor	r0,r5,r5,ror#5
2010	veor	d25,d25,d24
2011	add	r9,r9,r3
2012	and	r2,r2,r5
2013	vadd.i32	d1,d1,d25
2014	eor	r3,r0,r5,ror#19
2015	eor	r0,r9,r9,ror#11
2016	vadd.i32	q8,q8,q0
2017	eor	r2,r2,r7
2018	add	r8,r8,r3,ror#6
2019	eor	r3,r9,r10
2020	eor	r0,r0,r9,ror#20
2021	add	r8,r8,r2
2022	ldr	r2,[sp,#16]
2023	and	r12,r12,r3
2024	add	r4,r4,r8
2025	vst1.32	{q8},[r1,:128]!
2026	add	r8,r8,r0,ror#2
2027	eor	r12,r12,r10
2028	vext.8	q8,q1,q2,#4
2029	add	r7,r7,r2
2030	eor	r2,r5,r6
2031	eor	r0,r4,r4,ror#5
2032	vext.8	q9,q3,q0,#4
2033	add	r8,r8,r12
2034	and	r2,r2,r4
2035	eor	r12,r0,r4,ror#19
2036	vshr.u32	q10,q8,#7
2037	eor	r0,r8,r8,ror#11
2038	eor	r2,r2,r6
2039	vadd.i32	q1,q1,q9
2040	add	r7,r7,r12,ror#6
2041	eor	r12,r8,r9
2042	vshr.u32	q9,q8,#3
2043	eor	r0,r0,r8,ror#20
2044	add	r7,r7,r2
2045	vsli.32	q10,q8,#25
2046	ldr	r2,[sp,#20]
2047	and	r3,r3,r12
2048	vshr.u32	q11,q8,#18
2049	add	r11,r11,r7
2050	add	r7,r7,r0,ror#2
2051	eor	r3,r3,r9
2052	veor	q9,q9,q10
2053	add	r6,r6,r2
2054	vsli.32	q11,q8,#14
2055	eor	r2,r4,r5
2056	eor	r0,r11,r11,ror#5
2057	vshr.u32	d24,d1,#17
2058	add	r7,r7,r3
2059	and	r2,r2,r11
2060	veor	q9,q9,q11
2061	eor	r3,r0,r11,ror#19
2062	eor	r0,r7,r7,ror#11
2063	vsli.32	d24,d1,#15
2064	eor	r2,r2,r5
2065	add	r6,r6,r3,ror#6
2066	vshr.u32	d25,d1,#10
2067	eor	r3,r7,r8
2068	eor	r0,r0,r7,ror#20
2069	vadd.i32	q1,q1,q9
2070	add	r6,r6,r2
2071	ldr	r2,[sp,#24]
2072	veor	d25,d25,d24
2073	and	r12,r12,r3
2074	add	r10,r10,r6
2075	vshr.u32	d24,d1,#19
2076	add	r6,r6,r0,ror#2
2077	eor	r12,r12,r8
2078	vsli.32	d24,d1,#13
2079	add	r5,r5,r2
2080	eor	r2,r11,r4
2081	veor	d25,d25,d24
2082	eor	r0,r10,r10,ror#5
2083	add	r6,r6,r12
2084	vadd.i32	d2,d2,d25
2085	and	r2,r2,r10
2086	eor	r12,r0,r10,ror#19
2087	vshr.u32	d24,d2,#17
2088	eor	r0,r6,r6,ror#11
2089	eor	r2,r2,r4
2090	vsli.32	d24,d2,#15
2091	add	r5,r5,r12,ror#6
2092	eor	r12,r6,r7
2093	vshr.u32	d25,d2,#10
2094	eor	r0,r0,r6,ror#20
2095	add	r5,r5,r2
2096	veor	d25,d25,d24
2097	ldr	r2,[sp,#28]
2098	and	r3,r3,r12
2099	vshr.u32	d24,d2,#19
2100	add	r9,r9,r5
2101	add	r5,r5,r0,ror#2
2102	eor	r3,r3,r7
2103	vld1.32	{q8},[r14,:128]!
2104	add	r4,r4,r2
2105	vsli.32	d24,d2,#13
2106	eor	r2,r10,r11
2107	eor	r0,r9,r9,ror#5
2108	veor	d25,d25,d24
2109	add	r5,r5,r3
2110	and	r2,r2,r9
2111	vadd.i32	d3,d3,d25
2112	eor	r3,r0,r9,ror#19
2113	eor	r0,r5,r5,ror#11
2114	vadd.i32	q8,q8,q1
2115	eor	r2,r2,r11
2116	add	r4,r4,r3,ror#6
2117	eor	r3,r5,r6
2118	eor	r0,r0,r5,ror#20
2119	add	r4,r4,r2
2120	ldr	r2,[sp,#32]
2121	and	r12,r12,r3
2122	add	r8,r8,r4
2123	vst1.32	{q8},[r1,:128]!
2124	add	r4,r4,r0,ror#2
2125	eor	r12,r12,r6
2126	vext.8	q8,q2,q3,#4
2127	add	r11,r11,r2
2128	eor	r2,r9,r10
2129	eor	r0,r8,r8,ror#5
2130	vext.8	q9,q0,q1,#4
2131	add	r4,r4,r12
2132	and	r2,r2,r8
2133	eor	r12,r0,r8,ror#19
2134	vshr.u32	q10,q8,#7
2135	eor	r0,r4,r4,ror#11
2136	eor	r2,r2,r10
2137	vadd.i32	q2,q2,q9
2138	add	r11,r11,r12,ror#6
2139	eor	r12,r4,r5
2140	vshr.u32	q9,q8,#3
2141	eor	r0,r0,r4,ror#20
2142	add	r11,r11,r2
2143	vsli.32	q10,q8,#25
2144	ldr	r2,[sp,#36]
2145	and	r3,r3,r12
2146	vshr.u32	q11,q8,#18
2147	add	r7,r7,r11
2148	add	r11,r11,r0,ror#2
2149	eor	r3,r3,r5
2150	veor	q9,q9,q10
2151	add	r10,r10,r2
2152	vsli.32	q11,q8,#14
2153	eor	r2,r8,r9
2154	eor	r0,r7,r7,ror#5
2155	vshr.u32	d24,d3,#17
2156	add	r11,r11,r3
2157	and	r2,r2,r7
2158	veor	q9,q9,q11
2159	eor	r3,r0,r7,ror#19
2160	eor	r0,r11,r11,ror#11
2161	vsli.32	d24,d3,#15
2162	eor	r2,r2,r9
2163	add	r10,r10,r3,ror#6
2164	vshr.u32	d25,d3,#10
2165	eor	r3,r11,r4
2166	eor	r0,r0,r11,ror#20
2167	vadd.i32	q2,q2,q9
2168	add	r10,r10,r2
2169	ldr	r2,[sp,#40]
2170	veor	d25,d25,d24
2171	and	r12,r12,r3
2172	add	r6,r6,r10
2173	vshr.u32	d24,d3,#19
2174	add	r10,r10,r0,ror#2
2175	eor	r12,r12,r4
2176	vsli.32	d24,d3,#13
2177	add	r9,r9,r2
2178	eor	r2,r7,r8
2179	veor	d25,d25,d24
2180	eor	r0,r6,r6,ror#5
2181	add	r10,r10,r12
2182	vadd.i32	d4,d4,d25
2183	and	r2,r2,r6
2184	eor	r12,r0,r6,ror#19
2185	vshr.u32	d24,d4,#17
2186	eor	r0,r10,r10,ror#11
2187	eor	r2,r2,r8
2188	vsli.32	d24,d4,#15
2189	add	r9,r9,r12,ror#6
2190	eor	r12,r10,r11
2191	vshr.u32	d25,d4,#10
2192	eor	r0,r0,r10,ror#20
2193	add	r9,r9,r2
2194	veor	d25,d25,d24
2195	ldr	r2,[sp,#44]
2196	and	r3,r3,r12
2197	vshr.u32	d24,d4,#19
2198	add	r5,r5,r9
2199	add	r9,r9,r0,ror#2
2200	eor	r3,r3,r11
2201	vld1.32	{q8},[r14,:128]!
2202	add	r8,r8,r2
2203	vsli.32	d24,d4,#13
2204	eor	r2,r6,r7
2205	eor	r0,r5,r5,ror#5
2206	veor	d25,d25,d24
2207	add	r9,r9,r3
2208	and	r2,r2,r5
2209	vadd.i32	d5,d5,d25
2210	eor	r3,r0,r5,ror#19
2211	eor	r0,r9,r9,ror#11
2212	vadd.i32	q8,q8,q2
2213	eor	r2,r2,r7
2214	add	r8,r8,r3,ror#6
2215	eor	r3,r9,r10
2216	eor	r0,r0,r9,ror#20
2217	add	r8,r8,r2
2218	ldr	r2,[sp,#48]
2219	and	r12,r12,r3
2220	add	r4,r4,r8
2221	vst1.32	{q8},[r1,:128]!
2222	add	r8,r8,r0,ror#2
2223	eor	r12,r12,r10
2224	vext.8	q8,q3,q0,#4
2225	add	r7,r7,r2
2226	eor	r2,r5,r6
2227	eor	r0,r4,r4,ror#5
2228	vext.8	q9,q1,q2,#4
2229	add	r8,r8,r12
2230	and	r2,r2,r4
2231	eor	r12,r0,r4,ror#19
2232	vshr.u32	q10,q8,#7
2233	eor	r0,r8,r8,ror#11
2234	eor	r2,r2,r6
2235	vadd.i32	q3,q3,q9
2236	add	r7,r7,r12,ror#6
2237	eor	r12,r8,r9
2238	vshr.u32	q9,q8,#3
2239	eor	r0,r0,r8,ror#20
2240	add	r7,r7,r2
2241	vsli.32	q10,q8,#25
2242	ldr	r2,[sp,#52]
2243	and	r3,r3,r12
2244	vshr.u32	q11,q8,#18
2245	add	r11,r11,r7
2246	add	r7,r7,r0,ror#2
2247	eor	r3,r3,r9
2248	veor	q9,q9,q10
2249	add	r6,r6,r2
2250	vsli.32	q11,q8,#14
2251	eor	r2,r4,r5
2252	eor	r0,r11,r11,ror#5
2253	vshr.u32	d24,d5,#17
2254	add	r7,r7,r3
2255	and	r2,r2,r11
2256	veor	q9,q9,q11
2257	eor	r3,r0,r11,ror#19
2258	eor	r0,r7,r7,ror#11
2259	vsli.32	d24,d5,#15
2260	eor	r2,r2,r5
2261	add	r6,r6,r3,ror#6
2262	vshr.u32	d25,d5,#10
2263	eor	r3,r7,r8
2264	eor	r0,r0,r7,ror#20
2265	vadd.i32	q3,q3,q9
2266	add	r6,r6,r2
2267	ldr	r2,[sp,#56]
2268	veor	d25,d25,d24
2269	and	r12,r12,r3
2270	add	r10,r10,r6
2271	vshr.u32	d24,d5,#19
2272	add	r6,r6,r0,ror#2
2273	eor	r12,r12,r8
2274	vsli.32	d24,d5,#13
2275	add	r5,r5,r2
2276	eor	r2,r11,r4
2277	veor	d25,d25,d24
2278	eor	r0,r10,r10,ror#5
2279	add	r6,r6,r12
2280	vadd.i32	d6,d6,d25
2281	and	r2,r2,r10
2282	eor	r12,r0,r10,ror#19
2283	vshr.u32	d24,d6,#17
2284	eor	r0,r6,r6,ror#11
2285	eor	r2,r2,r4
2286	vsli.32	d24,d6,#15
2287	add	r5,r5,r12,ror#6
2288	eor	r12,r6,r7
2289	vshr.u32	d25,d6,#10
2290	eor	r0,r0,r6,ror#20
2291	add	r5,r5,r2
2292	veor	d25,d25,d24
2293	ldr	r2,[sp,#60]
2294	and	r3,r3,r12
2295	vshr.u32	d24,d6,#19
2296	add	r9,r9,r5
2297	add	r5,r5,r0,ror#2
2298	eor	r3,r3,r7
2299	vld1.32	{q8},[r14,:128]!
2300	add	r4,r4,r2
2301	vsli.32	d24,d6,#13
2302	eor	r2,r10,r11
2303	eor	r0,r9,r9,ror#5
2304	veor	d25,d25,d24
2305	add	r5,r5,r3
2306	and	r2,r2,r9
2307	vadd.i32	d7,d7,d25
2308	eor	r3,r0,r9,ror#19
2309	eor	r0,r5,r5,ror#11
2310	vadd.i32	q8,q8,q3
2311	eor	r2,r2,r11
2312	add	r4,r4,r3,ror#6
2313	eor	r3,r5,r6
2314	eor	r0,r0,r5,ror#20
2315	add	r4,r4,r2
2316	ldr	r2,[r14]
2317	and	r12,r12,r3
2318	add	r8,r8,r4
2319	vst1.32	{q8},[r1,:128]!
2320	add	r4,r4,r0,ror#2
2321	eor	r12,r12,r6
2322	teq	r2,#0				@ check for K256 terminator
2323	ldr	r2,[sp,#0]
2324	sub	r1,r1,#64
2325	bne	.L_00_48
2326
2327	ldr	r1,[sp,#68]
2328	ldr	r0,[sp,#72]
2329	sub	r14,r14,#256	@ rewind r14
2330	teq	r1,r0
2331	it	eq
2332	subeq	r1,r1,#64		@ avoid SEGV
2333	vld1.8	{q0},[r1]!		@ load next input block
2334	vld1.8	{q1},[r1]!
2335	vld1.8	{q2},[r1]!
2336	vld1.8	{q3},[r1]!
2337	it	ne
2338	strne	r1,[sp,#68]
2339	mov	r1,sp
2340	add	r11,r11,r2
2341	eor	r2,r9,r10
2342	eor	r0,r8,r8,ror#5
2343	add	r4,r4,r12
2344	vld1.32	{q8},[r14,:128]!
2345	and	r2,r2,r8
2346	eor	r12,r0,r8,ror#19
2347	eor	r0,r4,r4,ror#11
2348	eor	r2,r2,r10
2349	vrev32.8	q0,q0
2350	add	r11,r11,r12,ror#6
2351	eor	r12,r4,r5
2352	eor	r0,r0,r4,ror#20
2353	add	r11,r11,r2
2354	vadd.i32	q8,q8,q0
2355	ldr	r2,[sp,#4]
2356	and	r3,r3,r12
2357	add	r7,r7,r11
2358	add	r11,r11,r0,ror#2
2359	eor	r3,r3,r5
2360	add	r10,r10,r2
2361	eor	r2,r8,r9
2362	eor	r0,r7,r7,ror#5
2363	add	r11,r11,r3
2364	and	r2,r2,r7
2365	eor	r3,r0,r7,ror#19
2366	eor	r0,r11,r11,ror#11
2367	eor	r2,r2,r9
2368	add	r10,r10,r3,ror#6
2369	eor	r3,r11,r4
2370	eor	r0,r0,r11,ror#20
2371	add	r10,r10,r2
2372	ldr	r2,[sp,#8]
2373	and	r12,r12,r3
2374	add	r6,r6,r10
2375	add	r10,r10,r0,ror#2
2376	eor	r12,r12,r4
2377	add	r9,r9,r2
2378	eor	r2,r7,r8
2379	eor	r0,r6,r6,ror#5
2380	add	r10,r10,r12
2381	and	r2,r2,r6
2382	eor	r12,r0,r6,ror#19
2383	eor	r0,r10,r10,ror#11
2384	eor	r2,r2,r8
2385	add	r9,r9,r12,ror#6
2386	eor	r12,r10,r11
2387	eor	r0,r0,r10,ror#20
2388	add	r9,r9,r2
2389	ldr	r2,[sp,#12]
2390	and	r3,r3,r12
2391	add	r5,r5,r9
2392	add	r9,r9,r0,ror#2
2393	eor	r3,r3,r11
2394	add	r8,r8,r2
2395	eor	r2,r6,r7
2396	eor	r0,r5,r5,ror#5
2397	add	r9,r9,r3
2398	and	r2,r2,r5
2399	eor	r3,r0,r5,ror#19
2400	eor	r0,r9,r9,ror#11
2401	eor	r2,r2,r7
2402	add	r8,r8,r3,ror#6
2403	eor	r3,r9,r10
2404	eor	r0,r0,r9,ror#20
2405	add	r8,r8,r2
2406	ldr	r2,[sp,#16]
2407	and	r12,r12,r3
2408	add	r4,r4,r8
2409	add	r8,r8,r0,ror#2
2410	eor	r12,r12,r10
2411	vst1.32	{q8},[r1,:128]!
2412	add	r7,r7,r2
2413	eor	r2,r5,r6
2414	eor	r0,r4,r4,ror#5
2415	add	r8,r8,r12
2416	vld1.32	{q8},[r14,:128]!
2417	and	r2,r2,r4
2418	eor	r12,r0,r4,ror#19
2419	eor	r0,r8,r8,ror#11
2420	eor	r2,r2,r6
2421	vrev32.8	q1,q1
2422	add	r7,r7,r12,ror#6
2423	eor	r12,r8,r9
2424	eor	r0,r0,r8,ror#20
2425	add	r7,r7,r2
2426	vadd.i32	q8,q8,q1
2427	ldr	r2,[sp,#20]
2428	and	r3,r3,r12
2429	add	r11,r11,r7
2430	add	r7,r7,r0,ror#2
2431	eor	r3,r3,r9
2432	add	r6,r6,r2
2433	eor	r2,r4,r5
2434	eor	r0,r11,r11,ror#5
2435	add	r7,r7,r3
2436	and	r2,r2,r11
2437	eor	r3,r0,r11,ror#19
2438	eor	r0,r7,r7,ror#11
2439	eor	r2,r2,r5
2440	add	r6,r6,r3,ror#6
2441	eor	r3,r7,r8
2442	eor	r0,r0,r7,ror#20
2443	add	r6,r6,r2
2444	ldr	r2,[sp,#24]
2445	and	r12,r12,r3
2446	add	r10,r10,r6
2447	add	r6,r6,r0,ror#2
2448	eor	r12,r12,r8
2449	add	r5,r5,r2
2450	eor	r2,r11,r4
2451	eor	r0,r10,r10,ror#5
2452	add	r6,r6,r12
2453	and	r2,r2,r10
2454	eor	r12,r0,r10,ror#19
2455	eor	r0,r6,r6,ror#11
2456	eor	r2,r2,r4
2457	add	r5,r5,r12,ror#6
2458	eor	r12,r6,r7
2459	eor	r0,r0,r6,ror#20
2460	add	r5,r5,r2
2461	ldr	r2,[sp,#28]
2462	and	r3,r3,r12
2463	add	r9,r9,r5
2464	add	r5,r5,r0,ror#2
2465	eor	r3,r3,r7
2466	add	r4,r4,r2
2467	eor	r2,r10,r11
2468	eor	r0,r9,r9,ror#5
2469	add	r5,r5,r3
2470	and	r2,r2,r9
2471	eor	r3,r0,r9,ror#19
2472	eor	r0,r5,r5,ror#11
2473	eor	r2,r2,r11
2474	add	r4,r4,r3,ror#6
2475	eor	r3,r5,r6
2476	eor	r0,r0,r5,ror#20
2477	add	r4,r4,r2
2478	ldr	r2,[sp,#32]
2479	and	r12,r12,r3
2480	add	r8,r8,r4
2481	add	r4,r4,r0,ror#2
2482	eor	r12,r12,r6
2483	vst1.32	{q8},[r1,:128]!
2484	add	r11,r11,r2
2485	eor	r2,r9,r10
2486	eor	r0,r8,r8,ror#5
2487	add	r4,r4,r12
2488	vld1.32	{q8},[r14,:128]!
2489	and	r2,r2,r8
2490	eor	r12,r0,r8,ror#19
2491	eor	r0,r4,r4,ror#11
2492	eor	r2,r2,r10
2493	vrev32.8	q2,q2
2494	add	r11,r11,r12,ror#6
2495	eor	r12,r4,r5
2496	eor	r0,r0,r4,ror#20
2497	add	r11,r11,r2
2498	vadd.i32	q8,q8,q2
2499	ldr	r2,[sp,#36]
2500	and	r3,r3,r12
2501	add	r7,r7,r11
2502	add	r11,r11,r0,ror#2
2503	eor	r3,r3,r5
2504	add	r10,r10,r2
2505	eor	r2,r8,r9
2506	eor	r0,r7,r7,ror#5
2507	add	r11,r11,r3
2508	and	r2,r2,r7
2509	eor	r3,r0,r7,ror#19
2510	eor	r0,r11,r11,ror#11
2511	eor	r2,r2,r9
2512	add	r10,r10,r3,ror#6
2513	eor	r3,r11,r4
2514	eor	r0,r0,r11,ror#20
2515	add	r10,r10,r2
2516	ldr	r2,[sp,#40]
2517	and	r12,r12,r3
2518	add	r6,r6,r10
2519	add	r10,r10,r0,ror#2
2520	eor	r12,r12,r4
2521	add	r9,r9,r2
2522	eor	r2,r7,r8
2523	eor	r0,r6,r6,ror#5
2524	add	r10,r10,r12
2525	and	r2,r2,r6
2526	eor	r12,r0,r6,ror#19
2527	eor	r0,r10,r10,ror#11
2528	eor	r2,r2,r8
2529	add	r9,r9,r12,ror#6
2530	eor	r12,r10,r11
2531	eor	r0,r0,r10,ror#20
2532	add	r9,r9,r2
2533	ldr	r2,[sp,#44]
2534	and	r3,r3,r12
2535	add	r5,r5,r9
2536	add	r9,r9,r0,ror#2
2537	eor	r3,r3,r11
2538	add	r8,r8,r2
2539	eor	r2,r6,r7
2540	eor	r0,r5,r5,ror#5
2541	add	r9,r9,r3
2542	and	r2,r2,r5
2543	eor	r3,r0,r5,ror#19
2544	eor	r0,r9,r9,ror#11
2545	eor	r2,r2,r7
2546	add	r8,r8,r3,ror#6
2547	eor	r3,r9,r10
2548	eor	r0,r0,r9,ror#20
2549	add	r8,r8,r2
2550	ldr	r2,[sp,#48]
2551	and	r12,r12,r3
2552	add	r4,r4,r8
2553	add	r8,r8,r0,ror#2
2554	eor	r12,r12,r10
2555	vst1.32	{q8},[r1,:128]!
2556	add	r7,r7,r2
2557	eor	r2,r5,r6
2558	eor	r0,r4,r4,ror#5
2559	add	r8,r8,r12
2560	vld1.32	{q8},[r14,:128]!
2561	and	r2,r2,r4
2562	eor	r12,r0,r4,ror#19
2563	eor	r0,r8,r8,ror#11
2564	eor	r2,r2,r6
2565	vrev32.8	q3,q3
2566	add	r7,r7,r12,ror#6
2567	eor	r12,r8,r9
2568	eor	r0,r0,r8,ror#20
2569	add	r7,r7,r2
2570	vadd.i32	q8,q8,q3
2571	ldr	r2,[sp,#52]
2572	and	r3,r3,r12
2573	add	r11,r11,r7
2574	add	r7,r7,r0,ror#2
2575	eor	r3,r3,r9
2576	add	r6,r6,r2
2577	eor	r2,r4,r5
2578	eor	r0,r11,r11,ror#5
2579	add	r7,r7,r3
2580	and	r2,r2,r11
2581	eor	r3,r0,r11,ror#19
2582	eor	r0,r7,r7,ror#11
2583	eor	r2,r2,r5
2584	add	r6,r6,r3,ror#6
2585	eor	r3,r7,r8
2586	eor	r0,r0,r7,ror#20
2587	add	r6,r6,r2
2588	ldr	r2,[sp,#56]
2589	and	r12,r12,r3
2590	add	r10,r10,r6
2591	add	r6,r6,r0,ror#2
2592	eor	r12,r12,r8
2593	add	r5,r5,r2
2594	eor	r2,r11,r4
2595	eor	r0,r10,r10,ror#5
2596	add	r6,r6,r12
2597	and	r2,r2,r10
2598	eor	r12,r0,r10,ror#19
2599	eor	r0,r6,r6,ror#11
2600	eor	r2,r2,r4
2601	add	r5,r5,r12,ror#6
2602	eor	r12,r6,r7
2603	eor	r0,r0,r6,ror#20
2604	add	r5,r5,r2
2605	ldr	r2,[sp,#60]
2606	and	r3,r3,r12
2607	add	r9,r9,r5
2608	add	r5,r5,r0,ror#2
2609	eor	r3,r3,r7
2610	add	r4,r4,r2
2611	eor	r2,r10,r11
2612	eor	r0,r9,r9,ror#5
2613	add	r5,r5,r3
2614	and	r2,r2,r9
2615	eor	r3,r0,r9,ror#19
2616	eor	r0,r5,r5,ror#11
2617	eor	r2,r2,r11
2618	add	r4,r4,r3,ror#6
2619	eor	r3,r5,r6
2620	eor	r0,r0,r5,ror#20
2621	add	r4,r4,r2
2622	ldr	r2,[sp,#64]
2623	and	r12,r12,r3
2624	add	r8,r8,r4
2625	add	r4,r4,r0,ror#2
2626	eor	r12,r12,r6
2627	vst1.32	{q8},[r1,:128]!
2628	ldr	r0,[r2,#0]
2629	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2630	ldr	r12,[r2,#4]
2631	ldr	r3,[r2,#8]
2632	ldr	r1,[r2,#12]
2633	add	r4,r4,r0			@ accumulate
2634	ldr	r0,[r2,#16]
2635	add	r5,r5,r12
2636	ldr	r12,[r2,#20]
2637	add	r6,r6,r3
2638	ldr	r3,[r2,#24]
2639	add	r7,r7,r1
2640	ldr	r1,[r2,#28]
2641	add	r8,r8,r0
2642	str	r4,[r2],#4
2643	add	r9,r9,r12
2644	str	r5,[r2],#4
2645	add	r10,r10,r3
2646	str	r6,[r2],#4
2647	add	r11,r11,r1
2648	str	r7,[r2],#4
2649	stmia	r2,{r8,r9,r10,r11}
2650
2651	ittte	ne
2652	movne	r1,sp
2653	ldrne	r2,[sp,#0]
2654	eorne	r12,r12,r12
2655	ldreq	sp,[sp,#76]			@ restore original sp
2656	itt	ne
2657	eorne	r3,r5,r6
2658	bne	.L_00_48
2659
2660	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2661.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2662#endif
2663#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2664
2665# if defined(__thumb2__)
2666#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2667# else
2668#  define INST(a,b,c,d)	.byte	a,b,c,d
2669# endif
2670
2671.type	sha256_block_data_order_armv8,%function
2672.align	5
2673sha256_block_data_order_armv8:
2674.LARMv8:
2675	vld1.32	{q0,q1},[r0]
2676	sub	r3,r3,#256+32
2677	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2678	b	.Loop_v8
2679
2680.align	4
2681.Loop_v8:
2682	vld1.8	{q8,q9},[r1]!
2683	vld1.8	{q10,q11},[r1]!
2684	vld1.32	{q12},[r3]!
2685	vrev32.8	q8,q8
2686	vrev32.8	q9,q9
2687	vrev32.8	q10,q10
2688	vrev32.8	q11,q11
2689	vmov	q14,q0	@ offload
2690	vmov	q15,q1
2691	teq	r1,r2
2692	vld1.32	{q13},[r3]!
2693	vadd.i32	q12,q12,q8
2694	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2695	vmov	q2,q0
2696	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2697	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2698	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2699	vld1.32	{q12},[r3]!
2700	vadd.i32	q13,q13,q9
2701	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2702	vmov	q2,q0
2703	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2704	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2705	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2706	vld1.32	{q13},[r3]!
2707	vadd.i32	q12,q12,q10
2708	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2709	vmov	q2,q0
2710	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2711	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2712	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2713	vld1.32	{q12},[r3]!
2714	vadd.i32	q13,q13,q11
2715	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2716	vmov	q2,q0
2717	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2718	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2719	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2720	vld1.32	{q13},[r3]!
2721	vadd.i32	q12,q12,q8
2722	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2723	vmov	q2,q0
2724	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2725	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2726	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2727	vld1.32	{q12},[r3]!
2728	vadd.i32	q13,q13,q9
2729	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2730	vmov	q2,q0
2731	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2732	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2733	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2734	vld1.32	{q13},[r3]!
2735	vadd.i32	q12,q12,q10
2736	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2737	vmov	q2,q0
2738	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2739	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2740	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2741	vld1.32	{q12},[r3]!
2742	vadd.i32	q13,q13,q11
2743	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2744	vmov	q2,q0
2745	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2746	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2747	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2748	vld1.32	{q13},[r3]!
2749	vadd.i32	q12,q12,q8
2750	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2751	vmov	q2,q0
2752	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2753	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2754	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2755	vld1.32	{q12},[r3]!
2756	vadd.i32	q13,q13,q9
2757	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2758	vmov	q2,q0
2759	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2760	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2761	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2762	vld1.32	{q13},[r3]!
2763	vadd.i32	q12,q12,q10
2764	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2765	vmov	q2,q0
2766	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2767	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2768	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2769	vld1.32	{q12},[r3]!
2770	vadd.i32	q13,q13,q11
2771	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2772	vmov	q2,q0
2773	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2774	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2775	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2776	vld1.32	{q13},[r3]!
2777	vadd.i32	q12,q12,q8
2778	vmov	q2,q0
2779	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2780	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2781
2782	vld1.32	{q12},[r3]!
2783	vadd.i32	q13,q13,q9
2784	vmov	q2,q0
2785	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2786	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2787
2788	vld1.32	{q13},[r3]
2789	vadd.i32	q12,q12,q10
2790	sub	r3,r3,#256-16	@ rewind
2791	vmov	q2,q0
2792	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2793	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2794
2795	vadd.i32	q13,q13,q11
2796	vmov	q2,q0
2797	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2798	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2799
2800	vadd.i32	q0,q0,q14
2801	vadd.i32	q1,q1,q15
2802	it	ne
2803	bne	.Loop_v8
2804
2805	vst1.32	{q0,q1},[r0]
2806
2807	bx	lr		@ bx lr
2808.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2809#endif
2810.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2811.align	2
2812.align	2
2813#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2814.comm	OPENSSL_armcap_P,4,4
2815#endif
2816