1@ SPDX-License-Identifier: GPL-2.0
2
3@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
4@ has relicensed it under the GPLv2. Therefore this program is free software;
5@ you can redistribute it and/or modify it under the terms of the GNU General
6@ Public License version 2 as published by the Free Software Foundation.
7@
8@ The original headers, including the original license headers, are
9@ included below for completeness.
10
11@ ====================================================================
12@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
13@ project. The module is, however, dual licensed under OpenSSL and
14@ CRYPTOGAMS licenses depending on where you obtain it. For further
15@ details see https://www.openssl.org/~appro/cryptogams/.
16@ ====================================================================
17
18@ SHA256 block procedure for ARMv4. May 2007.
19
20@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22@ byte [on single-issue Xscale PXA250 core].
23
24@ July 2010.
25@
26@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27@ Cortex A8 core and ~20 cycles per processed byte.
28
29@ February 2011.
30@
31@ Profiler-assisted and platform-specific optimization resulted in 16%
32@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
33
34@ September 2013.
35@
36@ Add NEON implementation. On Cortex A8 it was measured to process one
37@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39@ code (meaning that latter performs sub-optimally, nothing was done
40@ about it).
41
42@ May 2014.
43@
44@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
45
46#ifndef __KERNEL__
47# include "arm_arch.h"
48#else
49# define __ARM_ARCH__ __LINUX_ARM_ARCH__
50# define __ARM_MAX_ARCH__ 7
51#endif
52
53.text
54#if __ARM_ARCH__<7
55.code	32
56#else
57.syntax unified
58# ifdef __thumb2__
59.thumb
60# else
61.code   32
62# endif
63#endif
64
65.type	K256,%object
66.align	5
67K256:
68.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
69.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
70.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
71.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
72.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
73.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
74.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
75.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
76.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
77.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
78.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
79.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
80.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
81.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
82.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
83.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
84.size	K256,.-K256
85.word	0				@ terminator
86#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
87.LOPENSSL_armcap:
88.word	OPENSSL_armcap_P-sha256_block_data_order
89#endif
90.align	5
91
92.global	sha256_block_data_order
93.type	sha256_block_data_order,%function
94sha256_block_data_order:
95.Lsha256_block_data_order:
96#if __ARM_ARCH__<7
97	sub	r3,pc,#8		@ sha256_block_data_order
98#else
99	adr	r3,.Lsha256_block_data_order
100#endif
101#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
102	ldr	r12,.LOPENSSL_armcap
103	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
104	tst	r12,#ARMV8_SHA256
105	bne	.LARMv8
106	tst	r12,#ARMV7_NEON
107	bne	.LNEON
108#endif
109	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
110	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
111	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
112	sub	r14,r3,#256+32	@ K256
113	sub	sp,sp,#16*4		@ alloca(X[16])
114.Loop:
115# if __ARM_ARCH__>=7
116	ldr	r2,[r1],#4
117# else
118	ldrb	r2,[r1,#3]
119# endif
120	eor	r3,r5,r6		@ magic
121	eor	r12,r12,r12
122#if __ARM_ARCH__>=7
123	@ ldr	r2,[r1],#4			@ 0
124# if 0==15
125	str	r1,[sp,#17*4]			@ make room for r1
126# endif
127	eor	r0,r8,r8,ror#5
128	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
129	eor	r0,r0,r8,ror#19	@ Sigma1(e)
130# ifndef __ARMEB__
131	rev	r2,r2
132# endif
133#else
134	@ ldrb	r2,[r1,#3]			@ 0
135	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
136	ldrb	r12,[r1,#2]
137	ldrb	r0,[r1,#1]
138	orr	r2,r2,r12,lsl#8
139	ldrb	r12,[r1],#4
140	orr	r2,r2,r0,lsl#16
141# if 0==15
142	str	r1,[sp,#17*4]			@ make room for r1
143# endif
144	eor	r0,r8,r8,ror#5
145	orr	r2,r2,r12,lsl#24
146	eor	r0,r0,r8,ror#19	@ Sigma1(e)
147#endif
148	ldr	r12,[r14],#4			@ *K256++
149	add	r11,r11,r2			@ h+=X[i]
150	str	r2,[sp,#0*4]
151	eor	r2,r9,r10
152	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
153	and	r2,r2,r8
154	add	r11,r11,r12			@ h+=K256[i]
155	eor	r2,r2,r10			@ Ch(e,f,g)
156	eor	r0,r4,r4,ror#11
157	add	r11,r11,r2			@ h+=Ch(e,f,g)
158#if 0==31
159	and	r12,r12,#0xff
160	cmp	r12,#0xf2			@ done?
161#endif
162#if 0<15
163# if __ARM_ARCH__>=7
164	ldr	r2,[r1],#4			@ prefetch
165# else
166	ldrb	r2,[r1,#3]
167# endif
168	eor	r12,r4,r5			@ a^b, b^c in next round
169#else
170	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
171	eor	r12,r4,r5			@ a^b, b^c in next round
172	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
173#endif
174	eor	r0,r0,r4,ror#20	@ Sigma0(a)
175	and	r3,r3,r12			@ (b^c)&=(a^b)
176	add	r7,r7,r11			@ d+=h
177	eor	r3,r3,r5			@ Maj(a,b,c)
178	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
179	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
180#if __ARM_ARCH__>=7
181	@ ldr	r2,[r1],#4			@ 1
182# if 1==15
183	str	r1,[sp,#17*4]			@ make room for r1
184# endif
185	eor	r0,r7,r7,ror#5
186	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
187	eor	r0,r0,r7,ror#19	@ Sigma1(e)
188# ifndef __ARMEB__
189	rev	r2,r2
190# endif
191#else
192	@ ldrb	r2,[r1,#3]			@ 1
193	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
194	ldrb	r3,[r1,#2]
195	ldrb	r0,[r1,#1]
196	orr	r2,r2,r3,lsl#8
197	ldrb	r3,[r1],#4
198	orr	r2,r2,r0,lsl#16
199# if 1==15
200	str	r1,[sp,#17*4]			@ make room for r1
201# endif
202	eor	r0,r7,r7,ror#5
203	orr	r2,r2,r3,lsl#24
204	eor	r0,r0,r7,ror#19	@ Sigma1(e)
205#endif
206	ldr	r3,[r14],#4			@ *K256++
207	add	r10,r10,r2			@ h+=X[i]
208	str	r2,[sp,#1*4]
209	eor	r2,r8,r9
210	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
211	and	r2,r2,r7
212	add	r10,r10,r3			@ h+=K256[i]
213	eor	r2,r2,r9			@ Ch(e,f,g)
214	eor	r0,r11,r11,ror#11
215	add	r10,r10,r2			@ h+=Ch(e,f,g)
216#if 1==31
217	and	r3,r3,#0xff
218	cmp	r3,#0xf2			@ done?
219#endif
220#if 1<15
221# if __ARM_ARCH__>=7
222	ldr	r2,[r1],#4			@ prefetch
223# else
224	ldrb	r2,[r1,#3]
225# endif
226	eor	r3,r11,r4			@ a^b, b^c in next round
227#else
228	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
229	eor	r3,r11,r4			@ a^b, b^c in next round
230	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
231#endif
232	eor	r0,r0,r11,ror#20	@ Sigma0(a)
233	and	r12,r12,r3			@ (b^c)&=(a^b)
234	add	r6,r6,r10			@ d+=h
235	eor	r12,r12,r4			@ Maj(a,b,c)
236	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
237	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
238#if __ARM_ARCH__>=7
239	@ ldr	r2,[r1],#4			@ 2
240# if 2==15
241	str	r1,[sp,#17*4]			@ make room for r1
242# endif
243	eor	r0,r6,r6,ror#5
244	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
245	eor	r0,r0,r6,ror#19	@ Sigma1(e)
246# ifndef __ARMEB__
247	rev	r2,r2
248# endif
249#else
250	@ ldrb	r2,[r1,#3]			@ 2
251	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
252	ldrb	r12,[r1,#2]
253	ldrb	r0,[r1,#1]
254	orr	r2,r2,r12,lsl#8
255	ldrb	r12,[r1],#4
256	orr	r2,r2,r0,lsl#16
257# if 2==15
258	str	r1,[sp,#17*4]			@ make room for r1
259# endif
260	eor	r0,r6,r6,ror#5
261	orr	r2,r2,r12,lsl#24
262	eor	r0,r0,r6,ror#19	@ Sigma1(e)
263#endif
264	ldr	r12,[r14],#4			@ *K256++
265	add	r9,r9,r2			@ h+=X[i]
266	str	r2,[sp,#2*4]
267	eor	r2,r7,r8
268	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
269	and	r2,r2,r6
270	add	r9,r9,r12			@ h+=K256[i]
271	eor	r2,r2,r8			@ Ch(e,f,g)
272	eor	r0,r10,r10,ror#11
273	add	r9,r9,r2			@ h+=Ch(e,f,g)
274#if 2==31
275	and	r12,r12,#0xff
276	cmp	r12,#0xf2			@ done?
277#endif
278#if 2<15
279# if __ARM_ARCH__>=7
280	ldr	r2,[r1],#4			@ prefetch
281# else
282	ldrb	r2,[r1,#3]
283# endif
284	eor	r12,r10,r11			@ a^b, b^c in next round
285#else
286	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
287	eor	r12,r10,r11			@ a^b, b^c in next round
288	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
289#endif
290	eor	r0,r0,r10,ror#20	@ Sigma0(a)
291	and	r3,r3,r12			@ (b^c)&=(a^b)
292	add	r5,r5,r9			@ d+=h
293	eor	r3,r3,r11			@ Maj(a,b,c)
294	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
295	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
296#if __ARM_ARCH__>=7
297	@ ldr	r2,[r1],#4			@ 3
298# if 3==15
299	str	r1,[sp,#17*4]			@ make room for r1
300# endif
301	eor	r0,r5,r5,ror#5
302	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
303	eor	r0,r0,r5,ror#19	@ Sigma1(e)
304# ifndef __ARMEB__
305	rev	r2,r2
306# endif
307#else
308	@ ldrb	r2,[r1,#3]			@ 3
309	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
310	ldrb	r3,[r1,#2]
311	ldrb	r0,[r1,#1]
312	orr	r2,r2,r3,lsl#8
313	ldrb	r3,[r1],#4
314	orr	r2,r2,r0,lsl#16
315# if 3==15
316	str	r1,[sp,#17*4]			@ make room for r1
317# endif
318	eor	r0,r5,r5,ror#5
319	orr	r2,r2,r3,lsl#24
320	eor	r0,r0,r5,ror#19	@ Sigma1(e)
321#endif
322	ldr	r3,[r14],#4			@ *K256++
323	add	r8,r8,r2			@ h+=X[i]
324	str	r2,[sp,#3*4]
325	eor	r2,r6,r7
326	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
327	and	r2,r2,r5
328	add	r8,r8,r3			@ h+=K256[i]
329	eor	r2,r2,r7			@ Ch(e,f,g)
330	eor	r0,r9,r9,ror#11
331	add	r8,r8,r2			@ h+=Ch(e,f,g)
332#if 3==31
333	and	r3,r3,#0xff
334	cmp	r3,#0xf2			@ done?
335#endif
336#if 3<15
337# if __ARM_ARCH__>=7
338	ldr	r2,[r1],#4			@ prefetch
339# else
340	ldrb	r2,[r1,#3]
341# endif
342	eor	r3,r9,r10			@ a^b, b^c in next round
343#else
344	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
345	eor	r3,r9,r10			@ a^b, b^c in next round
346	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
347#endif
348	eor	r0,r0,r9,ror#20	@ Sigma0(a)
349	and	r12,r12,r3			@ (b^c)&=(a^b)
350	add	r4,r4,r8			@ d+=h
351	eor	r12,r12,r10			@ Maj(a,b,c)
352	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
353	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
354#if __ARM_ARCH__>=7
355	@ ldr	r2,[r1],#4			@ 4
356# if 4==15
357	str	r1,[sp,#17*4]			@ make room for r1
358# endif
359	eor	r0,r4,r4,ror#5
360	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
361	eor	r0,r0,r4,ror#19	@ Sigma1(e)
362# ifndef __ARMEB__
363	rev	r2,r2
364# endif
365#else
366	@ ldrb	r2,[r1,#3]			@ 4
367	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
368	ldrb	r12,[r1,#2]
369	ldrb	r0,[r1,#1]
370	orr	r2,r2,r12,lsl#8
371	ldrb	r12,[r1],#4
372	orr	r2,r2,r0,lsl#16
373# if 4==15
374	str	r1,[sp,#17*4]			@ make room for r1
375# endif
376	eor	r0,r4,r4,ror#5
377	orr	r2,r2,r12,lsl#24
378	eor	r0,r0,r4,ror#19	@ Sigma1(e)
379#endif
380	ldr	r12,[r14],#4			@ *K256++
381	add	r7,r7,r2			@ h+=X[i]
382	str	r2,[sp,#4*4]
383	eor	r2,r5,r6
384	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
385	and	r2,r2,r4
386	add	r7,r7,r12			@ h+=K256[i]
387	eor	r2,r2,r6			@ Ch(e,f,g)
388	eor	r0,r8,r8,ror#11
389	add	r7,r7,r2			@ h+=Ch(e,f,g)
390#if 4==31
391	and	r12,r12,#0xff
392	cmp	r12,#0xf2			@ done?
393#endif
394#if 4<15
395# if __ARM_ARCH__>=7
396	ldr	r2,[r1],#4			@ prefetch
397# else
398	ldrb	r2,[r1,#3]
399# endif
400	eor	r12,r8,r9			@ a^b, b^c in next round
401#else
402	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
403	eor	r12,r8,r9			@ a^b, b^c in next round
404	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
405#endif
406	eor	r0,r0,r8,ror#20	@ Sigma0(a)
407	and	r3,r3,r12			@ (b^c)&=(a^b)
408	add	r11,r11,r7			@ d+=h
409	eor	r3,r3,r9			@ Maj(a,b,c)
410	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
411	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
412#if __ARM_ARCH__>=7
413	@ ldr	r2,[r1],#4			@ 5
414# if 5==15
415	str	r1,[sp,#17*4]			@ make room for r1
416# endif
417	eor	r0,r11,r11,ror#5
418	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
419	eor	r0,r0,r11,ror#19	@ Sigma1(e)
420# ifndef __ARMEB__
421	rev	r2,r2
422# endif
423#else
424	@ ldrb	r2,[r1,#3]			@ 5
425	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
426	ldrb	r3,[r1,#2]
427	ldrb	r0,[r1,#1]
428	orr	r2,r2,r3,lsl#8
429	ldrb	r3,[r1],#4
430	orr	r2,r2,r0,lsl#16
431# if 5==15
432	str	r1,[sp,#17*4]			@ make room for r1
433# endif
434	eor	r0,r11,r11,ror#5
435	orr	r2,r2,r3,lsl#24
436	eor	r0,r0,r11,ror#19	@ Sigma1(e)
437#endif
438	ldr	r3,[r14],#4			@ *K256++
439	add	r6,r6,r2			@ h+=X[i]
440	str	r2,[sp,#5*4]
441	eor	r2,r4,r5
442	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
443	and	r2,r2,r11
444	add	r6,r6,r3			@ h+=K256[i]
445	eor	r2,r2,r5			@ Ch(e,f,g)
446	eor	r0,r7,r7,ror#11
447	add	r6,r6,r2			@ h+=Ch(e,f,g)
448#if 5==31
449	and	r3,r3,#0xff
450	cmp	r3,#0xf2			@ done?
451#endif
452#if 5<15
453# if __ARM_ARCH__>=7
454	ldr	r2,[r1],#4			@ prefetch
455# else
456	ldrb	r2,[r1,#3]
457# endif
458	eor	r3,r7,r8			@ a^b, b^c in next round
459#else
460	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
461	eor	r3,r7,r8			@ a^b, b^c in next round
462	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
463#endif
464	eor	r0,r0,r7,ror#20	@ Sigma0(a)
465	and	r12,r12,r3			@ (b^c)&=(a^b)
466	add	r10,r10,r6			@ d+=h
467	eor	r12,r12,r8			@ Maj(a,b,c)
468	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
469	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
470#if __ARM_ARCH__>=7
471	@ ldr	r2,[r1],#4			@ 6
472# if 6==15
473	str	r1,[sp,#17*4]			@ make room for r1
474# endif
475	eor	r0,r10,r10,ror#5
476	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
477	eor	r0,r0,r10,ror#19	@ Sigma1(e)
478# ifndef __ARMEB__
479	rev	r2,r2
480# endif
481#else
482	@ ldrb	r2,[r1,#3]			@ 6
483	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
484	ldrb	r12,[r1,#2]
485	ldrb	r0,[r1,#1]
486	orr	r2,r2,r12,lsl#8
487	ldrb	r12,[r1],#4
488	orr	r2,r2,r0,lsl#16
489# if 6==15
490	str	r1,[sp,#17*4]			@ make room for r1
491# endif
492	eor	r0,r10,r10,ror#5
493	orr	r2,r2,r12,lsl#24
494	eor	r0,r0,r10,ror#19	@ Sigma1(e)
495#endif
496	ldr	r12,[r14],#4			@ *K256++
497	add	r5,r5,r2			@ h+=X[i]
498	str	r2,[sp,#6*4]
499	eor	r2,r11,r4
500	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
501	and	r2,r2,r10
502	add	r5,r5,r12			@ h+=K256[i]
503	eor	r2,r2,r4			@ Ch(e,f,g)
504	eor	r0,r6,r6,ror#11
505	add	r5,r5,r2			@ h+=Ch(e,f,g)
506#if 6==31
507	and	r12,r12,#0xff
508	cmp	r12,#0xf2			@ done?
509#endif
510#if 6<15
511# if __ARM_ARCH__>=7
512	ldr	r2,[r1],#4			@ prefetch
513# else
514	ldrb	r2,[r1,#3]
515# endif
516	eor	r12,r6,r7			@ a^b, b^c in next round
517#else
518	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
519	eor	r12,r6,r7			@ a^b, b^c in next round
520	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
521#endif
522	eor	r0,r0,r6,ror#20	@ Sigma0(a)
523	and	r3,r3,r12			@ (b^c)&=(a^b)
524	add	r9,r9,r5			@ d+=h
525	eor	r3,r3,r7			@ Maj(a,b,c)
526	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
527	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
528#if __ARM_ARCH__>=7
529	@ ldr	r2,[r1],#4			@ 7
530# if 7==15
531	str	r1,[sp,#17*4]			@ make room for r1
532# endif
533	eor	r0,r9,r9,ror#5
534	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
535	eor	r0,r0,r9,ror#19	@ Sigma1(e)
536# ifndef __ARMEB__
537	rev	r2,r2
538# endif
539#else
540	@ ldrb	r2,[r1,#3]			@ 7
541	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
542	ldrb	r3,[r1,#2]
543	ldrb	r0,[r1,#1]
544	orr	r2,r2,r3,lsl#8
545	ldrb	r3,[r1],#4
546	orr	r2,r2,r0,lsl#16
547# if 7==15
548	str	r1,[sp,#17*4]			@ make room for r1
549# endif
550	eor	r0,r9,r9,ror#5
551	orr	r2,r2,r3,lsl#24
552	eor	r0,r0,r9,ror#19	@ Sigma1(e)
553#endif
554	ldr	r3,[r14],#4			@ *K256++
555	add	r4,r4,r2			@ h+=X[i]
556	str	r2,[sp,#7*4]
557	eor	r2,r10,r11
558	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
559	and	r2,r2,r9
560	add	r4,r4,r3			@ h+=K256[i]
561	eor	r2,r2,r11			@ Ch(e,f,g)
562	eor	r0,r5,r5,ror#11
563	add	r4,r4,r2			@ h+=Ch(e,f,g)
564#if 7==31
565	and	r3,r3,#0xff
566	cmp	r3,#0xf2			@ done?
567#endif
568#if 7<15
569# if __ARM_ARCH__>=7
570	ldr	r2,[r1],#4			@ prefetch
571# else
572	ldrb	r2,[r1,#3]
573# endif
574	eor	r3,r5,r6			@ a^b, b^c in next round
575#else
576	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
577	eor	r3,r5,r6			@ a^b, b^c in next round
578	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
579#endif
580	eor	r0,r0,r5,ror#20	@ Sigma0(a)
581	and	r12,r12,r3			@ (b^c)&=(a^b)
582	add	r8,r8,r4			@ d+=h
583	eor	r12,r12,r6			@ Maj(a,b,c)
584	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
585	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
586#if __ARM_ARCH__>=7
587	@ ldr	r2,[r1],#4			@ 8
588# if 8==15
589	str	r1,[sp,#17*4]			@ make room for r1
590# endif
591	eor	r0,r8,r8,ror#5
592	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
593	eor	r0,r0,r8,ror#19	@ Sigma1(e)
594# ifndef __ARMEB__
595	rev	r2,r2
596# endif
597#else
598	@ ldrb	r2,[r1,#3]			@ 8
599	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
600	ldrb	r12,[r1,#2]
601	ldrb	r0,[r1,#1]
602	orr	r2,r2,r12,lsl#8
603	ldrb	r12,[r1],#4
604	orr	r2,r2,r0,lsl#16
605# if 8==15
606	str	r1,[sp,#17*4]			@ make room for r1
607# endif
608	eor	r0,r8,r8,ror#5
609	orr	r2,r2,r12,lsl#24
610	eor	r0,r0,r8,ror#19	@ Sigma1(e)
611#endif
612	ldr	r12,[r14],#4			@ *K256++
613	add	r11,r11,r2			@ h+=X[i]
614	str	r2,[sp,#8*4]
615	eor	r2,r9,r10
616	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
617	and	r2,r2,r8
618	add	r11,r11,r12			@ h+=K256[i]
619	eor	r2,r2,r10			@ Ch(e,f,g)
620	eor	r0,r4,r4,ror#11
621	add	r11,r11,r2			@ h+=Ch(e,f,g)
622#if 8==31
623	and	r12,r12,#0xff
624	cmp	r12,#0xf2			@ done?
625#endif
626#if 8<15
627# if __ARM_ARCH__>=7
628	ldr	r2,[r1],#4			@ prefetch
629# else
630	ldrb	r2,[r1,#3]
631# endif
632	eor	r12,r4,r5			@ a^b, b^c in next round
633#else
634	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
635	eor	r12,r4,r5			@ a^b, b^c in next round
636	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
637#endif
638	eor	r0,r0,r4,ror#20	@ Sigma0(a)
639	and	r3,r3,r12			@ (b^c)&=(a^b)
640	add	r7,r7,r11			@ d+=h
641	eor	r3,r3,r5			@ Maj(a,b,c)
642	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
643	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
644#if __ARM_ARCH__>=7
645	@ ldr	r2,[r1],#4			@ 9
646# if 9==15
647	str	r1,[sp,#17*4]			@ make room for r1
648# endif
649	eor	r0,r7,r7,ror#5
650	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
651	eor	r0,r0,r7,ror#19	@ Sigma1(e)
652# ifndef __ARMEB__
653	rev	r2,r2
654# endif
655#else
656	@ ldrb	r2,[r1,#3]			@ 9
657	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
658	ldrb	r3,[r1,#2]
659	ldrb	r0,[r1,#1]
660	orr	r2,r2,r3,lsl#8
661	ldrb	r3,[r1],#4
662	orr	r2,r2,r0,lsl#16
663# if 9==15
664	str	r1,[sp,#17*4]			@ make room for r1
665# endif
666	eor	r0,r7,r7,ror#5
667	orr	r2,r2,r3,lsl#24
668	eor	r0,r0,r7,ror#19	@ Sigma1(e)
669#endif
670	ldr	r3,[r14],#4			@ *K256++
671	add	r10,r10,r2			@ h+=X[i]
672	str	r2,[sp,#9*4]
673	eor	r2,r8,r9
674	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
675	and	r2,r2,r7
676	add	r10,r10,r3			@ h+=K256[i]
677	eor	r2,r2,r9			@ Ch(e,f,g)
678	eor	r0,r11,r11,ror#11
679	add	r10,r10,r2			@ h+=Ch(e,f,g)
680#if 9==31
681	and	r3,r3,#0xff
682	cmp	r3,#0xf2			@ done?
683#endif
684#if 9<15
685# if __ARM_ARCH__>=7
686	ldr	r2,[r1],#4			@ prefetch
687# else
688	ldrb	r2,[r1,#3]
689# endif
690	eor	r3,r11,r4			@ a^b, b^c in next round
691#else
692	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
693	eor	r3,r11,r4			@ a^b, b^c in next round
694	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
695#endif
696	eor	r0,r0,r11,ror#20	@ Sigma0(a)
697	and	r12,r12,r3			@ (b^c)&=(a^b)
698	add	r6,r6,r10			@ d+=h
699	eor	r12,r12,r4			@ Maj(a,b,c)
700	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
701	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
702#if __ARM_ARCH__>=7
703	@ ldr	r2,[r1],#4			@ 10
704# if 10==15
705	str	r1,[sp,#17*4]			@ make room for r1
706# endif
707	eor	r0,r6,r6,ror#5
708	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
709	eor	r0,r0,r6,ror#19	@ Sigma1(e)
710# ifndef __ARMEB__
711	rev	r2,r2
712# endif
713#else
714	@ ldrb	r2,[r1,#3]			@ 10
715	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
716	ldrb	r12,[r1,#2]
717	ldrb	r0,[r1,#1]
718	orr	r2,r2,r12,lsl#8
719	ldrb	r12,[r1],#4
720	orr	r2,r2,r0,lsl#16
721# if 10==15
722	str	r1,[sp,#17*4]			@ make room for r1
723# endif
724	eor	r0,r6,r6,ror#5
725	orr	r2,r2,r12,lsl#24
726	eor	r0,r0,r6,ror#19	@ Sigma1(e)
727#endif
728	ldr	r12,[r14],#4			@ *K256++
729	add	r9,r9,r2			@ h+=X[i]
730	str	r2,[sp,#10*4]
731	eor	r2,r7,r8
732	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
733	and	r2,r2,r6
734	add	r9,r9,r12			@ h+=K256[i]
735	eor	r2,r2,r8			@ Ch(e,f,g)
736	eor	r0,r10,r10,ror#11
737	add	r9,r9,r2			@ h+=Ch(e,f,g)
738#if 10==31
739	and	r12,r12,#0xff
740	cmp	r12,#0xf2			@ done?
741#endif
742#if 10<15
743# if __ARM_ARCH__>=7
744	ldr	r2,[r1],#4			@ prefetch
745# else
746	ldrb	r2,[r1,#3]
747# endif
748	eor	r12,r10,r11			@ a^b, b^c in next round
749#else
750	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
751	eor	r12,r10,r11			@ a^b, b^c in next round
752	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
753#endif
754	eor	r0,r0,r10,ror#20	@ Sigma0(a)
755	and	r3,r3,r12			@ (b^c)&=(a^b)
756	add	r5,r5,r9			@ d+=h
757	eor	r3,r3,r11			@ Maj(a,b,c)
758	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
759	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
760#if __ARM_ARCH__>=7
761	@ ldr	r2,[r1],#4			@ 11
762# if 11==15
763	str	r1,[sp,#17*4]			@ make room for r1
764# endif
765	eor	r0,r5,r5,ror#5
766	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
767	eor	r0,r0,r5,ror#19	@ Sigma1(e)
768# ifndef __ARMEB__
769	rev	r2,r2
770# endif
771#else
772	@ ldrb	r2,[r1,#3]			@ 11
773	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
774	ldrb	r3,[r1,#2]
775	ldrb	r0,[r1,#1]
776	orr	r2,r2,r3,lsl#8
777	ldrb	r3,[r1],#4
778	orr	r2,r2,r0,lsl#16
779# if 11==15
780	str	r1,[sp,#17*4]			@ make room for r1
781# endif
782	eor	r0,r5,r5,ror#5
783	orr	r2,r2,r3,lsl#24
784	eor	r0,r0,r5,ror#19	@ Sigma1(e)
785#endif
786	ldr	r3,[r14],#4			@ *K256++
787	add	r8,r8,r2			@ h+=X[i]
788	str	r2,[sp,#11*4]
789	eor	r2,r6,r7
790	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
791	and	r2,r2,r5
792	add	r8,r8,r3			@ h+=K256[i]
793	eor	r2,r2,r7			@ Ch(e,f,g)
794	eor	r0,r9,r9,ror#11
795	add	r8,r8,r2			@ h+=Ch(e,f,g)
796#if 11==31
797	and	r3,r3,#0xff
798	cmp	r3,#0xf2			@ done?
799#endif
800#if 11<15
801# if __ARM_ARCH__>=7
802	ldr	r2,[r1],#4			@ prefetch
803# else
804	ldrb	r2,[r1,#3]
805# endif
806	eor	r3,r9,r10			@ a^b, b^c in next round
807#else
808	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
809	eor	r3,r9,r10			@ a^b, b^c in next round
810	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
811#endif
812	eor	r0,r0,r9,ror#20	@ Sigma0(a)
813	and	r12,r12,r3			@ (b^c)&=(a^b)
814	add	r4,r4,r8			@ d+=h
815	eor	r12,r12,r10			@ Maj(a,b,c)
816	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
817	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
818#if __ARM_ARCH__>=7
819	@ ldr	r2,[r1],#4			@ 12
820# if 12==15
821	str	r1,[sp,#17*4]			@ make room for r1
822# endif
823	eor	r0,r4,r4,ror#5
824	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
825	eor	r0,r0,r4,ror#19	@ Sigma1(e)
826# ifndef __ARMEB__
827	rev	r2,r2
828# endif
829#else
830	@ ldrb	r2,[r1,#3]			@ 12
831	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
832	ldrb	r12,[r1,#2]
833	ldrb	r0,[r1,#1]
834	orr	r2,r2,r12,lsl#8
835	ldrb	r12,[r1],#4
836	orr	r2,r2,r0,lsl#16
837# if 12==15
838	str	r1,[sp,#17*4]			@ make room for r1
839# endif
840	eor	r0,r4,r4,ror#5
841	orr	r2,r2,r12,lsl#24
842	eor	r0,r0,r4,ror#19	@ Sigma1(e)
843#endif
844	ldr	r12,[r14],#4			@ *K256++
845	add	r7,r7,r2			@ h+=X[i]
846	str	r2,[sp,#12*4]
847	eor	r2,r5,r6
848	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
849	and	r2,r2,r4
850	add	r7,r7,r12			@ h+=K256[i]
851	eor	r2,r2,r6			@ Ch(e,f,g)
852	eor	r0,r8,r8,ror#11
853	add	r7,r7,r2			@ h+=Ch(e,f,g)
854#if 12==31
855	and	r12,r12,#0xff
856	cmp	r12,#0xf2			@ done?
857#endif
858#if 12<15
859# if __ARM_ARCH__>=7
860	ldr	r2,[r1],#4			@ prefetch
861# else
862	ldrb	r2,[r1,#3]
863# endif
864	eor	r12,r8,r9			@ a^b, b^c in next round
865#else
866	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
867	eor	r12,r8,r9			@ a^b, b^c in next round
868	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
869#endif
870	eor	r0,r0,r8,ror#20	@ Sigma0(a)
871	and	r3,r3,r12			@ (b^c)&=(a^b)
872	add	r11,r11,r7			@ d+=h
873	eor	r3,r3,r9			@ Maj(a,b,c)
874	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
875	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
876#if __ARM_ARCH__>=7
877	@ ldr	r2,[r1],#4			@ 13
878# if 13==15
879	str	r1,[sp,#17*4]			@ make room for r1
880# endif
881	eor	r0,r11,r11,ror#5
882	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
883	eor	r0,r0,r11,ror#19	@ Sigma1(e)
884# ifndef __ARMEB__
885	rev	r2,r2
886# endif
887#else
888	@ ldrb	r2,[r1,#3]			@ 13
889	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
890	ldrb	r3,[r1,#2]
891	ldrb	r0,[r1,#1]
892	orr	r2,r2,r3,lsl#8
893	ldrb	r3,[r1],#4
894	orr	r2,r2,r0,lsl#16
895# if 13==15
896	str	r1,[sp,#17*4]			@ make room for r1
897# endif
898	eor	r0,r11,r11,ror#5
899	orr	r2,r2,r3,lsl#24
900	eor	r0,r0,r11,ror#19	@ Sigma1(e)
901#endif
902	ldr	r3,[r14],#4			@ *K256++
903	add	r6,r6,r2			@ h+=X[i]
904	str	r2,[sp,#13*4]
905	eor	r2,r4,r5
906	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
907	and	r2,r2,r11
908	add	r6,r6,r3			@ h+=K256[i]
909	eor	r2,r2,r5			@ Ch(e,f,g)
910	eor	r0,r7,r7,ror#11
911	add	r6,r6,r2			@ h+=Ch(e,f,g)
912#if 13==31
913	and	r3,r3,#0xff
914	cmp	r3,#0xf2			@ done?
915#endif
916#if 13<15
917# if __ARM_ARCH__>=7
918	ldr	r2,[r1],#4			@ prefetch
919# else
920	ldrb	r2,[r1,#3]
921# endif
922	eor	r3,r7,r8			@ a^b, b^c in next round
923#else
924	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
925	eor	r3,r7,r8			@ a^b, b^c in next round
926	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
927#endif
928	eor	r0,r0,r7,ror#20	@ Sigma0(a)
929	and	r12,r12,r3			@ (b^c)&=(a^b)
930	add	r10,r10,r6			@ d+=h
931	eor	r12,r12,r8			@ Maj(a,b,c)
932	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
933	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
934#if __ARM_ARCH__>=7
935	@ ldr	r2,[r1],#4			@ 14
936# if 14==15
937	str	r1,[sp,#17*4]			@ make room for r1
938# endif
939	eor	r0,r10,r10,ror#5
940	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
941	eor	r0,r0,r10,ror#19	@ Sigma1(e)
942# ifndef __ARMEB__
943	rev	r2,r2
944# endif
945#else
946	@ ldrb	r2,[r1,#3]			@ 14
947	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
948	ldrb	r12,[r1,#2]
949	ldrb	r0,[r1,#1]
950	orr	r2,r2,r12,lsl#8
951	ldrb	r12,[r1],#4
952	orr	r2,r2,r0,lsl#16
953# if 14==15
954	str	r1,[sp,#17*4]			@ make room for r1
955# endif
956	eor	r0,r10,r10,ror#5
957	orr	r2,r2,r12,lsl#24
958	eor	r0,r0,r10,ror#19	@ Sigma1(e)
959#endif
960	ldr	r12,[r14],#4			@ *K256++
961	add	r5,r5,r2			@ h+=X[i]
962	str	r2,[sp,#14*4]
963	eor	r2,r11,r4
964	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
965	and	r2,r2,r10
966	add	r5,r5,r12			@ h+=K256[i]
967	eor	r2,r2,r4			@ Ch(e,f,g)
968	eor	r0,r6,r6,ror#11
969	add	r5,r5,r2			@ h+=Ch(e,f,g)
970#if 14==31
971	and	r12,r12,#0xff
972	cmp	r12,#0xf2			@ done?
973#endif
974#if 14<15
975# if __ARM_ARCH__>=7
976	ldr	r2,[r1],#4			@ prefetch
977# else
978	ldrb	r2,[r1,#3]
979# endif
980	eor	r12,r6,r7			@ a^b, b^c in next round
981#else
982	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
983	eor	r12,r6,r7			@ a^b, b^c in next round
984	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
985#endif
986	eor	r0,r0,r6,ror#20	@ Sigma0(a)
987	and	r3,r3,r12			@ (b^c)&=(a^b)
988	add	r9,r9,r5			@ d+=h
989	eor	r3,r3,r7			@ Maj(a,b,c)
990	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
991	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
992#if __ARM_ARCH__>=7
993	@ ldr	r2,[r1],#4			@ 15
994# if 15==15
995	str	r1,[sp,#17*4]			@ make room for r1
996# endif
997	eor	r0,r9,r9,ror#5
998	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
999	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1000# ifndef __ARMEB__
1001	rev	r2,r2
1002# endif
1003#else
1004	@ ldrb	r2,[r1,#3]			@ 15
1005	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1006	ldrb	r3,[r1,#2]
1007	ldrb	r0,[r1,#1]
1008	orr	r2,r2,r3,lsl#8
1009	ldrb	r3,[r1],#4
1010	orr	r2,r2,r0,lsl#16
1011# if 15==15
1012	str	r1,[sp,#17*4]			@ make room for r1
1013# endif
1014	eor	r0,r9,r9,ror#5
1015	orr	r2,r2,r3,lsl#24
1016	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1017#endif
1018	ldr	r3,[r14],#4			@ *K256++
1019	add	r4,r4,r2			@ h+=X[i]
1020	str	r2,[sp,#15*4]
1021	eor	r2,r10,r11
1022	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1023	and	r2,r2,r9
1024	add	r4,r4,r3			@ h+=K256[i]
1025	eor	r2,r2,r11			@ Ch(e,f,g)
1026	eor	r0,r5,r5,ror#11
1027	add	r4,r4,r2			@ h+=Ch(e,f,g)
1028#if 15==31
1029	and	r3,r3,#0xff
1030	cmp	r3,#0xf2			@ done?
1031#endif
1032#if 15<15
1033# if __ARM_ARCH__>=7
1034	ldr	r2,[r1],#4			@ prefetch
1035# else
1036	ldrb	r2,[r1,#3]
1037# endif
1038	eor	r3,r5,r6			@ a^b, b^c in next round
1039#else
1040	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1041	eor	r3,r5,r6			@ a^b, b^c in next round
1042	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1043#endif
1044	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1045	and	r12,r12,r3			@ (b^c)&=(a^b)
1046	add	r8,r8,r4			@ d+=h
1047	eor	r12,r12,r6			@ Maj(a,b,c)
1048	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1049	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1050.Lrounds_16_xx:
1051	@ ldr	r2,[sp,#1*4]		@ 16
1052	@ ldr	r1,[sp,#14*4]
1053	mov	r0,r2,ror#7
1054	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1055	mov	r12,r1,ror#17
1056	eor	r0,r0,r2,ror#18
1057	eor	r12,r12,r1,ror#19
1058	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1059	ldr	r2,[sp,#0*4]
1060	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1061	ldr	r1,[sp,#9*4]
1062
1063	add	r12,r12,r0
1064	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1065	add	r2,r2,r12
1066	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1067	add	r2,r2,r1			@ X[i]
1068	ldr	r12,[r14],#4			@ *K256++
1069	add	r11,r11,r2			@ h+=X[i]
1070	str	r2,[sp,#0*4]
1071	eor	r2,r9,r10
1072	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1073	and	r2,r2,r8
1074	add	r11,r11,r12			@ h+=K256[i]
1075	eor	r2,r2,r10			@ Ch(e,f,g)
1076	eor	r0,r4,r4,ror#11
1077	add	r11,r11,r2			@ h+=Ch(e,f,g)
1078#if 16==31
1079	and	r12,r12,#0xff
1080	cmp	r12,#0xf2			@ done?
1081#endif
1082#if 16<15
1083# if __ARM_ARCH__>=7
1084	ldr	r2,[r1],#4			@ prefetch
1085# else
1086	ldrb	r2,[r1,#3]
1087# endif
1088	eor	r12,r4,r5			@ a^b, b^c in next round
1089#else
1090	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1091	eor	r12,r4,r5			@ a^b, b^c in next round
1092	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1093#endif
1094	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1095	and	r3,r3,r12			@ (b^c)&=(a^b)
1096	add	r7,r7,r11			@ d+=h
1097	eor	r3,r3,r5			@ Maj(a,b,c)
1098	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1099	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1100	@ ldr	r2,[sp,#2*4]		@ 17
1101	@ ldr	r1,[sp,#15*4]
1102	mov	r0,r2,ror#7
1103	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1104	mov	r3,r1,ror#17
1105	eor	r0,r0,r2,ror#18
1106	eor	r3,r3,r1,ror#19
1107	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1108	ldr	r2,[sp,#1*4]
1109	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1110	ldr	r1,[sp,#10*4]
1111
1112	add	r3,r3,r0
1113	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1114	add	r2,r2,r3
1115	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1116	add	r2,r2,r1			@ X[i]
1117	ldr	r3,[r14],#4			@ *K256++
1118	add	r10,r10,r2			@ h+=X[i]
1119	str	r2,[sp,#1*4]
1120	eor	r2,r8,r9
1121	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1122	and	r2,r2,r7
1123	add	r10,r10,r3			@ h+=K256[i]
1124	eor	r2,r2,r9			@ Ch(e,f,g)
1125	eor	r0,r11,r11,ror#11
1126	add	r10,r10,r2			@ h+=Ch(e,f,g)
1127#if 17==31
1128	and	r3,r3,#0xff
1129	cmp	r3,#0xf2			@ done?
1130#endif
1131#if 17<15
1132# if __ARM_ARCH__>=7
1133	ldr	r2,[r1],#4			@ prefetch
1134# else
1135	ldrb	r2,[r1,#3]
1136# endif
1137	eor	r3,r11,r4			@ a^b, b^c in next round
1138#else
1139	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1140	eor	r3,r11,r4			@ a^b, b^c in next round
1141	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1142#endif
1143	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1144	and	r12,r12,r3			@ (b^c)&=(a^b)
1145	add	r6,r6,r10			@ d+=h
1146	eor	r12,r12,r4			@ Maj(a,b,c)
1147	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1148	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1149	@ ldr	r2,[sp,#3*4]		@ 18
1150	@ ldr	r1,[sp,#0*4]
1151	mov	r0,r2,ror#7
1152	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1153	mov	r12,r1,ror#17
1154	eor	r0,r0,r2,ror#18
1155	eor	r12,r12,r1,ror#19
1156	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1157	ldr	r2,[sp,#2*4]
1158	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1159	ldr	r1,[sp,#11*4]
1160
1161	add	r12,r12,r0
1162	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1163	add	r2,r2,r12
1164	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1165	add	r2,r2,r1			@ X[i]
1166	ldr	r12,[r14],#4			@ *K256++
1167	add	r9,r9,r2			@ h+=X[i]
1168	str	r2,[sp,#2*4]
1169	eor	r2,r7,r8
1170	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1171	and	r2,r2,r6
1172	add	r9,r9,r12			@ h+=K256[i]
1173	eor	r2,r2,r8			@ Ch(e,f,g)
1174	eor	r0,r10,r10,ror#11
1175	add	r9,r9,r2			@ h+=Ch(e,f,g)
1176#if 18==31
1177	and	r12,r12,#0xff
1178	cmp	r12,#0xf2			@ done?
1179#endif
1180#if 18<15
1181# if __ARM_ARCH__>=7
1182	ldr	r2,[r1],#4			@ prefetch
1183# else
1184	ldrb	r2,[r1,#3]
1185# endif
1186	eor	r12,r10,r11			@ a^b, b^c in next round
1187#else
1188	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1189	eor	r12,r10,r11			@ a^b, b^c in next round
1190	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1191#endif
1192	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1193	and	r3,r3,r12			@ (b^c)&=(a^b)
1194	add	r5,r5,r9			@ d+=h
1195	eor	r3,r3,r11			@ Maj(a,b,c)
1196	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1197	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1198	@ ldr	r2,[sp,#4*4]		@ 19
1199	@ ldr	r1,[sp,#1*4]
1200	mov	r0,r2,ror#7
1201	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1202	mov	r3,r1,ror#17
1203	eor	r0,r0,r2,ror#18
1204	eor	r3,r3,r1,ror#19
1205	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1206	ldr	r2,[sp,#3*4]
1207	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1208	ldr	r1,[sp,#12*4]
1209
1210	add	r3,r3,r0
1211	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1212	add	r2,r2,r3
1213	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1214	add	r2,r2,r1			@ X[i]
1215	ldr	r3,[r14],#4			@ *K256++
1216	add	r8,r8,r2			@ h+=X[i]
1217	str	r2,[sp,#3*4]
1218	eor	r2,r6,r7
1219	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1220	and	r2,r2,r5
1221	add	r8,r8,r3			@ h+=K256[i]
1222	eor	r2,r2,r7			@ Ch(e,f,g)
1223	eor	r0,r9,r9,ror#11
1224	add	r8,r8,r2			@ h+=Ch(e,f,g)
1225#if 19==31
1226	and	r3,r3,#0xff
1227	cmp	r3,#0xf2			@ done?
1228#endif
1229#if 19<15
1230# if __ARM_ARCH__>=7
1231	ldr	r2,[r1],#4			@ prefetch
1232# else
1233	ldrb	r2,[r1,#3]
1234# endif
1235	eor	r3,r9,r10			@ a^b, b^c in next round
1236#else
1237	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1238	eor	r3,r9,r10			@ a^b, b^c in next round
1239	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1240#endif
1241	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1242	and	r12,r12,r3			@ (b^c)&=(a^b)
1243	add	r4,r4,r8			@ d+=h
1244	eor	r12,r12,r10			@ Maj(a,b,c)
1245	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1246	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1247	@ ldr	r2,[sp,#5*4]		@ 20
1248	@ ldr	r1,[sp,#2*4]
1249	mov	r0,r2,ror#7
1250	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1251	mov	r12,r1,ror#17
1252	eor	r0,r0,r2,ror#18
1253	eor	r12,r12,r1,ror#19
1254	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1255	ldr	r2,[sp,#4*4]
1256	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1257	ldr	r1,[sp,#13*4]
1258
1259	add	r12,r12,r0
1260	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1261	add	r2,r2,r12
1262	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1263	add	r2,r2,r1			@ X[i]
1264	ldr	r12,[r14],#4			@ *K256++
1265	add	r7,r7,r2			@ h+=X[i]
1266	str	r2,[sp,#4*4]
1267	eor	r2,r5,r6
1268	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1269	and	r2,r2,r4
1270	add	r7,r7,r12			@ h+=K256[i]
1271	eor	r2,r2,r6			@ Ch(e,f,g)
1272	eor	r0,r8,r8,ror#11
1273	add	r7,r7,r2			@ h+=Ch(e,f,g)
1274#if 20==31
1275	and	r12,r12,#0xff
1276	cmp	r12,#0xf2			@ done?
1277#endif
1278#if 20<15
1279# if __ARM_ARCH__>=7
1280	ldr	r2,[r1],#4			@ prefetch
1281# else
1282	ldrb	r2,[r1,#3]
1283# endif
1284	eor	r12,r8,r9			@ a^b, b^c in next round
1285#else
1286	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1287	eor	r12,r8,r9			@ a^b, b^c in next round
1288	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1289#endif
1290	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1291	and	r3,r3,r12			@ (b^c)&=(a^b)
1292	add	r11,r11,r7			@ d+=h
1293	eor	r3,r3,r9			@ Maj(a,b,c)
1294	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1295	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1296	@ ldr	r2,[sp,#6*4]		@ 21
1297	@ ldr	r1,[sp,#3*4]
1298	mov	r0,r2,ror#7
1299	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1300	mov	r3,r1,ror#17
1301	eor	r0,r0,r2,ror#18
1302	eor	r3,r3,r1,ror#19
1303	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1304	ldr	r2,[sp,#5*4]
1305	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1306	ldr	r1,[sp,#14*4]
1307
1308	add	r3,r3,r0
1309	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1310	add	r2,r2,r3
1311	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1312	add	r2,r2,r1			@ X[i]
1313	ldr	r3,[r14],#4			@ *K256++
1314	add	r6,r6,r2			@ h+=X[i]
1315	str	r2,[sp,#5*4]
1316	eor	r2,r4,r5
1317	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1318	and	r2,r2,r11
1319	add	r6,r6,r3			@ h+=K256[i]
1320	eor	r2,r2,r5			@ Ch(e,f,g)
1321	eor	r0,r7,r7,ror#11
1322	add	r6,r6,r2			@ h+=Ch(e,f,g)
1323#if 21==31
1324	and	r3,r3,#0xff
1325	cmp	r3,#0xf2			@ done?
1326#endif
1327#if 21<15
1328# if __ARM_ARCH__>=7
1329	ldr	r2,[r1],#4			@ prefetch
1330# else
1331	ldrb	r2,[r1,#3]
1332# endif
1333	eor	r3,r7,r8			@ a^b, b^c in next round
1334#else
1335	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1336	eor	r3,r7,r8			@ a^b, b^c in next round
1337	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1338#endif
1339	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1340	and	r12,r12,r3			@ (b^c)&=(a^b)
1341	add	r10,r10,r6			@ d+=h
1342	eor	r12,r12,r8			@ Maj(a,b,c)
1343	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1344	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1345	@ ldr	r2,[sp,#7*4]		@ 22
1346	@ ldr	r1,[sp,#4*4]
1347	mov	r0,r2,ror#7
1348	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1349	mov	r12,r1,ror#17
1350	eor	r0,r0,r2,ror#18
1351	eor	r12,r12,r1,ror#19
1352	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1353	ldr	r2,[sp,#6*4]
1354	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1355	ldr	r1,[sp,#15*4]
1356
1357	add	r12,r12,r0
1358	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1359	add	r2,r2,r12
1360	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1361	add	r2,r2,r1			@ X[i]
1362	ldr	r12,[r14],#4			@ *K256++
1363	add	r5,r5,r2			@ h+=X[i]
1364	str	r2,[sp,#6*4]
1365	eor	r2,r11,r4
1366	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1367	and	r2,r2,r10
1368	add	r5,r5,r12			@ h+=K256[i]
1369	eor	r2,r2,r4			@ Ch(e,f,g)
1370	eor	r0,r6,r6,ror#11
1371	add	r5,r5,r2			@ h+=Ch(e,f,g)
1372#if 22==31
1373	and	r12,r12,#0xff
1374	cmp	r12,#0xf2			@ done?
1375#endif
1376#if 22<15
1377# if __ARM_ARCH__>=7
1378	ldr	r2,[r1],#4			@ prefetch
1379# else
1380	ldrb	r2,[r1,#3]
1381# endif
1382	eor	r12,r6,r7			@ a^b, b^c in next round
1383#else
1384	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1385	eor	r12,r6,r7			@ a^b, b^c in next round
1386	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1387#endif
1388	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1389	and	r3,r3,r12			@ (b^c)&=(a^b)
1390	add	r9,r9,r5			@ d+=h
1391	eor	r3,r3,r7			@ Maj(a,b,c)
1392	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1393	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1394	@ ldr	r2,[sp,#8*4]		@ 23
1395	@ ldr	r1,[sp,#5*4]
1396	mov	r0,r2,ror#7
1397	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1398	mov	r3,r1,ror#17
1399	eor	r0,r0,r2,ror#18
1400	eor	r3,r3,r1,ror#19
1401	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1402	ldr	r2,[sp,#7*4]
1403	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1404	ldr	r1,[sp,#0*4]
1405
1406	add	r3,r3,r0
1407	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1408	add	r2,r2,r3
1409	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1410	add	r2,r2,r1			@ X[i]
1411	ldr	r3,[r14],#4			@ *K256++
1412	add	r4,r4,r2			@ h+=X[i]
1413	str	r2,[sp,#7*4]
1414	eor	r2,r10,r11
1415	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1416	and	r2,r2,r9
1417	add	r4,r4,r3			@ h+=K256[i]
1418	eor	r2,r2,r11			@ Ch(e,f,g)
1419	eor	r0,r5,r5,ror#11
1420	add	r4,r4,r2			@ h+=Ch(e,f,g)
1421#if 23==31
1422	and	r3,r3,#0xff
1423	cmp	r3,#0xf2			@ done?
1424#endif
1425#if 23<15
1426# if __ARM_ARCH__>=7
1427	ldr	r2,[r1],#4			@ prefetch
1428# else
1429	ldrb	r2,[r1,#3]
1430# endif
1431	eor	r3,r5,r6			@ a^b, b^c in next round
1432#else
1433	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1434	eor	r3,r5,r6			@ a^b, b^c in next round
1435	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1436#endif
1437	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1438	and	r12,r12,r3			@ (b^c)&=(a^b)
1439	add	r8,r8,r4			@ d+=h
1440	eor	r12,r12,r6			@ Maj(a,b,c)
1441	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1442	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1443	@ ldr	r2,[sp,#9*4]		@ 24
1444	@ ldr	r1,[sp,#6*4]
1445	mov	r0,r2,ror#7
1446	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1447	mov	r12,r1,ror#17
1448	eor	r0,r0,r2,ror#18
1449	eor	r12,r12,r1,ror#19
1450	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1451	ldr	r2,[sp,#8*4]
1452	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1453	ldr	r1,[sp,#1*4]
1454
1455	add	r12,r12,r0
1456	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1457	add	r2,r2,r12
1458	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1459	add	r2,r2,r1			@ X[i]
1460	ldr	r12,[r14],#4			@ *K256++
1461	add	r11,r11,r2			@ h+=X[i]
1462	str	r2,[sp,#8*4]
1463	eor	r2,r9,r10
1464	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1465	and	r2,r2,r8
1466	add	r11,r11,r12			@ h+=K256[i]
1467	eor	r2,r2,r10			@ Ch(e,f,g)
1468	eor	r0,r4,r4,ror#11
1469	add	r11,r11,r2			@ h+=Ch(e,f,g)
1470#if 24==31
1471	and	r12,r12,#0xff
1472	cmp	r12,#0xf2			@ done?
1473#endif
1474#if 24<15
1475# if __ARM_ARCH__>=7
1476	ldr	r2,[r1],#4			@ prefetch
1477# else
1478	ldrb	r2,[r1,#3]
1479# endif
1480	eor	r12,r4,r5			@ a^b, b^c in next round
1481#else
1482	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1483	eor	r12,r4,r5			@ a^b, b^c in next round
1484	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1485#endif
1486	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1487	and	r3,r3,r12			@ (b^c)&=(a^b)
1488	add	r7,r7,r11			@ d+=h
1489	eor	r3,r3,r5			@ Maj(a,b,c)
1490	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1491	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1492	@ ldr	r2,[sp,#10*4]		@ 25
1493	@ ldr	r1,[sp,#7*4]
1494	mov	r0,r2,ror#7
1495	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1496	mov	r3,r1,ror#17
1497	eor	r0,r0,r2,ror#18
1498	eor	r3,r3,r1,ror#19
1499	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1500	ldr	r2,[sp,#9*4]
1501	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1502	ldr	r1,[sp,#2*4]
1503
1504	add	r3,r3,r0
1505	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1506	add	r2,r2,r3
1507	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1508	add	r2,r2,r1			@ X[i]
1509	ldr	r3,[r14],#4			@ *K256++
1510	add	r10,r10,r2			@ h+=X[i]
1511	str	r2,[sp,#9*4]
1512	eor	r2,r8,r9
1513	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1514	and	r2,r2,r7
1515	add	r10,r10,r3			@ h+=K256[i]
1516	eor	r2,r2,r9			@ Ch(e,f,g)
1517	eor	r0,r11,r11,ror#11
1518	add	r10,r10,r2			@ h+=Ch(e,f,g)
1519#if 25==31
1520	and	r3,r3,#0xff
1521	cmp	r3,#0xf2			@ done?
1522#endif
1523#if 25<15
1524# if __ARM_ARCH__>=7
1525	ldr	r2,[r1],#4			@ prefetch
1526# else
1527	ldrb	r2,[r1,#3]
1528# endif
1529	eor	r3,r11,r4			@ a^b, b^c in next round
1530#else
1531	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1532	eor	r3,r11,r4			@ a^b, b^c in next round
1533	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1534#endif
1535	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1536	and	r12,r12,r3			@ (b^c)&=(a^b)
1537	add	r6,r6,r10			@ d+=h
1538	eor	r12,r12,r4			@ Maj(a,b,c)
1539	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1540	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1541	@ ldr	r2,[sp,#11*4]		@ 26
1542	@ ldr	r1,[sp,#8*4]
1543	mov	r0,r2,ror#7
1544	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1545	mov	r12,r1,ror#17
1546	eor	r0,r0,r2,ror#18
1547	eor	r12,r12,r1,ror#19
1548	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1549	ldr	r2,[sp,#10*4]
1550	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1551	ldr	r1,[sp,#3*4]
1552
1553	add	r12,r12,r0
1554	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1555	add	r2,r2,r12
1556	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1557	add	r2,r2,r1			@ X[i]
1558	ldr	r12,[r14],#4			@ *K256++
1559	add	r9,r9,r2			@ h+=X[i]
1560	str	r2,[sp,#10*4]
1561	eor	r2,r7,r8
1562	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1563	and	r2,r2,r6
1564	add	r9,r9,r12			@ h+=K256[i]
1565	eor	r2,r2,r8			@ Ch(e,f,g)
1566	eor	r0,r10,r10,ror#11
1567	add	r9,r9,r2			@ h+=Ch(e,f,g)
1568#if 26==31
1569	and	r12,r12,#0xff
1570	cmp	r12,#0xf2			@ done?
1571#endif
1572#if 26<15
1573# if __ARM_ARCH__>=7
1574	ldr	r2,[r1],#4			@ prefetch
1575# else
1576	ldrb	r2,[r1,#3]
1577# endif
1578	eor	r12,r10,r11			@ a^b, b^c in next round
1579#else
1580	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1581	eor	r12,r10,r11			@ a^b, b^c in next round
1582	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1583#endif
1584	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1585	and	r3,r3,r12			@ (b^c)&=(a^b)
1586	add	r5,r5,r9			@ d+=h
1587	eor	r3,r3,r11			@ Maj(a,b,c)
1588	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1589	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1590	@ ldr	r2,[sp,#12*4]		@ 27
1591	@ ldr	r1,[sp,#9*4]
1592	mov	r0,r2,ror#7
1593	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1594	mov	r3,r1,ror#17
1595	eor	r0,r0,r2,ror#18
1596	eor	r3,r3,r1,ror#19
1597	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1598	ldr	r2,[sp,#11*4]
1599	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1600	ldr	r1,[sp,#4*4]
1601
1602	add	r3,r3,r0
1603	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1604	add	r2,r2,r3
1605	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1606	add	r2,r2,r1			@ X[i]
1607	ldr	r3,[r14],#4			@ *K256++
1608	add	r8,r8,r2			@ h+=X[i]
1609	str	r2,[sp,#11*4]
1610	eor	r2,r6,r7
1611	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1612	and	r2,r2,r5
1613	add	r8,r8,r3			@ h+=K256[i]
1614	eor	r2,r2,r7			@ Ch(e,f,g)
1615	eor	r0,r9,r9,ror#11
1616	add	r8,r8,r2			@ h+=Ch(e,f,g)
1617#if 27==31
1618	and	r3,r3,#0xff
1619	cmp	r3,#0xf2			@ done?
1620#endif
1621#if 27<15
1622# if __ARM_ARCH__>=7
1623	ldr	r2,[r1],#4			@ prefetch
1624# else
1625	ldrb	r2,[r1,#3]
1626# endif
1627	eor	r3,r9,r10			@ a^b, b^c in next round
1628#else
1629	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1630	eor	r3,r9,r10			@ a^b, b^c in next round
1631	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1632#endif
1633	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1634	and	r12,r12,r3			@ (b^c)&=(a^b)
1635	add	r4,r4,r8			@ d+=h
1636	eor	r12,r12,r10			@ Maj(a,b,c)
1637	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1638	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1639	@ ldr	r2,[sp,#13*4]		@ 28
1640	@ ldr	r1,[sp,#10*4]
1641	mov	r0,r2,ror#7
1642	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1643	mov	r12,r1,ror#17
1644	eor	r0,r0,r2,ror#18
1645	eor	r12,r12,r1,ror#19
1646	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1647	ldr	r2,[sp,#12*4]
1648	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1649	ldr	r1,[sp,#5*4]
1650
1651	add	r12,r12,r0
1652	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1653	add	r2,r2,r12
1654	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1655	add	r2,r2,r1			@ X[i]
1656	ldr	r12,[r14],#4			@ *K256++
1657	add	r7,r7,r2			@ h+=X[i]
1658	str	r2,[sp,#12*4]
1659	eor	r2,r5,r6
1660	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1661	and	r2,r2,r4
1662	add	r7,r7,r12			@ h+=K256[i]
1663	eor	r2,r2,r6			@ Ch(e,f,g)
1664	eor	r0,r8,r8,ror#11
1665	add	r7,r7,r2			@ h+=Ch(e,f,g)
1666#if 28==31
1667	and	r12,r12,#0xff
1668	cmp	r12,#0xf2			@ done?
1669#endif
1670#if 28<15
1671# if __ARM_ARCH__>=7
1672	ldr	r2,[r1],#4			@ prefetch
1673# else
1674	ldrb	r2,[r1,#3]
1675# endif
1676	eor	r12,r8,r9			@ a^b, b^c in next round
1677#else
1678	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1679	eor	r12,r8,r9			@ a^b, b^c in next round
1680	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1681#endif
1682	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1683	and	r3,r3,r12			@ (b^c)&=(a^b)
1684	add	r11,r11,r7			@ d+=h
1685	eor	r3,r3,r9			@ Maj(a,b,c)
1686	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1687	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1688	@ ldr	r2,[sp,#14*4]		@ 29
1689	@ ldr	r1,[sp,#11*4]
1690	mov	r0,r2,ror#7
1691	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1692	mov	r3,r1,ror#17
1693	eor	r0,r0,r2,ror#18
1694	eor	r3,r3,r1,ror#19
1695	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1696	ldr	r2,[sp,#13*4]
1697	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1698	ldr	r1,[sp,#6*4]
1699
1700	add	r3,r3,r0
1701	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1702	add	r2,r2,r3
1703	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1704	add	r2,r2,r1			@ X[i]
1705	ldr	r3,[r14],#4			@ *K256++
1706	add	r6,r6,r2			@ h+=X[i]
1707	str	r2,[sp,#13*4]
1708	eor	r2,r4,r5
1709	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1710	and	r2,r2,r11
1711	add	r6,r6,r3			@ h+=K256[i]
1712	eor	r2,r2,r5			@ Ch(e,f,g)
1713	eor	r0,r7,r7,ror#11
1714	add	r6,r6,r2			@ h+=Ch(e,f,g)
1715#if 29==31
1716	and	r3,r3,#0xff
1717	cmp	r3,#0xf2			@ done?
1718#endif
1719#if 29<15
1720# if __ARM_ARCH__>=7
1721	ldr	r2,[r1],#4			@ prefetch
1722# else
1723	ldrb	r2,[r1,#3]
1724# endif
1725	eor	r3,r7,r8			@ a^b, b^c in next round
1726#else
1727	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1728	eor	r3,r7,r8			@ a^b, b^c in next round
1729	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1730#endif
1731	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1732	and	r12,r12,r3			@ (b^c)&=(a^b)
1733	add	r10,r10,r6			@ d+=h
1734	eor	r12,r12,r8			@ Maj(a,b,c)
1735	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1736	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1737	@ ldr	r2,[sp,#15*4]		@ 30
1738	@ ldr	r1,[sp,#12*4]
1739	mov	r0,r2,ror#7
1740	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1741	mov	r12,r1,ror#17
1742	eor	r0,r0,r2,ror#18
1743	eor	r12,r12,r1,ror#19
1744	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1745	ldr	r2,[sp,#14*4]
1746	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1747	ldr	r1,[sp,#7*4]
1748
1749	add	r12,r12,r0
1750	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1751	add	r2,r2,r12
1752	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1753	add	r2,r2,r1			@ X[i]
1754	ldr	r12,[r14],#4			@ *K256++
1755	add	r5,r5,r2			@ h+=X[i]
1756	str	r2,[sp,#14*4]
1757	eor	r2,r11,r4
1758	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1759	and	r2,r2,r10
1760	add	r5,r5,r12			@ h+=K256[i]
1761	eor	r2,r2,r4			@ Ch(e,f,g)
1762	eor	r0,r6,r6,ror#11
1763	add	r5,r5,r2			@ h+=Ch(e,f,g)
1764#if 30==31
1765	and	r12,r12,#0xff
1766	cmp	r12,#0xf2			@ done?
1767#endif
1768#if 30<15
1769# if __ARM_ARCH__>=7
1770	ldr	r2,[r1],#4			@ prefetch
1771# else
1772	ldrb	r2,[r1,#3]
1773# endif
1774	eor	r12,r6,r7			@ a^b, b^c in next round
1775#else
1776	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1777	eor	r12,r6,r7			@ a^b, b^c in next round
1778	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1779#endif
1780	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1781	and	r3,r3,r12			@ (b^c)&=(a^b)
1782	add	r9,r9,r5			@ d+=h
1783	eor	r3,r3,r7			@ Maj(a,b,c)
1784	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1785	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1786	@ ldr	r2,[sp,#0*4]		@ 31
1787	@ ldr	r1,[sp,#13*4]
1788	mov	r0,r2,ror#7
1789	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1790	mov	r3,r1,ror#17
1791	eor	r0,r0,r2,ror#18
1792	eor	r3,r3,r1,ror#19
1793	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1794	ldr	r2,[sp,#15*4]
1795	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1796	ldr	r1,[sp,#8*4]
1797
1798	add	r3,r3,r0
1799	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1800	add	r2,r2,r3
1801	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1802	add	r2,r2,r1			@ X[i]
1803	ldr	r3,[r14],#4			@ *K256++
1804	add	r4,r4,r2			@ h+=X[i]
1805	str	r2,[sp,#15*4]
1806	eor	r2,r10,r11
1807	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1808	and	r2,r2,r9
1809	add	r4,r4,r3			@ h+=K256[i]
1810	eor	r2,r2,r11			@ Ch(e,f,g)
1811	eor	r0,r5,r5,ror#11
1812	add	r4,r4,r2			@ h+=Ch(e,f,g)
1813#if 31==31
1814	and	r3,r3,#0xff
1815	cmp	r3,#0xf2			@ done?
1816#endif
1817#if 31<15
1818# if __ARM_ARCH__>=7
1819	ldr	r2,[r1],#4			@ prefetch
1820# else
1821	ldrb	r2,[r1,#3]
1822# endif
1823	eor	r3,r5,r6			@ a^b, b^c in next round
1824#else
1825	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1826	eor	r3,r5,r6			@ a^b, b^c in next round
1827	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1828#endif
1829	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1830	and	r12,r12,r3			@ (b^c)&=(a^b)
1831	add	r8,r8,r4			@ d+=h
1832	eor	r12,r12,r6			@ Maj(a,b,c)
1833	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1834	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1835#if __ARM_ARCH__>=7
1836	ite	eq			@ Thumb2 thing, sanity check in ARM
1837#endif
1838	ldreq	r3,[sp,#16*4]		@ pull ctx
1839	bne	.Lrounds_16_xx
1840
1841	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1842	ldr	r0,[r3,#0]
1843	ldr	r2,[r3,#4]
1844	ldr	r12,[r3,#8]
1845	add	r4,r4,r0
1846	ldr	r0,[r3,#12]
1847	add	r5,r5,r2
1848	ldr	r2,[r3,#16]
1849	add	r6,r6,r12
1850	ldr	r12,[r3,#20]
1851	add	r7,r7,r0
1852	ldr	r0,[r3,#24]
1853	add	r8,r8,r2
1854	ldr	r2,[r3,#28]
1855	add	r9,r9,r12
1856	ldr	r1,[sp,#17*4]		@ pull inp
1857	ldr	r12,[sp,#18*4]		@ pull inp+len
1858	add	r10,r10,r0
1859	add	r11,r11,r2
1860	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1861	cmp	r1,r12
1862	sub	r14,r14,#256	@ rewind Ktbl
1863	bne	.Loop
1864
1865	add	sp,sp,#19*4	@ destroy frame
1866#if __ARM_ARCH__>=5
1867	ldmia	sp!,{r4-r11,pc}
1868#else
1869	ldmia	sp!,{r4-r11,lr}
1870	tst	lr,#1
1871	moveq	pc,lr			@ be binary compatible with V4, yet
1872	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1873#endif
1874.size	sha256_block_data_order,.-sha256_block_data_order
1875#if __ARM_MAX_ARCH__>=7
1876.arch	armv7-a
1877.fpu	neon
1878
1879.global	sha256_block_data_order_neon
1880.type	sha256_block_data_order_neon,%function
1881.align	4
1882sha256_block_data_order_neon:
1883.LNEON:
1884	stmdb	sp!,{r4-r12,lr}
1885
1886	sub	r11,sp,#16*4+16
1887	adr	r14,.Lsha256_block_data_order
1888	sub	r14,r14,#.Lsha256_block_data_order-K256
1889	bic	r11,r11,#15		@ align for 128-bit stores
1890	mov	r12,sp
1891	mov	sp,r11			@ alloca
1892	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1893
1894	vld1.8		{q0},[r1]!
1895	vld1.8		{q1},[r1]!
1896	vld1.8		{q2},[r1]!
1897	vld1.8		{q3},[r1]!
1898	vld1.32		{q8},[r14,:128]!
1899	vld1.32		{q9},[r14,:128]!
1900	vld1.32		{q10},[r14,:128]!
1901	vld1.32		{q11},[r14,:128]!
1902	vrev32.8	q0,q0		@ yes, even on
1903	str		r0,[sp,#64]
1904	vrev32.8	q1,q1		@ big-endian
1905	str		r1,[sp,#68]
1906	mov		r1,sp
1907	vrev32.8	q2,q2
1908	str		r2,[sp,#72]
1909	vrev32.8	q3,q3
1910	str		r12,[sp,#76]		@ save original sp
1911	vadd.i32	q8,q8,q0
1912	vadd.i32	q9,q9,q1
1913	vst1.32		{q8},[r1,:128]!
1914	vadd.i32	q10,q10,q2
1915	vst1.32		{q9},[r1,:128]!
1916	vadd.i32	q11,q11,q3
1917	vst1.32		{q10},[r1,:128]!
1918	vst1.32		{q11},[r1,:128]!
1919
1920	ldmia		r0,{r4-r11}
1921	sub		r1,r1,#64
1922	ldr		r2,[sp,#0]
1923	eor		r12,r12,r12
1924	eor		r3,r5,r6
1925	b		.L_00_48
1926
1927.align	4
1928.L_00_48:
1929	vext.8	q8,q0,q1,#4
1930	add	r11,r11,r2
1931	eor	r2,r9,r10
1932	eor	r0,r8,r8,ror#5
1933	vext.8	q9,q2,q3,#4
1934	add	r4,r4,r12
1935	and	r2,r2,r8
1936	eor	r12,r0,r8,ror#19
1937	vshr.u32	q10,q8,#7
1938	eor	r0,r4,r4,ror#11
1939	eor	r2,r2,r10
1940	vadd.i32	q0,q0,q9
1941	add	r11,r11,r12,ror#6
1942	eor	r12,r4,r5
1943	vshr.u32	q9,q8,#3
1944	eor	r0,r0,r4,ror#20
1945	add	r11,r11,r2
1946	vsli.32	q10,q8,#25
1947	ldr	r2,[sp,#4]
1948	and	r3,r3,r12
1949	vshr.u32	q11,q8,#18
1950	add	r7,r7,r11
1951	add	r11,r11,r0,ror#2
1952	eor	r3,r3,r5
1953	veor	q9,q9,q10
1954	add	r10,r10,r2
1955	vsli.32	q11,q8,#14
1956	eor	r2,r8,r9
1957	eor	r0,r7,r7,ror#5
1958	vshr.u32	d24,d7,#17
1959	add	r11,r11,r3
1960	and	r2,r2,r7
1961	veor	q9,q9,q11
1962	eor	r3,r0,r7,ror#19
1963	eor	r0,r11,r11,ror#11
1964	vsli.32	d24,d7,#15
1965	eor	r2,r2,r9
1966	add	r10,r10,r3,ror#6
1967	vshr.u32	d25,d7,#10
1968	eor	r3,r11,r4
1969	eor	r0,r0,r11,ror#20
1970	vadd.i32	q0,q0,q9
1971	add	r10,r10,r2
1972	ldr	r2,[sp,#8]
1973	veor	d25,d25,d24
1974	and	r12,r12,r3
1975	add	r6,r6,r10
1976	vshr.u32	d24,d7,#19
1977	add	r10,r10,r0,ror#2
1978	eor	r12,r12,r4
1979	vsli.32	d24,d7,#13
1980	add	r9,r9,r2
1981	eor	r2,r7,r8
1982	veor	d25,d25,d24
1983	eor	r0,r6,r6,ror#5
1984	add	r10,r10,r12
1985	vadd.i32	d0,d0,d25
1986	and	r2,r2,r6
1987	eor	r12,r0,r6,ror#19
1988	vshr.u32	d24,d0,#17
1989	eor	r0,r10,r10,ror#11
1990	eor	r2,r2,r8
1991	vsli.32	d24,d0,#15
1992	add	r9,r9,r12,ror#6
1993	eor	r12,r10,r11
1994	vshr.u32	d25,d0,#10
1995	eor	r0,r0,r10,ror#20
1996	add	r9,r9,r2
1997	veor	d25,d25,d24
1998	ldr	r2,[sp,#12]
1999	and	r3,r3,r12
2000	vshr.u32	d24,d0,#19
2001	add	r5,r5,r9
2002	add	r9,r9,r0,ror#2
2003	eor	r3,r3,r11
2004	vld1.32	{q8},[r14,:128]!
2005	add	r8,r8,r2
2006	vsli.32	d24,d0,#13
2007	eor	r2,r6,r7
2008	eor	r0,r5,r5,ror#5
2009	veor	d25,d25,d24
2010	add	r9,r9,r3
2011	and	r2,r2,r5
2012	vadd.i32	d1,d1,d25
2013	eor	r3,r0,r5,ror#19
2014	eor	r0,r9,r9,ror#11
2015	vadd.i32	q8,q8,q0
2016	eor	r2,r2,r7
2017	add	r8,r8,r3,ror#6
2018	eor	r3,r9,r10
2019	eor	r0,r0,r9,ror#20
2020	add	r8,r8,r2
2021	ldr	r2,[sp,#16]
2022	and	r12,r12,r3
2023	add	r4,r4,r8
2024	vst1.32	{q8},[r1,:128]!
2025	add	r8,r8,r0,ror#2
2026	eor	r12,r12,r10
2027	vext.8	q8,q1,q2,#4
2028	add	r7,r7,r2
2029	eor	r2,r5,r6
2030	eor	r0,r4,r4,ror#5
2031	vext.8	q9,q3,q0,#4
2032	add	r8,r8,r12
2033	and	r2,r2,r4
2034	eor	r12,r0,r4,ror#19
2035	vshr.u32	q10,q8,#7
2036	eor	r0,r8,r8,ror#11
2037	eor	r2,r2,r6
2038	vadd.i32	q1,q1,q9
2039	add	r7,r7,r12,ror#6
2040	eor	r12,r8,r9
2041	vshr.u32	q9,q8,#3
2042	eor	r0,r0,r8,ror#20
2043	add	r7,r7,r2
2044	vsli.32	q10,q8,#25
2045	ldr	r2,[sp,#20]
2046	and	r3,r3,r12
2047	vshr.u32	q11,q8,#18
2048	add	r11,r11,r7
2049	add	r7,r7,r0,ror#2
2050	eor	r3,r3,r9
2051	veor	q9,q9,q10
2052	add	r6,r6,r2
2053	vsli.32	q11,q8,#14
2054	eor	r2,r4,r5
2055	eor	r0,r11,r11,ror#5
2056	vshr.u32	d24,d1,#17
2057	add	r7,r7,r3
2058	and	r2,r2,r11
2059	veor	q9,q9,q11
2060	eor	r3,r0,r11,ror#19
2061	eor	r0,r7,r7,ror#11
2062	vsli.32	d24,d1,#15
2063	eor	r2,r2,r5
2064	add	r6,r6,r3,ror#6
2065	vshr.u32	d25,d1,#10
2066	eor	r3,r7,r8
2067	eor	r0,r0,r7,ror#20
2068	vadd.i32	q1,q1,q9
2069	add	r6,r6,r2
2070	ldr	r2,[sp,#24]
2071	veor	d25,d25,d24
2072	and	r12,r12,r3
2073	add	r10,r10,r6
2074	vshr.u32	d24,d1,#19
2075	add	r6,r6,r0,ror#2
2076	eor	r12,r12,r8
2077	vsli.32	d24,d1,#13
2078	add	r5,r5,r2
2079	eor	r2,r11,r4
2080	veor	d25,d25,d24
2081	eor	r0,r10,r10,ror#5
2082	add	r6,r6,r12
2083	vadd.i32	d2,d2,d25
2084	and	r2,r2,r10
2085	eor	r12,r0,r10,ror#19
2086	vshr.u32	d24,d2,#17
2087	eor	r0,r6,r6,ror#11
2088	eor	r2,r2,r4
2089	vsli.32	d24,d2,#15
2090	add	r5,r5,r12,ror#6
2091	eor	r12,r6,r7
2092	vshr.u32	d25,d2,#10
2093	eor	r0,r0,r6,ror#20
2094	add	r5,r5,r2
2095	veor	d25,d25,d24
2096	ldr	r2,[sp,#28]
2097	and	r3,r3,r12
2098	vshr.u32	d24,d2,#19
2099	add	r9,r9,r5
2100	add	r5,r5,r0,ror#2
2101	eor	r3,r3,r7
2102	vld1.32	{q8},[r14,:128]!
2103	add	r4,r4,r2
2104	vsli.32	d24,d2,#13
2105	eor	r2,r10,r11
2106	eor	r0,r9,r9,ror#5
2107	veor	d25,d25,d24
2108	add	r5,r5,r3
2109	and	r2,r2,r9
2110	vadd.i32	d3,d3,d25
2111	eor	r3,r0,r9,ror#19
2112	eor	r0,r5,r5,ror#11
2113	vadd.i32	q8,q8,q1
2114	eor	r2,r2,r11
2115	add	r4,r4,r3,ror#6
2116	eor	r3,r5,r6
2117	eor	r0,r0,r5,ror#20
2118	add	r4,r4,r2
2119	ldr	r2,[sp,#32]
2120	and	r12,r12,r3
2121	add	r8,r8,r4
2122	vst1.32	{q8},[r1,:128]!
2123	add	r4,r4,r0,ror#2
2124	eor	r12,r12,r6
2125	vext.8	q8,q2,q3,#4
2126	add	r11,r11,r2
2127	eor	r2,r9,r10
2128	eor	r0,r8,r8,ror#5
2129	vext.8	q9,q0,q1,#4
2130	add	r4,r4,r12
2131	and	r2,r2,r8
2132	eor	r12,r0,r8,ror#19
2133	vshr.u32	q10,q8,#7
2134	eor	r0,r4,r4,ror#11
2135	eor	r2,r2,r10
2136	vadd.i32	q2,q2,q9
2137	add	r11,r11,r12,ror#6
2138	eor	r12,r4,r5
2139	vshr.u32	q9,q8,#3
2140	eor	r0,r0,r4,ror#20
2141	add	r11,r11,r2
2142	vsli.32	q10,q8,#25
2143	ldr	r2,[sp,#36]
2144	and	r3,r3,r12
2145	vshr.u32	q11,q8,#18
2146	add	r7,r7,r11
2147	add	r11,r11,r0,ror#2
2148	eor	r3,r3,r5
2149	veor	q9,q9,q10
2150	add	r10,r10,r2
2151	vsli.32	q11,q8,#14
2152	eor	r2,r8,r9
2153	eor	r0,r7,r7,ror#5
2154	vshr.u32	d24,d3,#17
2155	add	r11,r11,r3
2156	and	r2,r2,r7
2157	veor	q9,q9,q11
2158	eor	r3,r0,r7,ror#19
2159	eor	r0,r11,r11,ror#11
2160	vsli.32	d24,d3,#15
2161	eor	r2,r2,r9
2162	add	r10,r10,r3,ror#6
2163	vshr.u32	d25,d3,#10
2164	eor	r3,r11,r4
2165	eor	r0,r0,r11,ror#20
2166	vadd.i32	q2,q2,q9
2167	add	r10,r10,r2
2168	ldr	r2,[sp,#40]
2169	veor	d25,d25,d24
2170	and	r12,r12,r3
2171	add	r6,r6,r10
2172	vshr.u32	d24,d3,#19
2173	add	r10,r10,r0,ror#2
2174	eor	r12,r12,r4
2175	vsli.32	d24,d3,#13
2176	add	r9,r9,r2
2177	eor	r2,r7,r8
2178	veor	d25,d25,d24
2179	eor	r0,r6,r6,ror#5
2180	add	r10,r10,r12
2181	vadd.i32	d4,d4,d25
2182	and	r2,r2,r6
2183	eor	r12,r0,r6,ror#19
2184	vshr.u32	d24,d4,#17
2185	eor	r0,r10,r10,ror#11
2186	eor	r2,r2,r8
2187	vsli.32	d24,d4,#15
2188	add	r9,r9,r12,ror#6
2189	eor	r12,r10,r11
2190	vshr.u32	d25,d4,#10
2191	eor	r0,r0,r10,ror#20
2192	add	r9,r9,r2
2193	veor	d25,d25,d24
2194	ldr	r2,[sp,#44]
2195	and	r3,r3,r12
2196	vshr.u32	d24,d4,#19
2197	add	r5,r5,r9
2198	add	r9,r9,r0,ror#2
2199	eor	r3,r3,r11
2200	vld1.32	{q8},[r14,:128]!
2201	add	r8,r8,r2
2202	vsli.32	d24,d4,#13
2203	eor	r2,r6,r7
2204	eor	r0,r5,r5,ror#5
2205	veor	d25,d25,d24
2206	add	r9,r9,r3
2207	and	r2,r2,r5
2208	vadd.i32	d5,d5,d25
2209	eor	r3,r0,r5,ror#19
2210	eor	r0,r9,r9,ror#11
2211	vadd.i32	q8,q8,q2
2212	eor	r2,r2,r7
2213	add	r8,r8,r3,ror#6
2214	eor	r3,r9,r10
2215	eor	r0,r0,r9,ror#20
2216	add	r8,r8,r2
2217	ldr	r2,[sp,#48]
2218	and	r12,r12,r3
2219	add	r4,r4,r8
2220	vst1.32	{q8},[r1,:128]!
2221	add	r8,r8,r0,ror#2
2222	eor	r12,r12,r10
2223	vext.8	q8,q3,q0,#4
2224	add	r7,r7,r2
2225	eor	r2,r5,r6
2226	eor	r0,r4,r4,ror#5
2227	vext.8	q9,q1,q2,#4
2228	add	r8,r8,r12
2229	and	r2,r2,r4
2230	eor	r12,r0,r4,ror#19
2231	vshr.u32	q10,q8,#7
2232	eor	r0,r8,r8,ror#11
2233	eor	r2,r2,r6
2234	vadd.i32	q3,q3,q9
2235	add	r7,r7,r12,ror#6
2236	eor	r12,r8,r9
2237	vshr.u32	q9,q8,#3
2238	eor	r0,r0,r8,ror#20
2239	add	r7,r7,r2
2240	vsli.32	q10,q8,#25
2241	ldr	r2,[sp,#52]
2242	and	r3,r3,r12
2243	vshr.u32	q11,q8,#18
2244	add	r11,r11,r7
2245	add	r7,r7,r0,ror#2
2246	eor	r3,r3,r9
2247	veor	q9,q9,q10
2248	add	r6,r6,r2
2249	vsli.32	q11,q8,#14
2250	eor	r2,r4,r5
2251	eor	r0,r11,r11,ror#5
2252	vshr.u32	d24,d5,#17
2253	add	r7,r7,r3
2254	and	r2,r2,r11
2255	veor	q9,q9,q11
2256	eor	r3,r0,r11,ror#19
2257	eor	r0,r7,r7,ror#11
2258	vsli.32	d24,d5,#15
2259	eor	r2,r2,r5
2260	add	r6,r6,r3,ror#6
2261	vshr.u32	d25,d5,#10
2262	eor	r3,r7,r8
2263	eor	r0,r0,r7,ror#20
2264	vadd.i32	q3,q3,q9
2265	add	r6,r6,r2
2266	ldr	r2,[sp,#56]
2267	veor	d25,d25,d24
2268	and	r12,r12,r3
2269	add	r10,r10,r6
2270	vshr.u32	d24,d5,#19
2271	add	r6,r6,r0,ror#2
2272	eor	r12,r12,r8
2273	vsli.32	d24,d5,#13
2274	add	r5,r5,r2
2275	eor	r2,r11,r4
2276	veor	d25,d25,d24
2277	eor	r0,r10,r10,ror#5
2278	add	r6,r6,r12
2279	vadd.i32	d6,d6,d25
2280	and	r2,r2,r10
2281	eor	r12,r0,r10,ror#19
2282	vshr.u32	d24,d6,#17
2283	eor	r0,r6,r6,ror#11
2284	eor	r2,r2,r4
2285	vsli.32	d24,d6,#15
2286	add	r5,r5,r12,ror#6
2287	eor	r12,r6,r7
2288	vshr.u32	d25,d6,#10
2289	eor	r0,r0,r6,ror#20
2290	add	r5,r5,r2
2291	veor	d25,d25,d24
2292	ldr	r2,[sp,#60]
2293	and	r3,r3,r12
2294	vshr.u32	d24,d6,#19
2295	add	r9,r9,r5
2296	add	r5,r5,r0,ror#2
2297	eor	r3,r3,r7
2298	vld1.32	{q8},[r14,:128]!
2299	add	r4,r4,r2
2300	vsli.32	d24,d6,#13
2301	eor	r2,r10,r11
2302	eor	r0,r9,r9,ror#5
2303	veor	d25,d25,d24
2304	add	r5,r5,r3
2305	and	r2,r2,r9
2306	vadd.i32	d7,d7,d25
2307	eor	r3,r0,r9,ror#19
2308	eor	r0,r5,r5,ror#11
2309	vadd.i32	q8,q8,q3
2310	eor	r2,r2,r11
2311	add	r4,r4,r3,ror#6
2312	eor	r3,r5,r6
2313	eor	r0,r0,r5,ror#20
2314	add	r4,r4,r2
2315	ldr	r2,[r14]
2316	and	r12,r12,r3
2317	add	r8,r8,r4
2318	vst1.32	{q8},[r1,:128]!
2319	add	r4,r4,r0,ror#2
2320	eor	r12,r12,r6
2321	teq	r2,#0				@ check for K256 terminator
2322	ldr	r2,[sp,#0]
2323	sub	r1,r1,#64
2324	bne	.L_00_48
2325
2326	ldr		r1,[sp,#68]
2327	ldr		r0,[sp,#72]
2328	sub		r14,r14,#256	@ rewind r14
2329	teq		r1,r0
2330	it		eq
2331	subeq		r1,r1,#64		@ avoid SEGV
2332	vld1.8		{q0},[r1]!		@ load next input block
2333	vld1.8		{q1},[r1]!
2334	vld1.8		{q2},[r1]!
2335	vld1.8		{q3},[r1]!
2336	it		ne
2337	strne		r1,[sp,#68]
2338	mov		r1,sp
2339	add	r11,r11,r2
2340	eor	r2,r9,r10
2341	eor	r0,r8,r8,ror#5
2342	add	r4,r4,r12
2343	vld1.32	{q8},[r14,:128]!
2344	and	r2,r2,r8
2345	eor	r12,r0,r8,ror#19
2346	eor	r0,r4,r4,ror#11
2347	eor	r2,r2,r10
2348	vrev32.8	q0,q0
2349	add	r11,r11,r12,ror#6
2350	eor	r12,r4,r5
2351	eor	r0,r0,r4,ror#20
2352	add	r11,r11,r2
2353	vadd.i32	q8,q8,q0
2354	ldr	r2,[sp,#4]
2355	and	r3,r3,r12
2356	add	r7,r7,r11
2357	add	r11,r11,r0,ror#2
2358	eor	r3,r3,r5
2359	add	r10,r10,r2
2360	eor	r2,r8,r9
2361	eor	r0,r7,r7,ror#5
2362	add	r11,r11,r3
2363	and	r2,r2,r7
2364	eor	r3,r0,r7,ror#19
2365	eor	r0,r11,r11,ror#11
2366	eor	r2,r2,r9
2367	add	r10,r10,r3,ror#6
2368	eor	r3,r11,r4
2369	eor	r0,r0,r11,ror#20
2370	add	r10,r10,r2
2371	ldr	r2,[sp,#8]
2372	and	r12,r12,r3
2373	add	r6,r6,r10
2374	add	r10,r10,r0,ror#2
2375	eor	r12,r12,r4
2376	add	r9,r9,r2
2377	eor	r2,r7,r8
2378	eor	r0,r6,r6,ror#5
2379	add	r10,r10,r12
2380	and	r2,r2,r6
2381	eor	r12,r0,r6,ror#19
2382	eor	r0,r10,r10,ror#11
2383	eor	r2,r2,r8
2384	add	r9,r9,r12,ror#6
2385	eor	r12,r10,r11
2386	eor	r0,r0,r10,ror#20
2387	add	r9,r9,r2
2388	ldr	r2,[sp,#12]
2389	and	r3,r3,r12
2390	add	r5,r5,r9
2391	add	r9,r9,r0,ror#2
2392	eor	r3,r3,r11
2393	add	r8,r8,r2
2394	eor	r2,r6,r7
2395	eor	r0,r5,r5,ror#5
2396	add	r9,r9,r3
2397	and	r2,r2,r5
2398	eor	r3,r0,r5,ror#19
2399	eor	r0,r9,r9,ror#11
2400	eor	r2,r2,r7
2401	add	r8,r8,r3,ror#6
2402	eor	r3,r9,r10
2403	eor	r0,r0,r9,ror#20
2404	add	r8,r8,r2
2405	ldr	r2,[sp,#16]
2406	and	r12,r12,r3
2407	add	r4,r4,r8
2408	add	r8,r8,r0,ror#2
2409	eor	r12,r12,r10
2410	vst1.32	{q8},[r1,:128]!
2411	add	r7,r7,r2
2412	eor	r2,r5,r6
2413	eor	r0,r4,r4,ror#5
2414	add	r8,r8,r12
2415	vld1.32	{q8},[r14,:128]!
2416	and	r2,r2,r4
2417	eor	r12,r0,r4,ror#19
2418	eor	r0,r8,r8,ror#11
2419	eor	r2,r2,r6
2420	vrev32.8	q1,q1
2421	add	r7,r7,r12,ror#6
2422	eor	r12,r8,r9
2423	eor	r0,r0,r8,ror#20
2424	add	r7,r7,r2
2425	vadd.i32	q8,q8,q1
2426	ldr	r2,[sp,#20]
2427	and	r3,r3,r12
2428	add	r11,r11,r7
2429	add	r7,r7,r0,ror#2
2430	eor	r3,r3,r9
2431	add	r6,r6,r2
2432	eor	r2,r4,r5
2433	eor	r0,r11,r11,ror#5
2434	add	r7,r7,r3
2435	and	r2,r2,r11
2436	eor	r3,r0,r11,ror#19
2437	eor	r0,r7,r7,ror#11
2438	eor	r2,r2,r5
2439	add	r6,r6,r3,ror#6
2440	eor	r3,r7,r8
2441	eor	r0,r0,r7,ror#20
2442	add	r6,r6,r2
2443	ldr	r2,[sp,#24]
2444	and	r12,r12,r3
2445	add	r10,r10,r6
2446	add	r6,r6,r0,ror#2
2447	eor	r12,r12,r8
2448	add	r5,r5,r2
2449	eor	r2,r11,r4
2450	eor	r0,r10,r10,ror#5
2451	add	r6,r6,r12
2452	and	r2,r2,r10
2453	eor	r12,r0,r10,ror#19
2454	eor	r0,r6,r6,ror#11
2455	eor	r2,r2,r4
2456	add	r5,r5,r12,ror#6
2457	eor	r12,r6,r7
2458	eor	r0,r0,r6,ror#20
2459	add	r5,r5,r2
2460	ldr	r2,[sp,#28]
2461	and	r3,r3,r12
2462	add	r9,r9,r5
2463	add	r5,r5,r0,ror#2
2464	eor	r3,r3,r7
2465	add	r4,r4,r2
2466	eor	r2,r10,r11
2467	eor	r0,r9,r9,ror#5
2468	add	r5,r5,r3
2469	and	r2,r2,r9
2470	eor	r3,r0,r9,ror#19
2471	eor	r0,r5,r5,ror#11
2472	eor	r2,r2,r11
2473	add	r4,r4,r3,ror#6
2474	eor	r3,r5,r6
2475	eor	r0,r0,r5,ror#20
2476	add	r4,r4,r2
2477	ldr	r2,[sp,#32]
2478	and	r12,r12,r3
2479	add	r8,r8,r4
2480	add	r4,r4,r0,ror#2
2481	eor	r12,r12,r6
2482	vst1.32	{q8},[r1,:128]!
2483	add	r11,r11,r2
2484	eor	r2,r9,r10
2485	eor	r0,r8,r8,ror#5
2486	add	r4,r4,r12
2487	vld1.32	{q8},[r14,:128]!
2488	and	r2,r2,r8
2489	eor	r12,r0,r8,ror#19
2490	eor	r0,r4,r4,ror#11
2491	eor	r2,r2,r10
2492	vrev32.8	q2,q2
2493	add	r11,r11,r12,ror#6
2494	eor	r12,r4,r5
2495	eor	r0,r0,r4,ror#20
2496	add	r11,r11,r2
2497	vadd.i32	q8,q8,q2
2498	ldr	r2,[sp,#36]
2499	and	r3,r3,r12
2500	add	r7,r7,r11
2501	add	r11,r11,r0,ror#2
2502	eor	r3,r3,r5
2503	add	r10,r10,r2
2504	eor	r2,r8,r9
2505	eor	r0,r7,r7,ror#5
2506	add	r11,r11,r3
2507	and	r2,r2,r7
2508	eor	r3,r0,r7,ror#19
2509	eor	r0,r11,r11,ror#11
2510	eor	r2,r2,r9
2511	add	r10,r10,r3,ror#6
2512	eor	r3,r11,r4
2513	eor	r0,r0,r11,ror#20
2514	add	r10,r10,r2
2515	ldr	r2,[sp,#40]
2516	and	r12,r12,r3
2517	add	r6,r6,r10
2518	add	r10,r10,r0,ror#2
2519	eor	r12,r12,r4
2520	add	r9,r9,r2
2521	eor	r2,r7,r8
2522	eor	r0,r6,r6,ror#5
2523	add	r10,r10,r12
2524	and	r2,r2,r6
2525	eor	r12,r0,r6,ror#19
2526	eor	r0,r10,r10,ror#11
2527	eor	r2,r2,r8
2528	add	r9,r9,r12,ror#6
2529	eor	r12,r10,r11
2530	eor	r0,r0,r10,ror#20
2531	add	r9,r9,r2
2532	ldr	r2,[sp,#44]
2533	and	r3,r3,r12
2534	add	r5,r5,r9
2535	add	r9,r9,r0,ror#2
2536	eor	r3,r3,r11
2537	add	r8,r8,r2
2538	eor	r2,r6,r7
2539	eor	r0,r5,r5,ror#5
2540	add	r9,r9,r3
2541	and	r2,r2,r5
2542	eor	r3,r0,r5,ror#19
2543	eor	r0,r9,r9,ror#11
2544	eor	r2,r2,r7
2545	add	r8,r8,r3,ror#6
2546	eor	r3,r9,r10
2547	eor	r0,r0,r9,ror#20
2548	add	r8,r8,r2
2549	ldr	r2,[sp,#48]
2550	and	r12,r12,r3
2551	add	r4,r4,r8
2552	add	r8,r8,r0,ror#2
2553	eor	r12,r12,r10
2554	vst1.32	{q8},[r1,:128]!
2555	add	r7,r7,r2
2556	eor	r2,r5,r6
2557	eor	r0,r4,r4,ror#5
2558	add	r8,r8,r12
2559	vld1.32	{q8},[r14,:128]!
2560	and	r2,r2,r4
2561	eor	r12,r0,r4,ror#19
2562	eor	r0,r8,r8,ror#11
2563	eor	r2,r2,r6
2564	vrev32.8	q3,q3
2565	add	r7,r7,r12,ror#6
2566	eor	r12,r8,r9
2567	eor	r0,r0,r8,ror#20
2568	add	r7,r7,r2
2569	vadd.i32	q8,q8,q3
2570	ldr	r2,[sp,#52]
2571	and	r3,r3,r12
2572	add	r11,r11,r7
2573	add	r7,r7,r0,ror#2
2574	eor	r3,r3,r9
2575	add	r6,r6,r2
2576	eor	r2,r4,r5
2577	eor	r0,r11,r11,ror#5
2578	add	r7,r7,r3
2579	and	r2,r2,r11
2580	eor	r3,r0,r11,ror#19
2581	eor	r0,r7,r7,ror#11
2582	eor	r2,r2,r5
2583	add	r6,r6,r3,ror#6
2584	eor	r3,r7,r8
2585	eor	r0,r0,r7,ror#20
2586	add	r6,r6,r2
2587	ldr	r2,[sp,#56]
2588	and	r12,r12,r3
2589	add	r10,r10,r6
2590	add	r6,r6,r0,ror#2
2591	eor	r12,r12,r8
2592	add	r5,r5,r2
2593	eor	r2,r11,r4
2594	eor	r0,r10,r10,ror#5
2595	add	r6,r6,r12
2596	and	r2,r2,r10
2597	eor	r12,r0,r10,ror#19
2598	eor	r0,r6,r6,ror#11
2599	eor	r2,r2,r4
2600	add	r5,r5,r12,ror#6
2601	eor	r12,r6,r7
2602	eor	r0,r0,r6,ror#20
2603	add	r5,r5,r2
2604	ldr	r2,[sp,#60]
2605	and	r3,r3,r12
2606	add	r9,r9,r5
2607	add	r5,r5,r0,ror#2
2608	eor	r3,r3,r7
2609	add	r4,r4,r2
2610	eor	r2,r10,r11
2611	eor	r0,r9,r9,ror#5
2612	add	r5,r5,r3
2613	and	r2,r2,r9
2614	eor	r3,r0,r9,ror#19
2615	eor	r0,r5,r5,ror#11
2616	eor	r2,r2,r11
2617	add	r4,r4,r3,ror#6
2618	eor	r3,r5,r6
2619	eor	r0,r0,r5,ror#20
2620	add	r4,r4,r2
2621	ldr	r2,[sp,#64]
2622	and	r12,r12,r3
2623	add	r8,r8,r4
2624	add	r4,r4,r0,ror#2
2625	eor	r12,r12,r6
2626	vst1.32	{q8},[r1,:128]!
2627	ldr	r0,[r2,#0]
2628	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2629	ldr	r12,[r2,#4]
2630	ldr	r3,[r2,#8]
2631	ldr	r1,[r2,#12]
2632	add	r4,r4,r0			@ accumulate
2633	ldr	r0,[r2,#16]
2634	add	r5,r5,r12
2635	ldr	r12,[r2,#20]
2636	add	r6,r6,r3
2637	ldr	r3,[r2,#24]
2638	add	r7,r7,r1
2639	ldr	r1,[r2,#28]
2640	add	r8,r8,r0
2641	str	r4,[r2],#4
2642	add	r9,r9,r12
2643	str	r5,[r2],#4
2644	add	r10,r10,r3
2645	str	r6,[r2],#4
2646	add	r11,r11,r1
2647	str	r7,[r2],#4
2648	stmia	r2,{r8-r11}
2649
2650	ittte	ne
2651	movne	r1,sp
2652	ldrne	r2,[sp,#0]
2653	eorne	r12,r12,r12
2654	ldreq	sp,[sp,#76]			@ restore original sp
2655	itt	ne
2656	eorne	r3,r5,r6
2657	bne	.L_00_48
2658
2659	ldmia	sp!,{r4-r12,pc}
2660.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2661#endif
2662#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2663
2664# ifdef __thumb2__
2665#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2666# else
2667#  define INST(a,b,c,d)	.byte	a,b,c,d
2668# endif
2669
2670.type	sha256_block_data_order_armv8,%function
2671.align	5
2672sha256_block_data_order_armv8:
2673.LARMv8:
2674	vld1.32	{q0,q1},[r0]
2675# ifdef __thumb2__
2676	adr	r3,.LARMv8
2677	sub	r3,r3,#.LARMv8-K256
2678# else
2679	adrl	r3,K256
2680# endif
2681	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2682
2683.Loop_v8:
2684	vld1.8		{q8-q9},[r1]!
2685	vld1.8		{q10-q11},[r1]!
2686	vld1.32		{q12},[r3]!
2687	vrev32.8	q8,q8
2688	vrev32.8	q9,q9
2689	vrev32.8	q10,q10
2690	vrev32.8	q11,q11
2691	vmov		q14,q0	@ offload
2692	vmov		q15,q1
2693	teq		r1,r2
2694	vld1.32		{q13},[r3]!
2695	vadd.i32	q12,q12,q8
2696	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2697	vmov		q2,q0
2698	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2699	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2700	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2701	vld1.32		{q12},[r3]!
2702	vadd.i32	q13,q13,q9
2703	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2704	vmov		q2,q0
2705	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2706	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2707	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2708	vld1.32		{q13},[r3]!
2709	vadd.i32	q12,q12,q10
2710	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2711	vmov		q2,q0
2712	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2713	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2714	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2715	vld1.32		{q12},[r3]!
2716	vadd.i32	q13,q13,q11
2717	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2718	vmov		q2,q0
2719	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2720	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2721	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2722	vld1.32		{q13},[r3]!
2723	vadd.i32	q12,q12,q8
2724	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2725	vmov		q2,q0
2726	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2727	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2728	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2729	vld1.32		{q12},[r3]!
2730	vadd.i32	q13,q13,q9
2731	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2732	vmov		q2,q0
2733	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2734	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2735	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2736	vld1.32		{q13},[r3]!
2737	vadd.i32	q12,q12,q10
2738	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2739	vmov		q2,q0
2740	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2741	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2742	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2743	vld1.32		{q12},[r3]!
2744	vadd.i32	q13,q13,q11
2745	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2746	vmov		q2,q0
2747	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2748	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2749	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2750	vld1.32		{q13},[r3]!
2751	vadd.i32	q12,q12,q8
2752	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2753	vmov		q2,q0
2754	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2755	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2756	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2757	vld1.32		{q12},[r3]!
2758	vadd.i32	q13,q13,q9
2759	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2760	vmov		q2,q0
2761	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2762	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2763	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2764	vld1.32		{q13},[r3]!
2765	vadd.i32	q12,q12,q10
2766	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2767	vmov		q2,q0
2768	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2769	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2770	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2771	vld1.32		{q12},[r3]!
2772	vadd.i32	q13,q13,q11
2773	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2774	vmov		q2,q0
2775	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2776	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2777	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2778	vld1.32		{q13},[r3]!
2779	vadd.i32	q12,q12,q8
2780	vmov		q2,q0
2781	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2782	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2783
2784	vld1.32		{q12},[r3]!
2785	vadd.i32	q13,q13,q9
2786	vmov		q2,q0
2787	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2788	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2789
2790	vld1.32		{q13},[r3]
2791	vadd.i32	q12,q12,q10
2792	sub		r3,r3,#256-16	@ rewind
2793	vmov		q2,q0
2794	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2795	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2796
2797	vadd.i32	q13,q13,q11
2798	vmov		q2,q0
2799	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2800	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2801
2802	vadd.i32	q0,q0,q14
2803	vadd.i32	q1,q1,q15
2804	it		ne
2805	bne		.Loop_v8
2806
2807	vst1.32		{q0,q1},[r0]
2808
2809	bx	lr		@ bx lr
2810.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2811#endif
2812.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2813.align	2
2814#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2815.comm   OPENSSL_armcap_P,4,4
2816#endif
2817