1/*
2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
19 * - modified assembly to fit into OpenZFS
20 */
21
22#if defined(__arm__)
23
24#ifndef __ARM_ARCH
25# define __ARM_ARCH__	7
26#else
27# define __ARM_ARCH__	__ARM_ARCH
28#endif
29
30#if defined(__thumb2__)
31.syntax unified
32.thumb
33#else
34.code   32
35#endif
36
37.text
38
39.type	K256,%object
40.align	5
41K256:
42.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
43.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
44.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
45.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
46.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
47.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
48.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
49.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
50.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
51.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
52.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
53.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
54.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
55.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
56.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
57.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
58.size	K256,.-K256
59.word	0				@ terminator
60
61.align	5
62.globl	zfs_sha256_block_armv7
63.type	zfs_sha256_block_armv7,%function
64zfs_sha256_block_armv7:
65.Lzfs_sha256_block_armv7:
66
67#if __ARM_ARCH__<7 && !defined(__thumb2__)
68	sub	r3,pc,#8		@ zfs_sha256_block_armv7
69#else
70	adr	r3,.Lzfs_sha256_block_armv7
71#endif
72
73	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
74	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
75	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
76	sub	r14,r3,#256+32	@ K256
77	sub	sp,sp,#16*4		@ alloca(X[16])
78.Loop:
79# if __ARM_ARCH__>=7
80	ldr	r2,[r1],#4
81# else
82	ldrb	r2,[r1,#3]
83# endif
84	eor	r3,r5,r6		@ magic
85	eor	r12,r12,r12
86#if __ARM_ARCH__>=7
87	@ ldr	r2,[r1],#4			@ 0
88# if 0==15
89	str	r1,[sp,#17*4]			@ make room for r1
90# endif
91	eor	r0,r8,r8,ror#5
92	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
93	eor	r0,r0,r8,ror#19	@ Sigma1(e)
94# ifndef __ARMEB__
95	rev	r2,r2
96# endif
97#else
98	@ ldrb	r2,[r1,#3]			@ 0
99	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
100	ldrb	r12,[r1,#2]
101	ldrb	r0,[r1,#1]
102	orr	r2,r2,r12,lsl#8
103	ldrb	r12,[r1],#4
104	orr	r2,r2,r0,lsl#16
105# if 0==15
106	str	r1,[sp,#17*4]			@ make room for r1
107# endif
108	eor	r0,r8,r8,ror#5
109	orr	r2,r2,r12,lsl#24
110	eor	r0,r0,r8,ror#19	@ Sigma1(e)
111#endif
112	ldr	r12,[r14],#4			@ *K256++
113	add	r11,r11,r2			@ h+=X[i]
114	str	r2,[sp,#0*4]
115	eor	r2,r9,r10
116	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
117	and	r2,r2,r8
118	add	r11,r11,r12			@ h+=K256[i]
119	eor	r2,r2,r10			@ Ch(e,f,g)
120	eor	r0,r4,r4,ror#11
121	add	r11,r11,r2			@ h+=Ch(e,f,g)
122#if 0==31
123	and	r12,r12,#0xff
124	cmp	r12,#0xf2			@ done?
125#endif
126#if 0<15
127# if __ARM_ARCH__>=7
128	ldr	r2,[r1],#4			@ prefetch
129# else
130	ldrb	r2,[r1,#3]
131# endif
132	eor	r12,r4,r5			@ a^b, b^c in next round
133#else
134	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
135	eor	r12,r4,r5			@ a^b, b^c in next round
136	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
137#endif
138	eor	r0,r0,r4,ror#20	@ Sigma0(a)
139	and	r3,r3,r12			@ (b^c)&=(a^b)
140	add	r7,r7,r11			@ d+=h
141	eor	r3,r3,r5			@ Maj(a,b,c)
142	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
143	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
144#if __ARM_ARCH__>=7
145	@ ldr	r2,[r1],#4			@ 1
146# if 1==15
147	str	r1,[sp,#17*4]			@ make room for r1
148# endif
149	eor	r0,r7,r7,ror#5
150	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
151	eor	r0,r0,r7,ror#19	@ Sigma1(e)
152# ifndef __ARMEB__
153	rev	r2,r2
154# endif
155#else
156	@ ldrb	r2,[r1,#3]			@ 1
157	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
158	ldrb	r3,[r1,#2]
159	ldrb	r0,[r1,#1]
160	orr	r2,r2,r3,lsl#8
161	ldrb	r3,[r1],#4
162	orr	r2,r2,r0,lsl#16
163# if 1==15
164	str	r1,[sp,#17*4]			@ make room for r1
165# endif
166	eor	r0,r7,r7,ror#5
167	orr	r2,r2,r3,lsl#24
168	eor	r0,r0,r7,ror#19	@ Sigma1(e)
169#endif
170	ldr	r3,[r14],#4			@ *K256++
171	add	r10,r10,r2			@ h+=X[i]
172	str	r2,[sp,#1*4]
173	eor	r2,r8,r9
174	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
175	and	r2,r2,r7
176	add	r10,r10,r3			@ h+=K256[i]
177	eor	r2,r2,r9			@ Ch(e,f,g)
178	eor	r0,r11,r11,ror#11
179	add	r10,r10,r2			@ h+=Ch(e,f,g)
180#if 1==31
181	and	r3,r3,#0xff
182	cmp	r3,#0xf2			@ done?
183#endif
184#if 1<15
185# if __ARM_ARCH__>=7
186	ldr	r2,[r1],#4			@ prefetch
187# else
188	ldrb	r2,[r1,#3]
189# endif
190	eor	r3,r11,r4			@ a^b, b^c in next round
191#else
192	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
193	eor	r3,r11,r4			@ a^b, b^c in next round
194	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
195#endif
196	eor	r0,r0,r11,ror#20	@ Sigma0(a)
197	and	r12,r12,r3			@ (b^c)&=(a^b)
198	add	r6,r6,r10			@ d+=h
199	eor	r12,r12,r4			@ Maj(a,b,c)
200	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
201	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
202#if __ARM_ARCH__>=7
203	@ ldr	r2,[r1],#4			@ 2
204# if 2==15
205	str	r1,[sp,#17*4]			@ make room for r1
206# endif
207	eor	r0,r6,r6,ror#5
208	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
209	eor	r0,r0,r6,ror#19	@ Sigma1(e)
210# ifndef __ARMEB__
211	rev	r2,r2
212# endif
213#else
214	@ ldrb	r2,[r1,#3]			@ 2
215	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
216	ldrb	r12,[r1,#2]
217	ldrb	r0,[r1,#1]
218	orr	r2,r2,r12,lsl#8
219	ldrb	r12,[r1],#4
220	orr	r2,r2,r0,lsl#16
221# if 2==15
222	str	r1,[sp,#17*4]			@ make room for r1
223# endif
224	eor	r0,r6,r6,ror#5
225	orr	r2,r2,r12,lsl#24
226	eor	r0,r0,r6,ror#19	@ Sigma1(e)
227#endif
228	ldr	r12,[r14],#4			@ *K256++
229	add	r9,r9,r2			@ h+=X[i]
230	str	r2,[sp,#2*4]
231	eor	r2,r7,r8
232	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
233	and	r2,r2,r6
234	add	r9,r9,r12			@ h+=K256[i]
235	eor	r2,r2,r8			@ Ch(e,f,g)
236	eor	r0,r10,r10,ror#11
237	add	r9,r9,r2			@ h+=Ch(e,f,g)
238#if 2==31
239	and	r12,r12,#0xff
240	cmp	r12,#0xf2			@ done?
241#endif
242#if 2<15
243# if __ARM_ARCH__>=7
244	ldr	r2,[r1],#4			@ prefetch
245# else
246	ldrb	r2,[r1,#3]
247# endif
248	eor	r12,r10,r11			@ a^b, b^c in next round
249#else
250	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
251	eor	r12,r10,r11			@ a^b, b^c in next round
252	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
253#endif
254	eor	r0,r0,r10,ror#20	@ Sigma0(a)
255	and	r3,r3,r12			@ (b^c)&=(a^b)
256	add	r5,r5,r9			@ d+=h
257	eor	r3,r3,r11			@ Maj(a,b,c)
258	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
259	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
260#if __ARM_ARCH__>=7
261	@ ldr	r2,[r1],#4			@ 3
262# if 3==15
263	str	r1,[sp,#17*4]			@ make room for r1
264# endif
265	eor	r0,r5,r5,ror#5
266	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
267	eor	r0,r0,r5,ror#19	@ Sigma1(e)
268# ifndef __ARMEB__
269	rev	r2,r2
270# endif
271#else
272	@ ldrb	r2,[r1,#3]			@ 3
273	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
274	ldrb	r3,[r1,#2]
275	ldrb	r0,[r1,#1]
276	orr	r2,r2,r3,lsl#8
277	ldrb	r3,[r1],#4
278	orr	r2,r2,r0,lsl#16
279# if 3==15
280	str	r1,[sp,#17*4]			@ make room for r1
281# endif
282	eor	r0,r5,r5,ror#5
283	orr	r2,r2,r3,lsl#24
284	eor	r0,r0,r5,ror#19	@ Sigma1(e)
285#endif
286	ldr	r3,[r14],#4			@ *K256++
287	add	r8,r8,r2			@ h+=X[i]
288	str	r2,[sp,#3*4]
289	eor	r2,r6,r7
290	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
291	and	r2,r2,r5
292	add	r8,r8,r3			@ h+=K256[i]
293	eor	r2,r2,r7			@ Ch(e,f,g)
294	eor	r0,r9,r9,ror#11
295	add	r8,r8,r2			@ h+=Ch(e,f,g)
296#if 3==31
297	and	r3,r3,#0xff
298	cmp	r3,#0xf2			@ done?
299#endif
300#if 3<15
301# if __ARM_ARCH__>=7
302	ldr	r2,[r1],#4			@ prefetch
303# else
304	ldrb	r2,[r1,#3]
305# endif
306	eor	r3,r9,r10			@ a^b, b^c in next round
307#else
308	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
309	eor	r3,r9,r10			@ a^b, b^c in next round
310	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
311#endif
312	eor	r0,r0,r9,ror#20	@ Sigma0(a)
313	and	r12,r12,r3			@ (b^c)&=(a^b)
314	add	r4,r4,r8			@ d+=h
315	eor	r12,r12,r10			@ Maj(a,b,c)
316	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
317	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
318#if __ARM_ARCH__>=7
319	@ ldr	r2,[r1],#4			@ 4
320# if 4==15
321	str	r1,[sp,#17*4]			@ make room for r1
322# endif
323	eor	r0,r4,r4,ror#5
324	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
325	eor	r0,r0,r4,ror#19	@ Sigma1(e)
326# ifndef __ARMEB__
327	rev	r2,r2
328# endif
329#else
330	@ ldrb	r2,[r1,#3]			@ 4
331	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
332	ldrb	r12,[r1,#2]
333	ldrb	r0,[r1,#1]
334	orr	r2,r2,r12,lsl#8
335	ldrb	r12,[r1],#4
336	orr	r2,r2,r0,lsl#16
337# if 4==15
338	str	r1,[sp,#17*4]			@ make room for r1
339# endif
340	eor	r0,r4,r4,ror#5
341	orr	r2,r2,r12,lsl#24
342	eor	r0,r0,r4,ror#19	@ Sigma1(e)
343#endif
344	ldr	r12,[r14],#4			@ *K256++
345	add	r7,r7,r2			@ h+=X[i]
346	str	r2,[sp,#4*4]
347	eor	r2,r5,r6
348	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
349	and	r2,r2,r4
350	add	r7,r7,r12			@ h+=K256[i]
351	eor	r2,r2,r6			@ Ch(e,f,g)
352	eor	r0,r8,r8,ror#11
353	add	r7,r7,r2			@ h+=Ch(e,f,g)
354#if 4==31
355	and	r12,r12,#0xff
356	cmp	r12,#0xf2			@ done?
357#endif
358#if 4<15
359# if __ARM_ARCH__>=7
360	ldr	r2,[r1],#4			@ prefetch
361# else
362	ldrb	r2,[r1,#3]
363# endif
364	eor	r12,r8,r9			@ a^b, b^c in next round
365#else
366	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
367	eor	r12,r8,r9			@ a^b, b^c in next round
368	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
369#endif
370	eor	r0,r0,r8,ror#20	@ Sigma0(a)
371	and	r3,r3,r12			@ (b^c)&=(a^b)
372	add	r11,r11,r7			@ d+=h
373	eor	r3,r3,r9			@ Maj(a,b,c)
374	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
375	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
376#if __ARM_ARCH__>=7
377	@ ldr	r2,[r1],#4			@ 5
378# if 5==15
379	str	r1,[sp,#17*4]			@ make room for r1
380# endif
381	eor	r0,r11,r11,ror#5
382	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
383	eor	r0,r0,r11,ror#19	@ Sigma1(e)
384# ifndef __ARMEB__
385	rev	r2,r2
386# endif
387#else
388	@ ldrb	r2,[r1,#3]			@ 5
389	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
390	ldrb	r3,[r1,#2]
391	ldrb	r0,[r1,#1]
392	orr	r2,r2,r3,lsl#8
393	ldrb	r3,[r1],#4
394	orr	r2,r2,r0,lsl#16
395# if 5==15
396	str	r1,[sp,#17*4]			@ make room for r1
397# endif
398	eor	r0,r11,r11,ror#5
399	orr	r2,r2,r3,lsl#24
400	eor	r0,r0,r11,ror#19	@ Sigma1(e)
401#endif
402	ldr	r3,[r14],#4			@ *K256++
403	add	r6,r6,r2			@ h+=X[i]
404	str	r2,[sp,#5*4]
405	eor	r2,r4,r5
406	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
407	and	r2,r2,r11
408	add	r6,r6,r3			@ h+=K256[i]
409	eor	r2,r2,r5			@ Ch(e,f,g)
410	eor	r0,r7,r7,ror#11
411	add	r6,r6,r2			@ h+=Ch(e,f,g)
412#if 5==31
413	and	r3,r3,#0xff
414	cmp	r3,#0xf2			@ done?
415#endif
416#if 5<15
417# if __ARM_ARCH__>=7
418	ldr	r2,[r1],#4			@ prefetch
419# else
420	ldrb	r2,[r1,#3]
421# endif
422	eor	r3,r7,r8			@ a^b, b^c in next round
423#else
424	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
425	eor	r3,r7,r8			@ a^b, b^c in next round
426	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
427#endif
428	eor	r0,r0,r7,ror#20	@ Sigma0(a)
429	and	r12,r12,r3			@ (b^c)&=(a^b)
430	add	r10,r10,r6			@ d+=h
431	eor	r12,r12,r8			@ Maj(a,b,c)
432	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
433	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
434#if __ARM_ARCH__>=7
435	@ ldr	r2,[r1],#4			@ 6
436# if 6==15
437	str	r1,[sp,#17*4]			@ make room for r1
438# endif
439	eor	r0,r10,r10,ror#5
440	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
441	eor	r0,r0,r10,ror#19	@ Sigma1(e)
442# ifndef __ARMEB__
443	rev	r2,r2
444# endif
445#else
446	@ ldrb	r2,[r1,#3]			@ 6
447	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
448	ldrb	r12,[r1,#2]
449	ldrb	r0,[r1,#1]
450	orr	r2,r2,r12,lsl#8
451	ldrb	r12,[r1],#4
452	orr	r2,r2,r0,lsl#16
453# if 6==15
454	str	r1,[sp,#17*4]			@ make room for r1
455# endif
456	eor	r0,r10,r10,ror#5
457	orr	r2,r2,r12,lsl#24
458	eor	r0,r0,r10,ror#19	@ Sigma1(e)
459#endif
460	ldr	r12,[r14],#4			@ *K256++
461	add	r5,r5,r2			@ h+=X[i]
462	str	r2,[sp,#6*4]
463	eor	r2,r11,r4
464	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
465	and	r2,r2,r10
466	add	r5,r5,r12			@ h+=K256[i]
467	eor	r2,r2,r4			@ Ch(e,f,g)
468	eor	r0,r6,r6,ror#11
469	add	r5,r5,r2			@ h+=Ch(e,f,g)
470#if 6==31
471	and	r12,r12,#0xff
472	cmp	r12,#0xf2			@ done?
473#endif
474#if 6<15
475# if __ARM_ARCH__>=7
476	ldr	r2,[r1],#4			@ prefetch
477# else
478	ldrb	r2,[r1,#3]
479# endif
480	eor	r12,r6,r7			@ a^b, b^c in next round
481#else
482	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
483	eor	r12,r6,r7			@ a^b, b^c in next round
484	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
485#endif
486	eor	r0,r0,r6,ror#20	@ Sigma0(a)
487	and	r3,r3,r12			@ (b^c)&=(a^b)
488	add	r9,r9,r5			@ d+=h
489	eor	r3,r3,r7			@ Maj(a,b,c)
490	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
491	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
492#if __ARM_ARCH__>=7
493	@ ldr	r2,[r1],#4			@ 7
494# if 7==15
495	str	r1,[sp,#17*4]			@ make room for r1
496# endif
497	eor	r0,r9,r9,ror#5
498	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
499	eor	r0,r0,r9,ror#19	@ Sigma1(e)
500# ifndef __ARMEB__
501	rev	r2,r2
502# endif
503#else
504	@ ldrb	r2,[r1,#3]			@ 7
505	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
506	ldrb	r3,[r1,#2]
507	ldrb	r0,[r1,#1]
508	orr	r2,r2,r3,lsl#8
509	ldrb	r3,[r1],#4
510	orr	r2,r2,r0,lsl#16
511# if 7==15
512	str	r1,[sp,#17*4]			@ make room for r1
513# endif
514	eor	r0,r9,r9,ror#5
515	orr	r2,r2,r3,lsl#24
516	eor	r0,r0,r9,ror#19	@ Sigma1(e)
517#endif
518	ldr	r3,[r14],#4			@ *K256++
519	add	r4,r4,r2			@ h+=X[i]
520	str	r2,[sp,#7*4]
521	eor	r2,r10,r11
522	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
523	and	r2,r2,r9
524	add	r4,r4,r3			@ h+=K256[i]
525	eor	r2,r2,r11			@ Ch(e,f,g)
526	eor	r0,r5,r5,ror#11
527	add	r4,r4,r2			@ h+=Ch(e,f,g)
528#if 7==31
529	and	r3,r3,#0xff
530	cmp	r3,#0xf2			@ done?
531#endif
532#if 7<15
533# if __ARM_ARCH__>=7
534	ldr	r2,[r1],#4			@ prefetch
535# else
536	ldrb	r2,[r1,#3]
537# endif
538	eor	r3,r5,r6			@ a^b, b^c in next round
539#else
540	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
541	eor	r3,r5,r6			@ a^b, b^c in next round
542	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
543#endif
544	eor	r0,r0,r5,ror#20	@ Sigma0(a)
545	and	r12,r12,r3			@ (b^c)&=(a^b)
546	add	r8,r8,r4			@ d+=h
547	eor	r12,r12,r6			@ Maj(a,b,c)
548	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
549	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
550#if __ARM_ARCH__>=7
551	@ ldr	r2,[r1],#4			@ 8
552# if 8==15
553	str	r1,[sp,#17*4]			@ make room for r1
554# endif
555	eor	r0,r8,r8,ror#5
556	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
557	eor	r0,r0,r8,ror#19	@ Sigma1(e)
558# ifndef __ARMEB__
559	rev	r2,r2
560# endif
561#else
562	@ ldrb	r2,[r1,#3]			@ 8
563	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
564	ldrb	r12,[r1,#2]
565	ldrb	r0,[r1,#1]
566	orr	r2,r2,r12,lsl#8
567	ldrb	r12,[r1],#4
568	orr	r2,r2,r0,lsl#16
569# if 8==15
570	str	r1,[sp,#17*4]			@ make room for r1
571# endif
572	eor	r0,r8,r8,ror#5
573	orr	r2,r2,r12,lsl#24
574	eor	r0,r0,r8,ror#19	@ Sigma1(e)
575#endif
576	ldr	r12,[r14],#4			@ *K256++
577	add	r11,r11,r2			@ h+=X[i]
578	str	r2,[sp,#8*4]
579	eor	r2,r9,r10
580	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
581	and	r2,r2,r8
582	add	r11,r11,r12			@ h+=K256[i]
583	eor	r2,r2,r10			@ Ch(e,f,g)
584	eor	r0,r4,r4,ror#11
585	add	r11,r11,r2			@ h+=Ch(e,f,g)
586#if 8==31
587	and	r12,r12,#0xff
588	cmp	r12,#0xf2			@ done?
589#endif
590#if 8<15
591# if __ARM_ARCH__>=7
592	ldr	r2,[r1],#4			@ prefetch
593# else
594	ldrb	r2,[r1,#3]
595# endif
596	eor	r12,r4,r5			@ a^b, b^c in next round
597#else
598	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
599	eor	r12,r4,r5			@ a^b, b^c in next round
600	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
601#endif
602	eor	r0,r0,r4,ror#20	@ Sigma0(a)
603	and	r3,r3,r12			@ (b^c)&=(a^b)
604	add	r7,r7,r11			@ d+=h
605	eor	r3,r3,r5			@ Maj(a,b,c)
606	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
607	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
608#if __ARM_ARCH__>=7
609	@ ldr	r2,[r1],#4			@ 9
610# if 9==15
611	str	r1,[sp,#17*4]			@ make room for r1
612# endif
613	eor	r0,r7,r7,ror#5
614	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
615	eor	r0,r0,r7,ror#19	@ Sigma1(e)
616# ifndef __ARMEB__
617	rev	r2,r2
618# endif
619#else
620	@ ldrb	r2,[r1,#3]			@ 9
621	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
622	ldrb	r3,[r1,#2]
623	ldrb	r0,[r1,#1]
624	orr	r2,r2,r3,lsl#8
625	ldrb	r3,[r1],#4
626	orr	r2,r2,r0,lsl#16
627# if 9==15
628	str	r1,[sp,#17*4]			@ make room for r1
629# endif
630	eor	r0,r7,r7,ror#5
631	orr	r2,r2,r3,lsl#24
632	eor	r0,r0,r7,ror#19	@ Sigma1(e)
633#endif
634	ldr	r3,[r14],#4			@ *K256++
635	add	r10,r10,r2			@ h+=X[i]
636	str	r2,[sp,#9*4]
637	eor	r2,r8,r9
638	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
639	and	r2,r2,r7
640	add	r10,r10,r3			@ h+=K256[i]
641	eor	r2,r2,r9			@ Ch(e,f,g)
642	eor	r0,r11,r11,ror#11
643	add	r10,r10,r2			@ h+=Ch(e,f,g)
644#if 9==31
645	and	r3,r3,#0xff
646	cmp	r3,#0xf2			@ done?
647#endif
648#if 9<15
649# if __ARM_ARCH__>=7
650	ldr	r2,[r1],#4			@ prefetch
651# else
652	ldrb	r2,[r1,#3]
653# endif
654	eor	r3,r11,r4			@ a^b, b^c in next round
655#else
656	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
657	eor	r3,r11,r4			@ a^b, b^c in next round
658	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
659#endif
660	eor	r0,r0,r11,ror#20	@ Sigma0(a)
661	and	r12,r12,r3			@ (b^c)&=(a^b)
662	add	r6,r6,r10			@ d+=h
663	eor	r12,r12,r4			@ Maj(a,b,c)
664	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
665	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
666#if __ARM_ARCH__>=7
667	@ ldr	r2,[r1],#4			@ 10
668# if 10==15
669	str	r1,[sp,#17*4]			@ make room for r1
670# endif
671	eor	r0,r6,r6,ror#5
672	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
673	eor	r0,r0,r6,ror#19	@ Sigma1(e)
674# ifndef __ARMEB__
675	rev	r2,r2
676# endif
677#else
678	@ ldrb	r2,[r1,#3]			@ 10
679	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
680	ldrb	r12,[r1,#2]
681	ldrb	r0,[r1,#1]
682	orr	r2,r2,r12,lsl#8
683	ldrb	r12,[r1],#4
684	orr	r2,r2,r0,lsl#16
685# if 10==15
686	str	r1,[sp,#17*4]			@ make room for r1
687# endif
688	eor	r0,r6,r6,ror#5
689	orr	r2,r2,r12,lsl#24
690	eor	r0,r0,r6,ror#19	@ Sigma1(e)
691#endif
692	ldr	r12,[r14],#4			@ *K256++
693	add	r9,r9,r2			@ h+=X[i]
694	str	r2,[sp,#10*4]
695	eor	r2,r7,r8
696	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
697	and	r2,r2,r6
698	add	r9,r9,r12			@ h+=K256[i]
699	eor	r2,r2,r8			@ Ch(e,f,g)
700	eor	r0,r10,r10,ror#11
701	add	r9,r9,r2			@ h+=Ch(e,f,g)
702#if 10==31
703	and	r12,r12,#0xff
704	cmp	r12,#0xf2			@ done?
705#endif
706#if 10<15
707# if __ARM_ARCH__>=7
708	ldr	r2,[r1],#4			@ prefetch
709# else
710	ldrb	r2,[r1,#3]
711# endif
712	eor	r12,r10,r11			@ a^b, b^c in next round
713#else
714	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
715	eor	r12,r10,r11			@ a^b, b^c in next round
716	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
717#endif
718	eor	r0,r0,r10,ror#20	@ Sigma0(a)
719	and	r3,r3,r12			@ (b^c)&=(a^b)
720	add	r5,r5,r9			@ d+=h
721	eor	r3,r3,r11			@ Maj(a,b,c)
722	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
723	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
724#if __ARM_ARCH__>=7
725	@ ldr	r2,[r1],#4			@ 11
726# if 11==15
727	str	r1,[sp,#17*4]			@ make room for r1
728# endif
729	eor	r0,r5,r5,ror#5
730	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
731	eor	r0,r0,r5,ror#19	@ Sigma1(e)
732# ifndef __ARMEB__
733	rev	r2,r2
734# endif
735#else
736	@ ldrb	r2,[r1,#3]			@ 11
737	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
738	ldrb	r3,[r1,#2]
739	ldrb	r0,[r1,#1]
740	orr	r2,r2,r3,lsl#8
741	ldrb	r3,[r1],#4
742	orr	r2,r2,r0,lsl#16
743# if 11==15
744	str	r1,[sp,#17*4]			@ make room for r1
745# endif
746	eor	r0,r5,r5,ror#5
747	orr	r2,r2,r3,lsl#24
748	eor	r0,r0,r5,ror#19	@ Sigma1(e)
749#endif
750	ldr	r3,[r14],#4			@ *K256++
751	add	r8,r8,r2			@ h+=X[i]
752	str	r2,[sp,#11*4]
753	eor	r2,r6,r7
754	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
755	and	r2,r2,r5
756	add	r8,r8,r3			@ h+=K256[i]
757	eor	r2,r2,r7			@ Ch(e,f,g)
758	eor	r0,r9,r9,ror#11
759	add	r8,r8,r2			@ h+=Ch(e,f,g)
760#if 11==31
761	and	r3,r3,#0xff
762	cmp	r3,#0xf2			@ done?
763#endif
764#if 11<15
765# if __ARM_ARCH__>=7
766	ldr	r2,[r1],#4			@ prefetch
767# else
768	ldrb	r2,[r1,#3]
769# endif
770	eor	r3,r9,r10			@ a^b, b^c in next round
771#else
772	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
773	eor	r3,r9,r10			@ a^b, b^c in next round
774	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
775#endif
776	eor	r0,r0,r9,ror#20	@ Sigma0(a)
777	and	r12,r12,r3			@ (b^c)&=(a^b)
778	add	r4,r4,r8			@ d+=h
779	eor	r12,r12,r10			@ Maj(a,b,c)
780	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
781	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
782#if __ARM_ARCH__>=7
783	@ ldr	r2,[r1],#4			@ 12
784# if 12==15
785	str	r1,[sp,#17*4]			@ make room for r1
786# endif
787	eor	r0,r4,r4,ror#5
788	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
789	eor	r0,r0,r4,ror#19	@ Sigma1(e)
790# ifndef __ARMEB__
791	rev	r2,r2
792# endif
793#else
794	@ ldrb	r2,[r1,#3]			@ 12
795	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
796	ldrb	r12,[r1,#2]
797	ldrb	r0,[r1,#1]
798	orr	r2,r2,r12,lsl#8
799	ldrb	r12,[r1],#4
800	orr	r2,r2,r0,lsl#16
801# if 12==15
802	str	r1,[sp,#17*4]			@ make room for r1
803# endif
804	eor	r0,r4,r4,ror#5
805	orr	r2,r2,r12,lsl#24
806	eor	r0,r0,r4,ror#19	@ Sigma1(e)
807#endif
808	ldr	r12,[r14],#4			@ *K256++
809	add	r7,r7,r2			@ h+=X[i]
810	str	r2,[sp,#12*4]
811	eor	r2,r5,r6
812	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
813	and	r2,r2,r4
814	add	r7,r7,r12			@ h+=K256[i]
815	eor	r2,r2,r6			@ Ch(e,f,g)
816	eor	r0,r8,r8,ror#11
817	add	r7,r7,r2			@ h+=Ch(e,f,g)
818#if 12==31
819	and	r12,r12,#0xff
820	cmp	r12,#0xf2			@ done?
821#endif
822#if 12<15
823# if __ARM_ARCH__>=7
824	ldr	r2,[r1],#4			@ prefetch
825# else
826	ldrb	r2,[r1,#3]
827# endif
828	eor	r12,r8,r9			@ a^b, b^c in next round
829#else
830	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
831	eor	r12,r8,r9			@ a^b, b^c in next round
832	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
833#endif
834	eor	r0,r0,r8,ror#20	@ Sigma0(a)
835	and	r3,r3,r12			@ (b^c)&=(a^b)
836	add	r11,r11,r7			@ d+=h
837	eor	r3,r3,r9			@ Maj(a,b,c)
838	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
839	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
840#if __ARM_ARCH__>=7
841	@ ldr	r2,[r1],#4			@ 13
842# if 13==15
843	str	r1,[sp,#17*4]			@ make room for r1
844# endif
845	eor	r0,r11,r11,ror#5
846	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
847	eor	r0,r0,r11,ror#19	@ Sigma1(e)
848# ifndef __ARMEB__
849	rev	r2,r2
850# endif
851#else
852	@ ldrb	r2,[r1,#3]			@ 13
853	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
854	ldrb	r3,[r1,#2]
855	ldrb	r0,[r1,#1]
856	orr	r2,r2,r3,lsl#8
857	ldrb	r3,[r1],#4
858	orr	r2,r2,r0,lsl#16
859# if 13==15
860	str	r1,[sp,#17*4]			@ make room for r1
861# endif
862	eor	r0,r11,r11,ror#5
863	orr	r2,r2,r3,lsl#24
864	eor	r0,r0,r11,ror#19	@ Sigma1(e)
865#endif
866	ldr	r3,[r14],#4			@ *K256++
867	add	r6,r6,r2			@ h+=X[i]
868	str	r2,[sp,#13*4]
869	eor	r2,r4,r5
870	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
871	and	r2,r2,r11
872	add	r6,r6,r3			@ h+=K256[i]
873	eor	r2,r2,r5			@ Ch(e,f,g)
874	eor	r0,r7,r7,ror#11
875	add	r6,r6,r2			@ h+=Ch(e,f,g)
876#if 13==31
877	and	r3,r3,#0xff
878	cmp	r3,#0xf2			@ done?
879#endif
880#if 13<15
881# if __ARM_ARCH__>=7
882	ldr	r2,[r1],#4			@ prefetch
883# else
884	ldrb	r2,[r1,#3]
885# endif
886	eor	r3,r7,r8			@ a^b, b^c in next round
887#else
888	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
889	eor	r3,r7,r8			@ a^b, b^c in next round
890	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
891#endif
892	eor	r0,r0,r7,ror#20	@ Sigma0(a)
893	and	r12,r12,r3			@ (b^c)&=(a^b)
894	add	r10,r10,r6			@ d+=h
895	eor	r12,r12,r8			@ Maj(a,b,c)
896	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
897	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
898#if __ARM_ARCH__>=7
899	@ ldr	r2,[r1],#4			@ 14
900# if 14==15
901	str	r1,[sp,#17*4]			@ make room for r1
902# endif
903	eor	r0,r10,r10,ror#5
904	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
905	eor	r0,r0,r10,ror#19	@ Sigma1(e)
906# ifndef __ARMEB__
907	rev	r2,r2
908# endif
909#else
910	@ ldrb	r2,[r1,#3]			@ 14
911	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
912	ldrb	r12,[r1,#2]
913	ldrb	r0,[r1,#1]
914	orr	r2,r2,r12,lsl#8
915	ldrb	r12,[r1],#4
916	orr	r2,r2,r0,lsl#16
917# if 14==15
918	str	r1,[sp,#17*4]			@ make room for r1
919# endif
920	eor	r0,r10,r10,ror#5
921	orr	r2,r2,r12,lsl#24
922	eor	r0,r0,r10,ror#19	@ Sigma1(e)
923#endif
924	ldr	r12,[r14],#4			@ *K256++
925	add	r5,r5,r2			@ h+=X[i]
926	str	r2,[sp,#14*4]
927	eor	r2,r11,r4
928	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
929	and	r2,r2,r10
930	add	r5,r5,r12			@ h+=K256[i]
931	eor	r2,r2,r4			@ Ch(e,f,g)
932	eor	r0,r6,r6,ror#11
933	add	r5,r5,r2			@ h+=Ch(e,f,g)
934#if 14==31
935	and	r12,r12,#0xff
936	cmp	r12,#0xf2			@ done?
937#endif
938#if 14<15
939# if __ARM_ARCH__>=7
940	ldr	r2,[r1],#4			@ prefetch
941# else
942	ldrb	r2,[r1,#3]
943# endif
944	eor	r12,r6,r7			@ a^b, b^c in next round
945#else
946	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
947	eor	r12,r6,r7			@ a^b, b^c in next round
948	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
949#endif
950	eor	r0,r0,r6,ror#20	@ Sigma0(a)
951	and	r3,r3,r12			@ (b^c)&=(a^b)
952	add	r9,r9,r5			@ d+=h
953	eor	r3,r3,r7			@ Maj(a,b,c)
954	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
955	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
956#if __ARM_ARCH__>=7
957	@ ldr	r2,[r1],#4			@ 15
958# if 15==15
959	str	r1,[sp,#17*4]			@ make room for r1
960# endif
961	eor	r0,r9,r9,ror#5
962	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
963	eor	r0,r0,r9,ror#19	@ Sigma1(e)
964# ifndef __ARMEB__
965	rev	r2,r2
966# endif
967#else
968	@ ldrb	r2,[r1,#3]			@ 15
969	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
970	ldrb	r3,[r1,#2]
971	ldrb	r0,[r1,#1]
972	orr	r2,r2,r3,lsl#8
973	ldrb	r3,[r1],#4
974	orr	r2,r2,r0,lsl#16
975# if 15==15
976	str	r1,[sp,#17*4]			@ make room for r1
977# endif
978	eor	r0,r9,r9,ror#5
979	orr	r2,r2,r3,lsl#24
980	eor	r0,r0,r9,ror#19	@ Sigma1(e)
981#endif
982	ldr	r3,[r14],#4			@ *K256++
983	add	r4,r4,r2			@ h+=X[i]
984	str	r2,[sp,#15*4]
985	eor	r2,r10,r11
986	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
987	and	r2,r2,r9
988	add	r4,r4,r3			@ h+=K256[i]
989	eor	r2,r2,r11			@ Ch(e,f,g)
990	eor	r0,r5,r5,ror#11
991	add	r4,r4,r2			@ h+=Ch(e,f,g)
992#if 15==31
993	and	r3,r3,#0xff
994	cmp	r3,#0xf2			@ done?
995#endif
996#if 15<15
997# if __ARM_ARCH__>=7
998	ldr	r2,[r1],#4			@ prefetch
999# else
1000	ldrb	r2,[r1,#3]
1001# endif
1002	eor	r3,r5,r6			@ a^b, b^c in next round
1003#else
1004	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1005	eor	r3,r5,r6			@ a^b, b^c in next round
1006	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1007#endif
1008	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1009	and	r12,r12,r3			@ (b^c)&=(a^b)
1010	add	r8,r8,r4			@ d+=h
1011	eor	r12,r12,r6			@ Maj(a,b,c)
1012	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1013	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1014.Lrounds_16_xx:
1015	@ ldr	r2,[sp,#1*4]		@ 16
1016	@ ldr	r1,[sp,#14*4]
1017	mov	r0,r2,ror#7
1018	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1019	mov	r12,r1,ror#17
1020	eor	r0,r0,r2,ror#18
1021	eor	r12,r12,r1,ror#19
1022	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1023	ldr	r2,[sp,#0*4]
1024	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1025	ldr	r1,[sp,#9*4]
1026
1027	add	r12,r12,r0
1028	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1029	add	r2,r2,r12
1030	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1031	add	r2,r2,r1			@ X[i]
1032	ldr	r12,[r14],#4			@ *K256++
1033	add	r11,r11,r2			@ h+=X[i]
1034	str	r2,[sp,#0*4]
1035	eor	r2,r9,r10
1036	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1037	and	r2,r2,r8
1038	add	r11,r11,r12			@ h+=K256[i]
1039	eor	r2,r2,r10			@ Ch(e,f,g)
1040	eor	r0,r4,r4,ror#11
1041	add	r11,r11,r2			@ h+=Ch(e,f,g)
1042#if 16==31
1043	and	r12,r12,#0xff
1044	cmp	r12,#0xf2			@ done?
1045#endif
1046#if 16<15
1047# if __ARM_ARCH__>=7
1048	ldr	r2,[r1],#4			@ prefetch
1049# else
1050	ldrb	r2,[r1,#3]
1051# endif
1052	eor	r12,r4,r5			@ a^b, b^c in next round
1053#else
1054	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1055	eor	r12,r4,r5			@ a^b, b^c in next round
1056	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1057#endif
1058	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1059	and	r3,r3,r12			@ (b^c)&=(a^b)
1060	add	r7,r7,r11			@ d+=h
1061	eor	r3,r3,r5			@ Maj(a,b,c)
1062	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1063	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1064	@ ldr	r2,[sp,#2*4]		@ 17
1065	@ ldr	r1,[sp,#15*4]
1066	mov	r0,r2,ror#7
1067	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1068	mov	r3,r1,ror#17
1069	eor	r0,r0,r2,ror#18
1070	eor	r3,r3,r1,ror#19
1071	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1072	ldr	r2,[sp,#1*4]
1073	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1074	ldr	r1,[sp,#10*4]
1075
1076	add	r3,r3,r0
1077	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1078	add	r2,r2,r3
1079	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1080	add	r2,r2,r1			@ X[i]
1081	ldr	r3,[r14],#4			@ *K256++
1082	add	r10,r10,r2			@ h+=X[i]
1083	str	r2,[sp,#1*4]
1084	eor	r2,r8,r9
1085	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1086	and	r2,r2,r7
1087	add	r10,r10,r3			@ h+=K256[i]
1088	eor	r2,r2,r9			@ Ch(e,f,g)
1089	eor	r0,r11,r11,ror#11
1090	add	r10,r10,r2			@ h+=Ch(e,f,g)
1091#if 17==31
1092	and	r3,r3,#0xff
1093	cmp	r3,#0xf2			@ done?
1094#endif
1095#if 17<15
1096# if __ARM_ARCH__>=7
1097	ldr	r2,[r1],#4			@ prefetch
1098# else
1099	ldrb	r2,[r1,#3]
1100# endif
1101	eor	r3,r11,r4			@ a^b, b^c in next round
1102#else
1103	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1104	eor	r3,r11,r4			@ a^b, b^c in next round
1105	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1106#endif
1107	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1108	and	r12,r12,r3			@ (b^c)&=(a^b)
1109	add	r6,r6,r10			@ d+=h
1110	eor	r12,r12,r4			@ Maj(a,b,c)
1111	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1112	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1113	@ ldr	r2,[sp,#3*4]		@ 18
1114	@ ldr	r1,[sp,#0*4]
1115	mov	r0,r2,ror#7
1116	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1117	mov	r12,r1,ror#17
1118	eor	r0,r0,r2,ror#18
1119	eor	r12,r12,r1,ror#19
1120	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1121	ldr	r2,[sp,#2*4]
1122	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1123	ldr	r1,[sp,#11*4]
1124
1125	add	r12,r12,r0
1126	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1127	add	r2,r2,r12
1128	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1129	add	r2,r2,r1			@ X[i]
1130	ldr	r12,[r14],#4			@ *K256++
1131	add	r9,r9,r2			@ h+=X[i]
1132	str	r2,[sp,#2*4]
1133	eor	r2,r7,r8
1134	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1135	and	r2,r2,r6
1136	add	r9,r9,r12			@ h+=K256[i]
1137	eor	r2,r2,r8			@ Ch(e,f,g)
1138	eor	r0,r10,r10,ror#11
1139	add	r9,r9,r2			@ h+=Ch(e,f,g)
1140#if 18==31
1141	and	r12,r12,#0xff
1142	cmp	r12,#0xf2			@ done?
1143#endif
1144#if 18<15
1145# if __ARM_ARCH__>=7
1146	ldr	r2,[r1],#4			@ prefetch
1147# else
1148	ldrb	r2,[r1,#3]
1149# endif
1150	eor	r12,r10,r11			@ a^b, b^c in next round
1151#else
1152	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1153	eor	r12,r10,r11			@ a^b, b^c in next round
1154	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1155#endif
1156	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1157	and	r3,r3,r12			@ (b^c)&=(a^b)
1158	add	r5,r5,r9			@ d+=h
1159	eor	r3,r3,r11			@ Maj(a,b,c)
1160	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1161	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1162	@ ldr	r2,[sp,#4*4]		@ 19
1163	@ ldr	r1,[sp,#1*4]
1164	mov	r0,r2,ror#7
1165	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1166	mov	r3,r1,ror#17
1167	eor	r0,r0,r2,ror#18
1168	eor	r3,r3,r1,ror#19
1169	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1170	ldr	r2,[sp,#3*4]
1171	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1172	ldr	r1,[sp,#12*4]
1173
1174	add	r3,r3,r0
1175	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1176	add	r2,r2,r3
1177	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1178	add	r2,r2,r1			@ X[i]
1179	ldr	r3,[r14],#4			@ *K256++
1180	add	r8,r8,r2			@ h+=X[i]
1181	str	r2,[sp,#3*4]
1182	eor	r2,r6,r7
1183	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1184	and	r2,r2,r5
1185	add	r8,r8,r3			@ h+=K256[i]
1186	eor	r2,r2,r7			@ Ch(e,f,g)
1187	eor	r0,r9,r9,ror#11
1188	add	r8,r8,r2			@ h+=Ch(e,f,g)
1189#if 19==31
1190	and	r3,r3,#0xff
1191	cmp	r3,#0xf2			@ done?
1192#endif
1193#if 19<15
1194# if __ARM_ARCH__>=7
1195	ldr	r2,[r1],#4			@ prefetch
1196# else
1197	ldrb	r2,[r1,#3]
1198# endif
1199	eor	r3,r9,r10			@ a^b, b^c in next round
1200#else
1201	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1202	eor	r3,r9,r10			@ a^b, b^c in next round
1203	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1204#endif
1205	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1206	and	r12,r12,r3			@ (b^c)&=(a^b)
1207	add	r4,r4,r8			@ d+=h
1208	eor	r12,r12,r10			@ Maj(a,b,c)
1209	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1210	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1211	@ ldr	r2,[sp,#5*4]		@ 20
1212	@ ldr	r1,[sp,#2*4]
1213	mov	r0,r2,ror#7
1214	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1215	mov	r12,r1,ror#17
1216	eor	r0,r0,r2,ror#18
1217	eor	r12,r12,r1,ror#19
1218	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1219	ldr	r2,[sp,#4*4]
1220	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1221	ldr	r1,[sp,#13*4]
1222
1223	add	r12,r12,r0
1224	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1225	add	r2,r2,r12
1226	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1227	add	r2,r2,r1			@ X[i]
1228	ldr	r12,[r14],#4			@ *K256++
1229	add	r7,r7,r2			@ h+=X[i]
1230	str	r2,[sp,#4*4]
1231	eor	r2,r5,r6
1232	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1233	and	r2,r2,r4
1234	add	r7,r7,r12			@ h+=K256[i]
1235	eor	r2,r2,r6			@ Ch(e,f,g)
1236	eor	r0,r8,r8,ror#11
1237	add	r7,r7,r2			@ h+=Ch(e,f,g)
1238#if 20==31
1239	and	r12,r12,#0xff
1240	cmp	r12,#0xf2			@ done?
1241#endif
1242#if 20<15
1243# if __ARM_ARCH__>=7
1244	ldr	r2,[r1],#4			@ prefetch
1245# else
1246	ldrb	r2,[r1,#3]
1247# endif
1248	eor	r12,r8,r9			@ a^b, b^c in next round
1249#else
1250	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1251	eor	r12,r8,r9			@ a^b, b^c in next round
1252	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1253#endif
1254	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1255	and	r3,r3,r12			@ (b^c)&=(a^b)
1256	add	r11,r11,r7			@ d+=h
1257	eor	r3,r3,r9			@ Maj(a,b,c)
1258	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1259	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1260	@ ldr	r2,[sp,#6*4]		@ 21
1261	@ ldr	r1,[sp,#3*4]
1262	mov	r0,r2,ror#7
1263	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1264	mov	r3,r1,ror#17
1265	eor	r0,r0,r2,ror#18
1266	eor	r3,r3,r1,ror#19
1267	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1268	ldr	r2,[sp,#5*4]
1269	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1270	ldr	r1,[sp,#14*4]
1271
1272	add	r3,r3,r0
1273	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1274	add	r2,r2,r3
1275	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1276	add	r2,r2,r1			@ X[i]
1277	ldr	r3,[r14],#4			@ *K256++
1278	add	r6,r6,r2			@ h+=X[i]
1279	str	r2,[sp,#5*4]
1280	eor	r2,r4,r5
1281	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1282	and	r2,r2,r11
1283	add	r6,r6,r3			@ h+=K256[i]
1284	eor	r2,r2,r5			@ Ch(e,f,g)
1285	eor	r0,r7,r7,ror#11
1286	add	r6,r6,r2			@ h+=Ch(e,f,g)
1287#if 21==31
1288	and	r3,r3,#0xff
1289	cmp	r3,#0xf2			@ done?
1290#endif
1291#if 21<15
1292# if __ARM_ARCH__>=7
1293	ldr	r2,[r1],#4			@ prefetch
1294# else
1295	ldrb	r2,[r1,#3]
1296# endif
1297	eor	r3,r7,r8			@ a^b, b^c in next round
1298#else
1299	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1300	eor	r3,r7,r8			@ a^b, b^c in next round
1301	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1302#endif
1303	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1304	and	r12,r12,r3			@ (b^c)&=(a^b)
1305	add	r10,r10,r6			@ d+=h
1306	eor	r12,r12,r8			@ Maj(a,b,c)
1307	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1308	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1309	@ ldr	r2,[sp,#7*4]		@ 22
1310	@ ldr	r1,[sp,#4*4]
1311	mov	r0,r2,ror#7
1312	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1313	mov	r12,r1,ror#17
1314	eor	r0,r0,r2,ror#18
1315	eor	r12,r12,r1,ror#19
1316	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1317	ldr	r2,[sp,#6*4]
1318	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1319	ldr	r1,[sp,#15*4]
1320
1321	add	r12,r12,r0
1322	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1323	add	r2,r2,r12
1324	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1325	add	r2,r2,r1			@ X[i]
1326	ldr	r12,[r14],#4			@ *K256++
1327	add	r5,r5,r2			@ h+=X[i]
1328	str	r2,[sp,#6*4]
1329	eor	r2,r11,r4
1330	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1331	and	r2,r2,r10
1332	add	r5,r5,r12			@ h+=K256[i]
1333	eor	r2,r2,r4			@ Ch(e,f,g)
1334	eor	r0,r6,r6,ror#11
1335	add	r5,r5,r2			@ h+=Ch(e,f,g)
1336#if 22==31
1337	and	r12,r12,#0xff
1338	cmp	r12,#0xf2			@ done?
1339#endif
1340#if 22<15
1341# if __ARM_ARCH__>=7
1342	ldr	r2,[r1],#4			@ prefetch
1343# else
1344	ldrb	r2,[r1,#3]
1345# endif
1346	eor	r12,r6,r7			@ a^b, b^c in next round
1347#else
1348	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1349	eor	r12,r6,r7			@ a^b, b^c in next round
1350	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1351#endif
1352	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1353	and	r3,r3,r12			@ (b^c)&=(a^b)
1354	add	r9,r9,r5			@ d+=h
1355	eor	r3,r3,r7			@ Maj(a,b,c)
1356	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1357	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1358	@ ldr	r2,[sp,#8*4]		@ 23
1359	@ ldr	r1,[sp,#5*4]
1360	mov	r0,r2,ror#7
1361	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1362	mov	r3,r1,ror#17
1363	eor	r0,r0,r2,ror#18
1364	eor	r3,r3,r1,ror#19
1365	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1366	ldr	r2,[sp,#7*4]
1367	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1368	ldr	r1,[sp,#0*4]
1369
1370	add	r3,r3,r0
1371	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1372	add	r2,r2,r3
1373	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1374	add	r2,r2,r1			@ X[i]
1375	ldr	r3,[r14],#4			@ *K256++
1376	add	r4,r4,r2			@ h+=X[i]
1377	str	r2,[sp,#7*4]
1378	eor	r2,r10,r11
1379	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1380	and	r2,r2,r9
1381	add	r4,r4,r3			@ h+=K256[i]
1382	eor	r2,r2,r11			@ Ch(e,f,g)
1383	eor	r0,r5,r5,ror#11
1384	add	r4,r4,r2			@ h+=Ch(e,f,g)
1385#if 23==31
1386	and	r3,r3,#0xff
1387	cmp	r3,#0xf2			@ done?
1388#endif
1389#if 23<15
1390# if __ARM_ARCH__>=7
1391	ldr	r2,[r1],#4			@ prefetch
1392# else
1393	ldrb	r2,[r1,#3]
1394# endif
1395	eor	r3,r5,r6			@ a^b, b^c in next round
1396#else
1397	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1398	eor	r3,r5,r6			@ a^b, b^c in next round
1399	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1400#endif
1401	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1402	and	r12,r12,r3			@ (b^c)&=(a^b)
1403	add	r8,r8,r4			@ d+=h
1404	eor	r12,r12,r6			@ Maj(a,b,c)
1405	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1406	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1407	@ ldr	r2,[sp,#9*4]		@ 24
1408	@ ldr	r1,[sp,#6*4]
1409	mov	r0,r2,ror#7
1410	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1411	mov	r12,r1,ror#17
1412	eor	r0,r0,r2,ror#18
1413	eor	r12,r12,r1,ror#19
1414	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1415	ldr	r2,[sp,#8*4]
1416	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1417	ldr	r1,[sp,#1*4]
1418
1419	add	r12,r12,r0
1420	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1421	add	r2,r2,r12
1422	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1423	add	r2,r2,r1			@ X[i]
1424	ldr	r12,[r14],#4			@ *K256++
1425	add	r11,r11,r2			@ h+=X[i]
1426	str	r2,[sp,#8*4]
1427	eor	r2,r9,r10
1428	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1429	and	r2,r2,r8
1430	add	r11,r11,r12			@ h+=K256[i]
1431	eor	r2,r2,r10			@ Ch(e,f,g)
1432	eor	r0,r4,r4,ror#11
1433	add	r11,r11,r2			@ h+=Ch(e,f,g)
1434#if 24==31
1435	and	r12,r12,#0xff
1436	cmp	r12,#0xf2			@ done?
1437#endif
1438#if 24<15
1439# if __ARM_ARCH__>=7
1440	ldr	r2,[r1],#4			@ prefetch
1441# else
1442	ldrb	r2,[r1,#3]
1443# endif
1444	eor	r12,r4,r5			@ a^b, b^c in next round
1445#else
1446	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1447	eor	r12,r4,r5			@ a^b, b^c in next round
1448	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1449#endif
1450	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1451	and	r3,r3,r12			@ (b^c)&=(a^b)
1452	add	r7,r7,r11			@ d+=h
1453	eor	r3,r3,r5			@ Maj(a,b,c)
1454	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1455	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1456	@ ldr	r2,[sp,#10*4]		@ 25
1457	@ ldr	r1,[sp,#7*4]
1458	mov	r0,r2,ror#7
1459	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1460	mov	r3,r1,ror#17
1461	eor	r0,r0,r2,ror#18
1462	eor	r3,r3,r1,ror#19
1463	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1464	ldr	r2,[sp,#9*4]
1465	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1466	ldr	r1,[sp,#2*4]
1467
1468	add	r3,r3,r0
1469	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1470	add	r2,r2,r3
1471	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1472	add	r2,r2,r1			@ X[i]
1473	ldr	r3,[r14],#4			@ *K256++
1474	add	r10,r10,r2			@ h+=X[i]
1475	str	r2,[sp,#9*4]
1476	eor	r2,r8,r9
1477	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1478	and	r2,r2,r7
1479	add	r10,r10,r3			@ h+=K256[i]
1480	eor	r2,r2,r9			@ Ch(e,f,g)
1481	eor	r0,r11,r11,ror#11
1482	add	r10,r10,r2			@ h+=Ch(e,f,g)
1483#if 25==31
1484	and	r3,r3,#0xff
1485	cmp	r3,#0xf2			@ done?
1486#endif
1487#if 25<15
1488# if __ARM_ARCH__>=7
1489	ldr	r2,[r1],#4			@ prefetch
1490# else
1491	ldrb	r2,[r1,#3]
1492# endif
1493	eor	r3,r11,r4			@ a^b, b^c in next round
1494#else
1495	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1496	eor	r3,r11,r4			@ a^b, b^c in next round
1497	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1498#endif
1499	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1500	and	r12,r12,r3			@ (b^c)&=(a^b)
1501	add	r6,r6,r10			@ d+=h
1502	eor	r12,r12,r4			@ Maj(a,b,c)
1503	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1504	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1505	@ ldr	r2,[sp,#11*4]		@ 26
1506	@ ldr	r1,[sp,#8*4]
1507	mov	r0,r2,ror#7
1508	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1509	mov	r12,r1,ror#17
1510	eor	r0,r0,r2,ror#18
1511	eor	r12,r12,r1,ror#19
1512	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1513	ldr	r2,[sp,#10*4]
1514	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1515	ldr	r1,[sp,#3*4]
1516
1517	add	r12,r12,r0
1518	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1519	add	r2,r2,r12
1520	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1521	add	r2,r2,r1			@ X[i]
1522	ldr	r12,[r14],#4			@ *K256++
1523	add	r9,r9,r2			@ h+=X[i]
1524	str	r2,[sp,#10*4]
1525	eor	r2,r7,r8
1526	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1527	and	r2,r2,r6
1528	add	r9,r9,r12			@ h+=K256[i]
1529	eor	r2,r2,r8			@ Ch(e,f,g)
1530	eor	r0,r10,r10,ror#11
1531	add	r9,r9,r2			@ h+=Ch(e,f,g)
1532#if 26==31
1533	and	r12,r12,#0xff
1534	cmp	r12,#0xf2			@ done?
1535#endif
1536#if 26<15
1537# if __ARM_ARCH__>=7
1538	ldr	r2,[r1],#4			@ prefetch
1539# else
1540	ldrb	r2,[r1,#3]
1541# endif
1542	eor	r12,r10,r11			@ a^b, b^c in next round
1543#else
1544	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1545	eor	r12,r10,r11			@ a^b, b^c in next round
1546	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1547#endif
1548	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1549	and	r3,r3,r12			@ (b^c)&=(a^b)
1550	add	r5,r5,r9			@ d+=h
1551	eor	r3,r3,r11			@ Maj(a,b,c)
1552	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1553	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1554	@ ldr	r2,[sp,#12*4]		@ 27
1555	@ ldr	r1,[sp,#9*4]
1556	mov	r0,r2,ror#7
1557	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1558	mov	r3,r1,ror#17
1559	eor	r0,r0,r2,ror#18
1560	eor	r3,r3,r1,ror#19
1561	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1562	ldr	r2,[sp,#11*4]
1563	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1564	ldr	r1,[sp,#4*4]
1565
1566	add	r3,r3,r0
1567	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1568	add	r2,r2,r3
1569	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1570	add	r2,r2,r1			@ X[i]
1571	ldr	r3,[r14],#4			@ *K256++
1572	add	r8,r8,r2			@ h+=X[i]
1573	str	r2,[sp,#11*4]
1574	eor	r2,r6,r7
1575	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1576	and	r2,r2,r5
1577	add	r8,r8,r3			@ h+=K256[i]
1578	eor	r2,r2,r7			@ Ch(e,f,g)
1579	eor	r0,r9,r9,ror#11
1580	add	r8,r8,r2			@ h+=Ch(e,f,g)
1581#if 27==31
1582	and	r3,r3,#0xff
1583	cmp	r3,#0xf2			@ done?
1584#endif
1585#if 27<15
1586# if __ARM_ARCH__>=7
1587	ldr	r2,[r1],#4			@ prefetch
1588# else
1589	ldrb	r2,[r1,#3]
1590# endif
1591	eor	r3,r9,r10			@ a^b, b^c in next round
1592#else
1593	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1594	eor	r3,r9,r10			@ a^b, b^c in next round
1595	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1596#endif
1597	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1598	and	r12,r12,r3			@ (b^c)&=(a^b)
1599	add	r4,r4,r8			@ d+=h
1600	eor	r12,r12,r10			@ Maj(a,b,c)
1601	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1602	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1603	@ ldr	r2,[sp,#13*4]		@ 28
1604	@ ldr	r1,[sp,#10*4]
1605	mov	r0,r2,ror#7
1606	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1607	mov	r12,r1,ror#17
1608	eor	r0,r0,r2,ror#18
1609	eor	r12,r12,r1,ror#19
1610	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1611	ldr	r2,[sp,#12*4]
1612	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1613	ldr	r1,[sp,#5*4]
1614
1615	add	r12,r12,r0
1616	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1617	add	r2,r2,r12
1618	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1619	add	r2,r2,r1			@ X[i]
1620	ldr	r12,[r14],#4			@ *K256++
1621	add	r7,r7,r2			@ h+=X[i]
1622	str	r2,[sp,#12*4]
1623	eor	r2,r5,r6
1624	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1625	and	r2,r2,r4
1626	add	r7,r7,r12			@ h+=K256[i]
1627	eor	r2,r2,r6			@ Ch(e,f,g)
1628	eor	r0,r8,r8,ror#11
1629	add	r7,r7,r2			@ h+=Ch(e,f,g)
1630#if 28==31
1631	and	r12,r12,#0xff
1632	cmp	r12,#0xf2			@ done?
1633#endif
1634#if 28<15
1635# if __ARM_ARCH__>=7
1636	ldr	r2,[r1],#4			@ prefetch
1637# else
1638	ldrb	r2,[r1,#3]
1639# endif
1640	eor	r12,r8,r9			@ a^b, b^c in next round
1641#else
1642	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1643	eor	r12,r8,r9			@ a^b, b^c in next round
1644	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1645#endif
1646	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1647	and	r3,r3,r12			@ (b^c)&=(a^b)
1648	add	r11,r11,r7			@ d+=h
1649	eor	r3,r3,r9			@ Maj(a,b,c)
1650	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1651	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1652	@ ldr	r2,[sp,#14*4]		@ 29
1653	@ ldr	r1,[sp,#11*4]
1654	mov	r0,r2,ror#7
1655	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1656	mov	r3,r1,ror#17
1657	eor	r0,r0,r2,ror#18
1658	eor	r3,r3,r1,ror#19
1659	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1660	ldr	r2,[sp,#13*4]
1661	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1662	ldr	r1,[sp,#6*4]
1663
1664	add	r3,r3,r0
1665	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1666	add	r2,r2,r3
1667	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1668	add	r2,r2,r1			@ X[i]
1669	ldr	r3,[r14],#4			@ *K256++
1670	add	r6,r6,r2			@ h+=X[i]
1671	str	r2,[sp,#13*4]
1672	eor	r2,r4,r5
1673	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1674	and	r2,r2,r11
1675	add	r6,r6,r3			@ h+=K256[i]
1676	eor	r2,r2,r5			@ Ch(e,f,g)
1677	eor	r0,r7,r7,ror#11
1678	add	r6,r6,r2			@ h+=Ch(e,f,g)
1679#if 29==31
1680	and	r3,r3,#0xff
1681	cmp	r3,#0xf2			@ done?
1682#endif
1683#if 29<15
1684# if __ARM_ARCH__>=7
1685	ldr	r2,[r1],#4			@ prefetch
1686# else
1687	ldrb	r2,[r1,#3]
1688# endif
1689	eor	r3,r7,r8			@ a^b, b^c in next round
1690#else
1691	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1692	eor	r3,r7,r8			@ a^b, b^c in next round
1693	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1694#endif
1695	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1696	and	r12,r12,r3			@ (b^c)&=(a^b)
1697	add	r10,r10,r6			@ d+=h
1698	eor	r12,r12,r8			@ Maj(a,b,c)
1699	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1700	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1701	@ ldr	r2,[sp,#15*4]		@ 30
1702	@ ldr	r1,[sp,#12*4]
1703	mov	r0,r2,ror#7
1704	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1705	mov	r12,r1,ror#17
1706	eor	r0,r0,r2,ror#18
1707	eor	r12,r12,r1,ror#19
1708	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1709	ldr	r2,[sp,#14*4]
1710	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1711	ldr	r1,[sp,#7*4]
1712
1713	add	r12,r12,r0
1714	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1715	add	r2,r2,r12
1716	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1717	add	r2,r2,r1			@ X[i]
1718	ldr	r12,[r14],#4			@ *K256++
1719	add	r5,r5,r2			@ h+=X[i]
1720	str	r2,[sp,#14*4]
1721	eor	r2,r11,r4
1722	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1723	and	r2,r2,r10
1724	add	r5,r5,r12			@ h+=K256[i]
1725	eor	r2,r2,r4			@ Ch(e,f,g)
1726	eor	r0,r6,r6,ror#11
1727	add	r5,r5,r2			@ h+=Ch(e,f,g)
1728#if 30==31
1729	and	r12,r12,#0xff
1730	cmp	r12,#0xf2			@ done?
1731#endif
1732#if 30<15
1733# if __ARM_ARCH__>=7
1734	ldr	r2,[r1],#4			@ prefetch
1735# else
1736	ldrb	r2,[r1,#3]
1737# endif
1738	eor	r12,r6,r7			@ a^b, b^c in next round
1739#else
1740	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1741	eor	r12,r6,r7			@ a^b, b^c in next round
1742	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1743#endif
1744	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1745	and	r3,r3,r12			@ (b^c)&=(a^b)
1746	add	r9,r9,r5			@ d+=h
1747	eor	r3,r3,r7			@ Maj(a,b,c)
1748	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1749	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1750	@ ldr	r2,[sp,#0*4]		@ 31
1751	@ ldr	r1,[sp,#13*4]
1752	mov	r0,r2,ror#7
1753	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1754	mov	r3,r1,ror#17
1755	eor	r0,r0,r2,ror#18
1756	eor	r3,r3,r1,ror#19
1757	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1758	ldr	r2,[sp,#15*4]
1759	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1760	ldr	r1,[sp,#8*4]
1761
1762	add	r3,r3,r0
1763	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1764	add	r2,r2,r3
1765	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1766	add	r2,r2,r1			@ X[i]
1767	ldr	r3,[r14],#4			@ *K256++
1768	add	r4,r4,r2			@ h+=X[i]
1769	str	r2,[sp,#15*4]
1770	eor	r2,r10,r11
1771	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1772	and	r2,r2,r9
1773	add	r4,r4,r3			@ h+=K256[i]
1774	eor	r2,r2,r11			@ Ch(e,f,g)
1775	eor	r0,r5,r5,ror#11
1776	add	r4,r4,r2			@ h+=Ch(e,f,g)
1777#if 31==31
1778	and	r3,r3,#0xff
1779	cmp	r3,#0xf2			@ done?
1780#endif
1781#if 31<15
1782# if __ARM_ARCH__>=7
1783	ldr	r2,[r1],#4			@ prefetch
1784# else
1785	ldrb	r2,[r1,#3]
1786# endif
1787	eor	r3,r5,r6			@ a^b, b^c in next round
1788#else
1789	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1790	eor	r3,r5,r6			@ a^b, b^c in next round
1791	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1792#endif
1793	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1794	and	r12,r12,r3			@ (b^c)&=(a^b)
1795	add	r8,r8,r4			@ d+=h
1796	eor	r12,r12,r6			@ Maj(a,b,c)
1797	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1798	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1799#ifdef	__thumb2__
1800	ite	eq			@ Thumb2 thing, sanity check in ARM
1801#endif
1802	ldreq	r3,[sp,#16*4]		@ pull ctx
1803	bne	.Lrounds_16_xx
1804
1805	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1806	ldr	r0,[r3,#0]
1807	ldr	r2,[r3,#4]
1808	ldr	r12,[r3,#8]
1809	add	r4,r4,r0
1810	ldr	r0,[r3,#12]
1811	add	r5,r5,r2
1812	ldr	r2,[r3,#16]
1813	add	r6,r6,r12
1814	ldr	r12,[r3,#20]
1815	add	r7,r7,r0
1816	ldr	r0,[r3,#24]
1817	add	r8,r8,r2
1818	ldr	r2,[r3,#28]
1819	add	r9,r9,r12
1820	ldr	r1,[sp,#17*4]		@ pull inp
1821	ldr	r12,[sp,#18*4]		@ pull inp+len
1822	add	r10,r10,r0
1823	add	r11,r11,r2
1824	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1825	cmp	r1,r12
1826	sub	r14,r14,#256	@ rewind Ktbl
1827	bne	.Loop
1828
1829	add	sp,sp,#19*4	@ destroy frame
1830#if __ARM_ARCH__>=5
1831	ldmia	sp!,{r4-r11,pc}
1832#else
1833	ldmia	sp!,{r4-r11,lr}
1834	tst	lr,#1
1835	moveq	pc,lr			@ be binary compatible with V4, yet
1836	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1837#endif
1838.size	zfs_sha256_block_armv7,.-zfs_sha256_block_armv7
1839
1840#if __ARM_ARCH__ >= 7
1841.arch	armv7-a
1842.fpu	neon
1843
1844.globl	zfs_sha256_block_neon
1845.type	zfs_sha256_block_neon,%function
1846.align	5
1847.skip	16
1848zfs_sha256_block_neon:
1849.LNEON:
1850	stmdb	sp!,{r4-r12,lr}
1851
1852	sub	r11,sp,#16*4+16
1853	adr	r14,K256
1854	bic	r11,r11,#15		@ align for 128-bit stores
1855	mov	r12,sp
1856	mov	sp,r11			@ alloca
1857	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1858
1859	vld1.8		{q0},[r1]!
1860	vld1.8		{q1},[r1]!
1861	vld1.8		{q2},[r1]!
1862	vld1.8		{q3},[r1]!
1863	vld1.32		{q8},[r14,:128]!
1864	vld1.32		{q9},[r14,:128]!
1865	vld1.32		{q10},[r14,:128]!
1866	vld1.32		{q11},[r14,:128]!
1867	vrev32.8	q0,q0		@ yes, even on
1868	str		r0,[sp,#64]
1869	vrev32.8	q1,q1		@ big-endian
1870	str		r1,[sp,#68]
1871	mov		r1,sp
1872	vrev32.8	q2,q2
1873	str		r2,[sp,#72]
1874	vrev32.8	q3,q3
1875	str		r12,[sp,#76]		@ save original sp
1876	vadd.i32	q8,q8,q0
1877	vadd.i32	q9,q9,q1
1878	vst1.32		{q8},[r1,:128]!
1879	vadd.i32	q10,q10,q2
1880	vst1.32		{q9},[r1,:128]!
1881	vadd.i32	q11,q11,q3
1882	vst1.32		{q10},[r1,:128]!
1883	vst1.32		{q11},[r1,:128]!
1884
1885	ldmia		r0,{r4-r11}
1886	sub		r1,r1,#64
1887	ldr		r2,[sp,#0]
1888	eor		r12,r12,r12
1889	eor		r3,r5,r6
1890	b		.L_00_48
1891
1892.align	4
1893.L_00_48:
1894	vext.8	q8,q0,q1,#4
1895	add	r11,r11,r2
1896	eor	r2,r9,r10
1897	eor	r0,r8,r8,ror#5
1898	vext.8	q9,q2,q3,#4
1899	add	r4,r4,r12
1900	and	r2,r2,r8
1901	eor	r12,r0,r8,ror#19
1902	vshr.u32	q10,q8,#7
1903	eor	r0,r4,r4,ror#11
1904	eor	r2,r2,r10
1905	vadd.i32	q0,q0,q9
1906	add	r11,r11,r12,ror#6
1907	eor	r12,r4,r5
1908	vshr.u32	q9,q8,#3
1909	eor	r0,r0,r4,ror#20
1910	add	r11,r11,r2
1911	vsli.32	q10,q8,#25
1912	ldr	r2,[sp,#4]
1913	and	r3,r3,r12
1914	vshr.u32	q11,q8,#18
1915	add	r7,r7,r11
1916	add	r11,r11,r0,ror#2
1917	eor	r3,r3,r5
1918	veor	q9,q9,q10
1919	add	r10,r10,r2
1920	vsli.32	q11,q8,#14
1921	eor	r2,r8,r9
1922	eor	r0,r7,r7,ror#5
1923	vshr.u32	d24,d7,#17
1924	add	r11,r11,r3
1925	and	r2,r2,r7
1926	veor	q9,q9,q11
1927	eor	r3,r0,r7,ror#19
1928	eor	r0,r11,r11,ror#11
1929	vsli.32	d24,d7,#15
1930	eor	r2,r2,r9
1931	add	r10,r10,r3,ror#6
1932	vshr.u32	d25,d7,#10
1933	eor	r3,r11,r4
1934	eor	r0,r0,r11,ror#20
1935	vadd.i32	q0,q0,q9
1936	add	r10,r10,r2
1937	ldr	r2,[sp,#8]
1938	veor	d25,d25,d24
1939	and	r12,r12,r3
1940	add	r6,r6,r10
1941	vshr.u32	d24,d7,#19
1942	add	r10,r10,r0,ror#2
1943	eor	r12,r12,r4
1944	vsli.32	d24,d7,#13
1945	add	r9,r9,r2
1946	eor	r2,r7,r8
1947	veor	d25,d25,d24
1948	eor	r0,r6,r6,ror#5
1949	add	r10,r10,r12
1950	vadd.i32	d0,d0,d25
1951	and	r2,r2,r6
1952	eor	r12,r0,r6,ror#19
1953	vshr.u32	d24,d0,#17
1954	eor	r0,r10,r10,ror#11
1955	eor	r2,r2,r8
1956	vsli.32	d24,d0,#15
1957	add	r9,r9,r12,ror#6
1958	eor	r12,r10,r11
1959	vshr.u32	d25,d0,#10
1960	eor	r0,r0,r10,ror#20
1961	add	r9,r9,r2
1962	veor	d25,d25,d24
1963	ldr	r2,[sp,#12]
1964	and	r3,r3,r12
1965	vshr.u32	d24,d0,#19
1966	add	r5,r5,r9
1967	add	r9,r9,r0,ror#2
1968	eor	r3,r3,r11
1969	vld1.32	{q8},[r14,:128]!
1970	add	r8,r8,r2
1971	vsli.32	d24,d0,#13
1972	eor	r2,r6,r7
1973	eor	r0,r5,r5,ror#5
1974	veor	d25,d25,d24
1975	add	r9,r9,r3
1976	and	r2,r2,r5
1977	vadd.i32	d1,d1,d25
1978	eor	r3,r0,r5,ror#19
1979	eor	r0,r9,r9,ror#11
1980	vadd.i32	q8,q8,q0
1981	eor	r2,r2,r7
1982	add	r8,r8,r3,ror#6
1983	eor	r3,r9,r10
1984	eor	r0,r0,r9,ror#20
1985	add	r8,r8,r2
1986	ldr	r2,[sp,#16]
1987	and	r12,r12,r3
1988	add	r4,r4,r8
1989	vst1.32	{q8},[r1,:128]!
1990	add	r8,r8,r0,ror#2
1991	eor	r12,r12,r10
1992	vext.8	q8,q1,q2,#4
1993	add	r7,r7,r2
1994	eor	r2,r5,r6
1995	eor	r0,r4,r4,ror#5
1996	vext.8	q9,q3,q0,#4
1997	add	r8,r8,r12
1998	and	r2,r2,r4
1999	eor	r12,r0,r4,ror#19
2000	vshr.u32	q10,q8,#7
2001	eor	r0,r8,r8,ror#11
2002	eor	r2,r2,r6
2003	vadd.i32	q1,q1,q9
2004	add	r7,r7,r12,ror#6
2005	eor	r12,r8,r9
2006	vshr.u32	q9,q8,#3
2007	eor	r0,r0,r8,ror#20
2008	add	r7,r7,r2
2009	vsli.32	q10,q8,#25
2010	ldr	r2,[sp,#20]
2011	and	r3,r3,r12
2012	vshr.u32	q11,q8,#18
2013	add	r11,r11,r7
2014	add	r7,r7,r0,ror#2
2015	eor	r3,r3,r9
2016	veor	q9,q9,q10
2017	add	r6,r6,r2
2018	vsli.32	q11,q8,#14
2019	eor	r2,r4,r5
2020	eor	r0,r11,r11,ror#5
2021	vshr.u32	d24,d1,#17
2022	add	r7,r7,r3
2023	and	r2,r2,r11
2024	veor	q9,q9,q11
2025	eor	r3,r0,r11,ror#19
2026	eor	r0,r7,r7,ror#11
2027	vsli.32	d24,d1,#15
2028	eor	r2,r2,r5
2029	add	r6,r6,r3,ror#6
2030	vshr.u32	d25,d1,#10
2031	eor	r3,r7,r8
2032	eor	r0,r0,r7,ror#20
2033	vadd.i32	q1,q1,q9
2034	add	r6,r6,r2
2035	ldr	r2,[sp,#24]
2036	veor	d25,d25,d24
2037	and	r12,r12,r3
2038	add	r10,r10,r6
2039	vshr.u32	d24,d1,#19
2040	add	r6,r6,r0,ror#2
2041	eor	r12,r12,r8
2042	vsli.32	d24,d1,#13
2043	add	r5,r5,r2
2044	eor	r2,r11,r4
2045	veor	d25,d25,d24
2046	eor	r0,r10,r10,ror#5
2047	add	r6,r6,r12
2048	vadd.i32	d2,d2,d25
2049	and	r2,r2,r10
2050	eor	r12,r0,r10,ror#19
2051	vshr.u32	d24,d2,#17
2052	eor	r0,r6,r6,ror#11
2053	eor	r2,r2,r4
2054	vsli.32	d24,d2,#15
2055	add	r5,r5,r12,ror#6
2056	eor	r12,r6,r7
2057	vshr.u32	d25,d2,#10
2058	eor	r0,r0,r6,ror#20
2059	add	r5,r5,r2
2060	veor	d25,d25,d24
2061	ldr	r2,[sp,#28]
2062	and	r3,r3,r12
2063	vshr.u32	d24,d2,#19
2064	add	r9,r9,r5
2065	add	r5,r5,r0,ror#2
2066	eor	r3,r3,r7
2067	vld1.32	{q8},[r14,:128]!
2068	add	r4,r4,r2
2069	vsli.32	d24,d2,#13
2070	eor	r2,r10,r11
2071	eor	r0,r9,r9,ror#5
2072	veor	d25,d25,d24
2073	add	r5,r5,r3
2074	and	r2,r2,r9
2075	vadd.i32	d3,d3,d25
2076	eor	r3,r0,r9,ror#19
2077	eor	r0,r5,r5,ror#11
2078	vadd.i32	q8,q8,q1
2079	eor	r2,r2,r11
2080	add	r4,r4,r3,ror#6
2081	eor	r3,r5,r6
2082	eor	r0,r0,r5,ror#20
2083	add	r4,r4,r2
2084	ldr	r2,[sp,#32]
2085	and	r12,r12,r3
2086	add	r8,r8,r4
2087	vst1.32	{q8},[r1,:128]!
2088	add	r4,r4,r0,ror#2
2089	eor	r12,r12,r6
2090	vext.8	q8,q2,q3,#4
2091	add	r11,r11,r2
2092	eor	r2,r9,r10
2093	eor	r0,r8,r8,ror#5
2094	vext.8	q9,q0,q1,#4
2095	add	r4,r4,r12
2096	and	r2,r2,r8
2097	eor	r12,r0,r8,ror#19
2098	vshr.u32	q10,q8,#7
2099	eor	r0,r4,r4,ror#11
2100	eor	r2,r2,r10
2101	vadd.i32	q2,q2,q9
2102	add	r11,r11,r12,ror#6
2103	eor	r12,r4,r5
2104	vshr.u32	q9,q8,#3
2105	eor	r0,r0,r4,ror#20
2106	add	r11,r11,r2
2107	vsli.32	q10,q8,#25
2108	ldr	r2,[sp,#36]
2109	and	r3,r3,r12
2110	vshr.u32	q11,q8,#18
2111	add	r7,r7,r11
2112	add	r11,r11,r0,ror#2
2113	eor	r3,r3,r5
2114	veor	q9,q9,q10
2115	add	r10,r10,r2
2116	vsli.32	q11,q8,#14
2117	eor	r2,r8,r9
2118	eor	r0,r7,r7,ror#5
2119	vshr.u32	d24,d3,#17
2120	add	r11,r11,r3
2121	and	r2,r2,r7
2122	veor	q9,q9,q11
2123	eor	r3,r0,r7,ror#19
2124	eor	r0,r11,r11,ror#11
2125	vsli.32	d24,d3,#15
2126	eor	r2,r2,r9
2127	add	r10,r10,r3,ror#6
2128	vshr.u32	d25,d3,#10
2129	eor	r3,r11,r4
2130	eor	r0,r0,r11,ror#20
2131	vadd.i32	q2,q2,q9
2132	add	r10,r10,r2
2133	ldr	r2,[sp,#40]
2134	veor	d25,d25,d24
2135	and	r12,r12,r3
2136	add	r6,r6,r10
2137	vshr.u32	d24,d3,#19
2138	add	r10,r10,r0,ror#2
2139	eor	r12,r12,r4
2140	vsli.32	d24,d3,#13
2141	add	r9,r9,r2
2142	eor	r2,r7,r8
2143	veor	d25,d25,d24
2144	eor	r0,r6,r6,ror#5
2145	add	r10,r10,r12
2146	vadd.i32	d4,d4,d25
2147	and	r2,r2,r6
2148	eor	r12,r0,r6,ror#19
2149	vshr.u32	d24,d4,#17
2150	eor	r0,r10,r10,ror#11
2151	eor	r2,r2,r8
2152	vsli.32	d24,d4,#15
2153	add	r9,r9,r12,ror#6
2154	eor	r12,r10,r11
2155	vshr.u32	d25,d4,#10
2156	eor	r0,r0,r10,ror#20
2157	add	r9,r9,r2
2158	veor	d25,d25,d24
2159	ldr	r2,[sp,#44]
2160	and	r3,r3,r12
2161	vshr.u32	d24,d4,#19
2162	add	r5,r5,r9
2163	add	r9,r9,r0,ror#2
2164	eor	r3,r3,r11
2165	vld1.32	{q8},[r14,:128]!
2166	add	r8,r8,r2
2167	vsli.32	d24,d4,#13
2168	eor	r2,r6,r7
2169	eor	r0,r5,r5,ror#5
2170	veor	d25,d25,d24
2171	add	r9,r9,r3
2172	and	r2,r2,r5
2173	vadd.i32	d5,d5,d25
2174	eor	r3,r0,r5,ror#19
2175	eor	r0,r9,r9,ror#11
2176	vadd.i32	q8,q8,q2
2177	eor	r2,r2,r7
2178	add	r8,r8,r3,ror#6
2179	eor	r3,r9,r10
2180	eor	r0,r0,r9,ror#20
2181	add	r8,r8,r2
2182	ldr	r2,[sp,#48]
2183	and	r12,r12,r3
2184	add	r4,r4,r8
2185	vst1.32	{q8},[r1,:128]!
2186	add	r8,r8,r0,ror#2
2187	eor	r12,r12,r10
2188	vext.8	q8,q3,q0,#4
2189	add	r7,r7,r2
2190	eor	r2,r5,r6
2191	eor	r0,r4,r4,ror#5
2192	vext.8	q9,q1,q2,#4
2193	add	r8,r8,r12
2194	and	r2,r2,r4
2195	eor	r12,r0,r4,ror#19
2196	vshr.u32	q10,q8,#7
2197	eor	r0,r8,r8,ror#11
2198	eor	r2,r2,r6
2199	vadd.i32	q3,q3,q9
2200	add	r7,r7,r12,ror#6
2201	eor	r12,r8,r9
2202	vshr.u32	q9,q8,#3
2203	eor	r0,r0,r8,ror#20
2204	add	r7,r7,r2
2205	vsli.32	q10,q8,#25
2206	ldr	r2,[sp,#52]
2207	and	r3,r3,r12
2208	vshr.u32	q11,q8,#18
2209	add	r11,r11,r7
2210	add	r7,r7,r0,ror#2
2211	eor	r3,r3,r9
2212	veor	q9,q9,q10
2213	add	r6,r6,r2
2214	vsli.32	q11,q8,#14
2215	eor	r2,r4,r5
2216	eor	r0,r11,r11,ror#5
2217	vshr.u32	d24,d5,#17
2218	add	r7,r7,r3
2219	and	r2,r2,r11
2220	veor	q9,q9,q11
2221	eor	r3,r0,r11,ror#19
2222	eor	r0,r7,r7,ror#11
2223	vsli.32	d24,d5,#15
2224	eor	r2,r2,r5
2225	add	r6,r6,r3,ror#6
2226	vshr.u32	d25,d5,#10
2227	eor	r3,r7,r8
2228	eor	r0,r0,r7,ror#20
2229	vadd.i32	q3,q3,q9
2230	add	r6,r6,r2
2231	ldr	r2,[sp,#56]
2232	veor	d25,d25,d24
2233	and	r12,r12,r3
2234	add	r10,r10,r6
2235	vshr.u32	d24,d5,#19
2236	add	r6,r6,r0,ror#2
2237	eor	r12,r12,r8
2238	vsli.32	d24,d5,#13
2239	add	r5,r5,r2
2240	eor	r2,r11,r4
2241	veor	d25,d25,d24
2242	eor	r0,r10,r10,ror#5
2243	add	r6,r6,r12
2244	vadd.i32	d6,d6,d25
2245	and	r2,r2,r10
2246	eor	r12,r0,r10,ror#19
2247	vshr.u32	d24,d6,#17
2248	eor	r0,r6,r6,ror#11
2249	eor	r2,r2,r4
2250	vsli.32	d24,d6,#15
2251	add	r5,r5,r12,ror#6
2252	eor	r12,r6,r7
2253	vshr.u32	d25,d6,#10
2254	eor	r0,r0,r6,ror#20
2255	add	r5,r5,r2
2256	veor	d25,d25,d24
2257	ldr	r2,[sp,#60]
2258	and	r3,r3,r12
2259	vshr.u32	d24,d6,#19
2260	add	r9,r9,r5
2261	add	r5,r5,r0,ror#2
2262	eor	r3,r3,r7
2263	vld1.32	{q8},[r14,:128]!
2264	add	r4,r4,r2
2265	vsli.32	d24,d6,#13
2266	eor	r2,r10,r11
2267	eor	r0,r9,r9,ror#5
2268	veor	d25,d25,d24
2269	add	r5,r5,r3
2270	and	r2,r2,r9
2271	vadd.i32	d7,d7,d25
2272	eor	r3,r0,r9,ror#19
2273	eor	r0,r5,r5,ror#11
2274	vadd.i32	q8,q8,q3
2275	eor	r2,r2,r11
2276	add	r4,r4,r3,ror#6
2277	eor	r3,r5,r6
2278	eor	r0,r0,r5,ror#20
2279	add	r4,r4,r2
2280	ldr	r2,[r14]
2281	and	r12,r12,r3
2282	add	r8,r8,r4
2283	vst1.32	{q8},[r1,:128]!
2284	add	r4,r4,r0,ror#2
2285	eor	r12,r12,r6
2286	teq	r2,#0				@ check for K256 terminator
2287	ldr	r2,[sp,#0]
2288	sub	r1,r1,#64
2289	bne	.L_00_48
2290
2291	ldr		r1,[sp,#68]
2292	ldr		r0,[sp,#72]
2293	sub		r14,r14,#256	@ rewind r14
2294	teq		r1,r0
2295	it		eq
2296	subeq		r1,r1,#64		@ avoid SEGV
2297	vld1.8		{q0},[r1]!		@ load next input block
2298	vld1.8		{q1},[r1]!
2299	vld1.8		{q2},[r1]!
2300	vld1.8		{q3},[r1]!
2301	it		ne
2302	strne		r1,[sp,#68]
2303	mov		r1,sp
2304	add	r11,r11,r2
2305	eor	r2,r9,r10
2306	eor	r0,r8,r8,ror#5
2307	add	r4,r4,r12
2308	vld1.32	{q8},[r14,:128]!
2309	and	r2,r2,r8
2310	eor	r12,r0,r8,ror#19
2311	eor	r0,r4,r4,ror#11
2312	eor	r2,r2,r10
2313	vrev32.8	q0,q0
2314	add	r11,r11,r12,ror#6
2315	eor	r12,r4,r5
2316	eor	r0,r0,r4,ror#20
2317	add	r11,r11,r2
2318	vadd.i32	q8,q8,q0
2319	ldr	r2,[sp,#4]
2320	and	r3,r3,r12
2321	add	r7,r7,r11
2322	add	r11,r11,r0,ror#2
2323	eor	r3,r3,r5
2324	add	r10,r10,r2
2325	eor	r2,r8,r9
2326	eor	r0,r7,r7,ror#5
2327	add	r11,r11,r3
2328	and	r2,r2,r7
2329	eor	r3,r0,r7,ror#19
2330	eor	r0,r11,r11,ror#11
2331	eor	r2,r2,r9
2332	add	r10,r10,r3,ror#6
2333	eor	r3,r11,r4
2334	eor	r0,r0,r11,ror#20
2335	add	r10,r10,r2
2336	ldr	r2,[sp,#8]
2337	and	r12,r12,r3
2338	add	r6,r6,r10
2339	add	r10,r10,r0,ror#2
2340	eor	r12,r12,r4
2341	add	r9,r9,r2
2342	eor	r2,r7,r8
2343	eor	r0,r6,r6,ror#5
2344	add	r10,r10,r12
2345	and	r2,r2,r6
2346	eor	r12,r0,r6,ror#19
2347	eor	r0,r10,r10,ror#11
2348	eor	r2,r2,r8
2349	add	r9,r9,r12,ror#6
2350	eor	r12,r10,r11
2351	eor	r0,r0,r10,ror#20
2352	add	r9,r9,r2
2353	ldr	r2,[sp,#12]
2354	and	r3,r3,r12
2355	add	r5,r5,r9
2356	add	r9,r9,r0,ror#2
2357	eor	r3,r3,r11
2358	add	r8,r8,r2
2359	eor	r2,r6,r7
2360	eor	r0,r5,r5,ror#5
2361	add	r9,r9,r3
2362	and	r2,r2,r5
2363	eor	r3,r0,r5,ror#19
2364	eor	r0,r9,r9,ror#11
2365	eor	r2,r2,r7
2366	add	r8,r8,r3,ror#6
2367	eor	r3,r9,r10
2368	eor	r0,r0,r9,ror#20
2369	add	r8,r8,r2
2370	ldr	r2,[sp,#16]
2371	and	r12,r12,r3
2372	add	r4,r4,r8
2373	add	r8,r8,r0,ror#2
2374	eor	r12,r12,r10
2375	vst1.32	{q8},[r1,:128]!
2376	add	r7,r7,r2
2377	eor	r2,r5,r6
2378	eor	r0,r4,r4,ror#5
2379	add	r8,r8,r12
2380	vld1.32	{q8},[r14,:128]!
2381	and	r2,r2,r4
2382	eor	r12,r0,r4,ror#19
2383	eor	r0,r8,r8,ror#11
2384	eor	r2,r2,r6
2385	vrev32.8	q1,q1
2386	add	r7,r7,r12,ror#6
2387	eor	r12,r8,r9
2388	eor	r0,r0,r8,ror#20
2389	add	r7,r7,r2
2390	vadd.i32	q8,q8,q1
2391	ldr	r2,[sp,#20]
2392	and	r3,r3,r12
2393	add	r11,r11,r7
2394	add	r7,r7,r0,ror#2
2395	eor	r3,r3,r9
2396	add	r6,r6,r2
2397	eor	r2,r4,r5
2398	eor	r0,r11,r11,ror#5
2399	add	r7,r7,r3
2400	and	r2,r2,r11
2401	eor	r3,r0,r11,ror#19
2402	eor	r0,r7,r7,ror#11
2403	eor	r2,r2,r5
2404	add	r6,r6,r3,ror#6
2405	eor	r3,r7,r8
2406	eor	r0,r0,r7,ror#20
2407	add	r6,r6,r2
2408	ldr	r2,[sp,#24]
2409	and	r12,r12,r3
2410	add	r10,r10,r6
2411	add	r6,r6,r0,ror#2
2412	eor	r12,r12,r8
2413	add	r5,r5,r2
2414	eor	r2,r11,r4
2415	eor	r0,r10,r10,ror#5
2416	add	r6,r6,r12
2417	and	r2,r2,r10
2418	eor	r12,r0,r10,ror#19
2419	eor	r0,r6,r6,ror#11
2420	eor	r2,r2,r4
2421	add	r5,r5,r12,ror#6
2422	eor	r12,r6,r7
2423	eor	r0,r0,r6,ror#20
2424	add	r5,r5,r2
2425	ldr	r2,[sp,#28]
2426	and	r3,r3,r12
2427	add	r9,r9,r5
2428	add	r5,r5,r0,ror#2
2429	eor	r3,r3,r7
2430	add	r4,r4,r2
2431	eor	r2,r10,r11
2432	eor	r0,r9,r9,ror#5
2433	add	r5,r5,r3
2434	and	r2,r2,r9
2435	eor	r3,r0,r9,ror#19
2436	eor	r0,r5,r5,ror#11
2437	eor	r2,r2,r11
2438	add	r4,r4,r3,ror#6
2439	eor	r3,r5,r6
2440	eor	r0,r0,r5,ror#20
2441	add	r4,r4,r2
2442	ldr	r2,[sp,#32]
2443	and	r12,r12,r3
2444	add	r8,r8,r4
2445	add	r4,r4,r0,ror#2
2446	eor	r12,r12,r6
2447	vst1.32	{q8},[r1,:128]!
2448	add	r11,r11,r2
2449	eor	r2,r9,r10
2450	eor	r0,r8,r8,ror#5
2451	add	r4,r4,r12
2452	vld1.32	{q8},[r14,:128]!
2453	and	r2,r2,r8
2454	eor	r12,r0,r8,ror#19
2455	eor	r0,r4,r4,ror#11
2456	eor	r2,r2,r10
2457	vrev32.8	q2,q2
2458	add	r11,r11,r12,ror#6
2459	eor	r12,r4,r5
2460	eor	r0,r0,r4,ror#20
2461	add	r11,r11,r2
2462	vadd.i32	q8,q8,q2
2463	ldr	r2,[sp,#36]
2464	and	r3,r3,r12
2465	add	r7,r7,r11
2466	add	r11,r11,r0,ror#2
2467	eor	r3,r3,r5
2468	add	r10,r10,r2
2469	eor	r2,r8,r9
2470	eor	r0,r7,r7,ror#5
2471	add	r11,r11,r3
2472	and	r2,r2,r7
2473	eor	r3,r0,r7,ror#19
2474	eor	r0,r11,r11,ror#11
2475	eor	r2,r2,r9
2476	add	r10,r10,r3,ror#6
2477	eor	r3,r11,r4
2478	eor	r0,r0,r11,ror#20
2479	add	r10,r10,r2
2480	ldr	r2,[sp,#40]
2481	and	r12,r12,r3
2482	add	r6,r6,r10
2483	add	r10,r10,r0,ror#2
2484	eor	r12,r12,r4
2485	add	r9,r9,r2
2486	eor	r2,r7,r8
2487	eor	r0,r6,r6,ror#5
2488	add	r10,r10,r12
2489	and	r2,r2,r6
2490	eor	r12,r0,r6,ror#19
2491	eor	r0,r10,r10,ror#11
2492	eor	r2,r2,r8
2493	add	r9,r9,r12,ror#6
2494	eor	r12,r10,r11
2495	eor	r0,r0,r10,ror#20
2496	add	r9,r9,r2
2497	ldr	r2,[sp,#44]
2498	and	r3,r3,r12
2499	add	r5,r5,r9
2500	add	r9,r9,r0,ror#2
2501	eor	r3,r3,r11
2502	add	r8,r8,r2
2503	eor	r2,r6,r7
2504	eor	r0,r5,r5,ror#5
2505	add	r9,r9,r3
2506	and	r2,r2,r5
2507	eor	r3,r0,r5,ror#19
2508	eor	r0,r9,r9,ror#11
2509	eor	r2,r2,r7
2510	add	r8,r8,r3,ror#6
2511	eor	r3,r9,r10
2512	eor	r0,r0,r9,ror#20
2513	add	r8,r8,r2
2514	ldr	r2,[sp,#48]
2515	and	r12,r12,r3
2516	add	r4,r4,r8
2517	add	r8,r8,r0,ror#2
2518	eor	r12,r12,r10
2519	vst1.32	{q8},[r1,:128]!
2520	add	r7,r7,r2
2521	eor	r2,r5,r6
2522	eor	r0,r4,r4,ror#5
2523	add	r8,r8,r12
2524	vld1.32	{q8},[r14,:128]!
2525	and	r2,r2,r4
2526	eor	r12,r0,r4,ror#19
2527	eor	r0,r8,r8,ror#11
2528	eor	r2,r2,r6
2529	vrev32.8	q3,q3
2530	add	r7,r7,r12,ror#6
2531	eor	r12,r8,r9
2532	eor	r0,r0,r8,ror#20
2533	add	r7,r7,r2
2534	vadd.i32	q8,q8,q3
2535	ldr	r2,[sp,#52]
2536	and	r3,r3,r12
2537	add	r11,r11,r7
2538	add	r7,r7,r0,ror#2
2539	eor	r3,r3,r9
2540	add	r6,r6,r2
2541	eor	r2,r4,r5
2542	eor	r0,r11,r11,ror#5
2543	add	r7,r7,r3
2544	and	r2,r2,r11
2545	eor	r3,r0,r11,ror#19
2546	eor	r0,r7,r7,ror#11
2547	eor	r2,r2,r5
2548	add	r6,r6,r3,ror#6
2549	eor	r3,r7,r8
2550	eor	r0,r0,r7,ror#20
2551	add	r6,r6,r2
2552	ldr	r2,[sp,#56]
2553	and	r12,r12,r3
2554	add	r10,r10,r6
2555	add	r6,r6,r0,ror#2
2556	eor	r12,r12,r8
2557	add	r5,r5,r2
2558	eor	r2,r11,r4
2559	eor	r0,r10,r10,ror#5
2560	add	r6,r6,r12
2561	and	r2,r2,r10
2562	eor	r12,r0,r10,ror#19
2563	eor	r0,r6,r6,ror#11
2564	eor	r2,r2,r4
2565	add	r5,r5,r12,ror#6
2566	eor	r12,r6,r7
2567	eor	r0,r0,r6,ror#20
2568	add	r5,r5,r2
2569	ldr	r2,[sp,#60]
2570	and	r3,r3,r12
2571	add	r9,r9,r5
2572	add	r5,r5,r0,ror#2
2573	eor	r3,r3,r7
2574	add	r4,r4,r2
2575	eor	r2,r10,r11
2576	eor	r0,r9,r9,ror#5
2577	add	r5,r5,r3
2578	and	r2,r2,r9
2579	eor	r3,r0,r9,ror#19
2580	eor	r0,r5,r5,ror#11
2581	eor	r2,r2,r11
2582	add	r4,r4,r3,ror#6
2583	eor	r3,r5,r6
2584	eor	r0,r0,r5,ror#20
2585	add	r4,r4,r2
2586	ldr	r2,[sp,#64]
2587	and	r12,r12,r3
2588	add	r8,r8,r4
2589	add	r4,r4,r0,ror#2
2590	eor	r12,r12,r6
2591	vst1.32	{q8},[r1,:128]!
2592	ldr	r0,[r2,#0]
2593	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2594	ldr	r12,[r2,#4]
2595	ldr	r3,[r2,#8]
2596	ldr	r1,[r2,#12]
2597	add	r4,r4,r0			@ accumulate
2598	ldr	r0,[r2,#16]
2599	add	r5,r5,r12
2600	ldr	r12,[r2,#20]
2601	add	r6,r6,r3
2602	ldr	r3,[r2,#24]
2603	add	r7,r7,r1
2604	ldr	r1,[r2,#28]
2605	add	r8,r8,r0
2606	str	r4,[r2],#4
2607	add	r9,r9,r12
2608	str	r5,[r2],#4
2609	add	r10,r10,r3
2610	str	r6,[r2],#4
2611	add	r11,r11,r1
2612	str	r7,[r2],#4
2613	stmia	r2,{r8-r11}
2614
2615	ittte	ne
2616	movne	r1,sp
2617	ldrne	r2,[sp,#0]
2618	eorne	r12,r12,r12
2619	ldreq	sp,[sp,#76]			@ restore original sp
2620	itt	ne
2621	eorne	r3,r5,r6
2622	bne	.L_00_48
2623
2624	ldmia	sp!,{r4-r12,pc}
2625.size	zfs_sha256_block_neon,.-zfs_sha256_block_neon
2626
2627# if defined(__thumb2__)
2628#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2629# else
2630#  define INST(a,b,c,d)	.byte	a,b,c,d
2631# endif
2632
2633.globl	zfs_sha256_block_armv8
2634.type	zfs_sha256_block_armv8,%function
2635.align	5
2636zfs_sha256_block_armv8:
2637.LARMv8:
2638	vld1.32	{q0,q1},[r0]
2639	sub	r3,r3,#256+32
2640	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2641	b	.Loop_v8
2642
2643.align	4
2644.Loop_v8:
2645	vld1.8		{q8-q9},[r1]!
2646	vld1.8		{q10-q11},[r1]!
2647	vld1.32		{q12},[r3]!
2648	vrev32.8	q8,q8
2649	vrev32.8	q9,q9
2650	vrev32.8	q10,q10
2651	vrev32.8	q11,q11
2652	vmov		q14,q0	@ offload
2653	vmov		q15,q1
2654	teq		r1,r2
2655	vld1.32		{q13},[r3]!
2656	vadd.i32	q12,q12,q8
2657	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2658	vmov		q2,q0
2659	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2660	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2661	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2662	vld1.32		{q12},[r3]!
2663	vadd.i32	q13,q13,q9
2664	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2665	vmov		q2,q0
2666	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2667	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2668	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2669	vld1.32		{q13},[r3]!
2670	vadd.i32	q12,q12,q10
2671	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2672	vmov		q2,q0
2673	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2674	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2675	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2676	vld1.32		{q12},[r3]!
2677	vadd.i32	q13,q13,q11
2678	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2679	vmov		q2,q0
2680	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2681	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2682	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2683	vld1.32		{q13},[r3]!
2684	vadd.i32	q12,q12,q8
2685	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2686	vmov		q2,q0
2687	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2688	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2689	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2690	vld1.32		{q12},[r3]!
2691	vadd.i32	q13,q13,q9
2692	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2693	vmov		q2,q0
2694	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2695	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2696	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2697	vld1.32		{q13},[r3]!
2698	vadd.i32	q12,q12,q10
2699	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2700	vmov		q2,q0
2701	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2702	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2703	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2704	vld1.32		{q12},[r3]!
2705	vadd.i32	q13,q13,q11
2706	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2707	vmov		q2,q0
2708	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2709	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2710	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2711	vld1.32		{q13},[r3]!
2712	vadd.i32	q12,q12,q8
2713	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2714	vmov		q2,q0
2715	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2716	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2717	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2718	vld1.32		{q12},[r3]!
2719	vadd.i32	q13,q13,q9
2720	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2721	vmov		q2,q0
2722	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2723	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2724	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2725	vld1.32		{q13},[r3]!
2726	vadd.i32	q12,q12,q10
2727	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2728	vmov		q2,q0
2729	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2730	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2731	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2732	vld1.32		{q12},[r3]!
2733	vadd.i32	q13,q13,q11
2734	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2735	vmov		q2,q0
2736	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2737	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2738	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2739	vld1.32		{q13},[r3]!
2740	vadd.i32	q12,q12,q8
2741	vmov		q2,q0
2742	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2743	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2744
2745	vld1.32		{q12},[r3]!
2746	vadd.i32	q13,q13,q9
2747	vmov		q2,q0
2748	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2749	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2750
2751	vld1.32		{q13},[r3]
2752	vadd.i32	q12,q12,q10
2753	sub		r3,r3,#256-16	@ rewind
2754	vmov		q2,q0
2755	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2756	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2757
2758	vadd.i32	q13,q13,q11
2759	vmov		q2,q0
2760	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2761	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2762
2763	vadd.i32	q0,q0,q14
2764	vadd.i32	q1,q1,q15
2765	it		ne
2766	bne		.Loop_v8
2767
2768	vst1.32		{q0,q1},[r0]
2769
2770	bx	lr		@ bx lr
2771.size	zfs_sha256_block_armv8,.-zfs_sha256_block_armv8
2772
2773#endif // #if __ARM_ARCH__ >= 7
2774#endif // #if defined(__arm__)
2775