1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from sha512-armv8.pl. */
2bc3d5698SJohn Baldwin// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
3bc3d5698SJohn Baldwin//
4c0855eaaSJohn Baldwin// Licensed under the Apache License 2.0 (the "License").  You may not use
5bc3d5698SJohn Baldwin// this file except in compliance with the License.  You can obtain a copy
6bc3d5698SJohn Baldwin// in the file LICENSE in the source distribution or at
7bc3d5698SJohn Baldwin// https://www.openssl.org/source/license.html
8bc3d5698SJohn Baldwin
9bc3d5698SJohn Baldwin// ====================================================================
10bc3d5698SJohn Baldwin// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11bc3d5698SJohn Baldwin// project. The module is, however, dual licensed under OpenSSL and
12bc3d5698SJohn Baldwin// CRYPTOGAMS licenses depending on where you obtain it. For further
13bc3d5698SJohn Baldwin// details see http://www.openssl.org/~appro/cryptogams/.
14bc3d5698SJohn Baldwin//
15bc3d5698SJohn Baldwin// Permission to use under GPLv2 terms is granted.
16bc3d5698SJohn Baldwin// ====================================================================
17bc3d5698SJohn Baldwin//
18bc3d5698SJohn Baldwin// SHA256/512 for ARMv8.
19bc3d5698SJohn Baldwin//
20bc3d5698SJohn Baldwin// Performance in cycles per processed byte and improvement coefficient
21bc3d5698SJohn Baldwin// over code generated with "default" compiler:
22bc3d5698SJohn Baldwin//
23bc3d5698SJohn Baldwin//		SHA256-hw	SHA256(*)	SHA512
24bc3d5698SJohn Baldwin// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
25bc3d5698SJohn Baldwin// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
26bc3d5698SJohn Baldwin// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
27bc3d5698SJohn Baldwin// Denver	2.01		10.5 (+26%)	6.70 (+8%)
28bc3d5698SJohn Baldwin// X-Gene			20.0 (+100%)	12.8 (+300%(***))
29bc3d5698SJohn Baldwin// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
30bc3d5698SJohn Baldwin// Kryo		1.92		17.4 (+30%)	11.2 (+8%)
31c0855eaaSJohn Baldwin// ThunderX2	2.54		13.2 (+40%)	8.40 (+18%)
32bc3d5698SJohn Baldwin//
33bc3d5698SJohn Baldwin// (*)	Software SHA256 results are of lesser relevance, presented
34bc3d5698SJohn Baldwin//	mostly for informational purposes.
35bc3d5698SJohn Baldwin// (**)	The result is a trade-off: it's possible to improve it by
36bc3d5698SJohn Baldwin//	10% (or by 1 cycle per round), but at the cost of 20% loss
37bc3d5698SJohn Baldwin//	on Cortex-A53 (or by 4 cycles per round).
38bc3d5698SJohn Baldwin// (***)	Super-impressive coefficients over gcc-generated code are
39bc3d5698SJohn Baldwin//	indication of some compiler "pathology", most notably code
40bc3d5698SJohn Baldwin//	generated with -mgeneral-regs-only is significantly faster
41bc3d5698SJohn Baldwin//	and the gap is only 40-90%.
42bc3d5698SJohn Baldwin//
43bc3d5698SJohn Baldwin// October 2016.
44bc3d5698SJohn Baldwin//
45bc3d5698SJohn Baldwin// Originally it was reckoned that it makes no sense to implement NEON
46bc3d5698SJohn Baldwin// version of SHA256 for 64-bit processors. This is because performance
47bc3d5698SJohn Baldwin// improvement on most wide-spread Cortex-A5x processors was observed
48bc3d5698SJohn Baldwin// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
49bc3d5698SJohn Baldwin// observed that 32-bit NEON SHA256 performs significantly better than
50bc3d5698SJohn Baldwin// 64-bit scalar version on *some* of the more recent processors. As
51bc3d5698SJohn Baldwin// result 64-bit NEON version of SHA256 was added to provide best
52bc3d5698SJohn Baldwin// all-round performance. For example it executes ~30% faster on X-Gene
53bc3d5698SJohn Baldwin// and Mongoose. [For reference, NEON version of SHA512 is bound to
54bc3d5698SJohn Baldwin// deliver much less improvement, likely *negative* on Cortex-A5x.
55bc3d5698SJohn Baldwin// Which is why NEON support is limited to SHA256.]
56bc3d5698SJohn Baldwin
57c0855eaaSJohn Baldwin// $output is the last argument if it looks like a file (it has an extension)
58c0855eaaSJohn Baldwin// $flavour is the first argument if it doesn't look like a file
59bc3d5698SJohn Baldwin#include "arm_arch.h"
60bd9588bcSAndrew Turner#ifndef	__KERNEL__
61c0855eaaSJohn Baldwin
62c0855eaaSJohn Baldwin.hidden	OPENSSL_armcap_P
63bc3d5698SJohn Baldwin#endif
64bc3d5698SJohn Baldwin
65bc3d5698SJohn Baldwin.text
66bc3d5698SJohn Baldwin
67bc3d5698SJohn Baldwin.globl	sha256_block_data_order
68bc3d5698SJohn Baldwin.type	sha256_block_data_order,%function
69bc3d5698SJohn Baldwin.align	6
70bc3d5698SJohn Baldwinsha256_block_data_order:
71bd9588bcSAndrew Turner	AARCH64_VALID_CALL_TARGET
72bc3d5698SJohn Baldwin#ifndef	__KERNEL__
73c0855eaaSJohn Baldwin	adrp	x16,OPENSSL_armcap_P
74c0855eaaSJohn Baldwin	ldr	w16,[x16,#:lo12:OPENSSL_armcap_P]
75bc3d5698SJohn Baldwin	tst	w16,#ARMV8_SHA256
76bc3d5698SJohn Baldwin	b.ne	.Lv8_entry
77bc3d5698SJohn Baldwin	tst	w16,#ARMV7_NEON
78bc3d5698SJohn Baldwin	b.ne	.Lneon_entry
79bc3d5698SJohn Baldwin#endif
80bd9588bcSAndrew Turner	AARCH64_SIGN_LINK_REGISTER
81bc3d5698SJohn Baldwin	stp	x29,x30,[sp,#-128]!
82bc3d5698SJohn Baldwin	add	x29,sp,#0
83bc3d5698SJohn Baldwin
84bc3d5698SJohn Baldwin	stp	x19,x20,[sp,#16]
85bc3d5698SJohn Baldwin	stp	x21,x22,[sp,#32]
86bc3d5698SJohn Baldwin	stp	x23,x24,[sp,#48]
87bc3d5698SJohn Baldwin	stp	x25,x26,[sp,#64]
88bc3d5698SJohn Baldwin	stp	x27,x28,[sp,#80]
89bc3d5698SJohn Baldwin	sub	sp,sp,#4*4
90bc3d5698SJohn Baldwin
91bc3d5698SJohn Baldwin	ldp	w20,w21,[x0]				// load context
92bc3d5698SJohn Baldwin	ldp	w22,w23,[x0,#2*4]
93bc3d5698SJohn Baldwin	ldp	w24,w25,[x0,#4*4]
94bc3d5698SJohn Baldwin	add	x2,x1,x2,lsl#6	// end of input
95bc3d5698SJohn Baldwin	ldp	w26,w27,[x0,#6*4]
96bc3d5698SJohn Baldwin	adr	x30,.LK256
97bc3d5698SJohn Baldwin	stp	x0,x2,[x29,#96]
98bc3d5698SJohn Baldwin
99bc3d5698SJohn Baldwin.Loop:
100bc3d5698SJohn Baldwin	ldp	w3,w4,[x1],#2*4
101bc3d5698SJohn Baldwin	ldr	w19,[x30],#4			// *K++
102bc3d5698SJohn Baldwin	eor	w28,w21,w22				// magic seed
103bc3d5698SJohn Baldwin	str	x1,[x29,#112]
104bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
105bc3d5698SJohn Baldwin	rev	w3,w3			// 0
106bc3d5698SJohn Baldwin#endif
107bc3d5698SJohn Baldwin	ror	w16,w24,#6
108bc3d5698SJohn Baldwin	add	w27,w27,w19			// h+=K[i]
109bc3d5698SJohn Baldwin	eor	w6,w24,w24,ror#14
110bc3d5698SJohn Baldwin	and	w17,w25,w24
111bc3d5698SJohn Baldwin	bic	w19,w26,w24
112bc3d5698SJohn Baldwin	add	w27,w27,w3			// h+=X[i]
113bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
114bc3d5698SJohn Baldwin	eor	w19,w20,w21			// a^b, b^c in next round
115bc3d5698SJohn Baldwin	eor	w16,w16,w6,ror#11	// Sigma1(e)
116bc3d5698SJohn Baldwin	ror	w6,w20,#2
117bc3d5698SJohn Baldwin	add	w27,w27,w17			// h+=Ch(e,f,g)
118bc3d5698SJohn Baldwin	eor	w17,w20,w20,ror#9
119bc3d5698SJohn Baldwin	add	w27,w27,w16			// h+=Sigma1(e)
120bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
121bc3d5698SJohn Baldwin	add	w23,w23,w27			// d+=h
122bc3d5698SJohn Baldwin	eor	w28,w28,w21			// Maj(a,b,c)
123bc3d5698SJohn Baldwin	eor	w17,w6,w17,ror#13	// Sigma0(a)
124bc3d5698SJohn Baldwin	add	w27,w27,w28			// h+=Maj(a,b,c)
125bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
126bc3d5698SJohn Baldwin	//add	w27,w27,w17			// h+=Sigma0(a)
127bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
128bc3d5698SJohn Baldwin	rev	w4,w4			// 1
129bc3d5698SJohn Baldwin#endif
130bc3d5698SJohn Baldwin	ldp	w5,w6,[x1],#2*4
131bc3d5698SJohn Baldwin	add	w27,w27,w17			// h+=Sigma0(a)
132bc3d5698SJohn Baldwin	ror	w16,w23,#6
133bc3d5698SJohn Baldwin	add	w26,w26,w28			// h+=K[i]
134bc3d5698SJohn Baldwin	eor	w7,w23,w23,ror#14
135bc3d5698SJohn Baldwin	and	w17,w24,w23
136bc3d5698SJohn Baldwin	bic	w28,w25,w23
137bc3d5698SJohn Baldwin	add	w26,w26,w4			// h+=X[i]
138bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
139bc3d5698SJohn Baldwin	eor	w28,w27,w20			// a^b, b^c in next round
140bc3d5698SJohn Baldwin	eor	w16,w16,w7,ror#11	// Sigma1(e)
141bc3d5698SJohn Baldwin	ror	w7,w27,#2
142bc3d5698SJohn Baldwin	add	w26,w26,w17			// h+=Ch(e,f,g)
143bc3d5698SJohn Baldwin	eor	w17,w27,w27,ror#9
144bc3d5698SJohn Baldwin	add	w26,w26,w16			// h+=Sigma1(e)
145bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
146bc3d5698SJohn Baldwin	add	w22,w22,w26			// d+=h
147bc3d5698SJohn Baldwin	eor	w19,w19,w20			// Maj(a,b,c)
148bc3d5698SJohn Baldwin	eor	w17,w7,w17,ror#13	// Sigma0(a)
149bc3d5698SJohn Baldwin	add	w26,w26,w19			// h+=Maj(a,b,c)
150bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
151bc3d5698SJohn Baldwin	//add	w26,w26,w17			// h+=Sigma0(a)
152bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
153bc3d5698SJohn Baldwin	rev	w5,w5			// 2
154bc3d5698SJohn Baldwin#endif
155bc3d5698SJohn Baldwin	add	w26,w26,w17			// h+=Sigma0(a)
156bc3d5698SJohn Baldwin	ror	w16,w22,#6
157bc3d5698SJohn Baldwin	add	w25,w25,w19			// h+=K[i]
158bc3d5698SJohn Baldwin	eor	w8,w22,w22,ror#14
159bc3d5698SJohn Baldwin	and	w17,w23,w22
160bc3d5698SJohn Baldwin	bic	w19,w24,w22
161bc3d5698SJohn Baldwin	add	w25,w25,w5			// h+=X[i]
162bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
163bc3d5698SJohn Baldwin	eor	w19,w26,w27			// a^b, b^c in next round
164bc3d5698SJohn Baldwin	eor	w16,w16,w8,ror#11	// Sigma1(e)
165bc3d5698SJohn Baldwin	ror	w8,w26,#2
166bc3d5698SJohn Baldwin	add	w25,w25,w17			// h+=Ch(e,f,g)
167bc3d5698SJohn Baldwin	eor	w17,w26,w26,ror#9
168bc3d5698SJohn Baldwin	add	w25,w25,w16			// h+=Sigma1(e)
169bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
170bc3d5698SJohn Baldwin	add	w21,w21,w25			// d+=h
171bc3d5698SJohn Baldwin	eor	w28,w28,w27			// Maj(a,b,c)
172bc3d5698SJohn Baldwin	eor	w17,w8,w17,ror#13	// Sigma0(a)
173bc3d5698SJohn Baldwin	add	w25,w25,w28			// h+=Maj(a,b,c)
174bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
175bc3d5698SJohn Baldwin	//add	w25,w25,w17			// h+=Sigma0(a)
176bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
177bc3d5698SJohn Baldwin	rev	w6,w6			// 3
178bc3d5698SJohn Baldwin#endif
179bc3d5698SJohn Baldwin	ldp	w7,w8,[x1],#2*4
180bc3d5698SJohn Baldwin	add	w25,w25,w17			// h+=Sigma0(a)
181bc3d5698SJohn Baldwin	ror	w16,w21,#6
182bc3d5698SJohn Baldwin	add	w24,w24,w28			// h+=K[i]
183bc3d5698SJohn Baldwin	eor	w9,w21,w21,ror#14
184bc3d5698SJohn Baldwin	and	w17,w22,w21
185bc3d5698SJohn Baldwin	bic	w28,w23,w21
186bc3d5698SJohn Baldwin	add	w24,w24,w6			// h+=X[i]
187bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
188bc3d5698SJohn Baldwin	eor	w28,w25,w26			// a^b, b^c in next round
189bc3d5698SJohn Baldwin	eor	w16,w16,w9,ror#11	// Sigma1(e)
190bc3d5698SJohn Baldwin	ror	w9,w25,#2
191bc3d5698SJohn Baldwin	add	w24,w24,w17			// h+=Ch(e,f,g)
192bc3d5698SJohn Baldwin	eor	w17,w25,w25,ror#9
193bc3d5698SJohn Baldwin	add	w24,w24,w16			// h+=Sigma1(e)
194bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
195bc3d5698SJohn Baldwin	add	w20,w20,w24			// d+=h
196bc3d5698SJohn Baldwin	eor	w19,w19,w26			// Maj(a,b,c)
197bc3d5698SJohn Baldwin	eor	w17,w9,w17,ror#13	// Sigma0(a)
198bc3d5698SJohn Baldwin	add	w24,w24,w19			// h+=Maj(a,b,c)
199bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
200bc3d5698SJohn Baldwin	//add	w24,w24,w17			// h+=Sigma0(a)
201bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
202bc3d5698SJohn Baldwin	rev	w7,w7			// 4
203bc3d5698SJohn Baldwin#endif
204bc3d5698SJohn Baldwin	add	w24,w24,w17			// h+=Sigma0(a)
205bc3d5698SJohn Baldwin	ror	w16,w20,#6
206bc3d5698SJohn Baldwin	add	w23,w23,w19			// h+=K[i]
207bc3d5698SJohn Baldwin	eor	w10,w20,w20,ror#14
208bc3d5698SJohn Baldwin	and	w17,w21,w20
209bc3d5698SJohn Baldwin	bic	w19,w22,w20
210bc3d5698SJohn Baldwin	add	w23,w23,w7			// h+=X[i]
211bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
212bc3d5698SJohn Baldwin	eor	w19,w24,w25			// a^b, b^c in next round
213bc3d5698SJohn Baldwin	eor	w16,w16,w10,ror#11	// Sigma1(e)
214bc3d5698SJohn Baldwin	ror	w10,w24,#2
215bc3d5698SJohn Baldwin	add	w23,w23,w17			// h+=Ch(e,f,g)
216bc3d5698SJohn Baldwin	eor	w17,w24,w24,ror#9
217bc3d5698SJohn Baldwin	add	w23,w23,w16			// h+=Sigma1(e)
218bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
219bc3d5698SJohn Baldwin	add	w27,w27,w23			// d+=h
220bc3d5698SJohn Baldwin	eor	w28,w28,w25			// Maj(a,b,c)
221bc3d5698SJohn Baldwin	eor	w17,w10,w17,ror#13	// Sigma0(a)
222bc3d5698SJohn Baldwin	add	w23,w23,w28			// h+=Maj(a,b,c)
223bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
224bc3d5698SJohn Baldwin	//add	w23,w23,w17			// h+=Sigma0(a)
225bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
226bc3d5698SJohn Baldwin	rev	w8,w8			// 5
227bc3d5698SJohn Baldwin#endif
228bc3d5698SJohn Baldwin	ldp	w9,w10,[x1],#2*4
229bc3d5698SJohn Baldwin	add	w23,w23,w17			// h+=Sigma0(a)
230bc3d5698SJohn Baldwin	ror	w16,w27,#6
231bc3d5698SJohn Baldwin	add	w22,w22,w28			// h+=K[i]
232bc3d5698SJohn Baldwin	eor	w11,w27,w27,ror#14
233bc3d5698SJohn Baldwin	and	w17,w20,w27
234bc3d5698SJohn Baldwin	bic	w28,w21,w27
235bc3d5698SJohn Baldwin	add	w22,w22,w8			// h+=X[i]
236bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
237bc3d5698SJohn Baldwin	eor	w28,w23,w24			// a^b, b^c in next round
238bc3d5698SJohn Baldwin	eor	w16,w16,w11,ror#11	// Sigma1(e)
239bc3d5698SJohn Baldwin	ror	w11,w23,#2
240bc3d5698SJohn Baldwin	add	w22,w22,w17			// h+=Ch(e,f,g)
241bc3d5698SJohn Baldwin	eor	w17,w23,w23,ror#9
242bc3d5698SJohn Baldwin	add	w22,w22,w16			// h+=Sigma1(e)
243bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
244bc3d5698SJohn Baldwin	add	w26,w26,w22			// d+=h
245bc3d5698SJohn Baldwin	eor	w19,w19,w24			// Maj(a,b,c)
246bc3d5698SJohn Baldwin	eor	w17,w11,w17,ror#13	// Sigma0(a)
247bc3d5698SJohn Baldwin	add	w22,w22,w19			// h+=Maj(a,b,c)
248bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
249bc3d5698SJohn Baldwin	//add	w22,w22,w17			// h+=Sigma0(a)
250bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
251bc3d5698SJohn Baldwin	rev	w9,w9			// 6
252bc3d5698SJohn Baldwin#endif
253bc3d5698SJohn Baldwin	add	w22,w22,w17			// h+=Sigma0(a)
254bc3d5698SJohn Baldwin	ror	w16,w26,#6
255bc3d5698SJohn Baldwin	add	w21,w21,w19			// h+=K[i]
256bc3d5698SJohn Baldwin	eor	w12,w26,w26,ror#14
257bc3d5698SJohn Baldwin	and	w17,w27,w26
258bc3d5698SJohn Baldwin	bic	w19,w20,w26
259bc3d5698SJohn Baldwin	add	w21,w21,w9			// h+=X[i]
260bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
261bc3d5698SJohn Baldwin	eor	w19,w22,w23			// a^b, b^c in next round
262bc3d5698SJohn Baldwin	eor	w16,w16,w12,ror#11	// Sigma1(e)
263bc3d5698SJohn Baldwin	ror	w12,w22,#2
264bc3d5698SJohn Baldwin	add	w21,w21,w17			// h+=Ch(e,f,g)
265bc3d5698SJohn Baldwin	eor	w17,w22,w22,ror#9
266bc3d5698SJohn Baldwin	add	w21,w21,w16			// h+=Sigma1(e)
267bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
268bc3d5698SJohn Baldwin	add	w25,w25,w21			// d+=h
269bc3d5698SJohn Baldwin	eor	w28,w28,w23			// Maj(a,b,c)
270bc3d5698SJohn Baldwin	eor	w17,w12,w17,ror#13	// Sigma0(a)
271bc3d5698SJohn Baldwin	add	w21,w21,w28			// h+=Maj(a,b,c)
272bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
273bc3d5698SJohn Baldwin	//add	w21,w21,w17			// h+=Sigma0(a)
274bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
275bc3d5698SJohn Baldwin	rev	w10,w10			// 7
276bc3d5698SJohn Baldwin#endif
277bc3d5698SJohn Baldwin	ldp	w11,w12,[x1],#2*4
278bc3d5698SJohn Baldwin	add	w21,w21,w17			// h+=Sigma0(a)
279bc3d5698SJohn Baldwin	ror	w16,w25,#6
280bc3d5698SJohn Baldwin	add	w20,w20,w28			// h+=K[i]
281bc3d5698SJohn Baldwin	eor	w13,w25,w25,ror#14
282bc3d5698SJohn Baldwin	and	w17,w26,w25
283bc3d5698SJohn Baldwin	bic	w28,w27,w25
284bc3d5698SJohn Baldwin	add	w20,w20,w10			// h+=X[i]
285bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
286bc3d5698SJohn Baldwin	eor	w28,w21,w22			// a^b, b^c in next round
287bc3d5698SJohn Baldwin	eor	w16,w16,w13,ror#11	// Sigma1(e)
288bc3d5698SJohn Baldwin	ror	w13,w21,#2
289bc3d5698SJohn Baldwin	add	w20,w20,w17			// h+=Ch(e,f,g)
290bc3d5698SJohn Baldwin	eor	w17,w21,w21,ror#9
291bc3d5698SJohn Baldwin	add	w20,w20,w16			// h+=Sigma1(e)
292bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
293bc3d5698SJohn Baldwin	add	w24,w24,w20			// d+=h
294bc3d5698SJohn Baldwin	eor	w19,w19,w22			// Maj(a,b,c)
295bc3d5698SJohn Baldwin	eor	w17,w13,w17,ror#13	// Sigma0(a)
296bc3d5698SJohn Baldwin	add	w20,w20,w19			// h+=Maj(a,b,c)
297bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
298bc3d5698SJohn Baldwin	//add	w20,w20,w17			// h+=Sigma0(a)
299bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
300bc3d5698SJohn Baldwin	rev	w11,w11			// 8
301bc3d5698SJohn Baldwin#endif
302bc3d5698SJohn Baldwin	add	w20,w20,w17			// h+=Sigma0(a)
303bc3d5698SJohn Baldwin	ror	w16,w24,#6
304bc3d5698SJohn Baldwin	add	w27,w27,w19			// h+=K[i]
305bc3d5698SJohn Baldwin	eor	w14,w24,w24,ror#14
306bc3d5698SJohn Baldwin	and	w17,w25,w24
307bc3d5698SJohn Baldwin	bic	w19,w26,w24
308bc3d5698SJohn Baldwin	add	w27,w27,w11			// h+=X[i]
309bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
310bc3d5698SJohn Baldwin	eor	w19,w20,w21			// a^b, b^c in next round
311bc3d5698SJohn Baldwin	eor	w16,w16,w14,ror#11	// Sigma1(e)
312bc3d5698SJohn Baldwin	ror	w14,w20,#2
313bc3d5698SJohn Baldwin	add	w27,w27,w17			// h+=Ch(e,f,g)
314bc3d5698SJohn Baldwin	eor	w17,w20,w20,ror#9
315bc3d5698SJohn Baldwin	add	w27,w27,w16			// h+=Sigma1(e)
316bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
317bc3d5698SJohn Baldwin	add	w23,w23,w27			// d+=h
318bc3d5698SJohn Baldwin	eor	w28,w28,w21			// Maj(a,b,c)
319bc3d5698SJohn Baldwin	eor	w17,w14,w17,ror#13	// Sigma0(a)
320bc3d5698SJohn Baldwin	add	w27,w27,w28			// h+=Maj(a,b,c)
321bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
322bc3d5698SJohn Baldwin	//add	w27,w27,w17			// h+=Sigma0(a)
323bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
324bc3d5698SJohn Baldwin	rev	w12,w12			// 9
325bc3d5698SJohn Baldwin#endif
326bc3d5698SJohn Baldwin	ldp	w13,w14,[x1],#2*4
327bc3d5698SJohn Baldwin	add	w27,w27,w17			// h+=Sigma0(a)
328bc3d5698SJohn Baldwin	ror	w16,w23,#6
329bc3d5698SJohn Baldwin	add	w26,w26,w28			// h+=K[i]
330bc3d5698SJohn Baldwin	eor	w15,w23,w23,ror#14
331bc3d5698SJohn Baldwin	and	w17,w24,w23
332bc3d5698SJohn Baldwin	bic	w28,w25,w23
333bc3d5698SJohn Baldwin	add	w26,w26,w12			// h+=X[i]
334bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
335bc3d5698SJohn Baldwin	eor	w28,w27,w20			// a^b, b^c in next round
336bc3d5698SJohn Baldwin	eor	w16,w16,w15,ror#11	// Sigma1(e)
337bc3d5698SJohn Baldwin	ror	w15,w27,#2
338bc3d5698SJohn Baldwin	add	w26,w26,w17			// h+=Ch(e,f,g)
339bc3d5698SJohn Baldwin	eor	w17,w27,w27,ror#9
340bc3d5698SJohn Baldwin	add	w26,w26,w16			// h+=Sigma1(e)
341bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
342bc3d5698SJohn Baldwin	add	w22,w22,w26			// d+=h
343bc3d5698SJohn Baldwin	eor	w19,w19,w20			// Maj(a,b,c)
344bc3d5698SJohn Baldwin	eor	w17,w15,w17,ror#13	// Sigma0(a)
345bc3d5698SJohn Baldwin	add	w26,w26,w19			// h+=Maj(a,b,c)
346bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
347bc3d5698SJohn Baldwin	//add	w26,w26,w17			// h+=Sigma0(a)
348bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
349bc3d5698SJohn Baldwin	rev	w13,w13			// 10
350bc3d5698SJohn Baldwin#endif
351bc3d5698SJohn Baldwin	add	w26,w26,w17			// h+=Sigma0(a)
352bc3d5698SJohn Baldwin	ror	w16,w22,#6
353bc3d5698SJohn Baldwin	add	w25,w25,w19			// h+=K[i]
354bc3d5698SJohn Baldwin	eor	w0,w22,w22,ror#14
355bc3d5698SJohn Baldwin	and	w17,w23,w22
356bc3d5698SJohn Baldwin	bic	w19,w24,w22
357bc3d5698SJohn Baldwin	add	w25,w25,w13			// h+=X[i]
358bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
359bc3d5698SJohn Baldwin	eor	w19,w26,w27			// a^b, b^c in next round
360bc3d5698SJohn Baldwin	eor	w16,w16,w0,ror#11	// Sigma1(e)
361bc3d5698SJohn Baldwin	ror	w0,w26,#2
362bc3d5698SJohn Baldwin	add	w25,w25,w17			// h+=Ch(e,f,g)
363bc3d5698SJohn Baldwin	eor	w17,w26,w26,ror#9
364bc3d5698SJohn Baldwin	add	w25,w25,w16			// h+=Sigma1(e)
365bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
366bc3d5698SJohn Baldwin	add	w21,w21,w25			// d+=h
367bc3d5698SJohn Baldwin	eor	w28,w28,w27			// Maj(a,b,c)
368bc3d5698SJohn Baldwin	eor	w17,w0,w17,ror#13	// Sigma0(a)
369bc3d5698SJohn Baldwin	add	w25,w25,w28			// h+=Maj(a,b,c)
370bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
371bc3d5698SJohn Baldwin	//add	w25,w25,w17			// h+=Sigma0(a)
372bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
373bc3d5698SJohn Baldwin	rev	w14,w14			// 11
374bc3d5698SJohn Baldwin#endif
375bc3d5698SJohn Baldwin	ldp	w15,w0,[x1],#2*4
376bc3d5698SJohn Baldwin	add	w25,w25,w17			// h+=Sigma0(a)
377bc3d5698SJohn Baldwin	str	w6,[sp,#12]
378bc3d5698SJohn Baldwin	ror	w16,w21,#6
379bc3d5698SJohn Baldwin	add	w24,w24,w28			// h+=K[i]
380bc3d5698SJohn Baldwin	eor	w6,w21,w21,ror#14
381bc3d5698SJohn Baldwin	and	w17,w22,w21
382bc3d5698SJohn Baldwin	bic	w28,w23,w21
383bc3d5698SJohn Baldwin	add	w24,w24,w14			// h+=X[i]
384bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
385bc3d5698SJohn Baldwin	eor	w28,w25,w26			// a^b, b^c in next round
386bc3d5698SJohn Baldwin	eor	w16,w16,w6,ror#11	// Sigma1(e)
387bc3d5698SJohn Baldwin	ror	w6,w25,#2
388bc3d5698SJohn Baldwin	add	w24,w24,w17			// h+=Ch(e,f,g)
389bc3d5698SJohn Baldwin	eor	w17,w25,w25,ror#9
390bc3d5698SJohn Baldwin	add	w24,w24,w16			// h+=Sigma1(e)
391bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
392bc3d5698SJohn Baldwin	add	w20,w20,w24			// d+=h
393bc3d5698SJohn Baldwin	eor	w19,w19,w26			// Maj(a,b,c)
394bc3d5698SJohn Baldwin	eor	w17,w6,w17,ror#13	// Sigma0(a)
395bc3d5698SJohn Baldwin	add	w24,w24,w19			// h+=Maj(a,b,c)
396bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
397bc3d5698SJohn Baldwin	//add	w24,w24,w17			// h+=Sigma0(a)
398bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
399bc3d5698SJohn Baldwin	rev	w15,w15			// 12
400bc3d5698SJohn Baldwin#endif
401bc3d5698SJohn Baldwin	add	w24,w24,w17			// h+=Sigma0(a)
402bc3d5698SJohn Baldwin	str	w7,[sp,#0]
403bc3d5698SJohn Baldwin	ror	w16,w20,#6
404bc3d5698SJohn Baldwin	add	w23,w23,w19			// h+=K[i]
405bc3d5698SJohn Baldwin	eor	w7,w20,w20,ror#14
406bc3d5698SJohn Baldwin	and	w17,w21,w20
407bc3d5698SJohn Baldwin	bic	w19,w22,w20
408bc3d5698SJohn Baldwin	add	w23,w23,w15			// h+=X[i]
409bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
410bc3d5698SJohn Baldwin	eor	w19,w24,w25			// a^b, b^c in next round
411bc3d5698SJohn Baldwin	eor	w16,w16,w7,ror#11	// Sigma1(e)
412bc3d5698SJohn Baldwin	ror	w7,w24,#2
413bc3d5698SJohn Baldwin	add	w23,w23,w17			// h+=Ch(e,f,g)
414bc3d5698SJohn Baldwin	eor	w17,w24,w24,ror#9
415bc3d5698SJohn Baldwin	add	w23,w23,w16			// h+=Sigma1(e)
416bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
417bc3d5698SJohn Baldwin	add	w27,w27,w23			// d+=h
418bc3d5698SJohn Baldwin	eor	w28,w28,w25			// Maj(a,b,c)
419bc3d5698SJohn Baldwin	eor	w17,w7,w17,ror#13	// Sigma0(a)
420bc3d5698SJohn Baldwin	add	w23,w23,w28			// h+=Maj(a,b,c)
421bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
422bc3d5698SJohn Baldwin	//add	w23,w23,w17			// h+=Sigma0(a)
423bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
424bc3d5698SJohn Baldwin	rev	w0,w0			// 13
425bc3d5698SJohn Baldwin#endif
426bc3d5698SJohn Baldwin	ldp	w1,w2,[x1]
427bc3d5698SJohn Baldwin	add	w23,w23,w17			// h+=Sigma0(a)
428bc3d5698SJohn Baldwin	str	w8,[sp,#4]
429bc3d5698SJohn Baldwin	ror	w16,w27,#6
430bc3d5698SJohn Baldwin	add	w22,w22,w28			// h+=K[i]
431bc3d5698SJohn Baldwin	eor	w8,w27,w27,ror#14
432bc3d5698SJohn Baldwin	and	w17,w20,w27
433bc3d5698SJohn Baldwin	bic	w28,w21,w27
434bc3d5698SJohn Baldwin	add	w22,w22,w0			// h+=X[i]
435bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
436bc3d5698SJohn Baldwin	eor	w28,w23,w24			// a^b, b^c in next round
437bc3d5698SJohn Baldwin	eor	w16,w16,w8,ror#11	// Sigma1(e)
438bc3d5698SJohn Baldwin	ror	w8,w23,#2
439bc3d5698SJohn Baldwin	add	w22,w22,w17			// h+=Ch(e,f,g)
440bc3d5698SJohn Baldwin	eor	w17,w23,w23,ror#9
441bc3d5698SJohn Baldwin	add	w22,w22,w16			// h+=Sigma1(e)
442bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
443bc3d5698SJohn Baldwin	add	w26,w26,w22			// d+=h
444bc3d5698SJohn Baldwin	eor	w19,w19,w24			// Maj(a,b,c)
445bc3d5698SJohn Baldwin	eor	w17,w8,w17,ror#13	// Sigma0(a)
446bc3d5698SJohn Baldwin	add	w22,w22,w19			// h+=Maj(a,b,c)
447bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
448bc3d5698SJohn Baldwin	//add	w22,w22,w17			// h+=Sigma0(a)
449bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
450bc3d5698SJohn Baldwin	rev	w1,w1			// 14
451bc3d5698SJohn Baldwin#endif
452bc3d5698SJohn Baldwin	ldr	w6,[sp,#12]
453bc3d5698SJohn Baldwin	add	w22,w22,w17			// h+=Sigma0(a)
454bc3d5698SJohn Baldwin	str	w9,[sp,#8]
455bc3d5698SJohn Baldwin	ror	w16,w26,#6
456bc3d5698SJohn Baldwin	add	w21,w21,w19			// h+=K[i]
457bc3d5698SJohn Baldwin	eor	w9,w26,w26,ror#14
458bc3d5698SJohn Baldwin	and	w17,w27,w26
459bc3d5698SJohn Baldwin	bic	w19,w20,w26
460bc3d5698SJohn Baldwin	add	w21,w21,w1			// h+=X[i]
461bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
462bc3d5698SJohn Baldwin	eor	w19,w22,w23			// a^b, b^c in next round
463bc3d5698SJohn Baldwin	eor	w16,w16,w9,ror#11	// Sigma1(e)
464bc3d5698SJohn Baldwin	ror	w9,w22,#2
465bc3d5698SJohn Baldwin	add	w21,w21,w17			// h+=Ch(e,f,g)
466bc3d5698SJohn Baldwin	eor	w17,w22,w22,ror#9
467bc3d5698SJohn Baldwin	add	w21,w21,w16			// h+=Sigma1(e)
468bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
469bc3d5698SJohn Baldwin	add	w25,w25,w21			// d+=h
470bc3d5698SJohn Baldwin	eor	w28,w28,w23			// Maj(a,b,c)
471bc3d5698SJohn Baldwin	eor	w17,w9,w17,ror#13	// Sigma0(a)
472bc3d5698SJohn Baldwin	add	w21,w21,w28			// h+=Maj(a,b,c)
473bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
474bc3d5698SJohn Baldwin	//add	w21,w21,w17			// h+=Sigma0(a)
475bc3d5698SJohn Baldwin#ifndef	__AARCH64EB__
476bc3d5698SJohn Baldwin	rev	w2,w2			// 15
477bc3d5698SJohn Baldwin#endif
478bc3d5698SJohn Baldwin	ldr	w7,[sp,#0]
479bc3d5698SJohn Baldwin	add	w21,w21,w17			// h+=Sigma0(a)
480bc3d5698SJohn Baldwin	str	w10,[sp,#12]
481bc3d5698SJohn Baldwin	ror	w16,w25,#6
482bc3d5698SJohn Baldwin	add	w20,w20,w28			// h+=K[i]
483bc3d5698SJohn Baldwin	ror	w9,w4,#7
484bc3d5698SJohn Baldwin	and	w17,w26,w25
485bc3d5698SJohn Baldwin	ror	w8,w1,#17
486bc3d5698SJohn Baldwin	bic	w28,w27,w25
487bc3d5698SJohn Baldwin	ror	w10,w21,#2
488bc3d5698SJohn Baldwin	add	w20,w20,w2			// h+=X[i]
489bc3d5698SJohn Baldwin	eor	w16,w16,w25,ror#11
490bc3d5698SJohn Baldwin	eor	w9,w9,w4,ror#18
491bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
492bc3d5698SJohn Baldwin	eor	w28,w21,w22			// a^b, b^c in next round
493bc3d5698SJohn Baldwin	eor	w16,w16,w25,ror#25	// Sigma1(e)
494bc3d5698SJohn Baldwin	eor	w10,w10,w21,ror#13
495bc3d5698SJohn Baldwin	add	w20,w20,w17			// h+=Ch(e,f,g)
496bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
497bc3d5698SJohn Baldwin	eor	w8,w8,w1,ror#19
498bc3d5698SJohn Baldwin	eor	w9,w9,w4,lsr#3	// sigma0(X[i+1])
499bc3d5698SJohn Baldwin	add	w20,w20,w16			// h+=Sigma1(e)
500bc3d5698SJohn Baldwin	eor	w19,w19,w22			// Maj(a,b,c)
501bc3d5698SJohn Baldwin	eor	w17,w10,w21,ror#22	// Sigma0(a)
502bc3d5698SJohn Baldwin	eor	w8,w8,w1,lsr#10	// sigma1(X[i+14])
503bc3d5698SJohn Baldwin	add	w3,w3,w12
504bc3d5698SJohn Baldwin	add	w24,w24,w20			// d+=h
505bc3d5698SJohn Baldwin	add	w20,w20,w19			// h+=Maj(a,b,c)
506bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
507bc3d5698SJohn Baldwin	add	w3,w3,w9
508bc3d5698SJohn Baldwin	add	w20,w20,w17			// h+=Sigma0(a)
509bc3d5698SJohn Baldwin	add	w3,w3,w8
510bc3d5698SJohn Baldwin.Loop_16_xx:
511bc3d5698SJohn Baldwin	ldr	w8,[sp,#4]
512bc3d5698SJohn Baldwin	str	w11,[sp,#0]
513bc3d5698SJohn Baldwin	ror	w16,w24,#6
514bc3d5698SJohn Baldwin	add	w27,w27,w19			// h+=K[i]
515bc3d5698SJohn Baldwin	ror	w10,w5,#7
516bc3d5698SJohn Baldwin	and	w17,w25,w24
517bc3d5698SJohn Baldwin	ror	w9,w2,#17
518bc3d5698SJohn Baldwin	bic	w19,w26,w24
519bc3d5698SJohn Baldwin	ror	w11,w20,#2
520bc3d5698SJohn Baldwin	add	w27,w27,w3			// h+=X[i]
521bc3d5698SJohn Baldwin	eor	w16,w16,w24,ror#11
522bc3d5698SJohn Baldwin	eor	w10,w10,w5,ror#18
523bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
524bc3d5698SJohn Baldwin	eor	w19,w20,w21			// a^b, b^c in next round
525bc3d5698SJohn Baldwin	eor	w16,w16,w24,ror#25	// Sigma1(e)
526bc3d5698SJohn Baldwin	eor	w11,w11,w20,ror#13
527bc3d5698SJohn Baldwin	add	w27,w27,w17			// h+=Ch(e,f,g)
528bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
529bc3d5698SJohn Baldwin	eor	w9,w9,w2,ror#19
530bc3d5698SJohn Baldwin	eor	w10,w10,w5,lsr#3	// sigma0(X[i+1])
531bc3d5698SJohn Baldwin	add	w27,w27,w16			// h+=Sigma1(e)
532bc3d5698SJohn Baldwin	eor	w28,w28,w21			// Maj(a,b,c)
533bc3d5698SJohn Baldwin	eor	w17,w11,w20,ror#22	// Sigma0(a)
534bc3d5698SJohn Baldwin	eor	w9,w9,w2,lsr#10	// sigma1(X[i+14])
535bc3d5698SJohn Baldwin	add	w4,w4,w13
536bc3d5698SJohn Baldwin	add	w23,w23,w27			// d+=h
537bc3d5698SJohn Baldwin	add	w27,w27,w28			// h+=Maj(a,b,c)
538bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
539bc3d5698SJohn Baldwin	add	w4,w4,w10
540bc3d5698SJohn Baldwin	add	w27,w27,w17			// h+=Sigma0(a)
541bc3d5698SJohn Baldwin	add	w4,w4,w9
542bc3d5698SJohn Baldwin	ldr	w9,[sp,#8]
543bc3d5698SJohn Baldwin	str	w12,[sp,#4]
544bc3d5698SJohn Baldwin	ror	w16,w23,#6
545bc3d5698SJohn Baldwin	add	w26,w26,w28			// h+=K[i]
546bc3d5698SJohn Baldwin	ror	w11,w6,#7
547bc3d5698SJohn Baldwin	and	w17,w24,w23
548bc3d5698SJohn Baldwin	ror	w10,w3,#17
549bc3d5698SJohn Baldwin	bic	w28,w25,w23
550bc3d5698SJohn Baldwin	ror	w12,w27,#2
551bc3d5698SJohn Baldwin	add	w26,w26,w4			// h+=X[i]
552bc3d5698SJohn Baldwin	eor	w16,w16,w23,ror#11
553bc3d5698SJohn Baldwin	eor	w11,w11,w6,ror#18
554bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
555bc3d5698SJohn Baldwin	eor	w28,w27,w20			// a^b, b^c in next round
556bc3d5698SJohn Baldwin	eor	w16,w16,w23,ror#25	// Sigma1(e)
557bc3d5698SJohn Baldwin	eor	w12,w12,w27,ror#13
558bc3d5698SJohn Baldwin	add	w26,w26,w17			// h+=Ch(e,f,g)
559bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
560bc3d5698SJohn Baldwin	eor	w10,w10,w3,ror#19
561bc3d5698SJohn Baldwin	eor	w11,w11,w6,lsr#3	// sigma0(X[i+1])
562bc3d5698SJohn Baldwin	add	w26,w26,w16			// h+=Sigma1(e)
563bc3d5698SJohn Baldwin	eor	w19,w19,w20			// Maj(a,b,c)
564bc3d5698SJohn Baldwin	eor	w17,w12,w27,ror#22	// Sigma0(a)
565bc3d5698SJohn Baldwin	eor	w10,w10,w3,lsr#10	// sigma1(X[i+14])
566bc3d5698SJohn Baldwin	add	w5,w5,w14
567bc3d5698SJohn Baldwin	add	w22,w22,w26			// d+=h
568bc3d5698SJohn Baldwin	add	w26,w26,w19			// h+=Maj(a,b,c)
569bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
570bc3d5698SJohn Baldwin	add	w5,w5,w11
571bc3d5698SJohn Baldwin	add	w26,w26,w17			// h+=Sigma0(a)
572bc3d5698SJohn Baldwin	add	w5,w5,w10
573bc3d5698SJohn Baldwin	ldr	w10,[sp,#12]
574bc3d5698SJohn Baldwin	str	w13,[sp,#8]
575bc3d5698SJohn Baldwin	ror	w16,w22,#6
576bc3d5698SJohn Baldwin	add	w25,w25,w19			// h+=K[i]
577bc3d5698SJohn Baldwin	ror	w12,w7,#7
578bc3d5698SJohn Baldwin	and	w17,w23,w22
579bc3d5698SJohn Baldwin	ror	w11,w4,#17
580bc3d5698SJohn Baldwin	bic	w19,w24,w22
581bc3d5698SJohn Baldwin	ror	w13,w26,#2
582bc3d5698SJohn Baldwin	add	w25,w25,w5			// h+=X[i]
583bc3d5698SJohn Baldwin	eor	w16,w16,w22,ror#11
584bc3d5698SJohn Baldwin	eor	w12,w12,w7,ror#18
585bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
586bc3d5698SJohn Baldwin	eor	w19,w26,w27			// a^b, b^c in next round
587bc3d5698SJohn Baldwin	eor	w16,w16,w22,ror#25	// Sigma1(e)
588bc3d5698SJohn Baldwin	eor	w13,w13,w26,ror#13
589bc3d5698SJohn Baldwin	add	w25,w25,w17			// h+=Ch(e,f,g)
590bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
591bc3d5698SJohn Baldwin	eor	w11,w11,w4,ror#19
592bc3d5698SJohn Baldwin	eor	w12,w12,w7,lsr#3	// sigma0(X[i+1])
593bc3d5698SJohn Baldwin	add	w25,w25,w16			// h+=Sigma1(e)
594bc3d5698SJohn Baldwin	eor	w28,w28,w27			// Maj(a,b,c)
595bc3d5698SJohn Baldwin	eor	w17,w13,w26,ror#22	// Sigma0(a)
596bc3d5698SJohn Baldwin	eor	w11,w11,w4,lsr#10	// sigma1(X[i+14])
597bc3d5698SJohn Baldwin	add	w6,w6,w15
598bc3d5698SJohn Baldwin	add	w21,w21,w25			// d+=h
599bc3d5698SJohn Baldwin	add	w25,w25,w28			// h+=Maj(a,b,c)
600bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
601bc3d5698SJohn Baldwin	add	w6,w6,w12
602bc3d5698SJohn Baldwin	add	w25,w25,w17			// h+=Sigma0(a)
603bc3d5698SJohn Baldwin	add	w6,w6,w11
604bc3d5698SJohn Baldwin	ldr	w11,[sp,#0]
605bc3d5698SJohn Baldwin	str	w14,[sp,#12]
606bc3d5698SJohn Baldwin	ror	w16,w21,#6
607bc3d5698SJohn Baldwin	add	w24,w24,w28			// h+=K[i]
608bc3d5698SJohn Baldwin	ror	w13,w8,#7
609bc3d5698SJohn Baldwin	and	w17,w22,w21
610bc3d5698SJohn Baldwin	ror	w12,w5,#17
611bc3d5698SJohn Baldwin	bic	w28,w23,w21
612bc3d5698SJohn Baldwin	ror	w14,w25,#2
613bc3d5698SJohn Baldwin	add	w24,w24,w6			// h+=X[i]
614bc3d5698SJohn Baldwin	eor	w16,w16,w21,ror#11
615bc3d5698SJohn Baldwin	eor	w13,w13,w8,ror#18
616bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
617bc3d5698SJohn Baldwin	eor	w28,w25,w26			// a^b, b^c in next round
618bc3d5698SJohn Baldwin	eor	w16,w16,w21,ror#25	// Sigma1(e)
619bc3d5698SJohn Baldwin	eor	w14,w14,w25,ror#13
620bc3d5698SJohn Baldwin	add	w24,w24,w17			// h+=Ch(e,f,g)
621bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
622bc3d5698SJohn Baldwin	eor	w12,w12,w5,ror#19
623bc3d5698SJohn Baldwin	eor	w13,w13,w8,lsr#3	// sigma0(X[i+1])
624bc3d5698SJohn Baldwin	add	w24,w24,w16			// h+=Sigma1(e)
625bc3d5698SJohn Baldwin	eor	w19,w19,w26			// Maj(a,b,c)
626bc3d5698SJohn Baldwin	eor	w17,w14,w25,ror#22	// Sigma0(a)
627bc3d5698SJohn Baldwin	eor	w12,w12,w5,lsr#10	// sigma1(X[i+14])
628bc3d5698SJohn Baldwin	add	w7,w7,w0
629bc3d5698SJohn Baldwin	add	w20,w20,w24			// d+=h
630bc3d5698SJohn Baldwin	add	w24,w24,w19			// h+=Maj(a,b,c)
631bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
632bc3d5698SJohn Baldwin	add	w7,w7,w13
633bc3d5698SJohn Baldwin	add	w24,w24,w17			// h+=Sigma0(a)
634bc3d5698SJohn Baldwin	add	w7,w7,w12
635bc3d5698SJohn Baldwin	ldr	w12,[sp,#4]
636bc3d5698SJohn Baldwin	str	w15,[sp,#0]
637bc3d5698SJohn Baldwin	ror	w16,w20,#6
638bc3d5698SJohn Baldwin	add	w23,w23,w19			// h+=K[i]
639bc3d5698SJohn Baldwin	ror	w14,w9,#7
640bc3d5698SJohn Baldwin	and	w17,w21,w20
641bc3d5698SJohn Baldwin	ror	w13,w6,#17
642bc3d5698SJohn Baldwin	bic	w19,w22,w20
643bc3d5698SJohn Baldwin	ror	w15,w24,#2
644bc3d5698SJohn Baldwin	add	w23,w23,w7			// h+=X[i]
645bc3d5698SJohn Baldwin	eor	w16,w16,w20,ror#11
646bc3d5698SJohn Baldwin	eor	w14,w14,w9,ror#18
647bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
648bc3d5698SJohn Baldwin	eor	w19,w24,w25			// a^b, b^c in next round
649bc3d5698SJohn Baldwin	eor	w16,w16,w20,ror#25	// Sigma1(e)
650bc3d5698SJohn Baldwin	eor	w15,w15,w24,ror#13
651bc3d5698SJohn Baldwin	add	w23,w23,w17			// h+=Ch(e,f,g)
652bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
653bc3d5698SJohn Baldwin	eor	w13,w13,w6,ror#19
654bc3d5698SJohn Baldwin	eor	w14,w14,w9,lsr#3	// sigma0(X[i+1])
655bc3d5698SJohn Baldwin	add	w23,w23,w16			// h+=Sigma1(e)
656bc3d5698SJohn Baldwin	eor	w28,w28,w25			// Maj(a,b,c)
657bc3d5698SJohn Baldwin	eor	w17,w15,w24,ror#22	// Sigma0(a)
658bc3d5698SJohn Baldwin	eor	w13,w13,w6,lsr#10	// sigma1(X[i+14])
659bc3d5698SJohn Baldwin	add	w8,w8,w1
660bc3d5698SJohn Baldwin	add	w27,w27,w23			// d+=h
661bc3d5698SJohn Baldwin	add	w23,w23,w28			// h+=Maj(a,b,c)
662bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
663bc3d5698SJohn Baldwin	add	w8,w8,w14
664bc3d5698SJohn Baldwin	add	w23,w23,w17			// h+=Sigma0(a)
665bc3d5698SJohn Baldwin	add	w8,w8,w13
666bc3d5698SJohn Baldwin	ldr	w13,[sp,#8]
667bc3d5698SJohn Baldwin	str	w0,[sp,#4]
668bc3d5698SJohn Baldwin	ror	w16,w27,#6
669bc3d5698SJohn Baldwin	add	w22,w22,w28			// h+=K[i]
670bc3d5698SJohn Baldwin	ror	w15,w10,#7
671bc3d5698SJohn Baldwin	and	w17,w20,w27
672bc3d5698SJohn Baldwin	ror	w14,w7,#17
673bc3d5698SJohn Baldwin	bic	w28,w21,w27
674bc3d5698SJohn Baldwin	ror	w0,w23,#2
675bc3d5698SJohn Baldwin	add	w22,w22,w8			// h+=X[i]
676bc3d5698SJohn Baldwin	eor	w16,w16,w27,ror#11
677bc3d5698SJohn Baldwin	eor	w15,w15,w10,ror#18
678bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
679bc3d5698SJohn Baldwin	eor	w28,w23,w24			// a^b, b^c in next round
680bc3d5698SJohn Baldwin	eor	w16,w16,w27,ror#25	// Sigma1(e)
681bc3d5698SJohn Baldwin	eor	w0,w0,w23,ror#13
682bc3d5698SJohn Baldwin	add	w22,w22,w17			// h+=Ch(e,f,g)
683bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
684bc3d5698SJohn Baldwin	eor	w14,w14,w7,ror#19
685bc3d5698SJohn Baldwin	eor	w15,w15,w10,lsr#3	// sigma0(X[i+1])
686bc3d5698SJohn Baldwin	add	w22,w22,w16			// h+=Sigma1(e)
687bc3d5698SJohn Baldwin	eor	w19,w19,w24			// Maj(a,b,c)
688bc3d5698SJohn Baldwin	eor	w17,w0,w23,ror#22	// Sigma0(a)
689bc3d5698SJohn Baldwin	eor	w14,w14,w7,lsr#10	// sigma1(X[i+14])
690bc3d5698SJohn Baldwin	add	w9,w9,w2
691bc3d5698SJohn Baldwin	add	w26,w26,w22			// d+=h
692bc3d5698SJohn Baldwin	add	w22,w22,w19			// h+=Maj(a,b,c)
693bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
694bc3d5698SJohn Baldwin	add	w9,w9,w15
695bc3d5698SJohn Baldwin	add	w22,w22,w17			// h+=Sigma0(a)
696bc3d5698SJohn Baldwin	add	w9,w9,w14
697bc3d5698SJohn Baldwin	ldr	w14,[sp,#12]
698bc3d5698SJohn Baldwin	str	w1,[sp,#8]
699bc3d5698SJohn Baldwin	ror	w16,w26,#6
700bc3d5698SJohn Baldwin	add	w21,w21,w19			// h+=K[i]
701bc3d5698SJohn Baldwin	ror	w0,w11,#7
702bc3d5698SJohn Baldwin	and	w17,w27,w26
703bc3d5698SJohn Baldwin	ror	w15,w8,#17
704bc3d5698SJohn Baldwin	bic	w19,w20,w26
705bc3d5698SJohn Baldwin	ror	w1,w22,#2
706bc3d5698SJohn Baldwin	add	w21,w21,w9			// h+=X[i]
707bc3d5698SJohn Baldwin	eor	w16,w16,w26,ror#11
708bc3d5698SJohn Baldwin	eor	w0,w0,w11,ror#18
709bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
710bc3d5698SJohn Baldwin	eor	w19,w22,w23			// a^b, b^c in next round
711bc3d5698SJohn Baldwin	eor	w16,w16,w26,ror#25	// Sigma1(e)
712bc3d5698SJohn Baldwin	eor	w1,w1,w22,ror#13
713bc3d5698SJohn Baldwin	add	w21,w21,w17			// h+=Ch(e,f,g)
714bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
715bc3d5698SJohn Baldwin	eor	w15,w15,w8,ror#19
716bc3d5698SJohn Baldwin	eor	w0,w0,w11,lsr#3	// sigma0(X[i+1])
717bc3d5698SJohn Baldwin	add	w21,w21,w16			// h+=Sigma1(e)
718bc3d5698SJohn Baldwin	eor	w28,w28,w23			// Maj(a,b,c)
719bc3d5698SJohn Baldwin	eor	w17,w1,w22,ror#22	// Sigma0(a)
720bc3d5698SJohn Baldwin	eor	w15,w15,w8,lsr#10	// sigma1(X[i+14])
721bc3d5698SJohn Baldwin	add	w10,w10,w3
722bc3d5698SJohn Baldwin	add	w25,w25,w21			// d+=h
723bc3d5698SJohn Baldwin	add	w21,w21,w28			// h+=Maj(a,b,c)
724bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
725bc3d5698SJohn Baldwin	add	w10,w10,w0
726bc3d5698SJohn Baldwin	add	w21,w21,w17			// h+=Sigma0(a)
727bc3d5698SJohn Baldwin	add	w10,w10,w15
728bc3d5698SJohn Baldwin	ldr	w15,[sp,#0]
729bc3d5698SJohn Baldwin	str	w2,[sp,#12]
730bc3d5698SJohn Baldwin	ror	w16,w25,#6
731bc3d5698SJohn Baldwin	add	w20,w20,w28			// h+=K[i]
732bc3d5698SJohn Baldwin	ror	w1,w12,#7
733bc3d5698SJohn Baldwin	and	w17,w26,w25
734bc3d5698SJohn Baldwin	ror	w0,w9,#17
735bc3d5698SJohn Baldwin	bic	w28,w27,w25
736bc3d5698SJohn Baldwin	ror	w2,w21,#2
737bc3d5698SJohn Baldwin	add	w20,w20,w10			// h+=X[i]
738bc3d5698SJohn Baldwin	eor	w16,w16,w25,ror#11
739bc3d5698SJohn Baldwin	eor	w1,w1,w12,ror#18
740bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
741bc3d5698SJohn Baldwin	eor	w28,w21,w22			// a^b, b^c in next round
742bc3d5698SJohn Baldwin	eor	w16,w16,w25,ror#25	// Sigma1(e)
743bc3d5698SJohn Baldwin	eor	w2,w2,w21,ror#13
744bc3d5698SJohn Baldwin	add	w20,w20,w17			// h+=Ch(e,f,g)
745bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
746bc3d5698SJohn Baldwin	eor	w0,w0,w9,ror#19
747bc3d5698SJohn Baldwin	eor	w1,w1,w12,lsr#3	// sigma0(X[i+1])
748bc3d5698SJohn Baldwin	add	w20,w20,w16			// h+=Sigma1(e)
749bc3d5698SJohn Baldwin	eor	w19,w19,w22			// Maj(a,b,c)
750bc3d5698SJohn Baldwin	eor	w17,w2,w21,ror#22	// Sigma0(a)
751bc3d5698SJohn Baldwin	eor	w0,w0,w9,lsr#10	// sigma1(X[i+14])
752bc3d5698SJohn Baldwin	add	w11,w11,w4
753bc3d5698SJohn Baldwin	add	w24,w24,w20			// d+=h
754bc3d5698SJohn Baldwin	add	w20,w20,w19			// h+=Maj(a,b,c)
755bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
756bc3d5698SJohn Baldwin	add	w11,w11,w1
757bc3d5698SJohn Baldwin	add	w20,w20,w17			// h+=Sigma0(a)
758bc3d5698SJohn Baldwin	add	w11,w11,w0
759bc3d5698SJohn Baldwin	ldr	w0,[sp,#4]
760bc3d5698SJohn Baldwin	str	w3,[sp,#0]
761bc3d5698SJohn Baldwin	ror	w16,w24,#6
762bc3d5698SJohn Baldwin	add	w27,w27,w19			// h+=K[i]
763bc3d5698SJohn Baldwin	ror	w2,w13,#7
764bc3d5698SJohn Baldwin	and	w17,w25,w24
765bc3d5698SJohn Baldwin	ror	w1,w10,#17
766bc3d5698SJohn Baldwin	bic	w19,w26,w24
767bc3d5698SJohn Baldwin	ror	w3,w20,#2
768bc3d5698SJohn Baldwin	add	w27,w27,w11			// h+=X[i]
769bc3d5698SJohn Baldwin	eor	w16,w16,w24,ror#11
770bc3d5698SJohn Baldwin	eor	w2,w2,w13,ror#18
771bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
772bc3d5698SJohn Baldwin	eor	w19,w20,w21			// a^b, b^c in next round
773bc3d5698SJohn Baldwin	eor	w16,w16,w24,ror#25	// Sigma1(e)
774bc3d5698SJohn Baldwin	eor	w3,w3,w20,ror#13
775bc3d5698SJohn Baldwin	add	w27,w27,w17			// h+=Ch(e,f,g)
776bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
777bc3d5698SJohn Baldwin	eor	w1,w1,w10,ror#19
778bc3d5698SJohn Baldwin	eor	w2,w2,w13,lsr#3	// sigma0(X[i+1])
779bc3d5698SJohn Baldwin	add	w27,w27,w16			// h+=Sigma1(e)
780bc3d5698SJohn Baldwin	eor	w28,w28,w21			// Maj(a,b,c)
781bc3d5698SJohn Baldwin	eor	w17,w3,w20,ror#22	// Sigma0(a)
782bc3d5698SJohn Baldwin	eor	w1,w1,w10,lsr#10	// sigma1(X[i+14])
783bc3d5698SJohn Baldwin	add	w12,w12,w5
784bc3d5698SJohn Baldwin	add	w23,w23,w27			// d+=h
785bc3d5698SJohn Baldwin	add	w27,w27,w28			// h+=Maj(a,b,c)
786bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
787bc3d5698SJohn Baldwin	add	w12,w12,w2
788bc3d5698SJohn Baldwin	add	w27,w27,w17			// h+=Sigma0(a)
789bc3d5698SJohn Baldwin	add	w12,w12,w1
790bc3d5698SJohn Baldwin	ldr	w1,[sp,#8]
791bc3d5698SJohn Baldwin	str	w4,[sp,#4]
792bc3d5698SJohn Baldwin	ror	w16,w23,#6
793bc3d5698SJohn Baldwin	add	w26,w26,w28			// h+=K[i]
794bc3d5698SJohn Baldwin	ror	w3,w14,#7
795bc3d5698SJohn Baldwin	and	w17,w24,w23
796bc3d5698SJohn Baldwin	ror	w2,w11,#17
797bc3d5698SJohn Baldwin	bic	w28,w25,w23
798bc3d5698SJohn Baldwin	ror	w4,w27,#2
799bc3d5698SJohn Baldwin	add	w26,w26,w12			// h+=X[i]
800bc3d5698SJohn Baldwin	eor	w16,w16,w23,ror#11
801bc3d5698SJohn Baldwin	eor	w3,w3,w14,ror#18
802bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
803bc3d5698SJohn Baldwin	eor	w28,w27,w20			// a^b, b^c in next round
804bc3d5698SJohn Baldwin	eor	w16,w16,w23,ror#25	// Sigma1(e)
805bc3d5698SJohn Baldwin	eor	w4,w4,w27,ror#13
806bc3d5698SJohn Baldwin	add	w26,w26,w17			// h+=Ch(e,f,g)
807bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
808bc3d5698SJohn Baldwin	eor	w2,w2,w11,ror#19
809bc3d5698SJohn Baldwin	eor	w3,w3,w14,lsr#3	// sigma0(X[i+1])
810bc3d5698SJohn Baldwin	add	w26,w26,w16			// h+=Sigma1(e)
811bc3d5698SJohn Baldwin	eor	w19,w19,w20			// Maj(a,b,c)
812bc3d5698SJohn Baldwin	eor	w17,w4,w27,ror#22	// Sigma0(a)
813bc3d5698SJohn Baldwin	eor	w2,w2,w11,lsr#10	// sigma1(X[i+14])
814bc3d5698SJohn Baldwin	add	w13,w13,w6
815bc3d5698SJohn Baldwin	add	w22,w22,w26			// d+=h
816bc3d5698SJohn Baldwin	add	w26,w26,w19			// h+=Maj(a,b,c)
817bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
818bc3d5698SJohn Baldwin	add	w13,w13,w3
819bc3d5698SJohn Baldwin	add	w26,w26,w17			// h+=Sigma0(a)
820bc3d5698SJohn Baldwin	add	w13,w13,w2
821bc3d5698SJohn Baldwin	ldr	w2,[sp,#12]
822bc3d5698SJohn Baldwin	str	w5,[sp,#8]
823bc3d5698SJohn Baldwin	ror	w16,w22,#6
824bc3d5698SJohn Baldwin	add	w25,w25,w19			// h+=K[i]
825bc3d5698SJohn Baldwin	ror	w4,w15,#7
826bc3d5698SJohn Baldwin	and	w17,w23,w22
827bc3d5698SJohn Baldwin	ror	w3,w12,#17
828bc3d5698SJohn Baldwin	bic	w19,w24,w22
829bc3d5698SJohn Baldwin	ror	w5,w26,#2
830bc3d5698SJohn Baldwin	add	w25,w25,w13			// h+=X[i]
831bc3d5698SJohn Baldwin	eor	w16,w16,w22,ror#11
832bc3d5698SJohn Baldwin	eor	w4,w4,w15,ror#18
833bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
834bc3d5698SJohn Baldwin	eor	w19,w26,w27			// a^b, b^c in next round
835bc3d5698SJohn Baldwin	eor	w16,w16,w22,ror#25	// Sigma1(e)
836bc3d5698SJohn Baldwin	eor	w5,w5,w26,ror#13
837bc3d5698SJohn Baldwin	add	w25,w25,w17			// h+=Ch(e,f,g)
838bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
839bc3d5698SJohn Baldwin	eor	w3,w3,w12,ror#19
840bc3d5698SJohn Baldwin	eor	w4,w4,w15,lsr#3	// sigma0(X[i+1])
841bc3d5698SJohn Baldwin	add	w25,w25,w16			// h+=Sigma1(e)
842bc3d5698SJohn Baldwin	eor	w28,w28,w27			// Maj(a,b,c)
843bc3d5698SJohn Baldwin	eor	w17,w5,w26,ror#22	// Sigma0(a)
844bc3d5698SJohn Baldwin	eor	w3,w3,w12,lsr#10	// sigma1(X[i+14])
845bc3d5698SJohn Baldwin	add	w14,w14,w7
846bc3d5698SJohn Baldwin	add	w21,w21,w25			// d+=h
847bc3d5698SJohn Baldwin	add	w25,w25,w28			// h+=Maj(a,b,c)
848bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
849bc3d5698SJohn Baldwin	add	w14,w14,w4
850bc3d5698SJohn Baldwin	add	w25,w25,w17			// h+=Sigma0(a)
851bc3d5698SJohn Baldwin	add	w14,w14,w3
852bc3d5698SJohn Baldwin	ldr	w3,[sp,#0]
853bc3d5698SJohn Baldwin	str	w6,[sp,#12]
854bc3d5698SJohn Baldwin	ror	w16,w21,#6
855bc3d5698SJohn Baldwin	add	w24,w24,w28			// h+=K[i]
856bc3d5698SJohn Baldwin	ror	w5,w0,#7
857bc3d5698SJohn Baldwin	and	w17,w22,w21
858bc3d5698SJohn Baldwin	ror	w4,w13,#17
859bc3d5698SJohn Baldwin	bic	w28,w23,w21
860bc3d5698SJohn Baldwin	ror	w6,w25,#2
861bc3d5698SJohn Baldwin	add	w24,w24,w14			// h+=X[i]
862bc3d5698SJohn Baldwin	eor	w16,w16,w21,ror#11
863bc3d5698SJohn Baldwin	eor	w5,w5,w0,ror#18
864bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
865bc3d5698SJohn Baldwin	eor	w28,w25,w26			// a^b, b^c in next round
866bc3d5698SJohn Baldwin	eor	w16,w16,w21,ror#25	// Sigma1(e)
867bc3d5698SJohn Baldwin	eor	w6,w6,w25,ror#13
868bc3d5698SJohn Baldwin	add	w24,w24,w17			// h+=Ch(e,f,g)
869bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
870bc3d5698SJohn Baldwin	eor	w4,w4,w13,ror#19
871bc3d5698SJohn Baldwin	eor	w5,w5,w0,lsr#3	// sigma0(X[i+1])
872bc3d5698SJohn Baldwin	add	w24,w24,w16			// h+=Sigma1(e)
873bc3d5698SJohn Baldwin	eor	w19,w19,w26			// Maj(a,b,c)
874bc3d5698SJohn Baldwin	eor	w17,w6,w25,ror#22	// Sigma0(a)
875bc3d5698SJohn Baldwin	eor	w4,w4,w13,lsr#10	// sigma1(X[i+14])
876bc3d5698SJohn Baldwin	add	w15,w15,w8
877bc3d5698SJohn Baldwin	add	w20,w20,w24			// d+=h
878bc3d5698SJohn Baldwin	add	w24,w24,w19			// h+=Maj(a,b,c)
879bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
880bc3d5698SJohn Baldwin	add	w15,w15,w5
881bc3d5698SJohn Baldwin	add	w24,w24,w17			// h+=Sigma0(a)
882bc3d5698SJohn Baldwin	add	w15,w15,w4
883bc3d5698SJohn Baldwin	ldr	w4,[sp,#4]
884bc3d5698SJohn Baldwin	str	w7,[sp,#0]
885bc3d5698SJohn Baldwin	ror	w16,w20,#6
886bc3d5698SJohn Baldwin	add	w23,w23,w19			// h+=K[i]
887bc3d5698SJohn Baldwin	ror	w6,w1,#7
888bc3d5698SJohn Baldwin	and	w17,w21,w20
889bc3d5698SJohn Baldwin	ror	w5,w14,#17
890bc3d5698SJohn Baldwin	bic	w19,w22,w20
891bc3d5698SJohn Baldwin	ror	w7,w24,#2
892bc3d5698SJohn Baldwin	add	w23,w23,w15			// h+=X[i]
893bc3d5698SJohn Baldwin	eor	w16,w16,w20,ror#11
894bc3d5698SJohn Baldwin	eor	w6,w6,w1,ror#18
895bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
896bc3d5698SJohn Baldwin	eor	w19,w24,w25			// a^b, b^c in next round
897bc3d5698SJohn Baldwin	eor	w16,w16,w20,ror#25	// Sigma1(e)
898bc3d5698SJohn Baldwin	eor	w7,w7,w24,ror#13
899bc3d5698SJohn Baldwin	add	w23,w23,w17			// h+=Ch(e,f,g)
900bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
901bc3d5698SJohn Baldwin	eor	w5,w5,w14,ror#19
902bc3d5698SJohn Baldwin	eor	w6,w6,w1,lsr#3	// sigma0(X[i+1])
903bc3d5698SJohn Baldwin	add	w23,w23,w16			// h+=Sigma1(e)
904bc3d5698SJohn Baldwin	eor	w28,w28,w25			// Maj(a,b,c)
905bc3d5698SJohn Baldwin	eor	w17,w7,w24,ror#22	// Sigma0(a)
906bc3d5698SJohn Baldwin	eor	w5,w5,w14,lsr#10	// sigma1(X[i+14])
907bc3d5698SJohn Baldwin	add	w0,w0,w9
908bc3d5698SJohn Baldwin	add	w27,w27,w23			// d+=h
909bc3d5698SJohn Baldwin	add	w23,w23,w28			// h+=Maj(a,b,c)
910bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
911bc3d5698SJohn Baldwin	add	w0,w0,w6
912bc3d5698SJohn Baldwin	add	w23,w23,w17			// h+=Sigma0(a)
913bc3d5698SJohn Baldwin	add	w0,w0,w5
914bc3d5698SJohn Baldwin	ldr	w5,[sp,#8]
915bc3d5698SJohn Baldwin	str	w8,[sp,#4]
916bc3d5698SJohn Baldwin	ror	w16,w27,#6
917bc3d5698SJohn Baldwin	add	w22,w22,w28			// h+=K[i]
918bc3d5698SJohn Baldwin	ror	w7,w2,#7
919bc3d5698SJohn Baldwin	and	w17,w20,w27
920bc3d5698SJohn Baldwin	ror	w6,w15,#17
921bc3d5698SJohn Baldwin	bic	w28,w21,w27
922bc3d5698SJohn Baldwin	ror	w8,w23,#2
923bc3d5698SJohn Baldwin	add	w22,w22,w0			// h+=X[i]
924bc3d5698SJohn Baldwin	eor	w16,w16,w27,ror#11
925bc3d5698SJohn Baldwin	eor	w7,w7,w2,ror#18
926bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
927bc3d5698SJohn Baldwin	eor	w28,w23,w24			// a^b, b^c in next round
928bc3d5698SJohn Baldwin	eor	w16,w16,w27,ror#25	// Sigma1(e)
929bc3d5698SJohn Baldwin	eor	w8,w8,w23,ror#13
930bc3d5698SJohn Baldwin	add	w22,w22,w17			// h+=Ch(e,f,g)
931bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
932bc3d5698SJohn Baldwin	eor	w6,w6,w15,ror#19
933bc3d5698SJohn Baldwin	eor	w7,w7,w2,lsr#3	// sigma0(X[i+1])
934bc3d5698SJohn Baldwin	add	w22,w22,w16			// h+=Sigma1(e)
935bc3d5698SJohn Baldwin	eor	w19,w19,w24			// Maj(a,b,c)
936bc3d5698SJohn Baldwin	eor	w17,w8,w23,ror#22	// Sigma0(a)
937bc3d5698SJohn Baldwin	eor	w6,w6,w15,lsr#10	// sigma1(X[i+14])
938bc3d5698SJohn Baldwin	add	w1,w1,w10
939bc3d5698SJohn Baldwin	add	w26,w26,w22			// d+=h
940bc3d5698SJohn Baldwin	add	w22,w22,w19			// h+=Maj(a,b,c)
941bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
942bc3d5698SJohn Baldwin	add	w1,w1,w7
943bc3d5698SJohn Baldwin	add	w22,w22,w17			// h+=Sigma0(a)
944bc3d5698SJohn Baldwin	add	w1,w1,w6
945bc3d5698SJohn Baldwin	ldr	w6,[sp,#12]
946bc3d5698SJohn Baldwin	str	w9,[sp,#8]
947bc3d5698SJohn Baldwin	ror	w16,w26,#6
948bc3d5698SJohn Baldwin	add	w21,w21,w19			// h+=K[i]
949bc3d5698SJohn Baldwin	ror	w8,w3,#7
950bc3d5698SJohn Baldwin	and	w17,w27,w26
951bc3d5698SJohn Baldwin	ror	w7,w0,#17
952bc3d5698SJohn Baldwin	bic	w19,w20,w26
953bc3d5698SJohn Baldwin	ror	w9,w22,#2
954bc3d5698SJohn Baldwin	add	w21,w21,w1			// h+=X[i]
955bc3d5698SJohn Baldwin	eor	w16,w16,w26,ror#11
956bc3d5698SJohn Baldwin	eor	w8,w8,w3,ror#18
957bc3d5698SJohn Baldwin	orr	w17,w17,w19			// Ch(e,f,g)
958bc3d5698SJohn Baldwin	eor	w19,w22,w23			// a^b, b^c in next round
959bc3d5698SJohn Baldwin	eor	w16,w16,w26,ror#25	// Sigma1(e)
960bc3d5698SJohn Baldwin	eor	w9,w9,w22,ror#13
961bc3d5698SJohn Baldwin	add	w21,w21,w17			// h+=Ch(e,f,g)
962bc3d5698SJohn Baldwin	and	w28,w28,w19			// (b^c)&=(a^b)
963bc3d5698SJohn Baldwin	eor	w7,w7,w0,ror#19
964bc3d5698SJohn Baldwin	eor	w8,w8,w3,lsr#3	// sigma0(X[i+1])
965bc3d5698SJohn Baldwin	add	w21,w21,w16			// h+=Sigma1(e)
966bc3d5698SJohn Baldwin	eor	w28,w28,w23			// Maj(a,b,c)
967bc3d5698SJohn Baldwin	eor	w17,w9,w22,ror#22	// Sigma0(a)
968bc3d5698SJohn Baldwin	eor	w7,w7,w0,lsr#10	// sigma1(X[i+14])
969bc3d5698SJohn Baldwin	add	w2,w2,w11
970bc3d5698SJohn Baldwin	add	w25,w25,w21			// d+=h
971bc3d5698SJohn Baldwin	add	w21,w21,w28			// h+=Maj(a,b,c)
972bc3d5698SJohn Baldwin	ldr	w28,[x30],#4		// *K++, w19 in next round
973bc3d5698SJohn Baldwin	add	w2,w2,w8
974bc3d5698SJohn Baldwin	add	w21,w21,w17			// h+=Sigma0(a)
975bc3d5698SJohn Baldwin	add	w2,w2,w7
976bc3d5698SJohn Baldwin	ldr	w7,[sp,#0]
977bc3d5698SJohn Baldwin	str	w10,[sp,#12]
978bc3d5698SJohn Baldwin	ror	w16,w25,#6
979bc3d5698SJohn Baldwin	add	w20,w20,w28			// h+=K[i]
980bc3d5698SJohn Baldwin	ror	w9,w4,#7
981bc3d5698SJohn Baldwin	and	w17,w26,w25
982bc3d5698SJohn Baldwin	ror	w8,w1,#17
983bc3d5698SJohn Baldwin	bic	w28,w27,w25
984bc3d5698SJohn Baldwin	ror	w10,w21,#2
985bc3d5698SJohn Baldwin	add	w20,w20,w2			// h+=X[i]
986bc3d5698SJohn Baldwin	eor	w16,w16,w25,ror#11
987bc3d5698SJohn Baldwin	eor	w9,w9,w4,ror#18
988bc3d5698SJohn Baldwin	orr	w17,w17,w28			// Ch(e,f,g)
989bc3d5698SJohn Baldwin	eor	w28,w21,w22			// a^b, b^c in next round
990bc3d5698SJohn Baldwin	eor	w16,w16,w25,ror#25	// Sigma1(e)
991bc3d5698SJohn Baldwin	eor	w10,w10,w21,ror#13
992bc3d5698SJohn Baldwin	add	w20,w20,w17			// h+=Ch(e,f,g)
993bc3d5698SJohn Baldwin	and	w19,w19,w28			// (b^c)&=(a^b)
994bc3d5698SJohn Baldwin	eor	w8,w8,w1,ror#19
995bc3d5698SJohn Baldwin	eor	w9,w9,w4,lsr#3	// sigma0(X[i+1])
996bc3d5698SJohn Baldwin	add	w20,w20,w16			// h+=Sigma1(e)
997bc3d5698SJohn Baldwin	eor	w19,w19,w22			// Maj(a,b,c)
998bc3d5698SJohn Baldwin	eor	w17,w10,w21,ror#22	// Sigma0(a)
999bc3d5698SJohn Baldwin	eor	w8,w8,w1,lsr#10	// sigma1(X[i+14])
1000bc3d5698SJohn Baldwin	add	w3,w3,w12
1001bc3d5698SJohn Baldwin	add	w24,w24,w20			// d+=h
1002bc3d5698SJohn Baldwin	add	w20,w20,w19			// h+=Maj(a,b,c)
1003bc3d5698SJohn Baldwin	ldr	w19,[x30],#4		// *K++, w28 in next round
1004bc3d5698SJohn Baldwin	add	w3,w3,w9
1005bc3d5698SJohn Baldwin	add	w20,w20,w17			// h+=Sigma0(a)
1006bc3d5698SJohn Baldwin	add	w3,w3,w8
1007bc3d5698SJohn Baldwin	cbnz	w19,.Loop_16_xx
1008bc3d5698SJohn Baldwin
1009bc3d5698SJohn Baldwin	ldp	x0,x2,[x29,#96]
1010bc3d5698SJohn Baldwin	ldr	x1,[x29,#112]
1011bc3d5698SJohn Baldwin	sub	x30,x30,#260		// rewind
1012bc3d5698SJohn Baldwin
1013bc3d5698SJohn Baldwin	ldp	w3,w4,[x0]
1014bc3d5698SJohn Baldwin	ldp	w5,w6,[x0,#2*4]
1015bc3d5698SJohn Baldwin	add	x1,x1,#14*4			// advance input pointer
1016bc3d5698SJohn Baldwin	ldp	w7,w8,[x0,#4*4]
1017bc3d5698SJohn Baldwin	add	w20,w20,w3
1018bc3d5698SJohn Baldwin	ldp	w9,w10,[x0,#6*4]
1019bc3d5698SJohn Baldwin	add	w21,w21,w4
1020bc3d5698SJohn Baldwin	add	w22,w22,w5
1021bc3d5698SJohn Baldwin	add	w23,w23,w6
1022bc3d5698SJohn Baldwin	stp	w20,w21,[x0]
1023bc3d5698SJohn Baldwin	add	w24,w24,w7
1024bc3d5698SJohn Baldwin	add	w25,w25,w8
1025bc3d5698SJohn Baldwin	stp	w22,w23,[x0,#2*4]
1026bc3d5698SJohn Baldwin	add	w26,w26,w9
1027bc3d5698SJohn Baldwin	add	w27,w27,w10
1028bc3d5698SJohn Baldwin	cmp	x1,x2
1029bc3d5698SJohn Baldwin	stp	w24,w25,[x0,#4*4]
1030bc3d5698SJohn Baldwin	stp	w26,w27,[x0,#6*4]
1031bc3d5698SJohn Baldwin	b.ne	.Loop
1032bc3d5698SJohn Baldwin
1033bc3d5698SJohn Baldwin	ldp	x19,x20,[x29,#16]
1034bc3d5698SJohn Baldwin	add	sp,sp,#4*4
1035bc3d5698SJohn Baldwin	ldp	x21,x22,[x29,#32]
1036bc3d5698SJohn Baldwin	ldp	x23,x24,[x29,#48]
1037bc3d5698SJohn Baldwin	ldp	x25,x26,[x29,#64]
1038bc3d5698SJohn Baldwin	ldp	x27,x28,[x29,#80]
1039bc3d5698SJohn Baldwin	ldp	x29,x30,[sp],#128
1040bd9588bcSAndrew Turner	AARCH64_VALIDATE_LINK_REGISTER
1041bc3d5698SJohn Baldwin	ret
1042bc3d5698SJohn Baldwin.size	sha256_block_data_order,.-sha256_block_data_order
1043bc3d5698SJohn Baldwin
1044bc3d5698SJohn Baldwin.align	6
1045bc3d5698SJohn Baldwin.type	.LK256,%object
1046bc3d5698SJohn Baldwin.LK256:
1047bc3d5698SJohn Baldwin.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
1048bc3d5698SJohn Baldwin.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
1049bc3d5698SJohn Baldwin.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
1050bc3d5698SJohn Baldwin.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
1051bc3d5698SJohn Baldwin.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
1052bc3d5698SJohn Baldwin.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
1053bc3d5698SJohn Baldwin.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
1054bc3d5698SJohn Baldwin.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
1055bc3d5698SJohn Baldwin.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
1056bc3d5698SJohn Baldwin.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
1057bc3d5698SJohn Baldwin.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
1058bc3d5698SJohn Baldwin.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
1059bc3d5698SJohn Baldwin.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
1060bc3d5698SJohn Baldwin.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
1061bc3d5698SJohn Baldwin.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
1062bc3d5698SJohn Baldwin.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
1063bc3d5698SJohn Baldwin.long	0	//terminator
1064bc3d5698SJohn Baldwin.size	.LK256,.-.LK256
1065bc3d5698SJohn Baldwin.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1066bc3d5698SJohn Baldwin.align	2
1067bc3d5698SJohn Baldwin.align	2
1068bc3d5698SJohn Baldwin#ifndef	__KERNEL__
1069bc3d5698SJohn Baldwin.type	sha256_block_armv8,%function
1070bc3d5698SJohn Baldwin.align	6
1071bc3d5698SJohn Baldwinsha256_block_armv8:
1072bc3d5698SJohn Baldwin.Lv8_entry:
1073bd9588bcSAndrew Turner	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
1074bc3d5698SJohn Baldwin	stp	x29,x30,[sp,#-16]!
1075bc3d5698SJohn Baldwin	add	x29,sp,#0
1076bc3d5698SJohn Baldwin
1077bc3d5698SJohn Baldwin	ld1	{v0.4s,v1.4s},[x0]
1078bc3d5698SJohn Baldwin	adr	x3,.LK256
1079bc3d5698SJohn Baldwin
1080bc3d5698SJohn Baldwin.Loop_hw:
1081bc3d5698SJohn Baldwin	ld1	{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
1082bc3d5698SJohn Baldwin	sub	x2,x2,#1
1083bc3d5698SJohn Baldwin	ld1	{v16.4s},[x3],#16
1084bc3d5698SJohn Baldwin	rev32	v4.16b,v4.16b
1085bc3d5698SJohn Baldwin	rev32	v5.16b,v5.16b
1086bc3d5698SJohn Baldwin	rev32	v6.16b,v6.16b
1087bc3d5698SJohn Baldwin	rev32	v7.16b,v7.16b
1088bc3d5698SJohn Baldwin	orr	v18.16b,v0.16b,v0.16b		// offload
1089bc3d5698SJohn Baldwin	orr	v19.16b,v1.16b,v1.16b
1090bc3d5698SJohn Baldwin	ld1	{v17.4s},[x3],#16
1091bc3d5698SJohn Baldwin	add	v16.4s,v16.4s,v4.4s
1092bc3d5698SJohn Baldwin.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b
1093bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1094bc3d5698SJohn Baldwin.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
1095bc3d5698SJohn Baldwin.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
1096bc3d5698SJohn Baldwin.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
1097bc3d5698SJohn Baldwin	ld1	{v16.4s},[x3],#16
1098bc3d5698SJohn Baldwin	add	v17.4s,v17.4s,v5.4s
1099bc3d5698SJohn Baldwin.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b
1100bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1101bc3d5698SJohn Baldwin.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
1102bc3d5698SJohn Baldwin.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
1103bc3d5698SJohn Baldwin.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
1104bc3d5698SJohn Baldwin	ld1	{v17.4s},[x3],#16
1105bc3d5698SJohn Baldwin	add	v16.4s,v16.4s,v6.4s
1106bc3d5698SJohn Baldwin.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b
1107bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1108bc3d5698SJohn Baldwin.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
1109bc3d5698SJohn Baldwin.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
1110bc3d5698SJohn Baldwin.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
1111bc3d5698SJohn Baldwin	ld1	{v16.4s},[x3],#16
1112bc3d5698SJohn Baldwin	add	v17.4s,v17.4s,v7.4s
1113bc3d5698SJohn Baldwin.inst	0x5e282887	//sha256su0 v7.16b,v4.16b
1114bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1115bc3d5698SJohn Baldwin.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
1116bc3d5698SJohn Baldwin.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
1117bc3d5698SJohn Baldwin.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
1118bc3d5698SJohn Baldwin	ld1	{v17.4s},[x3],#16
1119bc3d5698SJohn Baldwin	add	v16.4s,v16.4s,v4.4s
1120bc3d5698SJohn Baldwin.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b
1121bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1122bc3d5698SJohn Baldwin.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
1123bc3d5698SJohn Baldwin.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
1124bc3d5698SJohn Baldwin.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
1125bc3d5698SJohn Baldwin	ld1	{v16.4s},[x3],#16
1126bc3d5698SJohn Baldwin	add	v17.4s,v17.4s,v5.4s
1127bc3d5698SJohn Baldwin.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b
1128bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1129bc3d5698SJohn Baldwin.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
1130bc3d5698SJohn Baldwin.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
1131bc3d5698SJohn Baldwin.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
1132bc3d5698SJohn Baldwin	ld1	{v17.4s},[x3],#16
1133bc3d5698SJohn Baldwin	add	v16.4s,v16.4s,v6.4s
1134bc3d5698SJohn Baldwin.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b
1135bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1136bc3d5698SJohn Baldwin.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
1137bc3d5698SJohn Baldwin.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
1138bc3d5698SJohn Baldwin.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
1139bc3d5698SJohn Baldwin	ld1	{v16.4s},[x3],#16
1140bc3d5698SJohn Baldwin	add	v17.4s,v17.4s,v7.4s
1141bc3d5698SJohn Baldwin.inst	0x5e282887	//sha256su0 v7.16b,v4.16b
1142bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1143bc3d5698SJohn Baldwin.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
1144bc3d5698SJohn Baldwin.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
1145bc3d5698SJohn Baldwin.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
1146bc3d5698SJohn Baldwin	ld1	{v17.4s},[x3],#16
1147bc3d5698SJohn Baldwin	add	v16.4s,v16.4s,v4.4s
1148bc3d5698SJohn Baldwin.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b
1149bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1150bc3d5698SJohn Baldwin.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
1151bc3d5698SJohn Baldwin.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
1152bc3d5698SJohn Baldwin.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
1153bc3d5698SJohn Baldwin	ld1	{v16.4s},[x3],#16
1154bc3d5698SJohn Baldwin	add	v17.4s,v17.4s,v5.4s
1155bc3d5698SJohn Baldwin.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b
1156bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1157bc3d5698SJohn Baldwin.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
1158bc3d5698SJohn Baldwin.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
1159bc3d5698SJohn Baldwin.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
1160bc3d5698SJohn Baldwin	ld1	{v17.4s},[x3],#16
1161bc3d5698SJohn Baldwin	add	v16.4s,v16.4s,v6.4s
1162bc3d5698SJohn Baldwin.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b
1163bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1164bc3d5698SJohn Baldwin.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
1165bc3d5698SJohn Baldwin.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
1166bc3d5698SJohn Baldwin.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
1167bc3d5698SJohn Baldwin	ld1	{v16.4s},[x3],#16
1168bc3d5698SJohn Baldwin	add	v17.4s,v17.4s,v7.4s
1169bc3d5698SJohn Baldwin.inst	0x5e282887	//sha256su0 v7.16b,v4.16b
1170bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1171bc3d5698SJohn Baldwin.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
1172bc3d5698SJohn Baldwin.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
1173bc3d5698SJohn Baldwin.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
1174bc3d5698SJohn Baldwin	ld1	{v17.4s},[x3],#16
1175bc3d5698SJohn Baldwin	add	v16.4s,v16.4s,v4.4s
1176bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1177bc3d5698SJohn Baldwin.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
1178bc3d5698SJohn Baldwin.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
1179bc3d5698SJohn Baldwin
1180bc3d5698SJohn Baldwin	ld1	{v16.4s},[x3],#16
1181bc3d5698SJohn Baldwin	add	v17.4s,v17.4s,v5.4s
1182bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1183bc3d5698SJohn Baldwin.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
1184bc3d5698SJohn Baldwin.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
1185bc3d5698SJohn Baldwin
1186bc3d5698SJohn Baldwin	ld1	{v17.4s},[x3]
1187bc3d5698SJohn Baldwin	add	v16.4s,v16.4s,v6.4s
1188bc3d5698SJohn Baldwin	sub	x3,x3,#64*4-16	// rewind
1189bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1190bc3d5698SJohn Baldwin.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
1191bc3d5698SJohn Baldwin.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
1192bc3d5698SJohn Baldwin
1193bc3d5698SJohn Baldwin	add	v17.4s,v17.4s,v7.4s
1194bc3d5698SJohn Baldwin	orr	v2.16b,v0.16b,v0.16b
1195bc3d5698SJohn Baldwin.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
1196bc3d5698SJohn Baldwin.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
1197bc3d5698SJohn Baldwin
1198bc3d5698SJohn Baldwin	add	v0.4s,v0.4s,v18.4s
1199bc3d5698SJohn Baldwin	add	v1.4s,v1.4s,v19.4s
1200bc3d5698SJohn Baldwin
1201bc3d5698SJohn Baldwin	cbnz	x2,.Loop_hw
1202bc3d5698SJohn Baldwin
1203bc3d5698SJohn Baldwin	st1	{v0.4s,v1.4s},[x0]
1204bc3d5698SJohn Baldwin
1205bc3d5698SJohn Baldwin	ldr	x29,[sp],#16
1206bc3d5698SJohn Baldwin	ret
1207bc3d5698SJohn Baldwin.size	sha256_block_armv8,.-sha256_block_armv8
1208bc3d5698SJohn Baldwin#endif
1209bc3d5698SJohn Baldwin#ifdef	__KERNEL__
1210bc3d5698SJohn Baldwin.globl	sha256_block_neon
1211bc3d5698SJohn Baldwin#endif
1212bc3d5698SJohn Baldwin.type	sha256_block_neon,%function
1213bc3d5698SJohn Baldwin.align	4
1214bc3d5698SJohn Baldwinsha256_block_neon:
1215bd9588bcSAndrew Turner	AARCH64_VALID_CALL_TARGET
1216bc3d5698SJohn Baldwin.Lneon_entry:
1217bd9588bcSAndrew Turner	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later
1218bc3d5698SJohn Baldwin	stp	x29, x30, [sp, #-16]!
1219bc3d5698SJohn Baldwin	mov	x29, sp
1220bc3d5698SJohn Baldwin	sub	sp,sp,#16*4
1221bc3d5698SJohn Baldwin
1222bc3d5698SJohn Baldwin	adr	x16,.LK256
1223bc3d5698SJohn Baldwin	add	x2,x1,x2,lsl#6	// len to point at the end of inp
1224bc3d5698SJohn Baldwin
1225bc3d5698SJohn Baldwin	ld1	{v0.16b},[x1], #16
1226bc3d5698SJohn Baldwin	ld1	{v1.16b},[x1], #16
1227bc3d5698SJohn Baldwin	ld1	{v2.16b},[x1], #16
1228bc3d5698SJohn Baldwin	ld1	{v3.16b},[x1], #16
1229bc3d5698SJohn Baldwin	ld1	{v4.4s},[x16], #16
1230bc3d5698SJohn Baldwin	ld1	{v5.4s},[x16], #16
1231bc3d5698SJohn Baldwin	ld1	{v6.4s},[x16], #16
1232bc3d5698SJohn Baldwin	ld1	{v7.4s},[x16], #16
1233bc3d5698SJohn Baldwin	rev32	v0.16b,v0.16b		// yes, even on
1234bc3d5698SJohn Baldwin	rev32	v1.16b,v1.16b		// big-endian
1235bc3d5698SJohn Baldwin	rev32	v2.16b,v2.16b
1236bc3d5698SJohn Baldwin	rev32	v3.16b,v3.16b
1237bc3d5698SJohn Baldwin	mov	x17,sp
1238bc3d5698SJohn Baldwin	add	v4.4s,v4.4s,v0.4s
1239bc3d5698SJohn Baldwin	add	v5.4s,v5.4s,v1.4s
1240bc3d5698SJohn Baldwin	add	v6.4s,v6.4s,v2.4s
1241bc3d5698SJohn Baldwin	st1	{v4.4s,v5.4s},[x17], #32
1242bc3d5698SJohn Baldwin	add	v7.4s,v7.4s,v3.4s
1243bc3d5698SJohn Baldwin	st1	{v6.4s,v7.4s},[x17]
1244bc3d5698SJohn Baldwin	sub	x17,x17,#32
1245bc3d5698SJohn Baldwin
1246bc3d5698SJohn Baldwin	ldp	w3,w4,[x0]
1247bc3d5698SJohn Baldwin	ldp	w5,w6,[x0,#8]
1248bc3d5698SJohn Baldwin	ldp	w7,w8,[x0,#16]
1249bc3d5698SJohn Baldwin	ldp	w9,w10,[x0,#24]
1250bc3d5698SJohn Baldwin	ldr	w12,[sp,#0]
1251bc3d5698SJohn Baldwin	mov	w13,wzr
1252bc3d5698SJohn Baldwin	eor	w14,w4,w5
1253bc3d5698SJohn Baldwin	mov	w15,wzr
1254bc3d5698SJohn Baldwin	b	.L_00_48
1255bc3d5698SJohn Baldwin
1256bc3d5698SJohn Baldwin.align	4
1257bc3d5698SJohn Baldwin.L_00_48:
1258bc3d5698SJohn Baldwin	ext	v4.16b,v0.16b,v1.16b,#4
1259bc3d5698SJohn Baldwin	add	w10,w10,w12
1260bc3d5698SJohn Baldwin	add	w3,w3,w15
1261bc3d5698SJohn Baldwin	and	w12,w8,w7
1262bc3d5698SJohn Baldwin	bic	w15,w9,w7
1263bc3d5698SJohn Baldwin	ext	v7.16b,v2.16b,v3.16b,#4
1264bc3d5698SJohn Baldwin	eor	w11,w7,w7,ror#5
1265bc3d5698SJohn Baldwin	add	w3,w3,w13
1266bc3d5698SJohn Baldwin	mov	d19,v3.d[1]
1267bc3d5698SJohn Baldwin	orr	w12,w12,w15
1268bc3d5698SJohn Baldwin	eor	w11,w11,w7,ror#19
1269bc3d5698SJohn Baldwin	ushr	v6.4s,v4.4s,#7
1270bc3d5698SJohn Baldwin	eor	w15,w3,w3,ror#11
1271bc3d5698SJohn Baldwin	ushr	v5.4s,v4.4s,#3
1272bc3d5698SJohn Baldwin	add	w10,w10,w12
1273bc3d5698SJohn Baldwin	add	v0.4s,v0.4s,v7.4s
1274bc3d5698SJohn Baldwin	ror	w11,w11,#6
1275bc3d5698SJohn Baldwin	sli	v6.4s,v4.4s,#25
1276bc3d5698SJohn Baldwin	eor	w13,w3,w4
1277bc3d5698SJohn Baldwin	eor	w15,w15,w3,ror#20
1278bc3d5698SJohn Baldwin	ushr	v7.4s,v4.4s,#18
1279bc3d5698SJohn Baldwin	add	w10,w10,w11
1280bc3d5698SJohn Baldwin	ldr	w12,[sp,#4]
1281bc3d5698SJohn Baldwin	and	w14,w14,w13
1282bc3d5698SJohn Baldwin	eor	v5.16b,v5.16b,v6.16b
1283bc3d5698SJohn Baldwin	ror	w15,w15,#2
1284bc3d5698SJohn Baldwin	add	w6,w6,w10
1285bc3d5698SJohn Baldwin	sli	v7.4s,v4.4s,#14
1286bc3d5698SJohn Baldwin	eor	w14,w14,w4
1287bc3d5698SJohn Baldwin	ushr	v16.4s,v19.4s,#17
1288bc3d5698SJohn Baldwin	add	w9,w9,w12
1289bc3d5698SJohn Baldwin	add	w10,w10,w15
1290bc3d5698SJohn Baldwin	and	w12,w7,w6
1291bc3d5698SJohn Baldwin	eor	v5.16b,v5.16b,v7.16b
1292bc3d5698SJohn Baldwin	bic	w15,w8,w6
1293bc3d5698SJohn Baldwin	eor	w11,w6,w6,ror#5
1294bc3d5698SJohn Baldwin	sli	v16.4s,v19.4s,#15
1295bc3d5698SJohn Baldwin	add	w10,w10,w14
1296bc3d5698SJohn Baldwin	orr	w12,w12,w15
1297bc3d5698SJohn Baldwin	ushr	v17.4s,v19.4s,#10
1298bc3d5698SJohn Baldwin	eor	w11,w11,w6,ror#19
1299bc3d5698SJohn Baldwin	eor	w15,w10,w10,ror#11
1300bc3d5698SJohn Baldwin	ushr	v7.4s,v19.4s,#19
1301bc3d5698SJohn Baldwin	add	w9,w9,w12
1302bc3d5698SJohn Baldwin	ror	w11,w11,#6
1303bc3d5698SJohn Baldwin	add	v0.4s,v0.4s,v5.4s
1304bc3d5698SJohn Baldwin	eor	w14,w10,w3
1305bc3d5698SJohn Baldwin	eor	w15,w15,w10,ror#20
1306bc3d5698SJohn Baldwin	sli	v7.4s,v19.4s,#13
1307bc3d5698SJohn Baldwin	add	w9,w9,w11
1308bc3d5698SJohn Baldwin	ldr	w12,[sp,#8]
1309bc3d5698SJohn Baldwin	and	w13,w13,w14
1310bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v16.16b
1311bc3d5698SJohn Baldwin	ror	w15,w15,#2
1312bc3d5698SJohn Baldwin	add	w5,w5,w9
1313bc3d5698SJohn Baldwin	eor	w13,w13,w3
1314bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v7.16b
1315bc3d5698SJohn Baldwin	add	w8,w8,w12
1316bc3d5698SJohn Baldwin	add	w9,w9,w15
1317bc3d5698SJohn Baldwin	and	w12,w6,w5
1318bc3d5698SJohn Baldwin	add	v0.4s,v0.4s,v17.4s
1319bc3d5698SJohn Baldwin	bic	w15,w7,w5
1320bc3d5698SJohn Baldwin	eor	w11,w5,w5,ror#5
1321bc3d5698SJohn Baldwin	add	w9,w9,w13
1322bc3d5698SJohn Baldwin	ushr	v18.4s,v0.4s,#17
1323bc3d5698SJohn Baldwin	orr	w12,w12,w15
1324bc3d5698SJohn Baldwin	ushr	v19.4s,v0.4s,#10
1325bc3d5698SJohn Baldwin	eor	w11,w11,w5,ror#19
1326bc3d5698SJohn Baldwin	eor	w15,w9,w9,ror#11
1327bc3d5698SJohn Baldwin	sli	v18.4s,v0.4s,#15
1328bc3d5698SJohn Baldwin	add	w8,w8,w12
1329bc3d5698SJohn Baldwin	ushr	v17.4s,v0.4s,#19
1330bc3d5698SJohn Baldwin	ror	w11,w11,#6
1331bc3d5698SJohn Baldwin	eor	w13,w9,w10
1332bc3d5698SJohn Baldwin	eor	v19.16b,v19.16b,v18.16b
1333bc3d5698SJohn Baldwin	eor	w15,w15,w9,ror#20
1334bc3d5698SJohn Baldwin	add	w8,w8,w11
1335bc3d5698SJohn Baldwin	sli	v17.4s,v0.4s,#13
1336bc3d5698SJohn Baldwin	ldr	w12,[sp,#12]
1337bc3d5698SJohn Baldwin	and	w14,w14,w13
1338bc3d5698SJohn Baldwin	ror	w15,w15,#2
1339bc3d5698SJohn Baldwin	ld1	{v4.4s},[x16], #16
1340bc3d5698SJohn Baldwin	add	w4,w4,w8
1341bc3d5698SJohn Baldwin	eor	v19.16b,v19.16b,v17.16b
1342bc3d5698SJohn Baldwin	eor	w14,w14,w10
1343bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v17.16b
1344bc3d5698SJohn Baldwin	add	w7,w7,w12
1345bc3d5698SJohn Baldwin	add	w8,w8,w15
1346bc3d5698SJohn Baldwin	and	w12,w5,w4
1347bc3d5698SJohn Baldwin	mov	v17.d[1],v19.d[0]
1348bc3d5698SJohn Baldwin	bic	w15,w6,w4
1349bc3d5698SJohn Baldwin	eor	w11,w4,w4,ror#5
1350bc3d5698SJohn Baldwin	add	w8,w8,w14
1351bc3d5698SJohn Baldwin	add	v0.4s,v0.4s,v17.4s
1352bc3d5698SJohn Baldwin	orr	w12,w12,w15
1353bc3d5698SJohn Baldwin	eor	w11,w11,w4,ror#19
1354bc3d5698SJohn Baldwin	eor	w15,w8,w8,ror#11
1355bc3d5698SJohn Baldwin	add	v4.4s,v4.4s,v0.4s
1356bc3d5698SJohn Baldwin	add	w7,w7,w12
1357bc3d5698SJohn Baldwin	ror	w11,w11,#6
1358bc3d5698SJohn Baldwin	eor	w14,w8,w9
1359bc3d5698SJohn Baldwin	eor	w15,w15,w8,ror#20
1360bc3d5698SJohn Baldwin	add	w7,w7,w11
1361bc3d5698SJohn Baldwin	ldr	w12,[sp,#16]
1362bc3d5698SJohn Baldwin	and	w13,w13,w14
1363bc3d5698SJohn Baldwin	ror	w15,w15,#2
1364bc3d5698SJohn Baldwin	add	w3,w3,w7
1365bc3d5698SJohn Baldwin	eor	w13,w13,w9
1366bc3d5698SJohn Baldwin	st1	{v4.4s},[x17], #16
1367bc3d5698SJohn Baldwin	ext	v4.16b,v1.16b,v2.16b,#4
1368bc3d5698SJohn Baldwin	add	w6,w6,w12
1369bc3d5698SJohn Baldwin	add	w7,w7,w15
1370bc3d5698SJohn Baldwin	and	w12,w4,w3
1371bc3d5698SJohn Baldwin	bic	w15,w5,w3
1372bc3d5698SJohn Baldwin	ext	v7.16b,v3.16b,v0.16b,#4
1373bc3d5698SJohn Baldwin	eor	w11,w3,w3,ror#5
1374bc3d5698SJohn Baldwin	add	w7,w7,w13
1375bc3d5698SJohn Baldwin	mov	d19,v0.d[1]
1376bc3d5698SJohn Baldwin	orr	w12,w12,w15
1377bc3d5698SJohn Baldwin	eor	w11,w11,w3,ror#19
1378bc3d5698SJohn Baldwin	ushr	v6.4s,v4.4s,#7
1379bc3d5698SJohn Baldwin	eor	w15,w7,w7,ror#11
1380bc3d5698SJohn Baldwin	ushr	v5.4s,v4.4s,#3
1381bc3d5698SJohn Baldwin	add	w6,w6,w12
1382bc3d5698SJohn Baldwin	add	v1.4s,v1.4s,v7.4s
1383bc3d5698SJohn Baldwin	ror	w11,w11,#6
1384bc3d5698SJohn Baldwin	sli	v6.4s,v4.4s,#25
1385bc3d5698SJohn Baldwin	eor	w13,w7,w8
1386bc3d5698SJohn Baldwin	eor	w15,w15,w7,ror#20
1387bc3d5698SJohn Baldwin	ushr	v7.4s,v4.4s,#18
1388bc3d5698SJohn Baldwin	add	w6,w6,w11
1389bc3d5698SJohn Baldwin	ldr	w12,[sp,#20]
1390bc3d5698SJohn Baldwin	and	w14,w14,w13
1391bc3d5698SJohn Baldwin	eor	v5.16b,v5.16b,v6.16b
1392bc3d5698SJohn Baldwin	ror	w15,w15,#2
1393bc3d5698SJohn Baldwin	add	w10,w10,w6
1394bc3d5698SJohn Baldwin	sli	v7.4s,v4.4s,#14
1395bc3d5698SJohn Baldwin	eor	w14,w14,w8
1396bc3d5698SJohn Baldwin	ushr	v16.4s,v19.4s,#17
1397bc3d5698SJohn Baldwin	add	w5,w5,w12
1398bc3d5698SJohn Baldwin	add	w6,w6,w15
1399bc3d5698SJohn Baldwin	and	w12,w3,w10
1400bc3d5698SJohn Baldwin	eor	v5.16b,v5.16b,v7.16b
1401bc3d5698SJohn Baldwin	bic	w15,w4,w10
1402bc3d5698SJohn Baldwin	eor	w11,w10,w10,ror#5
1403bc3d5698SJohn Baldwin	sli	v16.4s,v19.4s,#15
1404bc3d5698SJohn Baldwin	add	w6,w6,w14
1405bc3d5698SJohn Baldwin	orr	w12,w12,w15
1406bc3d5698SJohn Baldwin	ushr	v17.4s,v19.4s,#10
1407bc3d5698SJohn Baldwin	eor	w11,w11,w10,ror#19
1408bc3d5698SJohn Baldwin	eor	w15,w6,w6,ror#11
1409bc3d5698SJohn Baldwin	ushr	v7.4s,v19.4s,#19
1410bc3d5698SJohn Baldwin	add	w5,w5,w12
1411bc3d5698SJohn Baldwin	ror	w11,w11,#6
1412bc3d5698SJohn Baldwin	add	v1.4s,v1.4s,v5.4s
1413bc3d5698SJohn Baldwin	eor	w14,w6,w7
1414bc3d5698SJohn Baldwin	eor	w15,w15,w6,ror#20
1415bc3d5698SJohn Baldwin	sli	v7.4s,v19.4s,#13
1416bc3d5698SJohn Baldwin	add	w5,w5,w11
1417bc3d5698SJohn Baldwin	ldr	w12,[sp,#24]
1418bc3d5698SJohn Baldwin	and	w13,w13,w14
1419bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v16.16b
1420bc3d5698SJohn Baldwin	ror	w15,w15,#2
1421bc3d5698SJohn Baldwin	add	w9,w9,w5
1422bc3d5698SJohn Baldwin	eor	w13,w13,w7
1423bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v7.16b
1424bc3d5698SJohn Baldwin	add	w4,w4,w12
1425bc3d5698SJohn Baldwin	add	w5,w5,w15
1426bc3d5698SJohn Baldwin	and	w12,w10,w9
1427bc3d5698SJohn Baldwin	add	v1.4s,v1.4s,v17.4s
1428bc3d5698SJohn Baldwin	bic	w15,w3,w9
1429bc3d5698SJohn Baldwin	eor	w11,w9,w9,ror#5
1430bc3d5698SJohn Baldwin	add	w5,w5,w13
1431bc3d5698SJohn Baldwin	ushr	v18.4s,v1.4s,#17
1432bc3d5698SJohn Baldwin	orr	w12,w12,w15
1433bc3d5698SJohn Baldwin	ushr	v19.4s,v1.4s,#10
1434bc3d5698SJohn Baldwin	eor	w11,w11,w9,ror#19
1435bc3d5698SJohn Baldwin	eor	w15,w5,w5,ror#11
1436bc3d5698SJohn Baldwin	sli	v18.4s,v1.4s,#15
1437bc3d5698SJohn Baldwin	add	w4,w4,w12
1438bc3d5698SJohn Baldwin	ushr	v17.4s,v1.4s,#19
1439bc3d5698SJohn Baldwin	ror	w11,w11,#6
1440bc3d5698SJohn Baldwin	eor	w13,w5,w6
1441bc3d5698SJohn Baldwin	eor	v19.16b,v19.16b,v18.16b
1442bc3d5698SJohn Baldwin	eor	w15,w15,w5,ror#20
1443bc3d5698SJohn Baldwin	add	w4,w4,w11
1444bc3d5698SJohn Baldwin	sli	v17.4s,v1.4s,#13
1445bc3d5698SJohn Baldwin	ldr	w12,[sp,#28]
1446bc3d5698SJohn Baldwin	and	w14,w14,w13
1447bc3d5698SJohn Baldwin	ror	w15,w15,#2
1448bc3d5698SJohn Baldwin	ld1	{v4.4s},[x16], #16
1449bc3d5698SJohn Baldwin	add	w8,w8,w4
1450bc3d5698SJohn Baldwin	eor	v19.16b,v19.16b,v17.16b
1451bc3d5698SJohn Baldwin	eor	w14,w14,w6
1452bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v17.16b
1453bc3d5698SJohn Baldwin	add	w3,w3,w12
1454bc3d5698SJohn Baldwin	add	w4,w4,w15
1455bc3d5698SJohn Baldwin	and	w12,w9,w8
1456bc3d5698SJohn Baldwin	mov	v17.d[1],v19.d[0]
1457bc3d5698SJohn Baldwin	bic	w15,w10,w8
1458bc3d5698SJohn Baldwin	eor	w11,w8,w8,ror#5
1459bc3d5698SJohn Baldwin	add	w4,w4,w14
1460bc3d5698SJohn Baldwin	add	v1.4s,v1.4s,v17.4s
1461bc3d5698SJohn Baldwin	orr	w12,w12,w15
1462bc3d5698SJohn Baldwin	eor	w11,w11,w8,ror#19
1463bc3d5698SJohn Baldwin	eor	w15,w4,w4,ror#11
1464bc3d5698SJohn Baldwin	add	v4.4s,v4.4s,v1.4s
1465bc3d5698SJohn Baldwin	add	w3,w3,w12
1466bc3d5698SJohn Baldwin	ror	w11,w11,#6
1467bc3d5698SJohn Baldwin	eor	w14,w4,w5
1468bc3d5698SJohn Baldwin	eor	w15,w15,w4,ror#20
1469bc3d5698SJohn Baldwin	add	w3,w3,w11
1470bc3d5698SJohn Baldwin	ldr	w12,[sp,#32]
1471bc3d5698SJohn Baldwin	and	w13,w13,w14
1472bc3d5698SJohn Baldwin	ror	w15,w15,#2
1473bc3d5698SJohn Baldwin	add	w7,w7,w3
1474bc3d5698SJohn Baldwin	eor	w13,w13,w5
1475bc3d5698SJohn Baldwin	st1	{v4.4s},[x17], #16
1476bc3d5698SJohn Baldwin	ext	v4.16b,v2.16b,v3.16b,#4
1477bc3d5698SJohn Baldwin	add	w10,w10,w12
1478bc3d5698SJohn Baldwin	add	w3,w3,w15
1479bc3d5698SJohn Baldwin	and	w12,w8,w7
1480bc3d5698SJohn Baldwin	bic	w15,w9,w7
1481bc3d5698SJohn Baldwin	ext	v7.16b,v0.16b,v1.16b,#4
1482bc3d5698SJohn Baldwin	eor	w11,w7,w7,ror#5
1483bc3d5698SJohn Baldwin	add	w3,w3,w13
1484bc3d5698SJohn Baldwin	mov	d19,v1.d[1]
1485bc3d5698SJohn Baldwin	orr	w12,w12,w15
1486bc3d5698SJohn Baldwin	eor	w11,w11,w7,ror#19
1487bc3d5698SJohn Baldwin	ushr	v6.4s,v4.4s,#7
1488bc3d5698SJohn Baldwin	eor	w15,w3,w3,ror#11
1489bc3d5698SJohn Baldwin	ushr	v5.4s,v4.4s,#3
1490bc3d5698SJohn Baldwin	add	w10,w10,w12
1491bc3d5698SJohn Baldwin	add	v2.4s,v2.4s,v7.4s
1492bc3d5698SJohn Baldwin	ror	w11,w11,#6
1493bc3d5698SJohn Baldwin	sli	v6.4s,v4.4s,#25
1494bc3d5698SJohn Baldwin	eor	w13,w3,w4
1495bc3d5698SJohn Baldwin	eor	w15,w15,w3,ror#20
1496bc3d5698SJohn Baldwin	ushr	v7.4s,v4.4s,#18
1497bc3d5698SJohn Baldwin	add	w10,w10,w11
1498bc3d5698SJohn Baldwin	ldr	w12,[sp,#36]
1499bc3d5698SJohn Baldwin	and	w14,w14,w13
1500bc3d5698SJohn Baldwin	eor	v5.16b,v5.16b,v6.16b
1501bc3d5698SJohn Baldwin	ror	w15,w15,#2
1502bc3d5698SJohn Baldwin	add	w6,w6,w10
1503bc3d5698SJohn Baldwin	sli	v7.4s,v4.4s,#14
1504bc3d5698SJohn Baldwin	eor	w14,w14,w4
1505bc3d5698SJohn Baldwin	ushr	v16.4s,v19.4s,#17
1506bc3d5698SJohn Baldwin	add	w9,w9,w12
1507bc3d5698SJohn Baldwin	add	w10,w10,w15
1508bc3d5698SJohn Baldwin	and	w12,w7,w6
1509bc3d5698SJohn Baldwin	eor	v5.16b,v5.16b,v7.16b
1510bc3d5698SJohn Baldwin	bic	w15,w8,w6
1511bc3d5698SJohn Baldwin	eor	w11,w6,w6,ror#5
1512bc3d5698SJohn Baldwin	sli	v16.4s,v19.4s,#15
1513bc3d5698SJohn Baldwin	add	w10,w10,w14
1514bc3d5698SJohn Baldwin	orr	w12,w12,w15
1515bc3d5698SJohn Baldwin	ushr	v17.4s,v19.4s,#10
1516bc3d5698SJohn Baldwin	eor	w11,w11,w6,ror#19
1517bc3d5698SJohn Baldwin	eor	w15,w10,w10,ror#11
1518bc3d5698SJohn Baldwin	ushr	v7.4s,v19.4s,#19
1519bc3d5698SJohn Baldwin	add	w9,w9,w12
1520bc3d5698SJohn Baldwin	ror	w11,w11,#6
1521bc3d5698SJohn Baldwin	add	v2.4s,v2.4s,v5.4s
1522bc3d5698SJohn Baldwin	eor	w14,w10,w3
1523bc3d5698SJohn Baldwin	eor	w15,w15,w10,ror#20
1524bc3d5698SJohn Baldwin	sli	v7.4s,v19.4s,#13
1525bc3d5698SJohn Baldwin	add	w9,w9,w11
1526bc3d5698SJohn Baldwin	ldr	w12,[sp,#40]
1527bc3d5698SJohn Baldwin	and	w13,w13,w14
1528bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v16.16b
1529bc3d5698SJohn Baldwin	ror	w15,w15,#2
1530bc3d5698SJohn Baldwin	add	w5,w5,w9
1531bc3d5698SJohn Baldwin	eor	w13,w13,w3
1532bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v7.16b
1533bc3d5698SJohn Baldwin	add	w8,w8,w12
1534bc3d5698SJohn Baldwin	add	w9,w9,w15
1535bc3d5698SJohn Baldwin	and	w12,w6,w5
1536bc3d5698SJohn Baldwin	add	v2.4s,v2.4s,v17.4s
1537bc3d5698SJohn Baldwin	bic	w15,w7,w5
1538bc3d5698SJohn Baldwin	eor	w11,w5,w5,ror#5
1539bc3d5698SJohn Baldwin	add	w9,w9,w13
1540bc3d5698SJohn Baldwin	ushr	v18.4s,v2.4s,#17
1541bc3d5698SJohn Baldwin	orr	w12,w12,w15
1542bc3d5698SJohn Baldwin	ushr	v19.4s,v2.4s,#10
1543bc3d5698SJohn Baldwin	eor	w11,w11,w5,ror#19
1544bc3d5698SJohn Baldwin	eor	w15,w9,w9,ror#11
1545bc3d5698SJohn Baldwin	sli	v18.4s,v2.4s,#15
1546bc3d5698SJohn Baldwin	add	w8,w8,w12
1547bc3d5698SJohn Baldwin	ushr	v17.4s,v2.4s,#19
1548bc3d5698SJohn Baldwin	ror	w11,w11,#6
1549bc3d5698SJohn Baldwin	eor	w13,w9,w10
1550bc3d5698SJohn Baldwin	eor	v19.16b,v19.16b,v18.16b
1551bc3d5698SJohn Baldwin	eor	w15,w15,w9,ror#20
1552bc3d5698SJohn Baldwin	add	w8,w8,w11
1553bc3d5698SJohn Baldwin	sli	v17.4s,v2.4s,#13
1554bc3d5698SJohn Baldwin	ldr	w12,[sp,#44]
1555bc3d5698SJohn Baldwin	and	w14,w14,w13
1556bc3d5698SJohn Baldwin	ror	w15,w15,#2
1557bc3d5698SJohn Baldwin	ld1	{v4.4s},[x16], #16
1558bc3d5698SJohn Baldwin	add	w4,w4,w8
1559bc3d5698SJohn Baldwin	eor	v19.16b,v19.16b,v17.16b
1560bc3d5698SJohn Baldwin	eor	w14,w14,w10
1561bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v17.16b
1562bc3d5698SJohn Baldwin	add	w7,w7,w12
1563bc3d5698SJohn Baldwin	add	w8,w8,w15
1564bc3d5698SJohn Baldwin	and	w12,w5,w4
1565bc3d5698SJohn Baldwin	mov	v17.d[1],v19.d[0]
1566bc3d5698SJohn Baldwin	bic	w15,w6,w4
1567bc3d5698SJohn Baldwin	eor	w11,w4,w4,ror#5
1568bc3d5698SJohn Baldwin	add	w8,w8,w14
1569bc3d5698SJohn Baldwin	add	v2.4s,v2.4s,v17.4s
1570bc3d5698SJohn Baldwin	orr	w12,w12,w15
1571bc3d5698SJohn Baldwin	eor	w11,w11,w4,ror#19
1572bc3d5698SJohn Baldwin	eor	w15,w8,w8,ror#11
1573bc3d5698SJohn Baldwin	add	v4.4s,v4.4s,v2.4s
1574bc3d5698SJohn Baldwin	add	w7,w7,w12
1575bc3d5698SJohn Baldwin	ror	w11,w11,#6
1576bc3d5698SJohn Baldwin	eor	w14,w8,w9
1577bc3d5698SJohn Baldwin	eor	w15,w15,w8,ror#20
1578bc3d5698SJohn Baldwin	add	w7,w7,w11
1579bc3d5698SJohn Baldwin	ldr	w12,[sp,#48]
1580bc3d5698SJohn Baldwin	and	w13,w13,w14
1581bc3d5698SJohn Baldwin	ror	w15,w15,#2
1582bc3d5698SJohn Baldwin	add	w3,w3,w7
1583bc3d5698SJohn Baldwin	eor	w13,w13,w9
1584bc3d5698SJohn Baldwin	st1	{v4.4s},[x17], #16
1585bc3d5698SJohn Baldwin	ext	v4.16b,v3.16b,v0.16b,#4
1586bc3d5698SJohn Baldwin	add	w6,w6,w12
1587bc3d5698SJohn Baldwin	add	w7,w7,w15
1588bc3d5698SJohn Baldwin	and	w12,w4,w3
1589bc3d5698SJohn Baldwin	bic	w15,w5,w3
1590bc3d5698SJohn Baldwin	ext	v7.16b,v1.16b,v2.16b,#4
1591bc3d5698SJohn Baldwin	eor	w11,w3,w3,ror#5
1592bc3d5698SJohn Baldwin	add	w7,w7,w13
1593bc3d5698SJohn Baldwin	mov	d19,v2.d[1]
1594bc3d5698SJohn Baldwin	orr	w12,w12,w15
1595bc3d5698SJohn Baldwin	eor	w11,w11,w3,ror#19
1596bc3d5698SJohn Baldwin	ushr	v6.4s,v4.4s,#7
1597bc3d5698SJohn Baldwin	eor	w15,w7,w7,ror#11
1598bc3d5698SJohn Baldwin	ushr	v5.4s,v4.4s,#3
1599bc3d5698SJohn Baldwin	add	w6,w6,w12
1600bc3d5698SJohn Baldwin	add	v3.4s,v3.4s,v7.4s
1601bc3d5698SJohn Baldwin	ror	w11,w11,#6
1602bc3d5698SJohn Baldwin	sli	v6.4s,v4.4s,#25
1603bc3d5698SJohn Baldwin	eor	w13,w7,w8
1604bc3d5698SJohn Baldwin	eor	w15,w15,w7,ror#20
1605bc3d5698SJohn Baldwin	ushr	v7.4s,v4.4s,#18
1606bc3d5698SJohn Baldwin	add	w6,w6,w11
1607bc3d5698SJohn Baldwin	ldr	w12,[sp,#52]
1608bc3d5698SJohn Baldwin	and	w14,w14,w13
1609bc3d5698SJohn Baldwin	eor	v5.16b,v5.16b,v6.16b
1610bc3d5698SJohn Baldwin	ror	w15,w15,#2
1611bc3d5698SJohn Baldwin	add	w10,w10,w6
1612bc3d5698SJohn Baldwin	sli	v7.4s,v4.4s,#14
1613bc3d5698SJohn Baldwin	eor	w14,w14,w8
1614bc3d5698SJohn Baldwin	ushr	v16.4s,v19.4s,#17
1615bc3d5698SJohn Baldwin	add	w5,w5,w12
1616bc3d5698SJohn Baldwin	add	w6,w6,w15
1617bc3d5698SJohn Baldwin	and	w12,w3,w10
1618bc3d5698SJohn Baldwin	eor	v5.16b,v5.16b,v7.16b
1619bc3d5698SJohn Baldwin	bic	w15,w4,w10
1620bc3d5698SJohn Baldwin	eor	w11,w10,w10,ror#5
1621bc3d5698SJohn Baldwin	sli	v16.4s,v19.4s,#15
1622bc3d5698SJohn Baldwin	add	w6,w6,w14
1623bc3d5698SJohn Baldwin	orr	w12,w12,w15
1624bc3d5698SJohn Baldwin	ushr	v17.4s,v19.4s,#10
1625bc3d5698SJohn Baldwin	eor	w11,w11,w10,ror#19
1626bc3d5698SJohn Baldwin	eor	w15,w6,w6,ror#11
1627bc3d5698SJohn Baldwin	ushr	v7.4s,v19.4s,#19
1628bc3d5698SJohn Baldwin	add	w5,w5,w12
1629bc3d5698SJohn Baldwin	ror	w11,w11,#6
1630bc3d5698SJohn Baldwin	add	v3.4s,v3.4s,v5.4s
1631bc3d5698SJohn Baldwin	eor	w14,w6,w7
1632bc3d5698SJohn Baldwin	eor	w15,w15,w6,ror#20
1633bc3d5698SJohn Baldwin	sli	v7.4s,v19.4s,#13
1634bc3d5698SJohn Baldwin	add	w5,w5,w11
1635bc3d5698SJohn Baldwin	ldr	w12,[sp,#56]
1636bc3d5698SJohn Baldwin	and	w13,w13,w14
1637bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v16.16b
1638bc3d5698SJohn Baldwin	ror	w15,w15,#2
1639bc3d5698SJohn Baldwin	add	w9,w9,w5
1640bc3d5698SJohn Baldwin	eor	w13,w13,w7
1641bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v7.16b
1642bc3d5698SJohn Baldwin	add	w4,w4,w12
1643bc3d5698SJohn Baldwin	add	w5,w5,w15
1644bc3d5698SJohn Baldwin	and	w12,w10,w9
1645bc3d5698SJohn Baldwin	add	v3.4s,v3.4s,v17.4s
1646bc3d5698SJohn Baldwin	bic	w15,w3,w9
1647bc3d5698SJohn Baldwin	eor	w11,w9,w9,ror#5
1648bc3d5698SJohn Baldwin	add	w5,w5,w13
1649bc3d5698SJohn Baldwin	ushr	v18.4s,v3.4s,#17
1650bc3d5698SJohn Baldwin	orr	w12,w12,w15
1651bc3d5698SJohn Baldwin	ushr	v19.4s,v3.4s,#10
1652bc3d5698SJohn Baldwin	eor	w11,w11,w9,ror#19
1653bc3d5698SJohn Baldwin	eor	w15,w5,w5,ror#11
1654bc3d5698SJohn Baldwin	sli	v18.4s,v3.4s,#15
1655bc3d5698SJohn Baldwin	add	w4,w4,w12
1656bc3d5698SJohn Baldwin	ushr	v17.4s,v3.4s,#19
1657bc3d5698SJohn Baldwin	ror	w11,w11,#6
1658bc3d5698SJohn Baldwin	eor	w13,w5,w6
1659bc3d5698SJohn Baldwin	eor	v19.16b,v19.16b,v18.16b
1660bc3d5698SJohn Baldwin	eor	w15,w15,w5,ror#20
1661bc3d5698SJohn Baldwin	add	w4,w4,w11
1662bc3d5698SJohn Baldwin	sli	v17.4s,v3.4s,#13
1663bc3d5698SJohn Baldwin	ldr	w12,[sp,#60]
1664bc3d5698SJohn Baldwin	and	w14,w14,w13
1665bc3d5698SJohn Baldwin	ror	w15,w15,#2
1666bc3d5698SJohn Baldwin	ld1	{v4.4s},[x16], #16
1667bc3d5698SJohn Baldwin	add	w8,w8,w4
1668bc3d5698SJohn Baldwin	eor	v19.16b,v19.16b,v17.16b
1669bc3d5698SJohn Baldwin	eor	w14,w14,w6
1670bc3d5698SJohn Baldwin	eor	v17.16b,v17.16b,v17.16b
1671bc3d5698SJohn Baldwin	add	w3,w3,w12
1672bc3d5698SJohn Baldwin	add	w4,w4,w15
1673bc3d5698SJohn Baldwin	and	w12,w9,w8
1674bc3d5698SJohn Baldwin	mov	v17.d[1],v19.d[0]
1675bc3d5698SJohn Baldwin	bic	w15,w10,w8
1676bc3d5698SJohn Baldwin	eor	w11,w8,w8,ror#5
1677bc3d5698SJohn Baldwin	add	w4,w4,w14
1678bc3d5698SJohn Baldwin	add	v3.4s,v3.4s,v17.4s
1679bc3d5698SJohn Baldwin	orr	w12,w12,w15
1680bc3d5698SJohn Baldwin	eor	w11,w11,w8,ror#19
1681bc3d5698SJohn Baldwin	eor	w15,w4,w4,ror#11
1682bc3d5698SJohn Baldwin	add	v4.4s,v4.4s,v3.4s
1683bc3d5698SJohn Baldwin	add	w3,w3,w12
1684bc3d5698SJohn Baldwin	ror	w11,w11,#6
1685bc3d5698SJohn Baldwin	eor	w14,w4,w5
1686bc3d5698SJohn Baldwin	eor	w15,w15,w4,ror#20
1687bc3d5698SJohn Baldwin	add	w3,w3,w11
1688bc3d5698SJohn Baldwin	ldr	w12,[x16]
1689bc3d5698SJohn Baldwin	and	w13,w13,w14
1690bc3d5698SJohn Baldwin	ror	w15,w15,#2
1691bc3d5698SJohn Baldwin	add	w7,w7,w3
1692bc3d5698SJohn Baldwin	eor	w13,w13,w5
1693bc3d5698SJohn Baldwin	st1	{v4.4s},[x17], #16
1694bc3d5698SJohn Baldwin	cmp	w12,#0				// check for K256 terminator
1695bc3d5698SJohn Baldwin	ldr	w12,[sp,#0]
1696bc3d5698SJohn Baldwin	sub	x17,x17,#64
1697bc3d5698SJohn Baldwin	bne	.L_00_48
1698bc3d5698SJohn Baldwin
1699bc3d5698SJohn Baldwin	sub	x16,x16,#256		// rewind x16
1700bc3d5698SJohn Baldwin	cmp	x1,x2
1701bc3d5698SJohn Baldwin	mov	x17, #64
1702bc3d5698SJohn Baldwin	csel	x17, x17, xzr, eq
1703bc3d5698SJohn Baldwin	sub	x1,x1,x17			// avoid SEGV
1704bc3d5698SJohn Baldwin	mov	x17,sp
1705bc3d5698SJohn Baldwin	add	w10,w10,w12
1706bc3d5698SJohn Baldwin	add	w3,w3,w15
1707bc3d5698SJohn Baldwin	and	w12,w8,w7
1708bc3d5698SJohn Baldwin	ld1	{v0.16b},[x1],#16
1709bc3d5698SJohn Baldwin	bic	w15,w9,w7
1710bc3d5698SJohn Baldwin	eor	w11,w7,w7,ror#5
1711bc3d5698SJohn Baldwin	ld1	{v4.4s},[x16],#16
1712bc3d5698SJohn Baldwin	add	w3,w3,w13
1713bc3d5698SJohn Baldwin	orr	w12,w12,w15
1714bc3d5698SJohn Baldwin	eor	w11,w11,w7,ror#19
1715bc3d5698SJohn Baldwin	eor	w15,w3,w3,ror#11
1716bc3d5698SJohn Baldwin	rev32	v0.16b,v0.16b
1717bc3d5698SJohn Baldwin	add	w10,w10,w12
1718bc3d5698SJohn Baldwin	ror	w11,w11,#6
1719bc3d5698SJohn Baldwin	eor	w13,w3,w4
1720bc3d5698SJohn Baldwin	eor	w15,w15,w3,ror#20
1721bc3d5698SJohn Baldwin	add	v4.4s,v4.4s,v0.4s
1722bc3d5698SJohn Baldwin	add	w10,w10,w11
1723bc3d5698SJohn Baldwin	ldr	w12,[sp,#4]
1724bc3d5698SJohn Baldwin	and	w14,w14,w13
1725bc3d5698SJohn Baldwin	ror	w15,w15,#2
1726bc3d5698SJohn Baldwin	add	w6,w6,w10
1727bc3d5698SJohn Baldwin	eor	w14,w14,w4
1728bc3d5698SJohn Baldwin	add	w9,w9,w12
1729bc3d5698SJohn Baldwin	add	w10,w10,w15
1730bc3d5698SJohn Baldwin	and	w12,w7,w6
1731bc3d5698SJohn Baldwin	bic	w15,w8,w6
1732bc3d5698SJohn Baldwin	eor	w11,w6,w6,ror#5
1733bc3d5698SJohn Baldwin	add	w10,w10,w14
1734bc3d5698SJohn Baldwin	orr	w12,w12,w15
1735bc3d5698SJohn Baldwin	eor	w11,w11,w6,ror#19
1736bc3d5698SJohn Baldwin	eor	w15,w10,w10,ror#11
1737bc3d5698SJohn Baldwin	add	w9,w9,w12
1738bc3d5698SJohn Baldwin	ror	w11,w11,#6
1739bc3d5698SJohn Baldwin	eor	w14,w10,w3
1740bc3d5698SJohn Baldwin	eor	w15,w15,w10,ror#20
1741bc3d5698SJohn Baldwin	add	w9,w9,w11
1742bc3d5698SJohn Baldwin	ldr	w12,[sp,#8]
1743bc3d5698SJohn Baldwin	and	w13,w13,w14
1744bc3d5698SJohn Baldwin	ror	w15,w15,#2
1745bc3d5698SJohn Baldwin	add	w5,w5,w9
1746bc3d5698SJohn Baldwin	eor	w13,w13,w3
1747bc3d5698SJohn Baldwin	add	w8,w8,w12
1748bc3d5698SJohn Baldwin	add	w9,w9,w15
1749bc3d5698SJohn Baldwin	and	w12,w6,w5
1750bc3d5698SJohn Baldwin	bic	w15,w7,w5
1751bc3d5698SJohn Baldwin	eor	w11,w5,w5,ror#5
1752bc3d5698SJohn Baldwin	add	w9,w9,w13
1753bc3d5698SJohn Baldwin	orr	w12,w12,w15
1754bc3d5698SJohn Baldwin	eor	w11,w11,w5,ror#19
1755bc3d5698SJohn Baldwin	eor	w15,w9,w9,ror#11
1756bc3d5698SJohn Baldwin	add	w8,w8,w12
1757bc3d5698SJohn Baldwin	ror	w11,w11,#6
1758bc3d5698SJohn Baldwin	eor	w13,w9,w10
1759bc3d5698SJohn Baldwin	eor	w15,w15,w9,ror#20
1760bc3d5698SJohn Baldwin	add	w8,w8,w11
1761bc3d5698SJohn Baldwin	ldr	w12,[sp,#12]
1762bc3d5698SJohn Baldwin	and	w14,w14,w13
1763bc3d5698SJohn Baldwin	ror	w15,w15,#2
1764bc3d5698SJohn Baldwin	add	w4,w4,w8
1765bc3d5698SJohn Baldwin	eor	w14,w14,w10
1766bc3d5698SJohn Baldwin	add	w7,w7,w12
1767bc3d5698SJohn Baldwin	add	w8,w8,w15
1768bc3d5698SJohn Baldwin	and	w12,w5,w4
1769bc3d5698SJohn Baldwin	bic	w15,w6,w4
1770bc3d5698SJohn Baldwin	eor	w11,w4,w4,ror#5
1771bc3d5698SJohn Baldwin	add	w8,w8,w14
1772bc3d5698SJohn Baldwin	orr	w12,w12,w15
1773bc3d5698SJohn Baldwin	eor	w11,w11,w4,ror#19
1774bc3d5698SJohn Baldwin	eor	w15,w8,w8,ror#11
1775bc3d5698SJohn Baldwin	add	w7,w7,w12
1776bc3d5698SJohn Baldwin	ror	w11,w11,#6
1777bc3d5698SJohn Baldwin	eor	w14,w8,w9
1778bc3d5698SJohn Baldwin	eor	w15,w15,w8,ror#20
1779bc3d5698SJohn Baldwin	add	w7,w7,w11
1780bc3d5698SJohn Baldwin	ldr	w12,[sp,#16]
1781bc3d5698SJohn Baldwin	and	w13,w13,w14
1782bc3d5698SJohn Baldwin	ror	w15,w15,#2
1783bc3d5698SJohn Baldwin	add	w3,w3,w7
1784bc3d5698SJohn Baldwin	eor	w13,w13,w9
1785bc3d5698SJohn Baldwin	st1	{v4.4s},[x17], #16
1786bc3d5698SJohn Baldwin	add	w6,w6,w12
1787bc3d5698SJohn Baldwin	add	w7,w7,w15
1788bc3d5698SJohn Baldwin	and	w12,w4,w3
1789bc3d5698SJohn Baldwin	ld1	{v1.16b},[x1],#16
1790bc3d5698SJohn Baldwin	bic	w15,w5,w3
1791bc3d5698SJohn Baldwin	eor	w11,w3,w3,ror#5
1792bc3d5698SJohn Baldwin	ld1	{v4.4s},[x16],#16
1793bc3d5698SJohn Baldwin	add	w7,w7,w13
1794bc3d5698SJohn Baldwin	orr	w12,w12,w15
1795bc3d5698SJohn Baldwin	eor	w11,w11,w3,ror#19
1796bc3d5698SJohn Baldwin	eor	w15,w7,w7,ror#11
1797bc3d5698SJohn Baldwin	rev32	v1.16b,v1.16b
1798bc3d5698SJohn Baldwin	add	w6,w6,w12
1799bc3d5698SJohn Baldwin	ror	w11,w11,#6
1800bc3d5698SJohn Baldwin	eor	w13,w7,w8
1801bc3d5698SJohn Baldwin	eor	w15,w15,w7,ror#20
1802bc3d5698SJohn Baldwin	add	v4.4s,v4.4s,v1.4s
1803bc3d5698SJohn Baldwin	add	w6,w6,w11
1804bc3d5698SJohn Baldwin	ldr	w12,[sp,#20]
1805bc3d5698SJohn Baldwin	and	w14,w14,w13
1806bc3d5698SJohn Baldwin	ror	w15,w15,#2
1807bc3d5698SJohn Baldwin	add	w10,w10,w6
1808bc3d5698SJohn Baldwin	eor	w14,w14,w8
1809bc3d5698SJohn Baldwin	add	w5,w5,w12
1810bc3d5698SJohn Baldwin	add	w6,w6,w15
1811bc3d5698SJohn Baldwin	and	w12,w3,w10
1812bc3d5698SJohn Baldwin	bic	w15,w4,w10
1813bc3d5698SJohn Baldwin	eor	w11,w10,w10,ror#5
1814bc3d5698SJohn Baldwin	add	w6,w6,w14
1815bc3d5698SJohn Baldwin	orr	w12,w12,w15
1816bc3d5698SJohn Baldwin	eor	w11,w11,w10,ror#19
1817bc3d5698SJohn Baldwin	eor	w15,w6,w6,ror#11
1818bc3d5698SJohn Baldwin	add	w5,w5,w12
1819bc3d5698SJohn Baldwin	ror	w11,w11,#6
1820bc3d5698SJohn Baldwin	eor	w14,w6,w7
1821bc3d5698SJohn Baldwin	eor	w15,w15,w6,ror#20
1822bc3d5698SJohn Baldwin	add	w5,w5,w11
1823bc3d5698SJohn Baldwin	ldr	w12,[sp,#24]
1824bc3d5698SJohn Baldwin	and	w13,w13,w14
1825bc3d5698SJohn Baldwin	ror	w15,w15,#2
1826bc3d5698SJohn Baldwin	add	w9,w9,w5
1827bc3d5698SJohn Baldwin	eor	w13,w13,w7
1828bc3d5698SJohn Baldwin	add	w4,w4,w12
1829bc3d5698SJohn Baldwin	add	w5,w5,w15
1830bc3d5698SJohn Baldwin	and	w12,w10,w9
1831bc3d5698SJohn Baldwin	bic	w15,w3,w9
1832bc3d5698SJohn Baldwin	eor	w11,w9,w9,ror#5
1833bc3d5698SJohn Baldwin	add	w5,w5,w13
1834bc3d5698SJohn Baldwin	orr	w12,w12,w15
1835bc3d5698SJohn Baldwin	eor	w11,w11,w9,ror#19
1836bc3d5698SJohn Baldwin	eor	w15,w5,w5,ror#11
1837bc3d5698SJohn Baldwin	add	w4,w4,w12
1838bc3d5698SJohn Baldwin	ror	w11,w11,#6
1839bc3d5698SJohn Baldwin	eor	w13,w5,w6
1840bc3d5698SJohn Baldwin	eor	w15,w15,w5,ror#20
1841bc3d5698SJohn Baldwin	add	w4,w4,w11
1842bc3d5698SJohn Baldwin	ldr	w12,[sp,#28]
1843bc3d5698SJohn Baldwin	and	w14,w14,w13
1844bc3d5698SJohn Baldwin	ror	w15,w15,#2
1845bc3d5698SJohn Baldwin	add	w8,w8,w4
1846bc3d5698SJohn Baldwin	eor	w14,w14,w6
1847bc3d5698SJohn Baldwin	add	w3,w3,w12
1848bc3d5698SJohn Baldwin	add	w4,w4,w15
1849bc3d5698SJohn Baldwin	and	w12,w9,w8
1850bc3d5698SJohn Baldwin	bic	w15,w10,w8
1851bc3d5698SJohn Baldwin	eor	w11,w8,w8,ror#5
1852bc3d5698SJohn Baldwin	add	w4,w4,w14
1853bc3d5698SJohn Baldwin	orr	w12,w12,w15
1854bc3d5698SJohn Baldwin	eor	w11,w11,w8,ror#19
1855bc3d5698SJohn Baldwin	eor	w15,w4,w4,ror#11
1856bc3d5698SJohn Baldwin	add	w3,w3,w12
1857bc3d5698SJohn Baldwin	ror	w11,w11,#6
1858bc3d5698SJohn Baldwin	eor	w14,w4,w5
1859bc3d5698SJohn Baldwin	eor	w15,w15,w4,ror#20
1860bc3d5698SJohn Baldwin	add	w3,w3,w11
1861bc3d5698SJohn Baldwin	ldr	w12,[sp,#32]
1862bc3d5698SJohn Baldwin	and	w13,w13,w14
1863bc3d5698SJohn Baldwin	ror	w15,w15,#2
1864bc3d5698SJohn Baldwin	add	w7,w7,w3
1865bc3d5698SJohn Baldwin	eor	w13,w13,w5
1866bc3d5698SJohn Baldwin	st1	{v4.4s},[x17], #16
1867bc3d5698SJohn Baldwin	add	w10,w10,w12
1868bc3d5698SJohn Baldwin	add	w3,w3,w15
1869bc3d5698SJohn Baldwin	and	w12,w8,w7
1870bc3d5698SJohn Baldwin	ld1	{v2.16b},[x1],#16
1871bc3d5698SJohn Baldwin	bic	w15,w9,w7
1872bc3d5698SJohn Baldwin	eor	w11,w7,w7,ror#5
1873bc3d5698SJohn Baldwin	ld1	{v4.4s},[x16],#16
1874bc3d5698SJohn Baldwin	add	w3,w3,w13
1875bc3d5698SJohn Baldwin	orr	w12,w12,w15
1876bc3d5698SJohn Baldwin	eor	w11,w11,w7,ror#19
1877bc3d5698SJohn Baldwin	eor	w15,w3,w3,ror#11
1878bc3d5698SJohn Baldwin	rev32	v2.16b,v2.16b
1879bc3d5698SJohn Baldwin	add	w10,w10,w12
1880bc3d5698SJohn Baldwin	ror	w11,w11,#6
1881bc3d5698SJohn Baldwin	eor	w13,w3,w4
1882bc3d5698SJohn Baldwin	eor	w15,w15,w3,ror#20
1883bc3d5698SJohn Baldwin	add	v4.4s,v4.4s,v2.4s
1884bc3d5698SJohn Baldwin	add	w10,w10,w11
1885bc3d5698SJohn Baldwin	ldr	w12,[sp,#36]
1886bc3d5698SJohn Baldwin	and	w14,w14,w13
1887bc3d5698SJohn Baldwin	ror	w15,w15,#2
1888bc3d5698SJohn Baldwin	add	w6,w6,w10
1889bc3d5698SJohn Baldwin	eor	w14,w14,w4
1890bc3d5698SJohn Baldwin	add	w9,w9,w12
1891bc3d5698SJohn Baldwin	add	w10,w10,w15
1892bc3d5698SJohn Baldwin	and	w12,w7,w6
1893bc3d5698SJohn Baldwin	bic	w15,w8,w6
1894bc3d5698SJohn Baldwin	eor	w11,w6,w6,ror#5
1895bc3d5698SJohn Baldwin	add	w10,w10,w14
1896bc3d5698SJohn Baldwin	orr	w12,w12,w15
1897bc3d5698SJohn Baldwin	eor	w11,w11,w6,ror#19
1898bc3d5698SJohn Baldwin	eor	w15,w10,w10,ror#11
1899bc3d5698SJohn Baldwin	add	w9,w9,w12
1900bc3d5698SJohn Baldwin	ror	w11,w11,#6
1901bc3d5698SJohn Baldwin	eor	w14,w10,w3
1902bc3d5698SJohn Baldwin	eor	w15,w15,w10,ror#20
1903bc3d5698SJohn Baldwin	add	w9,w9,w11
1904bc3d5698SJohn Baldwin	ldr	w12,[sp,#40]
1905bc3d5698SJohn Baldwin	and	w13,w13,w14
1906bc3d5698SJohn Baldwin	ror	w15,w15,#2
1907bc3d5698SJohn Baldwin	add	w5,w5,w9
1908bc3d5698SJohn Baldwin	eor	w13,w13,w3
1909bc3d5698SJohn Baldwin	add	w8,w8,w12
1910bc3d5698SJohn Baldwin	add	w9,w9,w15
1911bc3d5698SJohn Baldwin	and	w12,w6,w5
1912bc3d5698SJohn Baldwin	bic	w15,w7,w5
1913bc3d5698SJohn Baldwin	eor	w11,w5,w5,ror#5
1914bc3d5698SJohn Baldwin	add	w9,w9,w13
1915bc3d5698SJohn Baldwin	orr	w12,w12,w15
1916bc3d5698SJohn Baldwin	eor	w11,w11,w5,ror#19
1917bc3d5698SJohn Baldwin	eor	w15,w9,w9,ror#11
1918bc3d5698SJohn Baldwin	add	w8,w8,w12
1919bc3d5698SJohn Baldwin	ror	w11,w11,#6
1920bc3d5698SJohn Baldwin	eor	w13,w9,w10
1921bc3d5698SJohn Baldwin	eor	w15,w15,w9,ror#20
1922bc3d5698SJohn Baldwin	add	w8,w8,w11
1923bc3d5698SJohn Baldwin	ldr	w12,[sp,#44]
1924bc3d5698SJohn Baldwin	and	w14,w14,w13
1925bc3d5698SJohn Baldwin	ror	w15,w15,#2
1926bc3d5698SJohn Baldwin	add	w4,w4,w8
1927bc3d5698SJohn Baldwin	eor	w14,w14,w10
1928bc3d5698SJohn Baldwin	add	w7,w7,w12
1929bc3d5698SJohn Baldwin	add	w8,w8,w15
1930bc3d5698SJohn Baldwin	and	w12,w5,w4
1931bc3d5698SJohn Baldwin	bic	w15,w6,w4
1932bc3d5698SJohn Baldwin	eor	w11,w4,w4,ror#5
1933bc3d5698SJohn Baldwin	add	w8,w8,w14
1934bc3d5698SJohn Baldwin	orr	w12,w12,w15
1935bc3d5698SJohn Baldwin	eor	w11,w11,w4,ror#19
1936bc3d5698SJohn Baldwin	eor	w15,w8,w8,ror#11
1937bc3d5698SJohn Baldwin	add	w7,w7,w12
1938bc3d5698SJohn Baldwin	ror	w11,w11,#6
1939bc3d5698SJohn Baldwin	eor	w14,w8,w9
1940bc3d5698SJohn Baldwin	eor	w15,w15,w8,ror#20
1941bc3d5698SJohn Baldwin	add	w7,w7,w11
1942bc3d5698SJohn Baldwin	ldr	w12,[sp,#48]
1943bc3d5698SJohn Baldwin	and	w13,w13,w14
1944bc3d5698SJohn Baldwin	ror	w15,w15,#2
1945bc3d5698SJohn Baldwin	add	w3,w3,w7
1946bc3d5698SJohn Baldwin	eor	w13,w13,w9
1947bc3d5698SJohn Baldwin	st1	{v4.4s},[x17], #16
1948bc3d5698SJohn Baldwin	add	w6,w6,w12
1949bc3d5698SJohn Baldwin	add	w7,w7,w15
1950bc3d5698SJohn Baldwin	and	w12,w4,w3
1951bc3d5698SJohn Baldwin	ld1	{v3.16b},[x1],#16
1952bc3d5698SJohn Baldwin	bic	w15,w5,w3
1953bc3d5698SJohn Baldwin	eor	w11,w3,w3,ror#5
1954bc3d5698SJohn Baldwin	ld1	{v4.4s},[x16],#16
1955bc3d5698SJohn Baldwin	add	w7,w7,w13
1956bc3d5698SJohn Baldwin	orr	w12,w12,w15
1957bc3d5698SJohn Baldwin	eor	w11,w11,w3,ror#19
1958bc3d5698SJohn Baldwin	eor	w15,w7,w7,ror#11
1959bc3d5698SJohn Baldwin	rev32	v3.16b,v3.16b
1960bc3d5698SJohn Baldwin	add	w6,w6,w12
1961bc3d5698SJohn Baldwin	ror	w11,w11,#6
1962bc3d5698SJohn Baldwin	eor	w13,w7,w8
1963bc3d5698SJohn Baldwin	eor	w15,w15,w7,ror#20
1964bc3d5698SJohn Baldwin	add	v4.4s,v4.4s,v3.4s
1965bc3d5698SJohn Baldwin	add	w6,w6,w11
1966bc3d5698SJohn Baldwin	ldr	w12,[sp,#52]
1967bc3d5698SJohn Baldwin	and	w14,w14,w13
1968bc3d5698SJohn Baldwin	ror	w15,w15,#2
1969bc3d5698SJohn Baldwin	add	w10,w10,w6
1970bc3d5698SJohn Baldwin	eor	w14,w14,w8
1971bc3d5698SJohn Baldwin	add	w5,w5,w12
1972bc3d5698SJohn Baldwin	add	w6,w6,w15
1973bc3d5698SJohn Baldwin	and	w12,w3,w10
1974bc3d5698SJohn Baldwin	bic	w15,w4,w10
1975bc3d5698SJohn Baldwin	eor	w11,w10,w10,ror#5
1976bc3d5698SJohn Baldwin	add	w6,w6,w14
1977bc3d5698SJohn Baldwin	orr	w12,w12,w15
1978bc3d5698SJohn Baldwin	eor	w11,w11,w10,ror#19
1979bc3d5698SJohn Baldwin	eor	w15,w6,w6,ror#11
1980bc3d5698SJohn Baldwin	add	w5,w5,w12
1981bc3d5698SJohn Baldwin	ror	w11,w11,#6
1982bc3d5698SJohn Baldwin	eor	w14,w6,w7
1983bc3d5698SJohn Baldwin	eor	w15,w15,w6,ror#20
1984bc3d5698SJohn Baldwin	add	w5,w5,w11
1985bc3d5698SJohn Baldwin	ldr	w12,[sp,#56]
1986bc3d5698SJohn Baldwin	and	w13,w13,w14
1987bc3d5698SJohn Baldwin	ror	w15,w15,#2
1988bc3d5698SJohn Baldwin	add	w9,w9,w5
1989bc3d5698SJohn Baldwin	eor	w13,w13,w7
1990bc3d5698SJohn Baldwin	add	w4,w4,w12
1991bc3d5698SJohn Baldwin	add	w5,w5,w15
1992bc3d5698SJohn Baldwin	and	w12,w10,w9
1993bc3d5698SJohn Baldwin	bic	w15,w3,w9
1994bc3d5698SJohn Baldwin	eor	w11,w9,w9,ror#5
1995bc3d5698SJohn Baldwin	add	w5,w5,w13
1996bc3d5698SJohn Baldwin	orr	w12,w12,w15
1997bc3d5698SJohn Baldwin	eor	w11,w11,w9,ror#19
1998bc3d5698SJohn Baldwin	eor	w15,w5,w5,ror#11
1999bc3d5698SJohn Baldwin	add	w4,w4,w12
2000bc3d5698SJohn Baldwin	ror	w11,w11,#6
2001bc3d5698SJohn Baldwin	eor	w13,w5,w6
2002bc3d5698SJohn Baldwin	eor	w15,w15,w5,ror#20
2003bc3d5698SJohn Baldwin	add	w4,w4,w11
2004bc3d5698SJohn Baldwin	ldr	w12,[sp,#60]
2005bc3d5698SJohn Baldwin	and	w14,w14,w13
2006bc3d5698SJohn Baldwin	ror	w15,w15,#2
2007bc3d5698SJohn Baldwin	add	w8,w8,w4
2008bc3d5698SJohn Baldwin	eor	w14,w14,w6
2009bc3d5698SJohn Baldwin	add	w3,w3,w12
2010bc3d5698SJohn Baldwin	add	w4,w4,w15
2011bc3d5698SJohn Baldwin	and	w12,w9,w8
2012bc3d5698SJohn Baldwin	bic	w15,w10,w8
2013bc3d5698SJohn Baldwin	eor	w11,w8,w8,ror#5
2014bc3d5698SJohn Baldwin	add	w4,w4,w14
2015bc3d5698SJohn Baldwin	orr	w12,w12,w15
2016bc3d5698SJohn Baldwin	eor	w11,w11,w8,ror#19
2017bc3d5698SJohn Baldwin	eor	w15,w4,w4,ror#11
2018bc3d5698SJohn Baldwin	add	w3,w3,w12
2019bc3d5698SJohn Baldwin	ror	w11,w11,#6
2020bc3d5698SJohn Baldwin	eor	w14,w4,w5
2021bc3d5698SJohn Baldwin	eor	w15,w15,w4,ror#20
2022bc3d5698SJohn Baldwin	add	w3,w3,w11
2023bc3d5698SJohn Baldwin	and	w13,w13,w14
2024bc3d5698SJohn Baldwin	ror	w15,w15,#2
2025bc3d5698SJohn Baldwin	add	w7,w7,w3
2026bc3d5698SJohn Baldwin	eor	w13,w13,w5
2027bc3d5698SJohn Baldwin	st1	{v4.4s},[x17], #16
2028bc3d5698SJohn Baldwin	add	w3,w3,w15			// h+=Sigma0(a) from the past
2029bc3d5698SJohn Baldwin	ldp	w11,w12,[x0,#0]
2030bc3d5698SJohn Baldwin	add	w3,w3,w13			// h+=Maj(a,b,c) from the past
2031bc3d5698SJohn Baldwin	ldp	w13,w14,[x0,#8]
2032bc3d5698SJohn Baldwin	add	w3,w3,w11			// accumulate
2033bc3d5698SJohn Baldwin	add	w4,w4,w12
2034bc3d5698SJohn Baldwin	ldp	w11,w12,[x0,#16]
2035bc3d5698SJohn Baldwin	add	w5,w5,w13
2036bc3d5698SJohn Baldwin	add	w6,w6,w14
2037bc3d5698SJohn Baldwin	ldp	w13,w14,[x0,#24]
2038bc3d5698SJohn Baldwin	add	w7,w7,w11
2039bc3d5698SJohn Baldwin	add	w8,w8,w12
2040bc3d5698SJohn Baldwin	ldr	w12,[sp,#0]
2041bc3d5698SJohn Baldwin	stp	w3,w4,[x0,#0]
2042bc3d5698SJohn Baldwin	add	w9,w9,w13
2043bc3d5698SJohn Baldwin	mov	w13,wzr
2044bc3d5698SJohn Baldwin	stp	w5,w6,[x0,#8]
2045bc3d5698SJohn Baldwin	add	w10,w10,w14
2046bc3d5698SJohn Baldwin	stp	w7,w8,[x0,#16]
2047bc3d5698SJohn Baldwin	eor	w14,w4,w5
2048bc3d5698SJohn Baldwin	stp	w9,w10,[x0,#24]
2049bc3d5698SJohn Baldwin	mov	w15,wzr
2050bc3d5698SJohn Baldwin	mov	x17,sp
2051bc3d5698SJohn Baldwin	b.ne	.L_00_48
2052bc3d5698SJohn Baldwin
2053bc3d5698SJohn Baldwin	ldr	x29,[x29]
2054bc3d5698SJohn Baldwin	add	sp,sp,#16*4+16
2055bc3d5698SJohn Baldwin	ret
2056bc3d5698SJohn Baldwin.size	sha256_block_neon,.-sha256_block_neon
2057