1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if !defined(OPENSSL_NO_ASM)
11// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
12//
13// Licensed under the OpenSSL license (the "License").  You may not use
14// this file except in compliance with the License.  You can obtain a copy
15// in the file LICENSE in the source distribution or at
16// https://www.openssl.org/source/license.html
17
18// ====================================================================
19// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
20// project. The module is, however, dual licensed under OpenSSL and
21// CRYPTOGAMS licenses depending on where you obtain it. For further
22// details see http://www.openssl.org/~appro/cryptogams/.
23//
24// Permission to use under GPLv2 terms is granted.
25// ====================================================================
26//
27// SHA256/512 for ARMv8.
28//
29// Performance in cycles per processed byte and improvement coefficient
30// over code generated with "default" compiler:
31//
32//		SHA256-hw	SHA256(*)	SHA512
33// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
34// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
35// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
36// Denver	2.01		10.5 (+26%)	6.70 (+8%)
37// X-Gene			20.0 (+100%)	12.8 (+300%(***))
38// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
39//
40// (*)	Software SHA256 results are of lesser relevance, presented
41//	mostly for informational purposes.
42// (**)	The result is a trade-off: it's possible to improve it by
43//	10% (or by 1 cycle per round), but at the cost of 20% loss
44//	on Cortex-A53 (or by 4 cycles per round).
45// (***)	Super-impressive coefficients over gcc-generated code are
46//	indication of some compiler "pathology", most notably code
47//	generated with -mgeneral-regs-only is significanty faster
48//	and the gap is only 40-90%.
49
50#ifndef	__KERNEL__
51# include <GFp/arm_arch.h>
52#endif
53
54.text
55
56
57.globl	_GFp_sha512_block_data_order
58.private_extern	_GFp_sha512_block_data_order
59
60.align	6
61_GFp_sha512_block_data_order:
62	stp	x29,x30,[sp,#-128]!
63	add	x29,sp,#0
64
65	stp	x19,x20,[sp,#16]
66	stp	x21,x22,[sp,#32]
67	stp	x23,x24,[sp,#48]
68	stp	x25,x26,[sp,#64]
69	stp	x27,x28,[sp,#80]
70	sub	sp,sp,#4*8
71
72	ldp	x20,x21,[x0]				// load context
73	ldp	x22,x23,[x0,#2*8]
74	ldp	x24,x25,[x0,#4*8]
75	add	x2,x1,x2,lsl#7	// end of input
76	ldp	x26,x27,[x0,#6*8]
77	adrp	x30,LK512@PAGE
78	add	x30,x30,LK512@PAGEOFF
79	stp	x0,x2,[x29,#96]
80
81Loop:
82	ldp	x3,x4,[x1],#2*8
83	ldr	x19,[x30],#8			// *K++
84	eor	x28,x21,x22				// magic seed
85	str	x1,[x29,#112]
86#ifndef	__ARMEB__
87	rev	x3,x3			// 0
88#endif
89	ror	x16,x24,#14
90	add	x27,x27,x19			// h+=K[i]
91	eor	x6,x24,x24,ror#23
92	and	x17,x25,x24
93	bic	x19,x26,x24
94	add	x27,x27,x3			// h+=X[i]
95	orr	x17,x17,x19			// Ch(e,f,g)
96	eor	x19,x20,x21			// a^b, b^c in next round
97	eor	x16,x16,x6,ror#18	// Sigma1(e)
98	ror	x6,x20,#28
99	add	x27,x27,x17			// h+=Ch(e,f,g)
100	eor	x17,x20,x20,ror#5
101	add	x27,x27,x16			// h+=Sigma1(e)
102	and	x28,x28,x19			// (b^c)&=(a^b)
103	add	x23,x23,x27			// d+=h
104	eor	x28,x28,x21			// Maj(a,b,c)
105	eor	x17,x6,x17,ror#34	// Sigma0(a)
106	add	x27,x27,x28			// h+=Maj(a,b,c)
107	ldr	x28,[x30],#8		// *K++, x19 in next round
108	//add	x27,x27,x17			// h+=Sigma0(a)
109#ifndef	__ARMEB__
110	rev	x4,x4			// 1
111#endif
112	ldp	x5,x6,[x1],#2*8
113	add	x27,x27,x17			// h+=Sigma0(a)
114	ror	x16,x23,#14
115	add	x26,x26,x28			// h+=K[i]
116	eor	x7,x23,x23,ror#23
117	and	x17,x24,x23
118	bic	x28,x25,x23
119	add	x26,x26,x4			// h+=X[i]
120	orr	x17,x17,x28			// Ch(e,f,g)
121	eor	x28,x27,x20			// a^b, b^c in next round
122	eor	x16,x16,x7,ror#18	// Sigma1(e)
123	ror	x7,x27,#28
124	add	x26,x26,x17			// h+=Ch(e,f,g)
125	eor	x17,x27,x27,ror#5
126	add	x26,x26,x16			// h+=Sigma1(e)
127	and	x19,x19,x28			// (b^c)&=(a^b)
128	add	x22,x22,x26			// d+=h
129	eor	x19,x19,x20			// Maj(a,b,c)
130	eor	x17,x7,x17,ror#34	// Sigma0(a)
131	add	x26,x26,x19			// h+=Maj(a,b,c)
132	ldr	x19,[x30],#8		// *K++, x28 in next round
133	//add	x26,x26,x17			// h+=Sigma0(a)
134#ifndef	__ARMEB__
135	rev	x5,x5			// 2
136#endif
137	add	x26,x26,x17			// h+=Sigma0(a)
138	ror	x16,x22,#14
139	add	x25,x25,x19			// h+=K[i]
140	eor	x8,x22,x22,ror#23
141	and	x17,x23,x22
142	bic	x19,x24,x22
143	add	x25,x25,x5			// h+=X[i]
144	orr	x17,x17,x19			// Ch(e,f,g)
145	eor	x19,x26,x27			// a^b, b^c in next round
146	eor	x16,x16,x8,ror#18	// Sigma1(e)
147	ror	x8,x26,#28
148	add	x25,x25,x17			// h+=Ch(e,f,g)
149	eor	x17,x26,x26,ror#5
150	add	x25,x25,x16			// h+=Sigma1(e)
151	and	x28,x28,x19			// (b^c)&=(a^b)
152	add	x21,x21,x25			// d+=h
153	eor	x28,x28,x27			// Maj(a,b,c)
154	eor	x17,x8,x17,ror#34	// Sigma0(a)
155	add	x25,x25,x28			// h+=Maj(a,b,c)
156	ldr	x28,[x30],#8		// *K++, x19 in next round
157	//add	x25,x25,x17			// h+=Sigma0(a)
158#ifndef	__ARMEB__
159	rev	x6,x6			// 3
160#endif
161	ldp	x7,x8,[x1],#2*8
162	add	x25,x25,x17			// h+=Sigma0(a)
163	ror	x16,x21,#14
164	add	x24,x24,x28			// h+=K[i]
165	eor	x9,x21,x21,ror#23
166	and	x17,x22,x21
167	bic	x28,x23,x21
168	add	x24,x24,x6			// h+=X[i]
169	orr	x17,x17,x28			// Ch(e,f,g)
170	eor	x28,x25,x26			// a^b, b^c in next round
171	eor	x16,x16,x9,ror#18	// Sigma1(e)
172	ror	x9,x25,#28
173	add	x24,x24,x17			// h+=Ch(e,f,g)
174	eor	x17,x25,x25,ror#5
175	add	x24,x24,x16			// h+=Sigma1(e)
176	and	x19,x19,x28			// (b^c)&=(a^b)
177	add	x20,x20,x24			// d+=h
178	eor	x19,x19,x26			// Maj(a,b,c)
179	eor	x17,x9,x17,ror#34	// Sigma0(a)
180	add	x24,x24,x19			// h+=Maj(a,b,c)
181	ldr	x19,[x30],#8		// *K++, x28 in next round
182	//add	x24,x24,x17			// h+=Sigma0(a)
183#ifndef	__ARMEB__
184	rev	x7,x7			// 4
185#endif
186	add	x24,x24,x17			// h+=Sigma0(a)
187	ror	x16,x20,#14
188	add	x23,x23,x19			// h+=K[i]
189	eor	x10,x20,x20,ror#23
190	and	x17,x21,x20
191	bic	x19,x22,x20
192	add	x23,x23,x7			// h+=X[i]
193	orr	x17,x17,x19			// Ch(e,f,g)
194	eor	x19,x24,x25			// a^b, b^c in next round
195	eor	x16,x16,x10,ror#18	// Sigma1(e)
196	ror	x10,x24,#28
197	add	x23,x23,x17			// h+=Ch(e,f,g)
198	eor	x17,x24,x24,ror#5
199	add	x23,x23,x16			// h+=Sigma1(e)
200	and	x28,x28,x19			// (b^c)&=(a^b)
201	add	x27,x27,x23			// d+=h
202	eor	x28,x28,x25			// Maj(a,b,c)
203	eor	x17,x10,x17,ror#34	// Sigma0(a)
204	add	x23,x23,x28			// h+=Maj(a,b,c)
205	ldr	x28,[x30],#8		// *K++, x19 in next round
206	//add	x23,x23,x17			// h+=Sigma0(a)
207#ifndef	__ARMEB__
208	rev	x8,x8			// 5
209#endif
210	ldp	x9,x10,[x1],#2*8
211	add	x23,x23,x17			// h+=Sigma0(a)
212	ror	x16,x27,#14
213	add	x22,x22,x28			// h+=K[i]
214	eor	x11,x27,x27,ror#23
215	and	x17,x20,x27
216	bic	x28,x21,x27
217	add	x22,x22,x8			// h+=X[i]
218	orr	x17,x17,x28			// Ch(e,f,g)
219	eor	x28,x23,x24			// a^b, b^c in next round
220	eor	x16,x16,x11,ror#18	// Sigma1(e)
221	ror	x11,x23,#28
222	add	x22,x22,x17			// h+=Ch(e,f,g)
223	eor	x17,x23,x23,ror#5
224	add	x22,x22,x16			// h+=Sigma1(e)
225	and	x19,x19,x28			// (b^c)&=(a^b)
226	add	x26,x26,x22			// d+=h
227	eor	x19,x19,x24			// Maj(a,b,c)
228	eor	x17,x11,x17,ror#34	// Sigma0(a)
229	add	x22,x22,x19			// h+=Maj(a,b,c)
230	ldr	x19,[x30],#8		// *K++, x28 in next round
231	//add	x22,x22,x17			// h+=Sigma0(a)
232#ifndef	__ARMEB__
233	rev	x9,x9			// 6
234#endif
235	add	x22,x22,x17			// h+=Sigma0(a)
236	ror	x16,x26,#14
237	add	x21,x21,x19			// h+=K[i]
238	eor	x12,x26,x26,ror#23
239	and	x17,x27,x26
240	bic	x19,x20,x26
241	add	x21,x21,x9			// h+=X[i]
242	orr	x17,x17,x19			// Ch(e,f,g)
243	eor	x19,x22,x23			// a^b, b^c in next round
244	eor	x16,x16,x12,ror#18	// Sigma1(e)
245	ror	x12,x22,#28
246	add	x21,x21,x17			// h+=Ch(e,f,g)
247	eor	x17,x22,x22,ror#5
248	add	x21,x21,x16			// h+=Sigma1(e)
249	and	x28,x28,x19			// (b^c)&=(a^b)
250	add	x25,x25,x21			// d+=h
251	eor	x28,x28,x23			// Maj(a,b,c)
252	eor	x17,x12,x17,ror#34	// Sigma0(a)
253	add	x21,x21,x28			// h+=Maj(a,b,c)
254	ldr	x28,[x30],#8		// *K++, x19 in next round
255	//add	x21,x21,x17			// h+=Sigma0(a)
256#ifndef	__ARMEB__
257	rev	x10,x10			// 7
258#endif
259	ldp	x11,x12,[x1],#2*8
260	add	x21,x21,x17			// h+=Sigma0(a)
261	ror	x16,x25,#14
262	add	x20,x20,x28			// h+=K[i]
263	eor	x13,x25,x25,ror#23
264	and	x17,x26,x25
265	bic	x28,x27,x25
266	add	x20,x20,x10			// h+=X[i]
267	orr	x17,x17,x28			// Ch(e,f,g)
268	eor	x28,x21,x22			// a^b, b^c in next round
269	eor	x16,x16,x13,ror#18	// Sigma1(e)
270	ror	x13,x21,#28
271	add	x20,x20,x17			// h+=Ch(e,f,g)
272	eor	x17,x21,x21,ror#5
273	add	x20,x20,x16			// h+=Sigma1(e)
274	and	x19,x19,x28			// (b^c)&=(a^b)
275	add	x24,x24,x20			// d+=h
276	eor	x19,x19,x22			// Maj(a,b,c)
277	eor	x17,x13,x17,ror#34	// Sigma0(a)
278	add	x20,x20,x19			// h+=Maj(a,b,c)
279	ldr	x19,[x30],#8		// *K++, x28 in next round
280	//add	x20,x20,x17			// h+=Sigma0(a)
281#ifndef	__ARMEB__
282	rev	x11,x11			// 8
283#endif
284	add	x20,x20,x17			// h+=Sigma0(a)
285	ror	x16,x24,#14
286	add	x27,x27,x19			// h+=K[i]
287	eor	x14,x24,x24,ror#23
288	and	x17,x25,x24
289	bic	x19,x26,x24
290	add	x27,x27,x11			// h+=X[i]
291	orr	x17,x17,x19			// Ch(e,f,g)
292	eor	x19,x20,x21			// a^b, b^c in next round
293	eor	x16,x16,x14,ror#18	// Sigma1(e)
294	ror	x14,x20,#28
295	add	x27,x27,x17			// h+=Ch(e,f,g)
296	eor	x17,x20,x20,ror#5
297	add	x27,x27,x16			// h+=Sigma1(e)
298	and	x28,x28,x19			// (b^c)&=(a^b)
299	add	x23,x23,x27			// d+=h
300	eor	x28,x28,x21			// Maj(a,b,c)
301	eor	x17,x14,x17,ror#34	// Sigma0(a)
302	add	x27,x27,x28			// h+=Maj(a,b,c)
303	ldr	x28,[x30],#8		// *K++, x19 in next round
304	//add	x27,x27,x17			// h+=Sigma0(a)
305#ifndef	__ARMEB__
306	rev	x12,x12			// 9
307#endif
308	ldp	x13,x14,[x1],#2*8
309	add	x27,x27,x17			// h+=Sigma0(a)
310	ror	x16,x23,#14
311	add	x26,x26,x28			// h+=K[i]
312	eor	x15,x23,x23,ror#23
313	and	x17,x24,x23
314	bic	x28,x25,x23
315	add	x26,x26,x12			// h+=X[i]
316	orr	x17,x17,x28			// Ch(e,f,g)
317	eor	x28,x27,x20			// a^b, b^c in next round
318	eor	x16,x16,x15,ror#18	// Sigma1(e)
319	ror	x15,x27,#28
320	add	x26,x26,x17			// h+=Ch(e,f,g)
321	eor	x17,x27,x27,ror#5
322	add	x26,x26,x16			// h+=Sigma1(e)
323	and	x19,x19,x28			// (b^c)&=(a^b)
324	add	x22,x22,x26			// d+=h
325	eor	x19,x19,x20			// Maj(a,b,c)
326	eor	x17,x15,x17,ror#34	// Sigma0(a)
327	add	x26,x26,x19			// h+=Maj(a,b,c)
328	ldr	x19,[x30],#8		// *K++, x28 in next round
329	//add	x26,x26,x17			// h+=Sigma0(a)
330#ifndef	__ARMEB__
331	rev	x13,x13			// 10
332#endif
333	add	x26,x26,x17			// h+=Sigma0(a)
334	ror	x16,x22,#14
335	add	x25,x25,x19			// h+=K[i]
336	eor	x0,x22,x22,ror#23
337	and	x17,x23,x22
338	bic	x19,x24,x22
339	add	x25,x25,x13			// h+=X[i]
340	orr	x17,x17,x19			// Ch(e,f,g)
341	eor	x19,x26,x27			// a^b, b^c in next round
342	eor	x16,x16,x0,ror#18	// Sigma1(e)
343	ror	x0,x26,#28
344	add	x25,x25,x17			// h+=Ch(e,f,g)
345	eor	x17,x26,x26,ror#5
346	add	x25,x25,x16			// h+=Sigma1(e)
347	and	x28,x28,x19			// (b^c)&=(a^b)
348	add	x21,x21,x25			// d+=h
349	eor	x28,x28,x27			// Maj(a,b,c)
350	eor	x17,x0,x17,ror#34	// Sigma0(a)
351	add	x25,x25,x28			// h+=Maj(a,b,c)
352	ldr	x28,[x30],#8		// *K++, x19 in next round
353	//add	x25,x25,x17			// h+=Sigma0(a)
354#ifndef	__ARMEB__
355	rev	x14,x14			// 11
356#endif
357	ldp	x15,x0,[x1],#2*8
358	add	x25,x25,x17			// h+=Sigma0(a)
359	str	x6,[sp,#24]
360	ror	x16,x21,#14
361	add	x24,x24,x28			// h+=K[i]
362	eor	x6,x21,x21,ror#23
363	and	x17,x22,x21
364	bic	x28,x23,x21
365	add	x24,x24,x14			// h+=X[i]
366	orr	x17,x17,x28			// Ch(e,f,g)
367	eor	x28,x25,x26			// a^b, b^c in next round
368	eor	x16,x16,x6,ror#18	// Sigma1(e)
369	ror	x6,x25,#28
370	add	x24,x24,x17			// h+=Ch(e,f,g)
371	eor	x17,x25,x25,ror#5
372	add	x24,x24,x16			// h+=Sigma1(e)
373	and	x19,x19,x28			// (b^c)&=(a^b)
374	add	x20,x20,x24			// d+=h
375	eor	x19,x19,x26			// Maj(a,b,c)
376	eor	x17,x6,x17,ror#34	// Sigma0(a)
377	add	x24,x24,x19			// h+=Maj(a,b,c)
378	ldr	x19,[x30],#8		// *K++, x28 in next round
379	//add	x24,x24,x17			// h+=Sigma0(a)
380#ifndef	__ARMEB__
381	rev	x15,x15			// 12
382#endif
383	add	x24,x24,x17			// h+=Sigma0(a)
384	str	x7,[sp,#0]
385	ror	x16,x20,#14
386	add	x23,x23,x19			// h+=K[i]
387	eor	x7,x20,x20,ror#23
388	and	x17,x21,x20
389	bic	x19,x22,x20
390	add	x23,x23,x15			// h+=X[i]
391	orr	x17,x17,x19			// Ch(e,f,g)
392	eor	x19,x24,x25			// a^b, b^c in next round
393	eor	x16,x16,x7,ror#18	// Sigma1(e)
394	ror	x7,x24,#28
395	add	x23,x23,x17			// h+=Ch(e,f,g)
396	eor	x17,x24,x24,ror#5
397	add	x23,x23,x16			// h+=Sigma1(e)
398	and	x28,x28,x19			// (b^c)&=(a^b)
399	add	x27,x27,x23			// d+=h
400	eor	x28,x28,x25			// Maj(a,b,c)
401	eor	x17,x7,x17,ror#34	// Sigma0(a)
402	add	x23,x23,x28			// h+=Maj(a,b,c)
403	ldr	x28,[x30],#8		// *K++, x19 in next round
404	//add	x23,x23,x17			// h+=Sigma0(a)
405#ifndef	__ARMEB__
406	rev	x0,x0			// 13
407#endif
408	ldp	x1,x2,[x1]
409	add	x23,x23,x17			// h+=Sigma0(a)
410	str	x8,[sp,#8]
411	ror	x16,x27,#14
412	add	x22,x22,x28			// h+=K[i]
413	eor	x8,x27,x27,ror#23
414	and	x17,x20,x27
415	bic	x28,x21,x27
416	add	x22,x22,x0			// h+=X[i]
417	orr	x17,x17,x28			// Ch(e,f,g)
418	eor	x28,x23,x24			// a^b, b^c in next round
419	eor	x16,x16,x8,ror#18	// Sigma1(e)
420	ror	x8,x23,#28
421	add	x22,x22,x17			// h+=Ch(e,f,g)
422	eor	x17,x23,x23,ror#5
423	add	x22,x22,x16			// h+=Sigma1(e)
424	and	x19,x19,x28			// (b^c)&=(a^b)
425	add	x26,x26,x22			// d+=h
426	eor	x19,x19,x24			// Maj(a,b,c)
427	eor	x17,x8,x17,ror#34	// Sigma0(a)
428	add	x22,x22,x19			// h+=Maj(a,b,c)
429	ldr	x19,[x30],#8		// *K++, x28 in next round
430	//add	x22,x22,x17			// h+=Sigma0(a)
431#ifndef	__ARMEB__
432	rev	x1,x1			// 14
433#endif
434	ldr	x6,[sp,#24]
435	add	x22,x22,x17			// h+=Sigma0(a)
436	str	x9,[sp,#16]
437	ror	x16,x26,#14
438	add	x21,x21,x19			// h+=K[i]
439	eor	x9,x26,x26,ror#23
440	and	x17,x27,x26
441	bic	x19,x20,x26
442	add	x21,x21,x1			// h+=X[i]
443	orr	x17,x17,x19			// Ch(e,f,g)
444	eor	x19,x22,x23			// a^b, b^c in next round
445	eor	x16,x16,x9,ror#18	// Sigma1(e)
446	ror	x9,x22,#28
447	add	x21,x21,x17			// h+=Ch(e,f,g)
448	eor	x17,x22,x22,ror#5
449	add	x21,x21,x16			// h+=Sigma1(e)
450	and	x28,x28,x19			// (b^c)&=(a^b)
451	add	x25,x25,x21			// d+=h
452	eor	x28,x28,x23			// Maj(a,b,c)
453	eor	x17,x9,x17,ror#34	// Sigma0(a)
454	add	x21,x21,x28			// h+=Maj(a,b,c)
455	ldr	x28,[x30],#8		// *K++, x19 in next round
456	//add	x21,x21,x17			// h+=Sigma0(a)
457#ifndef	__ARMEB__
458	rev	x2,x2			// 15
459#endif
460	ldr	x7,[sp,#0]
461	add	x21,x21,x17			// h+=Sigma0(a)
462	str	x10,[sp,#24]
463	ror	x16,x25,#14
464	add	x20,x20,x28			// h+=K[i]
465	ror	x9,x4,#1
466	and	x17,x26,x25
467	ror	x8,x1,#19
468	bic	x28,x27,x25
469	ror	x10,x21,#28
470	add	x20,x20,x2			// h+=X[i]
471	eor	x16,x16,x25,ror#18
472	eor	x9,x9,x4,ror#8
473	orr	x17,x17,x28			// Ch(e,f,g)
474	eor	x28,x21,x22			// a^b, b^c in next round
475	eor	x16,x16,x25,ror#41	// Sigma1(e)
476	eor	x10,x10,x21,ror#34
477	add	x20,x20,x17			// h+=Ch(e,f,g)
478	and	x19,x19,x28			// (b^c)&=(a^b)
479	eor	x8,x8,x1,ror#61
480	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
481	add	x20,x20,x16			// h+=Sigma1(e)
482	eor	x19,x19,x22			// Maj(a,b,c)
483	eor	x17,x10,x21,ror#39	// Sigma0(a)
484	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
485	add	x3,x3,x12
486	add	x24,x24,x20			// d+=h
487	add	x20,x20,x19			// h+=Maj(a,b,c)
488	ldr	x19,[x30],#8		// *K++, x28 in next round
489	add	x3,x3,x9
490	add	x20,x20,x17			// h+=Sigma0(a)
491	add	x3,x3,x8
492Loop_16_xx:
493	ldr	x8,[sp,#8]
494	str	x11,[sp,#0]
495	ror	x16,x24,#14
496	add	x27,x27,x19			// h+=K[i]
497	ror	x10,x5,#1
498	and	x17,x25,x24
499	ror	x9,x2,#19
500	bic	x19,x26,x24
501	ror	x11,x20,#28
502	add	x27,x27,x3			// h+=X[i]
503	eor	x16,x16,x24,ror#18
504	eor	x10,x10,x5,ror#8
505	orr	x17,x17,x19			// Ch(e,f,g)
506	eor	x19,x20,x21			// a^b, b^c in next round
507	eor	x16,x16,x24,ror#41	// Sigma1(e)
508	eor	x11,x11,x20,ror#34
509	add	x27,x27,x17			// h+=Ch(e,f,g)
510	and	x28,x28,x19			// (b^c)&=(a^b)
511	eor	x9,x9,x2,ror#61
512	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
513	add	x27,x27,x16			// h+=Sigma1(e)
514	eor	x28,x28,x21			// Maj(a,b,c)
515	eor	x17,x11,x20,ror#39	// Sigma0(a)
516	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
517	add	x4,x4,x13
518	add	x23,x23,x27			// d+=h
519	add	x27,x27,x28			// h+=Maj(a,b,c)
520	ldr	x28,[x30],#8		// *K++, x19 in next round
521	add	x4,x4,x10
522	add	x27,x27,x17			// h+=Sigma0(a)
523	add	x4,x4,x9
524	ldr	x9,[sp,#16]
525	str	x12,[sp,#8]
526	ror	x16,x23,#14
527	add	x26,x26,x28			// h+=K[i]
528	ror	x11,x6,#1
529	and	x17,x24,x23
530	ror	x10,x3,#19
531	bic	x28,x25,x23
532	ror	x12,x27,#28
533	add	x26,x26,x4			// h+=X[i]
534	eor	x16,x16,x23,ror#18
535	eor	x11,x11,x6,ror#8
536	orr	x17,x17,x28			// Ch(e,f,g)
537	eor	x28,x27,x20			// a^b, b^c in next round
538	eor	x16,x16,x23,ror#41	// Sigma1(e)
539	eor	x12,x12,x27,ror#34
540	add	x26,x26,x17			// h+=Ch(e,f,g)
541	and	x19,x19,x28			// (b^c)&=(a^b)
542	eor	x10,x10,x3,ror#61
543	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
544	add	x26,x26,x16			// h+=Sigma1(e)
545	eor	x19,x19,x20			// Maj(a,b,c)
546	eor	x17,x12,x27,ror#39	// Sigma0(a)
547	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
548	add	x5,x5,x14
549	add	x22,x22,x26			// d+=h
550	add	x26,x26,x19			// h+=Maj(a,b,c)
551	ldr	x19,[x30],#8		// *K++, x28 in next round
552	add	x5,x5,x11
553	add	x26,x26,x17			// h+=Sigma0(a)
554	add	x5,x5,x10
555	ldr	x10,[sp,#24]
556	str	x13,[sp,#16]
557	ror	x16,x22,#14
558	add	x25,x25,x19			// h+=K[i]
559	ror	x12,x7,#1
560	and	x17,x23,x22
561	ror	x11,x4,#19
562	bic	x19,x24,x22
563	ror	x13,x26,#28
564	add	x25,x25,x5			// h+=X[i]
565	eor	x16,x16,x22,ror#18
566	eor	x12,x12,x7,ror#8
567	orr	x17,x17,x19			// Ch(e,f,g)
568	eor	x19,x26,x27			// a^b, b^c in next round
569	eor	x16,x16,x22,ror#41	// Sigma1(e)
570	eor	x13,x13,x26,ror#34
571	add	x25,x25,x17			// h+=Ch(e,f,g)
572	and	x28,x28,x19			// (b^c)&=(a^b)
573	eor	x11,x11,x4,ror#61
574	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
575	add	x25,x25,x16			// h+=Sigma1(e)
576	eor	x28,x28,x27			// Maj(a,b,c)
577	eor	x17,x13,x26,ror#39	// Sigma0(a)
578	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
579	add	x6,x6,x15
580	add	x21,x21,x25			// d+=h
581	add	x25,x25,x28			// h+=Maj(a,b,c)
582	ldr	x28,[x30],#8		// *K++, x19 in next round
583	add	x6,x6,x12
584	add	x25,x25,x17			// h+=Sigma0(a)
585	add	x6,x6,x11
586	ldr	x11,[sp,#0]
587	str	x14,[sp,#24]
588	ror	x16,x21,#14
589	add	x24,x24,x28			// h+=K[i]
590	ror	x13,x8,#1
591	and	x17,x22,x21
592	ror	x12,x5,#19
593	bic	x28,x23,x21
594	ror	x14,x25,#28
595	add	x24,x24,x6			// h+=X[i]
596	eor	x16,x16,x21,ror#18
597	eor	x13,x13,x8,ror#8
598	orr	x17,x17,x28			// Ch(e,f,g)
599	eor	x28,x25,x26			// a^b, b^c in next round
600	eor	x16,x16,x21,ror#41	// Sigma1(e)
601	eor	x14,x14,x25,ror#34
602	add	x24,x24,x17			// h+=Ch(e,f,g)
603	and	x19,x19,x28			// (b^c)&=(a^b)
604	eor	x12,x12,x5,ror#61
605	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
606	add	x24,x24,x16			// h+=Sigma1(e)
607	eor	x19,x19,x26			// Maj(a,b,c)
608	eor	x17,x14,x25,ror#39	// Sigma0(a)
609	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
610	add	x7,x7,x0
611	add	x20,x20,x24			// d+=h
612	add	x24,x24,x19			// h+=Maj(a,b,c)
613	ldr	x19,[x30],#8		// *K++, x28 in next round
614	add	x7,x7,x13
615	add	x24,x24,x17			// h+=Sigma0(a)
616	add	x7,x7,x12
617	ldr	x12,[sp,#8]
618	str	x15,[sp,#0]
619	ror	x16,x20,#14
620	add	x23,x23,x19			// h+=K[i]
621	ror	x14,x9,#1
622	and	x17,x21,x20
623	ror	x13,x6,#19
624	bic	x19,x22,x20
625	ror	x15,x24,#28
626	add	x23,x23,x7			// h+=X[i]
627	eor	x16,x16,x20,ror#18
628	eor	x14,x14,x9,ror#8
629	orr	x17,x17,x19			// Ch(e,f,g)
630	eor	x19,x24,x25			// a^b, b^c in next round
631	eor	x16,x16,x20,ror#41	// Sigma1(e)
632	eor	x15,x15,x24,ror#34
633	add	x23,x23,x17			// h+=Ch(e,f,g)
634	and	x28,x28,x19			// (b^c)&=(a^b)
635	eor	x13,x13,x6,ror#61
636	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
637	add	x23,x23,x16			// h+=Sigma1(e)
638	eor	x28,x28,x25			// Maj(a,b,c)
639	eor	x17,x15,x24,ror#39	// Sigma0(a)
640	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
641	add	x8,x8,x1
642	add	x27,x27,x23			// d+=h
643	add	x23,x23,x28			// h+=Maj(a,b,c)
644	ldr	x28,[x30],#8		// *K++, x19 in next round
645	add	x8,x8,x14
646	add	x23,x23,x17			// h+=Sigma0(a)
647	add	x8,x8,x13
648	ldr	x13,[sp,#16]
649	str	x0,[sp,#8]
650	ror	x16,x27,#14
651	add	x22,x22,x28			// h+=K[i]
652	ror	x15,x10,#1
653	and	x17,x20,x27
654	ror	x14,x7,#19
655	bic	x28,x21,x27
656	ror	x0,x23,#28
657	add	x22,x22,x8			// h+=X[i]
658	eor	x16,x16,x27,ror#18
659	eor	x15,x15,x10,ror#8
660	orr	x17,x17,x28			// Ch(e,f,g)
661	eor	x28,x23,x24			// a^b, b^c in next round
662	eor	x16,x16,x27,ror#41	// Sigma1(e)
663	eor	x0,x0,x23,ror#34
664	add	x22,x22,x17			// h+=Ch(e,f,g)
665	and	x19,x19,x28			// (b^c)&=(a^b)
666	eor	x14,x14,x7,ror#61
667	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
668	add	x22,x22,x16			// h+=Sigma1(e)
669	eor	x19,x19,x24			// Maj(a,b,c)
670	eor	x17,x0,x23,ror#39	// Sigma0(a)
671	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
672	add	x9,x9,x2
673	add	x26,x26,x22			// d+=h
674	add	x22,x22,x19			// h+=Maj(a,b,c)
675	ldr	x19,[x30],#8		// *K++, x28 in next round
676	add	x9,x9,x15
677	add	x22,x22,x17			// h+=Sigma0(a)
678	add	x9,x9,x14
679	ldr	x14,[sp,#24]
680	str	x1,[sp,#16]
681	ror	x16,x26,#14
682	add	x21,x21,x19			// h+=K[i]
683	ror	x0,x11,#1
684	and	x17,x27,x26
685	ror	x15,x8,#19
686	bic	x19,x20,x26
687	ror	x1,x22,#28
688	add	x21,x21,x9			// h+=X[i]
689	eor	x16,x16,x26,ror#18
690	eor	x0,x0,x11,ror#8
691	orr	x17,x17,x19			// Ch(e,f,g)
692	eor	x19,x22,x23			// a^b, b^c in next round
693	eor	x16,x16,x26,ror#41	// Sigma1(e)
694	eor	x1,x1,x22,ror#34
695	add	x21,x21,x17			// h+=Ch(e,f,g)
696	and	x28,x28,x19			// (b^c)&=(a^b)
697	eor	x15,x15,x8,ror#61
698	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
699	add	x21,x21,x16			// h+=Sigma1(e)
700	eor	x28,x28,x23			// Maj(a,b,c)
701	eor	x17,x1,x22,ror#39	// Sigma0(a)
702	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
703	add	x10,x10,x3
704	add	x25,x25,x21			// d+=h
705	add	x21,x21,x28			// h+=Maj(a,b,c)
706	ldr	x28,[x30],#8		// *K++, x19 in next round
707	add	x10,x10,x0
708	add	x21,x21,x17			// h+=Sigma0(a)
709	add	x10,x10,x15
710	ldr	x15,[sp,#0]
711	str	x2,[sp,#24]
712	ror	x16,x25,#14
713	add	x20,x20,x28			// h+=K[i]
714	ror	x1,x12,#1
715	and	x17,x26,x25
716	ror	x0,x9,#19
717	bic	x28,x27,x25
718	ror	x2,x21,#28
719	add	x20,x20,x10			// h+=X[i]
720	eor	x16,x16,x25,ror#18
721	eor	x1,x1,x12,ror#8
722	orr	x17,x17,x28			// Ch(e,f,g)
723	eor	x28,x21,x22			// a^b, b^c in next round
724	eor	x16,x16,x25,ror#41	// Sigma1(e)
725	eor	x2,x2,x21,ror#34
726	add	x20,x20,x17			// h+=Ch(e,f,g)
727	and	x19,x19,x28			// (b^c)&=(a^b)
728	eor	x0,x0,x9,ror#61
729	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
730	add	x20,x20,x16			// h+=Sigma1(e)
731	eor	x19,x19,x22			// Maj(a,b,c)
732	eor	x17,x2,x21,ror#39	// Sigma0(a)
733	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
734	add	x11,x11,x4
735	add	x24,x24,x20			// d+=h
736	add	x20,x20,x19			// h+=Maj(a,b,c)
737	ldr	x19,[x30],#8		// *K++, x28 in next round
738	add	x11,x11,x1
739	add	x20,x20,x17			// h+=Sigma0(a)
740	add	x11,x11,x0
741	ldr	x0,[sp,#8]
742	str	x3,[sp,#0]
743	ror	x16,x24,#14
744	add	x27,x27,x19			// h+=K[i]
745	ror	x2,x13,#1
746	and	x17,x25,x24
747	ror	x1,x10,#19
748	bic	x19,x26,x24
749	ror	x3,x20,#28
750	add	x27,x27,x11			// h+=X[i]
751	eor	x16,x16,x24,ror#18
752	eor	x2,x2,x13,ror#8
753	orr	x17,x17,x19			// Ch(e,f,g)
754	eor	x19,x20,x21			// a^b, b^c in next round
755	eor	x16,x16,x24,ror#41	// Sigma1(e)
756	eor	x3,x3,x20,ror#34
757	add	x27,x27,x17			// h+=Ch(e,f,g)
758	and	x28,x28,x19			// (b^c)&=(a^b)
759	eor	x1,x1,x10,ror#61
760	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
761	add	x27,x27,x16			// h+=Sigma1(e)
762	eor	x28,x28,x21			// Maj(a,b,c)
763	eor	x17,x3,x20,ror#39	// Sigma0(a)
764	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
765	add	x12,x12,x5
766	add	x23,x23,x27			// d+=h
767	add	x27,x27,x28			// h+=Maj(a,b,c)
768	ldr	x28,[x30],#8		// *K++, x19 in next round
769	add	x12,x12,x2
770	add	x27,x27,x17			// h+=Sigma0(a)
771	add	x12,x12,x1
772	ldr	x1,[sp,#16]
773	str	x4,[sp,#8]
774	ror	x16,x23,#14
775	add	x26,x26,x28			// h+=K[i]
776	ror	x3,x14,#1
777	and	x17,x24,x23
778	ror	x2,x11,#19
779	bic	x28,x25,x23
780	ror	x4,x27,#28
781	add	x26,x26,x12			// h+=X[i]
782	eor	x16,x16,x23,ror#18
783	eor	x3,x3,x14,ror#8
784	orr	x17,x17,x28			// Ch(e,f,g)
785	eor	x28,x27,x20			// a^b, b^c in next round
786	eor	x16,x16,x23,ror#41	// Sigma1(e)
787	eor	x4,x4,x27,ror#34
788	add	x26,x26,x17			// h+=Ch(e,f,g)
789	and	x19,x19,x28			// (b^c)&=(a^b)
790	eor	x2,x2,x11,ror#61
791	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
792	add	x26,x26,x16			// h+=Sigma1(e)
793	eor	x19,x19,x20			// Maj(a,b,c)
794	eor	x17,x4,x27,ror#39	// Sigma0(a)
795	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
796	add	x13,x13,x6
797	add	x22,x22,x26			// d+=h
798	add	x26,x26,x19			// h+=Maj(a,b,c)
799	ldr	x19,[x30],#8		// *K++, x28 in next round
800	add	x13,x13,x3
801	add	x26,x26,x17			// h+=Sigma0(a)
802	add	x13,x13,x2
803	ldr	x2,[sp,#24]
804	str	x5,[sp,#16]
805	ror	x16,x22,#14
806	add	x25,x25,x19			// h+=K[i]
807	ror	x4,x15,#1
808	and	x17,x23,x22
809	ror	x3,x12,#19
810	bic	x19,x24,x22
811	ror	x5,x26,#28
812	add	x25,x25,x13			// h+=X[i]
813	eor	x16,x16,x22,ror#18
814	eor	x4,x4,x15,ror#8
815	orr	x17,x17,x19			// Ch(e,f,g)
816	eor	x19,x26,x27			// a^b, b^c in next round
817	eor	x16,x16,x22,ror#41	// Sigma1(e)
818	eor	x5,x5,x26,ror#34
819	add	x25,x25,x17			// h+=Ch(e,f,g)
820	and	x28,x28,x19			// (b^c)&=(a^b)
821	eor	x3,x3,x12,ror#61
822	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
823	add	x25,x25,x16			// h+=Sigma1(e)
824	eor	x28,x28,x27			// Maj(a,b,c)
825	eor	x17,x5,x26,ror#39	// Sigma0(a)
826	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
827	add	x14,x14,x7
828	add	x21,x21,x25			// d+=h
829	add	x25,x25,x28			// h+=Maj(a,b,c)
830	ldr	x28,[x30],#8		// *K++, x19 in next round
831	add	x14,x14,x4
832	add	x25,x25,x17			// h+=Sigma0(a)
833	add	x14,x14,x3
834	ldr	x3,[sp,#0]
835	str	x6,[sp,#24]
836	ror	x16,x21,#14
837	add	x24,x24,x28			// h+=K[i]
838	ror	x5,x0,#1
839	and	x17,x22,x21
840	ror	x4,x13,#19
841	bic	x28,x23,x21
842	ror	x6,x25,#28
843	add	x24,x24,x14			// h+=X[i]
844	eor	x16,x16,x21,ror#18
845	eor	x5,x5,x0,ror#8
846	orr	x17,x17,x28			// Ch(e,f,g)
847	eor	x28,x25,x26			// a^b, b^c in next round
848	eor	x16,x16,x21,ror#41	// Sigma1(e)
849	eor	x6,x6,x25,ror#34
850	add	x24,x24,x17			// h+=Ch(e,f,g)
851	and	x19,x19,x28			// (b^c)&=(a^b)
852	eor	x4,x4,x13,ror#61
853	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
854	add	x24,x24,x16			// h+=Sigma1(e)
855	eor	x19,x19,x26			// Maj(a,b,c)
856	eor	x17,x6,x25,ror#39	// Sigma0(a)
857	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
858	add	x15,x15,x8
859	add	x20,x20,x24			// d+=h
860	add	x24,x24,x19			// h+=Maj(a,b,c)
861	ldr	x19,[x30],#8		// *K++, x28 in next round
862	add	x15,x15,x5
863	add	x24,x24,x17			// h+=Sigma0(a)
864	add	x15,x15,x4
865	ldr	x4,[sp,#8]
866	str	x7,[sp,#0]
867	ror	x16,x20,#14
868	add	x23,x23,x19			// h+=K[i]
869	ror	x6,x1,#1
870	and	x17,x21,x20
871	ror	x5,x14,#19
872	bic	x19,x22,x20
873	ror	x7,x24,#28
874	add	x23,x23,x15			// h+=X[i]
875	eor	x16,x16,x20,ror#18
876	eor	x6,x6,x1,ror#8
877	orr	x17,x17,x19			// Ch(e,f,g)
878	eor	x19,x24,x25			// a^b, b^c in next round
879	eor	x16,x16,x20,ror#41	// Sigma1(e)
880	eor	x7,x7,x24,ror#34
881	add	x23,x23,x17			// h+=Ch(e,f,g)
882	and	x28,x28,x19			// (b^c)&=(a^b)
883	eor	x5,x5,x14,ror#61
884	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
885	add	x23,x23,x16			// h+=Sigma1(e)
886	eor	x28,x28,x25			// Maj(a,b,c)
887	eor	x17,x7,x24,ror#39	// Sigma0(a)
888	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
889	add	x0,x0,x9
890	add	x27,x27,x23			// d+=h
891	add	x23,x23,x28			// h+=Maj(a,b,c)
892	ldr	x28,[x30],#8		// *K++, x19 in next round
893	add	x0,x0,x6
894	add	x23,x23,x17			// h+=Sigma0(a)
895	add	x0,x0,x5
896	ldr	x5,[sp,#16]
897	str	x8,[sp,#8]
898	ror	x16,x27,#14
899	add	x22,x22,x28			// h+=K[i]
900	ror	x7,x2,#1
901	and	x17,x20,x27
902	ror	x6,x15,#19
903	bic	x28,x21,x27
904	ror	x8,x23,#28
905	add	x22,x22,x0			// h+=X[i]
906	eor	x16,x16,x27,ror#18
907	eor	x7,x7,x2,ror#8
908	orr	x17,x17,x28			// Ch(e,f,g)
909	eor	x28,x23,x24			// a^b, b^c in next round
910	eor	x16,x16,x27,ror#41	// Sigma1(e)
911	eor	x8,x8,x23,ror#34
912	add	x22,x22,x17			// h+=Ch(e,f,g)
913	and	x19,x19,x28			// (b^c)&=(a^b)
914	eor	x6,x6,x15,ror#61
915	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
916	add	x22,x22,x16			// h+=Sigma1(e)
917	eor	x19,x19,x24			// Maj(a,b,c)
918	eor	x17,x8,x23,ror#39	// Sigma0(a)
919	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
920	add	x1,x1,x10
921	add	x26,x26,x22			// d+=h
922	add	x22,x22,x19			// h+=Maj(a,b,c)
923	ldr	x19,[x30],#8		// *K++, x28 in next round
924	add	x1,x1,x7
925	add	x22,x22,x17			// h+=Sigma0(a)
926	add	x1,x1,x6
927	ldr	x6,[sp,#24]
928	str	x9,[sp,#16]
929	ror	x16,x26,#14
930	add	x21,x21,x19			// h+=K[i]
931	ror	x8,x3,#1
932	and	x17,x27,x26
933	ror	x7,x0,#19
934	bic	x19,x20,x26
935	ror	x9,x22,#28
936	add	x21,x21,x1			// h+=X[i]
937	eor	x16,x16,x26,ror#18
938	eor	x8,x8,x3,ror#8
939	orr	x17,x17,x19			// Ch(e,f,g)
940	eor	x19,x22,x23			// a^b, b^c in next round
941	eor	x16,x16,x26,ror#41	// Sigma1(e)
942	eor	x9,x9,x22,ror#34
943	add	x21,x21,x17			// h+=Ch(e,f,g)
944	and	x28,x28,x19			// (b^c)&=(a^b)
945	eor	x7,x7,x0,ror#61
946	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
947	add	x21,x21,x16			// h+=Sigma1(e)
948	eor	x28,x28,x23			// Maj(a,b,c)
949	eor	x17,x9,x22,ror#39	// Sigma0(a)
950	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
951	add	x2,x2,x11
952	add	x25,x25,x21			// d+=h
953	add	x21,x21,x28			// h+=Maj(a,b,c)
954	ldr	x28,[x30],#8		// *K++, x19 in next round
955	add	x2,x2,x8
956	add	x21,x21,x17			// h+=Sigma0(a)
957	add	x2,x2,x7
958	ldr	x7,[sp,#0]
959	str	x10,[sp,#24]
960	ror	x16,x25,#14
961	add	x20,x20,x28			// h+=K[i]
962	ror	x9,x4,#1
963	and	x17,x26,x25
964	ror	x8,x1,#19
965	bic	x28,x27,x25
966	ror	x10,x21,#28
967	add	x20,x20,x2			// h+=X[i]
968	eor	x16,x16,x25,ror#18
969	eor	x9,x9,x4,ror#8
970	orr	x17,x17,x28			// Ch(e,f,g)
971	eor	x28,x21,x22			// a^b, b^c in next round
972	eor	x16,x16,x25,ror#41	// Sigma1(e)
973	eor	x10,x10,x21,ror#34
974	add	x20,x20,x17			// h+=Ch(e,f,g)
975	and	x19,x19,x28			// (b^c)&=(a^b)
976	eor	x8,x8,x1,ror#61
977	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
978	add	x20,x20,x16			// h+=Sigma1(e)
979	eor	x19,x19,x22			// Maj(a,b,c)
980	eor	x17,x10,x21,ror#39	// Sigma0(a)
981	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
982	add	x3,x3,x12
983	add	x24,x24,x20			// d+=h
984	add	x20,x20,x19			// h+=Maj(a,b,c)
985	ldr	x19,[x30],#8		// *K++, x28 in next round
986	add	x3,x3,x9
987	add	x20,x20,x17			// h+=Sigma0(a)
988	add	x3,x3,x8
989	cbnz	x19,Loop_16_xx
990
991	ldp	x0,x2,[x29,#96]
992	ldr	x1,[x29,#112]
993	sub	x30,x30,#648		// rewind
994
995	ldp	x3,x4,[x0]
996	ldp	x5,x6,[x0,#2*8]
997	add	x1,x1,#14*8			// advance input pointer
998	ldp	x7,x8,[x0,#4*8]
999	add	x20,x20,x3
1000	ldp	x9,x10,[x0,#6*8]
1001	add	x21,x21,x4
1002	add	x22,x22,x5
1003	add	x23,x23,x6
1004	stp	x20,x21,[x0]
1005	add	x24,x24,x7
1006	add	x25,x25,x8
1007	stp	x22,x23,[x0,#2*8]
1008	add	x26,x26,x9
1009	add	x27,x27,x10
1010	cmp	x1,x2
1011	stp	x24,x25,[x0,#4*8]
1012	stp	x26,x27,[x0,#6*8]
1013	b.ne	Loop
1014
1015	ldp	x19,x20,[x29,#16]
1016	add	sp,sp,#4*8
1017	ldp	x21,x22,[x29,#32]
1018	ldp	x23,x24,[x29,#48]
1019	ldp	x25,x26,[x29,#64]
1020	ldp	x27,x28,[x29,#80]
1021	ldp	x29,x30,[sp],#128
1022	ret
1023
1024
1025.section	__TEXT,__const
1026.align	6
1027
1028LK512:
1029.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1030.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1031.quad	0x3956c25bf348b538,0x59f111f1b605d019
1032.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1033.quad	0xd807aa98a3030242,0x12835b0145706fbe
1034.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1035.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1036.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1037.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1038.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1039.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1040.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1041.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1042.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1043.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1044.quad	0x06ca6351e003826f,0x142929670a0e6e70
1045.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1046.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1047.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1048.quad	0x81c2c92e47edaee6,0x92722c851482353b
1049.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1050.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1051.quad	0xd192e819d6ef5218,0xd69906245565a910
1052.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1053.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1054.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1055.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1056.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1057.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
1058.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
1059.quad	0x90befffa23631e28,0xa4506cebde82bde9
1060.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
1061.quad	0xca273eceea26619c,0xd186b8c721c0c207
1062.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1063.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
1064.quad	0x113f9804bef90dae,0x1b710b35131c471b
1065.quad	0x28db77f523047d84,0x32caab7b40c72493
1066.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1067.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1068.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
1069.quad	0	// terminator
1070
1071.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1072.align	2
1073.align	2
1074#ifndef	__KERNEL__
1075.comm	_GFp_armcap_P,4,4
1076.private_extern	_GFp_armcap_P
1077#endif
1078#endif  // !OPENSSL_NO_ASM
1079