1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__aarch64__)
13#if defined(BORINGSSL_PREFIX)
14#include <boringssl_prefix_symbols_asm.h>
15#endif
16// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
17//
18// Licensed under the OpenSSL license (the "License").  You may not use
19// this file except in compliance with the License.  You can obtain a copy
20// in the file LICENSE in the source distribution or at
21// https://www.openssl.org/source/license.html
22
23// ====================================================================
24// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
25// project. The module is, however, dual licensed under OpenSSL and
26// CRYPTOGAMS licenses depending on where you obtain it. For further
27// details see http://www.openssl.org/~appro/cryptogams/.
28//
29// Permission to use under GPLv2 terms is granted.
30// ====================================================================
31//
32// SHA256/512 for ARMv8.
33//
34// Performance in cycles per processed byte and improvement coefficient
35// over code generated with "default" compiler:
36//
37//		SHA256-hw	SHA256(*)	SHA512
38// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
39// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
40// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
41// Denver	2.01		10.5 (+26%)	6.70 (+8%)
42// X-Gene			20.0 (+100%)	12.8 (+300%(***))
43// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
44//
45// (*)	Software SHA256 results are of lesser relevance, presented
46//	mostly for informational purposes.
47// (**)	The result is a trade-off: it's possible to improve it by
48//	10% (or by 1 cycle per round), but at the cost of 20% loss
49//	on Cortex-A53 (or by 4 cycles per round).
50// (***)	Super-impressive coefficients over gcc-generated code are
51//	indication of some compiler "pathology", most notably code
52//	generated with -mgeneral-regs-only is significanty faster
53//	and the gap is only 40-90%.
54
55#ifndef	__KERNEL__
56# include <openssl/arm_arch.h>
57#endif
58
59.text
60
61
62.globl	sha512_block_data_order
63.hidden	sha512_block_data_order
64.type	sha512_block_data_order,%function
65.align	6
66sha512_block_data_order:
67	AARCH64_SIGN_LINK_REGISTER
68	stp	x29,x30,[sp,#-128]!
69	add	x29,sp,#0
70
71	stp	x19,x20,[sp,#16]
72	stp	x21,x22,[sp,#32]
73	stp	x23,x24,[sp,#48]
74	stp	x25,x26,[sp,#64]
75	stp	x27,x28,[sp,#80]
76	sub	sp,sp,#4*8
77
78	ldp	x20,x21,[x0]				// load context
79	ldp	x22,x23,[x0,#2*8]
80	ldp	x24,x25,[x0,#4*8]
81	add	x2,x1,x2,lsl#7	// end of input
82	ldp	x26,x27,[x0,#6*8]
83	adrp	x30,.LK512
84	add	x30,x30,:lo12:.LK512
85	stp	x0,x2,[x29,#96]
86
87.Loop:
88	ldp	x3,x4,[x1],#2*8
89	ldr	x19,[x30],#8			// *K++
90	eor	x28,x21,x22				// magic seed
91	str	x1,[x29,#112]
92#ifndef	__ARMEB__
93	rev	x3,x3			// 0
94#endif
95	ror	x16,x24,#14
96	add	x27,x27,x19			// h+=K[i]
97	eor	x6,x24,x24,ror#23
98	and	x17,x25,x24
99	bic	x19,x26,x24
100	add	x27,x27,x3			// h+=X[i]
101	orr	x17,x17,x19			// Ch(e,f,g)
102	eor	x19,x20,x21			// a^b, b^c in next round
103	eor	x16,x16,x6,ror#18	// Sigma1(e)
104	ror	x6,x20,#28
105	add	x27,x27,x17			// h+=Ch(e,f,g)
106	eor	x17,x20,x20,ror#5
107	add	x27,x27,x16			// h+=Sigma1(e)
108	and	x28,x28,x19			// (b^c)&=(a^b)
109	add	x23,x23,x27			// d+=h
110	eor	x28,x28,x21			// Maj(a,b,c)
111	eor	x17,x6,x17,ror#34	// Sigma0(a)
112	add	x27,x27,x28			// h+=Maj(a,b,c)
113	ldr	x28,[x30],#8		// *K++, x19 in next round
114	//add	x27,x27,x17			// h+=Sigma0(a)
115#ifndef	__ARMEB__
116	rev	x4,x4			// 1
117#endif
118	ldp	x5,x6,[x1],#2*8
119	add	x27,x27,x17			// h+=Sigma0(a)
120	ror	x16,x23,#14
121	add	x26,x26,x28			// h+=K[i]
122	eor	x7,x23,x23,ror#23
123	and	x17,x24,x23
124	bic	x28,x25,x23
125	add	x26,x26,x4			// h+=X[i]
126	orr	x17,x17,x28			// Ch(e,f,g)
127	eor	x28,x27,x20			// a^b, b^c in next round
128	eor	x16,x16,x7,ror#18	// Sigma1(e)
129	ror	x7,x27,#28
130	add	x26,x26,x17			// h+=Ch(e,f,g)
131	eor	x17,x27,x27,ror#5
132	add	x26,x26,x16			// h+=Sigma1(e)
133	and	x19,x19,x28			// (b^c)&=(a^b)
134	add	x22,x22,x26			// d+=h
135	eor	x19,x19,x20			// Maj(a,b,c)
136	eor	x17,x7,x17,ror#34	// Sigma0(a)
137	add	x26,x26,x19			// h+=Maj(a,b,c)
138	ldr	x19,[x30],#8		// *K++, x28 in next round
139	//add	x26,x26,x17			// h+=Sigma0(a)
140#ifndef	__ARMEB__
141	rev	x5,x5			// 2
142#endif
143	add	x26,x26,x17			// h+=Sigma0(a)
144	ror	x16,x22,#14
145	add	x25,x25,x19			// h+=K[i]
146	eor	x8,x22,x22,ror#23
147	and	x17,x23,x22
148	bic	x19,x24,x22
149	add	x25,x25,x5			// h+=X[i]
150	orr	x17,x17,x19			// Ch(e,f,g)
151	eor	x19,x26,x27			// a^b, b^c in next round
152	eor	x16,x16,x8,ror#18	// Sigma1(e)
153	ror	x8,x26,#28
154	add	x25,x25,x17			// h+=Ch(e,f,g)
155	eor	x17,x26,x26,ror#5
156	add	x25,x25,x16			// h+=Sigma1(e)
157	and	x28,x28,x19			// (b^c)&=(a^b)
158	add	x21,x21,x25			// d+=h
159	eor	x28,x28,x27			// Maj(a,b,c)
160	eor	x17,x8,x17,ror#34	// Sigma0(a)
161	add	x25,x25,x28			// h+=Maj(a,b,c)
162	ldr	x28,[x30],#8		// *K++, x19 in next round
163	//add	x25,x25,x17			// h+=Sigma0(a)
164#ifndef	__ARMEB__
165	rev	x6,x6			// 3
166#endif
167	ldp	x7,x8,[x1],#2*8
168	add	x25,x25,x17			// h+=Sigma0(a)
169	ror	x16,x21,#14
170	add	x24,x24,x28			// h+=K[i]
171	eor	x9,x21,x21,ror#23
172	and	x17,x22,x21
173	bic	x28,x23,x21
174	add	x24,x24,x6			// h+=X[i]
175	orr	x17,x17,x28			// Ch(e,f,g)
176	eor	x28,x25,x26			// a^b, b^c in next round
177	eor	x16,x16,x9,ror#18	// Sigma1(e)
178	ror	x9,x25,#28
179	add	x24,x24,x17			// h+=Ch(e,f,g)
180	eor	x17,x25,x25,ror#5
181	add	x24,x24,x16			// h+=Sigma1(e)
182	and	x19,x19,x28			// (b^c)&=(a^b)
183	add	x20,x20,x24			// d+=h
184	eor	x19,x19,x26			// Maj(a,b,c)
185	eor	x17,x9,x17,ror#34	// Sigma0(a)
186	add	x24,x24,x19			// h+=Maj(a,b,c)
187	ldr	x19,[x30],#8		// *K++, x28 in next round
188	//add	x24,x24,x17			// h+=Sigma0(a)
189#ifndef	__ARMEB__
190	rev	x7,x7			// 4
191#endif
192	add	x24,x24,x17			// h+=Sigma0(a)
193	ror	x16,x20,#14
194	add	x23,x23,x19			// h+=K[i]
195	eor	x10,x20,x20,ror#23
196	and	x17,x21,x20
197	bic	x19,x22,x20
198	add	x23,x23,x7			// h+=X[i]
199	orr	x17,x17,x19			// Ch(e,f,g)
200	eor	x19,x24,x25			// a^b, b^c in next round
201	eor	x16,x16,x10,ror#18	// Sigma1(e)
202	ror	x10,x24,#28
203	add	x23,x23,x17			// h+=Ch(e,f,g)
204	eor	x17,x24,x24,ror#5
205	add	x23,x23,x16			// h+=Sigma1(e)
206	and	x28,x28,x19			// (b^c)&=(a^b)
207	add	x27,x27,x23			// d+=h
208	eor	x28,x28,x25			// Maj(a,b,c)
209	eor	x17,x10,x17,ror#34	// Sigma0(a)
210	add	x23,x23,x28			// h+=Maj(a,b,c)
211	ldr	x28,[x30],#8		// *K++, x19 in next round
212	//add	x23,x23,x17			// h+=Sigma0(a)
213#ifndef	__ARMEB__
214	rev	x8,x8			// 5
215#endif
216	ldp	x9,x10,[x1],#2*8
217	add	x23,x23,x17			// h+=Sigma0(a)
218	ror	x16,x27,#14
219	add	x22,x22,x28			// h+=K[i]
220	eor	x11,x27,x27,ror#23
221	and	x17,x20,x27
222	bic	x28,x21,x27
223	add	x22,x22,x8			// h+=X[i]
224	orr	x17,x17,x28			// Ch(e,f,g)
225	eor	x28,x23,x24			// a^b, b^c in next round
226	eor	x16,x16,x11,ror#18	// Sigma1(e)
227	ror	x11,x23,#28
228	add	x22,x22,x17			// h+=Ch(e,f,g)
229	eor	x17,x23,x23,ror#5
230	add	x22,x22,x16			// h+=Sigma1(e)
231	and	x19,x19,x28			// (b^c)&=(a^b)
232	add	x26,x26,x22			// d+=h
233	eor	x19,x19,x24			// Maj(a,b,c)
234	eor	x17,x11,x17,ror#34	// Sigma0(a)
235	add	x22,x22,x19			// h+=Maj(a,b,c)
236	ldr	x19,[x30],#8		// *K++, x28 in next round
237	//add	x22,x22,x17			// h+=Sigma0(a)
238#ifndef	__ARMEB__
239	rev	x9,x9			// 6
240#endif
241	add	x22,x22,x17			// h+=Sigma0(a)
242	ror	x16,x26,#14
243	add	x21,x21,x19			// h+=K[i]
244	eor	x12,x26,x26,ror#23
245	and	x17,x27,x26
246	bic	x19,x20,x26
247	add	x21,x21,x9			// h+=X[i]
248	orr	x17,x17,x19			// Ch(e,f,g)
249	eor	x19,x22,x23			// a^b, b^c in next round
250	eor	x16,x16,x12,ror#18	// Sigma1(e)
251	ror	x12,x22,#28
252	add	x21,x21,x17			// h+=Ch(e,f,g)
253	eor	x17,x22,x22,ror#5
254	add	x21,x21,x16			// h+=Sigma1(e)
255	and	x28,x28,x19			// (b^c)&=(a^b)
256	add	x25,x25,x21			// d+=h
257	eor	x28,x28,x23			// Maj(a,b,c)
258	eor	x17,x12,x17,ror#34	// Sigma0(a)
259	add	x21,x21,x28			// h+=Maj(a,b,c)
260	ldr	x28,[x30],#8		// *K++, x19 in next round
261	//add	x21,x21,x17			// h+=Sigma0(a)
262#ifndef	__ARMEB__
263	rev	x10,x10			// 7
264#endif
265	ldp	x11,x12,[x1],#2*8
266	add	x21,x21,x17			// h+=Sigma0(a)
267	ror	x16,x25,#14
268	add	x20,x20,x28			// h+=K[i]
269	eor	x13,x25,x25,ror#23
270	and	x17,x26,x25
271	bic	x28,x27,x25
272	add	x20,x20,x10			// h+=X[i]
273	orr	x17,x17,x28			// Ch(e,f,g)
274	eor	x28,x21,x22			// a^b, b^c in next round
275	eor	x16,x16,x13,ror#18	// Sigma1(e)
276	ror	x13,x21,#28
277	add	x20,x20,x17			// h+=Ch(e,f,g)
278	eor	x17,x21,x21,ror#5
279	add	x20,x20,x16			// h+=Sigma1(e)
280	and	x19,x19,x28			// (b^c)&=(a^b)
281	add	x24,x24,x20			// d+=h
282	eor	x19,x19,x22			// Maj(a,b,c)
283	eor	x17,x13,x17,ror#34	// Sigma0(a)
284	add	x20,x20,x19			// h+=Maj(a,b,c)
285	ldr	x19,[x30],#8		// *K++, x28 in next round
286	//add	x20,x20,x17			// h+=Sigma0(a)
287#ifndef	__ARMEB__
288	rev	x11,x11			// 8
289#endif
290	add	x20,x20,x17			// h+=Sigma0(a)
291	ror	x16,x24,#14
292	add	x27,x27,x19			// h+=K[i]
293	eor	x14,x24,x24,ror#23
294	and	x17,x25,x24
295	bic	x19,x26,x24
296	add	x27,x27,x11			// h+=X[i]
297	orr	x17,x17,x19			// Ch(e,f,g)
298	eor	x19,x20,x21			// a^b, b^c in next round
299	eor	x16,x16,x14,ror#18	// Sigma1(e)
300	ror	x14,x20,#28
301	add	x27,x27,x17			// h+=Ch(e,f,g)
302	eor	x17,x20,x20,ror#5
303	add	x27,x27,x16			// h+=Sigma1(e)
304	and	x28,x28,x19			// (b^c)&=(a^b)
305	add	x23,x23,x27			// d+=h
306	eor	x28,x28,x21			// Maj(a,b,c)
307	eor	x17,x14,x17,ror#34	// Sigma0(a)
308	add	x27,x27,x28			// h+=Maj(a,b,c)
309	ldr	x28,[x30],#8		// *K++, x19 in next round
310	//add	x27,x27,x17			// h+=Sigma0(a)
311#ifndef	__ARMEB__
312	rev	x12,x12			// 9
313#endif
314	ldp	x13,x14,[x1],#2*8
315	add	x27,x27,x17			// h+=Sigma0(a)
316	ror	x16,x23,#14
317	add	x26,x26,x28			// h+=K[i]
318	eor	x15,x23,x23,ror#23
319	and	x17,x24,x23
320	bic	x28,x25,x23
321	add	x26,x26,x12			// h+=X[i]
322	orr	x17,x17,x28			// Ch(e,f,g)
323	eor	x28,x27,x20			// a^b, b^c in next round
324	eor	x16,x16,x15,ror#18	// Sigma1(e)
325	ror	x15,x27,#28
326	add	x26,x26,x17			// h+=Ch(e,f,g)
327	eor	x17,x27,x27,ror#5
328	add	x26,x26,x16			// h+=Sigma1(e)
329	and	x19,x19,x28			// (b^c)&=(a^b)
330	add	x22,x22,x26			// d+=h
331	eor	x19,x19,x20			// Maj(a,b,c)
332	eor	x17,x15,x17,ror#34	// Sigma0(a)
333	add	x26,x26,x19			// h+=Maj(a,b,c)
334	ldr	x19,[x30],#8		// *K++, x28 in next round
335	//add	x26,x26,x17			// h+=Sigma0(a)
336#ifndef	__ARMEB__
337	rev	x13,x13			// 10
338#endif
339	add	x26,x26,x17			// h+=Sigma0(a)
340	ror	x16,x22,#14
341	add	x25,x25,x19			// h+=K[i]
342	eor	x0,x22,x22,ror#23
343	and	x17,x23,x22
344	bic	x19,x24,x22
345	add	x25,x25,x13			// h+=X[i]
346	orr	x17,x17,x19			// Ch(e,f,g)
347	eor	x19,x26,x27			// a^b, b^c in next round
348	eor	x16,x16,x0,ror#18	// Sigma1(e)
349	ror	x0,x26,#28
350	add	x25,x25,x17			// h+=Ch(e,f,g)
351	eor	x17,x26,x26,ror#5
352	add	x25,x25,x16			// h+=Sigma1(e)
353	and	x28,x28,x19			// (b^c)&=(a^b)
354	add	x21,x21,x25			// d+=h
355	eor	x28,x28,x27			// Maj(a,b,c)
356	eor	x17,x0,x17,ror#34	// Sigma0(a)
357	add	x25,x25,x28			// h+=Maj(a,b,c)
358	ldr	x28,[x30],#8		// *K++, x19 in next round
359	//add	x25,x25,x17			// h+=Sigma0(a)
360#ifndef	__ARMEB__
361	rev	x14,x14			// 11
362#endif
363	ldp	x15,x0,[x1],#2*8
364	add	x25,x25,x17			// h+=Sigma0(a)
365	str	x6,[sp,#24]
366	ror	x16,x21,#14
367	add	x24,x24,x28			// h+=K[i]
368	eor	x6,x21,x21,ror#23
369	and	x17,x22,x21
370	bic	x28,x23,x21
371	add	x24,x24,x14			// h+=X[i]
372	orr	x17,x17,x28			// Ch(e,f,g)
373	eor	x28,x25,x26			// a^b, b^c in next round
374	eor	x16,x16,x6,ror#18	// Sigma1(e)
375	ror	x6,x25,#28
376	add	x24,x24,x17			// h+=Ch(e,f,g)
377	eor	x17,x25,x25,ror#5
378	add	x24,x24,x16			// h+=Sigma1(e)
379	and	x19,x19,x28			// (b^c)&=(a^b)
380	add	x20,x20,x24			// d+=h
381	eor	x19,x19,x26			// Maj(a,b,c)
382	eor	x17,x6,x17,ror#34	// Sigma0(a)
383	add	x24,x24,x19			// h+=Maj(a,b,c)
384	ldr	x19,[x30],#8		// *K++, x28 in next round
385	//add	x24,x24,x17			// h+=Sigma0(a)
386#ifndef	__ARMEB__
387	rev	x15,x15			// 12
388#endif
389	add	x24,x24,x17			// h+=Sigma0(a)
390	str	x7,[sp,#0]
391	ror	x16,x20,#14
392	add	x23,x23,x19			// h+=K[i]
393	eor	x7,x20,x20,ror#23
394	and	x17,x21,x20
395	bic	x19,x22,x20
396	add	x23,x23,x15			// h+=X[i]
397	orr	x17,x17,x19			// Ch(e,f,g)
398	eor	x19,x24,x25			// a^b, b^c in next round
399	eor	x16,x16,x7,ror#18	// Sigma1(e)
400	ror	x7,x24,#28
401	add	x23,x23,x17			// h+=Ch(e,f,g)
402	eor	x17,x24,x24,ror#5
403	add	x23,x23,x16			// h+=Sigma1(e)
404	and	x28,x28,x19			// (b^c)&=(a^b)
405	add	x27,x27,x23			// d+=h
406	eor	x28,x28,x25			// Maj(a,b,c)
407	eor	x17,x7,x17,ror#34	// Sigma0(a)
408	add	x23,x23,x28			// h+=Maj(a,b,c)
409	ldr	x28,[x30],#8		// *K++, x19 in next round
410	//add	x23,x23,x17			// h+=Sigma0(a)
411#ifndef	__ARMEB__
412	rev	x0,x0			// 13
413#endif
414	ldp	x1,x2,[x1]
415	add	x23,x23,x17			// h+=Sigma0(a)
416	str	x8,[sp,#8]
417	ror	x16,x27,#14
418	add	x22,x22,x28			// h+=K[i]
419	eor	x8,x27,x27,ror#23
420	and	x17,x20,x27
421	bic	x28,x21,x27
422	add	x22,x22,x0			// h+=X[i]
423	orr	x17,x17,x28			// Ch(e,f,g)
424	eor	x28,x23,x24			// a^b, b^c in next round
425	eor	x16,x16,x8,ror#18	// Sigma1(e)
426	ror	x8,x23,#28
427	add	x22,x22,x17			// h+=Ch(e,f,g)
428	eor	x17,x23,x23,ror#5
429	add	x22,x22,x16			// h+=Sigma1(e)
430	and	x19,x19,x28			// (b^c)&=(a^b)
431	add	x26,x26,x22			// d+=h
432	eor	x19,x19,x24			// Maj(a,b,c)
433	eor	x17,x8,x17,ror#34	// Sigma0(a)
434	add	x22,x22,x19			// h+=Maj(a,b,c)
435	ldr	x19,[x30],#8		// *K++, x28 in next round
436	//add	x22,x22,x17			// h+=Sigma0(a)
437#ifndef	__ARMEB__
438	rev	x1,x1			// 14
439#endif
440	ldr	x6,[sp,#24]
441	add	x22,x22,x17			// h+=Sigma0(a)
442	str	x9,[sp,#16]
443	ror	x16,x26,#14
444	add	x21,x21,x19			// h+=K[i]
445	eor	x9,x26,x26,ror#23
446	and	x17,x27,x26
447	bic	x19,x20,x26
448	add	x21,x21,x1			// h+=X[i]
449	orr	x17,x17,x19			// Ch(e,f,g)
450	eor	x19,x22,x23			// a^b, b^c in next round
451	eor	x16,x16,x9,ror#18	// Sigma1(e)
452	ror	x9,x22,#28
453	add	x21,x21,x17			// h+=Ch(e,f,g)
454	eor	x17,x22,x22,ror#5
455	add	x21,x21,x16			// h+=Sigma1(e)
456	and	x28,x28,x19			// (b^c)&=(a^b)
457	add	x25,x25,x21			// d+=h
458	eor	x28,x28,x23			// Maj(a,b,c)
459	eor	x17,x9,x17,ror#34	// Sigma0(a)
460	add	x21,x21,x28			// h+=Maj(a,b,c)
461	ldr	x28,[x30],#8		// *K++, x19 in next round
462	//add	x21,x21,x17			// h+=Sigma0(a)
463#ifndef	__ARMEB__
464	rev	x2,x2			// 15
465#endif
466	ldr	x7,[sp,#0]
467	add	x21,x21,x17			// h+=Sigma0(a)
468	str	x10,[sp,#24]
469	ror	x16,x25,#14
470	add	x20,x20,x28			// h+=K[i]
471	ror	x9,x4,#1
472	and	x17,x26,x25
473	ror	x8,x1,#19
474	bic	x28,x27,x25
475	ror	x10,x21,#28
476	add	x20,x20,x2			// h+=X[i]
477	eor	x16,x16,x25,ror#18
478	eor	x9,x9,x4,ror#8
479	orr	x17,x17,x28			// Ch(e,f,g)
480	eor	x28,x21,x22			// a^b, b^c in next round
481	eor	x16,x16,x25,ror#41	// Sigma1(e)
482	eor	x10,x10,x21,ror#34
483	add	x20,x20,x17			// h+=Ch(e,f,g)
484	and	x19,x19,x28			// (b^c)&=(a^b)
485	eor	x8,x8,x1,ror#61
486	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
487	add	x20,x20,x16			// h+=Sigma1(e)
488	eor	x19,x19,x22			// Maj(a,b,c)
489	eor	x17,x10,x21,ror#39	// Sigma0(a)
490	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
491	add	x3,x3,x12
492	add	x24,x24,x20			// d+=h
493	add	x20,x20,x19			// h+=Maj(a,b,c)
494	ldr	x19,[x30],#8		// *K++, x28 in next round
495	add	x3,x3,x9
496	add	x20,x20,x17			// h+=Sigma0(a)
497	add	x3,x3,x8
498.Loop_16_xx:
499	ldr	x8,[sp,#8]
500	str	x11,[sp,#0]
501	ror	x16,x24,#14
502	add	x27,x27,x19			// h+=K[i]
503	ror	x10,x5,#1
504	and	x17,x25,x24
505	ror	x9,x2,#19
506	bic	x19,x26,x24
507	ror	x11,x20,#28
508	add	x27,x27,x3			// h+=X[i]
509	eor	x16,x16,x24,ror#18
510	eor	x10,x10,x5,ror#8
511	orr	x17,x17,x19			// Ch(e,f,g)
512	eor	x19,x20,x21			// a^b, b^c in next round
513	eor	x16,x16,x24,ror#41	// Sigma1(e)
514	eor	x11,x11,x20,ror#34
515	add	x27,x27,x17			// h+=Ch(e,f,g)
516	and	x28,x28,x19			// (b^c)&=(a^b)
517	eor	x9,x9,x2,ror#61
518	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
519	add	x27,x27,x16			// h+=Sigma1(e)
520	eor	x28,x28,x21			// Maj(a,b,c)
521	eor	x17,x11,x20,ror#39	// Sigma0(a)
522	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
523	add	x4,x4,x13
524	add	x23,x23,x27			// d+=h
525	add	x27,x27,x28			// h+=Maj(a,b,c)
526	ldr	x28,[x30],#8		// *K++, x19 in next round
527	add	x4,x4,x10
528	add	x27,x27,x17			// h+=Sigma0(a)
529	add	x4,x4,x9
530	ldr	x9,[sp,#16]
531	str	x12,[sp,#8]
532	ror	x16,x23,#14
533	add	x26,x26,x28			// h+=K[i]
534	ror	x11,x6,#1
535	and	x17,x24,x23
536	ror	x10,x3,#19
537	bic	x28,x25,x23
538	ror	x12,x27,#28
539	add	x26,x26,x4			// h+=X[i]
540	eor	x16,x16,x23,ror#18
541	eor	x11,x11,x6,ror#8
542	orr	x17,x17,x28			// Ch(e,f,g)
543	eor	x28,x27,x20			// a^b, b^c in next round
544	eor	x16,x16,x23,ror#41	// Sigma1(e)
545	eor	x12,x12,x27,ror#34
546	add	x26,x26,x17			// h+=Ch(e,f,g)
547	and	x19,x19,x28			// (b^c)&=(a^b)
548	eor	x10,x10,x3,ror#61
549	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
550	add	x26,x26,x16			// h+=Sigma1(e)
551	eor	x19,x19,x20			// Maj(a,b,c)
552	eor	x17,x12,x27,ror#39	// Sigma0(a)
553	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
554	add	x5,x5,x14
555	add	x22,x22,x26			// d+=h
556	add	x26,x26,x19			// h+=Maj(a,b,c)
557	ldr	x19,[x30],#8		// *K++, x28 in next round
558	add	x5,x5,x11
559	add	x26,x26,x17			// h+=Sigma0(a)
560	add	x5,x5,x10
561	ldr	x10,[sp,#24]
562	str	x13,[sp,#16]
563	ror	x16,x22,#14
564	add	x25,x25,x19			// h+=K[i]
565	ror	x12,x7,#1
566	and	x17,x23,x22
567	ror	x11,x4,#19
568	bic	x19,x24,x22
569	ror	x13,x26,#28
570	add	x25,x25,x5			// h+=X[i]
571	eor	x16,x16,x22,ror#18
572	eor	x12,x12,x7,ror#8
573	orr	x17,x17,x19			// Ch(e,f,g)
574	eor	x19,x26,x27			// a^b, b^c in next round
575	eor	x16,x16,x22,ror#41	// Sigma1(e)
576	eor	x13,x13,x26,ror#34
577	add	x25,x25,x17			// h+=Ch(e,f,g)
578	and	x28,x28,x19			// (b^c)&=(a^b)
579	eor	x11,x11,x4,ror#61
580	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
581	add	x25,x25,x16			// h+=Sigma1(e)
582	eor	x28,x28,x27			// Maj(a,b,c)
583	eor	x17,x13,x26,ror#39	// Sigma0(a)
584	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
585	add	x6,x6,x15
586	add	x21,x21,x25			// d+=h
587	add	x25,x25,x28			// h+=Maj(a,b,c)
588	ldr	x28,[x30],#8		// *K++, x19 in next round
589	add	x6,x6,x12
590	add	x25,x25,x17			// h+=Sigma0(a)
591	add	x6,x6,x11
592	ldr	x11,[sp,#0]
593	str	x14,[sp,#24]
594	ror	x16,x21,#14
595	add	x24,x24,x28			// h+=K[i]
596	ror	x13,x8,#1
597	and	x17,x22,x21
598	ror	x12,x5,#19
599	bic	x28,x23,x21
600	ror	x14,x25,#28
601	add	x24,x24,x6			// h+=X[i]
602	eor	x16,x16,x21,ror#18
603	eor	x13,x13,x8,ror#8
604	orr	x17,x17,x28			// Ch(e,f,g)
605	eor	x28,x25,x26			// a^b, b^c in next round
606	eor	x16,x16,x21,ror#41	// Sigma1(e)
607	eor	x14,x14,x25,ror#34
608	add	x24,x24,x17			// h+=Ch(e,f,g)
609	and	x19,x19,x28			// (b^c)&=(a^b)
610	eor	x12,x12,x5,ror#61
611	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
612	add	x24,x24,x16			// h+=Sigma1(e)
613	eor	x19,x19,x26			// Maj(a,b,c)
614	eor	x17,x14,x25,ror#39	// Sigma0(a)
615	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
616	add	x7,x7,x0
617	add	x20,x20,x24			// d+=h
618	add	x24,x24,x19			// h+=Maj(a,b,c)
619	ldr	x19,[x30],#8		// *K++, x28 in next round
620	add	x7,x7,x13
621	add	x24,x24,x17			// h+=Sigma0(a)
622	add	x7,x7,x12
623	ldr	x12,[sp,#8]
624	str	x15,[sp,#0]
625	ror	x16,x20,#14
626	add	x23,x23,x19			// h+=K[i]
627	ror	x14,x9,#1
628	and	x17,x21,x20
629	ror	x13,x6,#19
630	bic	x19,x22,x20
631	ror	x15,x24,#28
632	add	x23,x23,x7			// h+=X[i]
633	eor	x16,x16,x20,ror#18
634	eor	x14,x14,x9,ror#8
635	orr	x17,x17,x19			// Ch(e,f,g)
636	eor	x19,x24,x25			// a^b, b^c in next round
637	eor	x16,x16,x20,ror#41	// Sigma1(e)
638	eor	x15,x15,x24,ror#34
639	add	x23,x23,x17			// h+=Ch(e,f,g)
640	and	x28,x28,x19			// (b^c)&=(a^b)
641	eor	x13,x13,x6,ror#61
642	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
643	add	x23,x23,x16			// h+=Sigma1(e)
644	eor	x28,x28,x25			// Maj(a,b,c)
645	eor	x17,x15,x24,ror#39	// Sigma0(a)
646	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
647	add	x8,x8,x1
648	add	x27,x27,x23			// d+=h
649	add	x23,x23,x28			// h+=Maj(a,b,c)
650	ldr	x28,[x30],#8		// *K++, x19 in next round
651	add	x8,x8,x14
652	add	x23,x23,x17			// h+=Sigma0(a)
653	add	x8,x8,x13
654	ldr	x13,[sp,#16]
655	str	x0,[sp,#8]
656	ror	x16,x27,#14
657	add	x22,x22,x28			// h+=K[i]
658	ror	x15,x10,#1
659	and	x17,x20,x27
660	ror	x14,x7,#19
661	bic	x28,x21,x27
662	ror	x0,x23,#28
663	add	x22,x22,x8			// h+=X[i]
664	eor	x16,x16,x27,ror#18
665	eor	x15,x15,x10,ror#8
666	orr	x17,x17,x28			// Ch(e,f,g)
667	eor	x28,x23,x24			// a^b, b^c in next round
668	eor	x16,x16,x27,ror#41	// Sigma1(e)
669	eor	x0,x0,x23,ror#34
670	add	x22,x22,x17			// h+=Ch(e,f,g)
671	and	x19,x19,x28			// (b^c)&=(a^b)
672	eor	x14,x14,x7,ror#61
673	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
674	add	x22,x22,x16			// h+=Sigma1(e)
675	eor	x19,x19,x24			// Maj(a,b,c)
676	eor	x17,x0,x23,ror#39	// Sigma0(a)
677	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
678	add	x9,x9,x2
679	add	x26,x26,x22			// d+=h
680	add	x22,x22,x19			// h+=Maj(a,b,c)
681	ldr	x19,[x30],#8		// *K++, x28 in next round
682	add	x9,x9,x15
683	add	x22,x22,x17			// h+=Sigma0(a)
684	add	x9,x9,x14
685	ldr	x14,[sp,#24]
686	str	x1,[sp,#16]
687	ror	x16,x26,#14
688	add	x21,x21,x19			// h+=K[i]
689	ror	x0,x11,#1
690	and	x17,x27,x26
691	ror	x15,x8,#19
692	bic	x19,x20,x26
693	ror	x1,x22,#28
694	add	x21,x21,x9			// h+=X[i]
695	eor	x16,x16,x26,ror#18
696	eor	x0,x0,x11,ror#8
697	orr	x17,x17,x19			// Ch(e,f,g)
698	eor	x19,x22,x23			// a^b, b^c in next round
699	eor	x16,x16,x26,ror#41	// Sigma1(e)
700	eor	x1,x1,x22,ror#34
701	add	x21,x21,x17			// h+=Ch(e,f,g)
702	and	x28,x28,x19			// (b^c)&=(a^b)
703	eor	x15,x15,x8,ror#61
704	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
705	add	x21,x21,x16			// h+=Sigma1(e)
706	eor	x28,x28,x23			// Maj(a,b,c)
707	eor	x17,x1,x22,ror#39	// Sigma0(a)
708	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
709	add	x10,x10,x3
710	add	x25,x25,x21			// d+=h
711	add	x21,x21,x28			// h+=Maj(a,b,c)
712	ldr	x28,[x30],#8		// *K++, x19 in next round
713	add	x10,x10,x0
714	add	x21,x21,x17			// h+=Sigma0(a)
715	add	x10,x10,x15
716	ldr	x15,[sp,#0]
717	str	x2,[sp,#24]
718	ror	x16,x25,#14
719	add	x20,x20,x28			// h+=K[i]
720	ror	x1,x12,#1
721	and	x17,x26,x25
722	ror	x0,x9,#19
723	bic	x28,x27,x25
724	ror	x2,x21,#28
725	add	x20,x20,x10			// h+=X[i]
726	eor	x16,x16,x25,ror#18
727	eor	x1,x1,x12,ror#8
728	orr	x17,x17,x28			// Ch(e,f,g)
729	eor	x28,x21,x22			// a^b, b^c in next round
730	eor	x16,x16,x25,ror#41	// Sigma1(e)
731	eor	x2,x2,x21,ror#34
732	add	x20,x20,x17			// h+=Ch(e,f,g)
733	and	x19,x19,x28			// (b^c)&=(a^b)
734	eor	x0,x0,x9,ror#61
735	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
736	add	x20,x20,x16			// h+=Sigma1(e)
737	eor	x19,x19,x22			// Maj(a,b,c)
738	eor	x17,x2,x21,ror#39	// Sigma0(a)
739	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
740	add	x11,x11,x4
741	add	x24,x24,x20			// d+=h
742	add	x20,x20,x19			// h+=Maj(a,b,c)
743	ldr	x19,[x30],#8		// *K++, x28 in next round
744	add	x11,x11,x1
745	add	x20,x20,x17			// h+=Sigma0(a)
746	add	x11,x11,x0
747	ldr	x0,[sp,#8]
748	str	x3,[sp,#0]
749	ror	x16,x24,#14
750	add	x27,x27,x19			// h+=K[i]
751	ror	x2,x13,#1
752	and	x17,x25,x24
753	ror	x1,x10,#19
754	bic	x19,x26,x24
755	ror	x3,x20,#28
756	add	x27,x27,x11			// h+=X[i]
757	eor	x16,x16,x24,ror#18
758	eor	x2,x2,x13,ror#8
759	orr	x17,x17,x19			// Ch(e,f,g)
760	eor	x19,x20,x21			// a^b, b^c in next round
761	eor	x16,x16,x24,ror#41	// Sigma1(e)
762	eor	x3,x3,x20,ror#34
763	add	x27,x27,x17			// h+=Ch(e,f,g)
764	and	x28,x28,x19			// (b^c)&=(a^b)
765	eor	x1,x1,x10,ror#61
766	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
767	add	x27,x27,x16			// h+=Sigma1(e)
768	eor	x28,x28,x21			// Maj(a,b,c)
769	eor	x17,x3,x20,ror#39	// Sigma0(a)
770	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
771	add	x12,x12,x5
772	add	x23,x23,x27			// d+=h
773	add	x27,x27,x28			// h+=Maj(a,b,c)
774	ldr	x28,[x30],#8		// *K++, x19 in next round
775	add	x12,x12,x2
776	add	x27,x27,x17			// h+=Sigma0(a)
777	add	x12,x12,x1
778	ldr	x1,[sp,#16]
779	str	x4,[sp,#8]
780	ror	x16,x23,#14
781	add	x26,x26,x28			// h+=K[i]
782	ror	x3,x14,#1
783	and	x17,x24,x23
784	ror	x2,x11,#19
785	bic	x28,x25,x23
786	ror	x4,x27,#28
787	add	x26,x26,x12			// h+=X[i]
788	eor	x16,x16,x23,ror#18
789	eor	x3,x3,x14,ror#8
790	orr	x17,x17,x28			// Ch(e,f,g)
791	eor	x28,x27,x20			// a^b, b^c in next round
792	eor	x16,x16,x23,ror#41	// Sigma1(e)
793	eor	x4,x4,x27,ror#34
794	add	x26,x26,x17			// h+=Ch(e,f,g)
795	and	x19,x19,x28			// (b^c)&=(a^b)
796	eor	x2,x2,x11,ror#61
797	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
798	add	x26,x26,x16			// h+=Sigma1(e)
799	eor	x19,x19,x20			// Maj(a,b,c)
800	eor	x17,x4,x27,ror#39	// Sigma0(a)
801	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
802	add	x13,x13,x6
803	add	x22,x22,x26			// d+=h
804	add	x26,x26,x19			// h+=Maj(a,b,c)
805	ldr	x19,[x30],#8		// *K++, x28 in next round
806	add	x13,x13,x3
807	add	x26,x26,x17			// h+=Sigma0(a)
808	add	x13,x13,x2
809	ldr	x2,[sp,#24]
810	str	x5,[sp,#16]
811	ror	x16,x22,#14
812	add	x25,x25,x19			// h+=K[i]
813	ror	x4,x15,#1
814	and	x17,x23,x22
815	ror	x3,x12,#19
816	bic	x19,x24,x22
817	ror	x5,x26,#28
818	add	x25,x25,x13			// h+=X[i]
819	eor	x16,x16,x22,ror#18
820	eor	x4,x4,x15,ror#8
821	orr	x17,x17,x19			// Ch(e,f,g)
822	eor	x19,x26,x27			// a^b, b^c in next round
823	eor	x16,x16,x22,ror#41	// Sigma1(e)
824	eor	x5,x5,x26,ror#34
825	add	x25,x25,x17			// h+=Ch(e,f,g)
826	and	x28,x28,x19			// (b^c)&=(a^b)
827	eor	x3,x3,x12,ror#61
828	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
829	add	x25,x25,x16			// h+=Sigma1(e)
830	eor	x28,x28,x27			// Maj(a,b,c)
831	eor	x17,x5,x26,ror#39	// Sigma0(a)
832	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
833	add	x14,x14,x7
834	add	x21,x21,x25			// d+=h
835	add	x25,x25,x28			// h+=Maj(a,b,c)
836	ldr	x28,[x30],#8		// *K++, x19 in next round
837	add	x14,x14,x4
838	add	x25,x25,x17			// h+=Sigma0(a)
839	add	x14,x14,x3
840	ldr	x3,[sp,#0]
841	str	x6,[sp,#24]
842	ror	x16,x21,#14
843	add	x24,x24,x28			// h+=K[i]
844	ror	x5,x0,#1
845	and	x17,x22,x21
846	ror	x4,x13,#19
847	bic	x28,x23,x21
848	ror	x6,x25,#28
849	add	x24,x24,x14			// h+=X[i]
850	eor	x16,x16,x21,ror#18
851	eor	x5,x5,x0,ror#8
852	orr	x17,x17,x28			// Ch(e,f,g)
853	eor	x28,x25,x26			// a^b, b^c in next round
854	eor	x16,x16,x21,ror#41	// Sigma1(e)
855	eor	x6,x6,x25,ror#34
856	add	x24,x24,x17			// h+=Ch(e,f,g)
857	and	x19,x19,x28			// (b^c)&=(a^b)
858	eor	x4,x4,x13,ror#61
859	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
860	add	x24,x24,x16			// h+=Sigma1(e)
861	eor	x19,x19,x26			// Maj(a,b,c)
862	eor	x17,x6,x25,ror#39	// Sigma0(a)
863	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
864	add	x15,x15,x8
865	add	x20,x20,x24			// d+=h
866	add	x24,x24,x19			// h+=Maj(a,b,c)
867	ldr	x19,[x30],#8		// *K++, x28 in next round
868	add	x15,x15,x5
869	add	x24,x24,x17			// h+=Sigma0(a)
870	add	x15,x15,x4
871	ldr	x4,[sp,#8]
872	str	x7,[sp,#0]
873	ror	x16,x20,#14
874	add	x23,x23,x19			// h+=K[i]
875	ror	x6,x1,#1
876	and	x17,x21,x20
877	ror	x5,x14,#19
878	bic	x19,x22,x20
879	ror	x7,x24,#28
880	add	x23,x23,x15			// h+=X[i]
881	eor	x16,x16,x20,ror#18
882	eor	x6,x6,x1,ror#8
883	orr	x17,x17,x19			// Ch(e,f,g)
884	eor	x19,x24,x25			// a^b, b^c in next round
885	eor	x16,x16,x20,ror#41	// Sigma1(e)
886	eor	x7,x7,x24,ror#34
887	add	x23,x23,x17			// h+=Ch(e,f,g)
888	and	x28,x28,x19			// (b^c)&=(a^b)
889	eor	x5,x5,x14,ror#61
890	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
891	add	x23,x23,x16			// h+=Sigma1(e)
892	eor	x28,x28,x25			// Maj(a,b,c)
893	eor	x17,x7,x24,ror#39	// Sigma0(a)
894	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
895	add	x0,x0,x9
896	add	x27,x27,x23			// d+=h
897	add	x23,x23,x28			// h+=Maj(a,b,c)
898	ldr	x28,[x30],#8		// *K++, x19 in next round
899	add	x0,x0,x6
900	add	x23,x23,x17			// h+=Sigma0(a)
901	add	x0,x0,x5
902	ldr	x5,[sp,#16]
903	str	x8,[sp,#8]
904	ror	x16,x27,#14
905	add	x22,x22,x28			// h+=K[i]
906	ror	x7,x2,#1
907	and	x17,x20,x27
908	ror	x6,x15,#19
909	bic	x28,x21,x27
910	ror	x8,x23,#28
911	add	x22,x22,x0			// h+=X[i]
912	eor	x16,x16,x27,ror#18
913	eor	x7,x7,x2,ror#8
914	orr	x17,x17,x28			// Ch(e,f,g)
915	eor	x28,x23,x24			// a^b, b^c in next round
916	eor	x16,x16,x27,ror#41	// Sigma1(e)
917	eor	x8,x8,x23,ror#34
918	add	x22,x22,x17			// h+=Ch(e,f,g)
919	and	x19,x19,x28			// (b^c)&=(a^b)
920	eor	x6,x6,x15,ror#61
921	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
922	add	x22,x22,x16			// h+=Sigma1(e)
923	eor	x19,x19,x24			// Maj(a,b,c)
924	eor	x17,x8,x23,ror#39	// Sigma0(a)
925	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
926	add	x1,x1,x10
927	add	x26,x26,x22			// d+=h
928	add	x22,x22,x19			// h+=Maj(a,b,c)
929	ldr	x19,[x30],#8		// *K++, x28 in next round
930	add	x1,x1,x7
931	add	x22,x22,x17			// h+=Sigma0(a)
932	add	x1,x1,x6
933	ldr	x6,[sp,#24]
934	str	x9,[sp,#16]
935	ror	x16,x26,#14
936	add	x21,x21,x19			// h+=K[i]
937	ror	x8,x3,#1
938	and	x17,x27,x26
939	ror	x7,x0,#19
940	bic	x19,x20,x26
941	ror	x9,x22,#28
942	add	x21,x21,x1			// h+=X[i]
943	eor	x16,x16,x26,ror#18
944	eor	x8,x8,x3,ror#8
945	orr	x17,x17,x19			// Ch(e,f,g)
946	eor	x19,x22,x23			// a^b, b^c in next round
947	eor	x16,x16,x26,ror#41	// Sigma1(e)
948	eor	x9,x9,x22,ror#34
949	add	x21,x21,x17			// h+=Ch(e,f,g)
950	and	x28,x28,x19			// (b^c)&=(a^b)
951	eor	x7,x7,x0,ror#61
952	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
953	add	x21,x21,x16			// h+=Sigma1(e)
954	eor	x28,x28,x23			// Maj(a,b,c)
955	eor	x17,x9,x22,ror#39	// Sigma0(a)
956	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
957	add	x2,x2,x11
958	add	x25,x25,x21			// d+=h
959	add	x21,x21,x28			// h+=Maj(a,b,c)
960	ldr	x28,[x30],#8		// *K++, x19 in next round
961	add	x2,x2,x8
962	add	x21,x21,x17			// h+=Sigma0(a)
963	add	x2,x2,x7
964	ldr	x7,[sp,#0]
965	str	x10,[sp,#24]
966	ror	x16,x25,#14
967	add	x20,x20,x28			// h+=K[i]
968	ror	x9,x4,#1
969	and	x17,x26,x25
970	ror	x8,x1,#19
971	bic	x28,x27,x25
972	ror	x10,x21,#28
973	add	x20,x20,x2			// h+=X[i]
974	eor	x16,x16,x25,ror#18
975	eor	x9,x9,x4,ror#8
976	orr	x17,x17,x28			// Ch(e,f,g)
977	eor	x28,x21,x22			// a^b, b^c in next round
978	eor	x16,x16,x25,ror#41	// Sigma1(e)
979	eor	x10,x10,x21,ror#34
980	add	x20,x20,x17			// h+=Ch(e,f,g)
981	and	x19,x19,x28			// (b^c)&=(a^b)
982	eor	x8,x8,x1,ror#61
983	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
984	add	x20,x20,x16			// h+=Sigma1(e)
985	eor	x19,x19,x22			// Maj(a,b,c)
986	eor	x17,x10,x21,ror#39	// Sigma0(a)
987	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
988	add	x3,x3,x12
989	add	x24,x24,x20			// d+=h
990	add	x20,x20,x19			// h+=Maj(a,b,c)
991	ldr	x19,[x30],#8		// *K++, x28 in next round
992	add	x3,x3,x9
993	add	x20,x20,x17			// h+=Sigma0(a)
994	add	x3,x3,x8
995	cbnz	x19,.Loop_16_xx
996
997	ldp	x0,x2,[x29,#96]
998	ldr	x1,[x29,#112]
999	sub	x30,x30,#648		// rewind
1000
1001	ldp	x3,x4,[x0]
1002	ldp	x5,x6,[x0,#2*8]
1003	add	x1,x1,#14*8			// advance input pointer
1004	ldp	x7,x8,[x0,#4*8]
1005	add	x20,x20,x3
1006	ldp	x9,x10,[x0,#6*8]
1007	add	x21,x21,x4
1008	add	x22,x22,x5
1009	add	x23,x23,x6
1010	stp	x20,x21,[x0]
1011	add	x24,x24,x7
1012	add	x25,x25,x8
1013	stp	x22,x23,[x0,#2*8]
1014	add	x26,x26,x9
1015	add	x27,x27,x10
1016	cmp	x1,x2
1017	stp	x24,x25,[x0,#4*8]
1018	stp	x26,x27,[x0,#6*8]
1019	b.ne	.Loop
1020
1021	ldp	x19,x20,[x29,#16]
1022	add	sp,sp,#4*8
1023	ldp	x21,x22,[x29,#32]
1024	ldp	x23,x24,[x29,#48]
1025	ldp	x25,x26,[x29,#64]
1026	ldp	x27,x28,[x29,#80]
1027	ldp	x29,x30,[sp],#128
1028	AARCH64_VALIDATE_LINK_REGISTER
1029	ret
1030.size	sha512_block_data_order,.-sha512_block_data_order
1031
1032.section	.rodata
1033.align	6
1034.type	.LK512,%object
1035.LK512:
1036.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1037.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1038.quad	0x3956c25bf348b538,0x59f111f1b605d019
1039.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1040.quad	0xd807aa98a3030242,0x12835b0145706fbe
1041.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1042.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1043.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1044.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1045.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1046.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1047.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1048.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1049.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1050.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1051.quad	0x06ca6351e003826f,0x142929670a0e6e70
1052.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1053.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1054.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1055.quad	0x81c2c92e47edaee6,0x92722c851482353b
1056.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1057.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1058.quad	0xd192e819d6ef5218,0xd69906245565a910
1059.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1060.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1061.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1062.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1063.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1064.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
1065.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
1066.quad	0x90befffa23631e28,0xa4506cebde82bde9
1067.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
1068.quad	0xca273eceea26619c,0xd186b8c721c0c207
1069.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1070.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
1071.quad	0x113f9804bef90dae,0x1b710b35131c471b
1072.quad	0x28db77f523047d84,0x32caab7b40c72493
1073.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1074.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1075.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
1076.quad	0	// terminator
1077.size	.LK512,.-.LK512
1078.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1079.align	2
1080.align	2
1081#ifndef	__KERNEL__
1082.comm	OPENSSL_armcap_P,4,4
1083.hidden	OPENSSL_armcap_P
1084#endif
1085#endif
1086#endif  // !OPENSSL_NO_ASM
1087.section	.note.GNU-stack,"",%progbits
1088