1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
13//
14// Licensed under the OpenSSL license (the "License").  You may not use
15// this file except in compliance with the License.  You can obtain a copy
16// in the file LICENSE in the source distribution or at
17// https://www.openssl.org/source/license.html
18
19// ====================================================================
20// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
21// project. The module is, however, dual licensed under OpenSSL and
22// CRYPTOGAMS licenses depending on where you obtain it. For further
23// details see http://www.openssl.org/~appro/cryptogams/.
24//
25// Permission to use under GPLv2 terms is granted.
26// ====================================================================
27//
28// SHA256/512 for ARMv8.
29//
30// Performance in cycles per processed byte and improvement coefficient
31// over code generated with "default" compiler:
32//
33//		SHA256-hw	SHA256(*)	SHA512
34// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
35// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
36// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
37// Denver	2.01		10.5 (+26%)	6.70 (+8%)
38// X-Gene			20.0 (+100%)	12.8 (+300%(***))
39// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
40//
41// (*)	Software SHA256 results are of lesser relevance, presented
42//	mostly for informational purposes.
43// (**)	The result is a trade-off: it's possible to improve it by
44//	10% (or by 1 cycle per round), but at the cost of 20% loss
45//	on Cortex-A53 (or by 4 cycles per round).
46// (***)	Super-impressive coefficients over gcc-generated code are
47//	indication of some compiler "pathology", most notably code
48//	generated with -mgeneral-regs-only is significanty faster
49//	and the gap is only 40-90%.
50
51#ifndef	__KERNEL__
52# include <GFp/arm_arch.h>
53#endif
54
55.text
56
57
58.private_extern	_GFp_armcap_P
59.globl	_GFp_sha512_block_data_order
60.private_extern	_GFp_sha512_block_data_order
61
62.align	6
63_GFp_sha512_block_data_order:
64	AARCH64_SIGN_LINK_REGISTER
65	stp	x29,x30,[sp,#-128]!
66	add	x29,sp,#0
67
68	stp	x19,x20,[sp,#16]
69	stp	x21,x22,[sp,#32]
70	stp	x23,x24,[sp,#48]
71	stp	x25,x26,[sp,#64]
72	stp	x27,x28,[sp,#80]
73	sub	sp,sp,#4*8
74
75	ldp	x20,x21,[x0]				// load context
76	ldp	x22,x23,[x0,#2*8]
77	ldp	x24,x25,[x0,#4*8]
78	add	x2,x1,x2,lsl#7	// end of input
79	ldp	x26,x27,[x0,#6*8]
80	adrp	x30,LK512@PAGE
81	add	x30,x30,LK512@PAGEOFF
82	stp	x0,x2,[x29,#96]
83
84Loop:
85	ldp	x3,x4,[x1],#2*8
86	ldr	x19,[x30],#8			// *K++
87	eor	x28,x21,x22				// magic seed
88	str	x1,[x29,#112]
89#ifndef	__ARMEB__
90	rev	x3,x3			// 0
91#endif
92	ror	x16,x24,#14
93	add	x27,x27,x19			// h+=K[i]
94	eor	x6,x24,x24,ror#23
95	and	x17,x25,x24
96	bic	x19,x26,x24
97	add	x27,x27,x3			// h+=X[i]
98	orr	x17,x17,x19			// Ch(e,f,g)
99	eor	x19,x20,x21			// a^b, b^c in next round
100	eor	x16,x16,x6,ror#18	// Sigma1(e)
101	ror	x6,x20,#28
102	add	x27,x27,x17			// h+=Ch(e,f,g)
103	eor	x17,x20,x20,ror#5
104	add	x27,x27,x16			// h+=Sigma1(e)
105	and	x28,x28,x19			// (b^c)&=(a^b)
106	add	x23,x23,x27			// d+=h
107	eor	x28,x28,x21			// Maj(a,b,c)
108	eor	x17,x6,x17,ror#34	// Sigma0(a)
109	add	x27,x27,x28			// h+=Maj(a,b,c)
110	ldr	x28,[x30],#8		// *K++, x19 in next round
111	//add	x27,x27,x17			// h+=Sigma0(a)
112#ifndef	__ARMEB__
113	rev	x4,x4			// 1
114#endif
115	ldp	x5,x6,[x1],#2*8
116	add	x27,x27,x17			// h+=Sigma0(a)
117	ror	x16,x23,#14
118	add	x26,x26,x28			// h+=K[i]
119	eor	x7,x23,x23,ror#23
120	and	x17,x24,x23
121	bic	x28,x25,x23
122	add	x26,x26,x4			// h+=X[i]
123	orr	x17,x17,x28			// Ch(e,f,g)
124	eor	x28,x27,x20			// a^b, b^c in next round
125	eor	x16,x16,x7,ror#18	// Sigma1(e)
126	ror	x7,x27,#28
127	add	x26,x26,x17			// h+=Ch(e,f,g)
128	eor	x17,x27,x27,ror#5
129	add	x26,x26,x16			// h+=Sigma1(e)
130	and	x19,x19,x28			// (b^c)&=(a^b)
131	add	x22,x22,x26			// d+=h
132	eor	x19,x19,x20			// Maj(a,b,c)
133	eor	x17,x7,x17,ror#34	// Sigma0(a)
134	add	x26,x26,x19			// h+=Maj(a,b,c)
135	ldr	x19,[x30],#8		// *K++, x28 in next round
136	//add	x26,x26,x17			// h+=Sigma0(a)
137#ifndef	__ARMEB__
138	rev	x5,x5			// 2
139#endif
140	add	x26,x26,x17			// h+=Sigma0(a)
141	ror	x16,x22,#14
142	add	x25,x25,x19			// h+=K[i]
143	eor	x8,x22,x22,ror#23
144	and	x17,x23,x22
145	bic	x19,x24,x22
146	add	x25,x25,x5			// h+=X[i]
147	orr	x17,x17,x19			// Ch(e,f,g)
148	eor	x19,x26,x27			// a^b, b^c in next round
149	eor	x16,x16,x8,ror#18	// Sigma1(e)
150	ror	x8,x26,#28
151	add	x25,x25,x17			// h+=Ch(e,f,g)
152	eor	x17,x26,x26,ror#5
153	add	x25,x25,x16			// h+=Sigma1(e)
154	and	x28,x28,x19			// (b^c)&=(a^b)
155	add	x21,x21,x25			// d+=h
156	eor	x28,x28,x27			// Maj(a,b,c)
157	eor	x17,x8,x17,ror#34	// Sigma0(a)
158	add	x25,x25,x28			// h+=Maj(a,b,c)
159	ldr	x28,[x30],#8		// *K++, x19 in next round
160	//add	x25,x25,x17			// h+=Sigma0(a)
161#ifndef	__ARMEB__
162	rev	x6,x6			// 3
163#endif
164	ldp	x7,x8,[x1],#2*8
165	add	x25,x25,x17			// h+=Sigma0(a)
166	ror	x16,x21,#14
167	add	x24,x24,x28			// h+=K[i]
168	eor	x9,x21,x21,ror#23
169	and	x17,x22,x21
170	bic	x28,x23,x21
171	add	x24,x24,x6			// h+=X[i]
172	orr	x17,x17,x28			// Ch(e,f,g)
173	eor	x28,x25,x26			// a^b, b^c in next round
174	eor	x16,x16,x9,ror#18	// Sigma1(e)
175	ror	x9,x25,#28
176	add	x24,x24,x17			// h+=Ch(e,f,g)
177	eor	x17,x25,x25,ror#5
178	add	x24,x24,x16			// h+=Sigma1(e)
179	and	x19,x19,x28			// (b^c)&=(a^b)
180	add	x20,x20,x24			// d+=h
181	eor	x19,x19,x26			// Maj(a,b,c)
182	eor	x17,x9,x17,ror#34	// Sigma0(a)
183	add	x24,x24,x19			// h+=Maj(a,b,c)
184	ldr	x19,[x30],#8		// *K++, x28 in next round
185	//add	x24,x24,x17			// h+=Sigma0(a)
186#ifndef	__ARMEB__
187	rev	x7,x7			// 4
188#endif
189	add	x24,x24,x17			// h+=Sigma0(a)
190	ror	x16,x20,#14
191	add	x23,x23,x19			// h+=K[i]
192	eor	x10,x20,x20,ror#23
193	and	x17,x21,x20
194	bic	x19,x22,x20
195	add	x23,x23,x7			// h+=X[i]
196	orr	x17,x17,x19			// Ch(e,f,g)
197	eor	x19,x24,x25			// a^b, b^c in next round
198	eor	x16,x16,x10,ror#18	// Sigma1(e)
199	ror	x10,x24,#28
200	add	x23,x23,x17			// h+=Ch(e,f,g)
201	eor	x17,x24,x24,ror#5
202	add	x23,x23,x16			// h+=Sigma1(e)
203	and	x28,x28,x19			// (b^c)&=(a^b)
204	add	x27,x27,x23			// d+=h
205	eor	x28,x28,x25			// Maj(a,b,c)
206	eor	x17,x10,x17,ror#34	// Sigma0(a)
207	add	x23,x23,x28			// h+=Maj(a,b,c)
208	ldr	x28,[x30],#8		// *K++, x19 in next round
209	//add	x23,x23,x17			// h+=Sigma0(a)
210#ifndef	__ARMEB__
211	rev	x8,x8			// 5
212#endif
213	ldp	x9,x10,[x1],#2*8
214	add	x23,x23,x17			// h+=Sigma0(a)
215	ror	x16,x27,#14
216	add	x22,x22,x28			// h+=K[i]
217	eor	x11,x27,x27,ror#23
218	and	x17,x20,x27
219	bic	x28,x21,x27
220	add	x22,x22,x8			// h+=X[i]
221	orr	x17,x17,x28			// Ch(e,f,g)
222	eor	x28,x23,x24			// a^b, b^c in next round
223	eor	x16,x16,x11,ror#18	// Sigma1(e)
224	ror	x11,x23,#28
225	add	x22,x22,x17			// h+=Ch(e,f,g)
226	eor	x17,x23,x23,ror#5
227	add	x22,x22,x16			// h+=Sigma1(e)
228	and	x19,x19,x28			// (b^c)&=(a^b)
229	add	x26,x26,x22			// d+=h
230	eor	x19,x19,x24			// Maj(a,b,c)
231	eor	x17,x11,x17,ror#34	// Sigma0(a)
232	add	x22,x22,x19			// h+=Maj(a,b,c)
233	ldr	x19,[x30],#8		// *K++, x28 in next round
234	//add	x22,x22,x17			// h+=Sigma0(a)
235#ifndef	__ARMEB__
236	rev	x9,x9			// 6
237#endif
238	add	x22,x22,x17			// h+=Sigma0(a)
239	ror	x16,x26,#14
240	add	x21,x21,x19			// h+=K[i]
241	eor	x12,x26,x26,ror#23
242	and	x17,x27,x26
243	bic	x19,x20,x26
244	add	x21,x21,x9			// h+=X[i]
245	orr	x17,x17,x19			// Ch(e,f,g)
246	eor	x19,x22,x23			// a^b, b^c in next round
247	eor	x16,x16,x12,ror#18	// Sigma1(e)
248	ror	x12,x22,#28
249	add	x21,x21,x17			// h+=Ch(e,f,g)
250	eor	x17,x22,x22,ror#5
251	add	x21,x21,x16			// h+=Sigma1(e)
252	and	x28,x28,x19			// (b^c)&=(a^b)
253	add	x25,x25,x21			// d+=h
254	eor	x28,x28,x23			// Maj(a,b,c)
255	eor	x17,x12,x17,ror#34	// Sigma0(a)
256	add	x21,x21,x28			// h+=Maj(a,b,c)
257	ldr	x28,[x30],#8		// *K++, x19 in next round
258	//add	x21,x21,x17			// h+=Sigma0(a)
259#ifndef	__ARMEB__
260	rev	x10,x10			// 7
261#endif
262	ldp	x11,x12,[x1],#2*8
263	add	x21,x21,x17			// h+=Sigma0(a)
264	ror	x16,x25,#14
265	add	x20,x20,x28			// h+=K[i]
266	eor	x13,x25,x25,ror#23
267	and	x17,x26,x25
268	bic	x28,x27,x25
269	add	x20,x20,x10			// h+=X[i]
270	orr	x17,x17,x28			// Ch(e,f,g)
271	eor	x28,x21,x22			// a^b, b^c in next round
272	eor	x16,x16,x13,ror#18	// Sigma1(e)
273	ror	x13,x21,#28
274	add	x20,x20,x17			// h+=Ch(e,f,g)
275	eor	x17,x21,x21,ror#5
276	add	x20,x20,x16			// h+=Sigma1(e)
277	and	x19,x19,x28			// (b^c)&=(a^b)
278	add	x24,x24,x20			// d+=h
279	eor	x19,x19,x22			// Maj(a,b,c)
280	eor	x17,x13,x17,ror#34	// Sigma0(a)
281	add	x20,x20,x19			// h+=Maj(a,b,c)
282	ldr	x19,[x30],#8		// *K++, x28 in next round
283	//add	x20,x20,x17			// h+=Sigma0(a)
284#ifndef	__ARMEB__
285	rev	x11,x11			// 8
286#endif
287	add	x20,x20,x17			// h+=Sigma0(a)
288	ror	x16,x24,#14
289	add	x27,x27,x19			// h+=K[i]
290	eor	x14,x24,x24,ror#23
291	and	x17,x25,x24
292	bic	x19,x26,x24
293	add	x27,x27,x11			// h+=X[i]
294	orr	x17,x17,x19			// Ch(e,f,g)
295	eor	x19,x20,x21			// a^b, b^c in next round
296	eor	x16,x16,x14,ror#18	// Sigma1(e)
297	ror	x14,x20,#28
298	add	x27,x27,x17			// h+=Ch(e,f,g)
299	eor	x17,x20,x20,ror#5
300	add	x27,x27,x16			// h+=Sigma1(e)
301	and	x28,x28,x19			// (b^c)&=(a^b)
302	add	x23,x23,x27			// d+=h
303	eor	x28,x28,x21			// Maj(a,b,c)
304	eor	x17,x14,x17,ror#34	// Sigma0(a)
305	add	x27,x27,x28			// h+=Maj(a,b,c)
306	ldr	x28,[x30],#8		// *K++, x19 in next round
307	//add	x27,x27,x17			// h+=Sigma0(a)
308#ifndef	__ARMEB__
309	rev	x12,x12			// 9
310#endif
311	ldp	x13,x14,[x1],#2*8
312	add	x27,x27,x17			// h+=Sigma0(a)
313	ror	x16,x23,#14
314	add	x26,x26,x28			// h+=K[i]
315	eor	x15,x23,x23,ror#23
316	and	x17,x24,x23
317	bic	x28,x25,x23
318	add	x26,x26,x12			// h+=X[i]
319	orr	x17,x17,x28			// Ch(e,f,g)
320	eor	x28,x27,x20			// a^b, b^c in next round
321	eor	x16,x16,x15,ror#18	// Sigma1(e)
322	ror	x15,x27,#28
323	add	x26,x26,x17			// h+=Ch(e,f,g)
324	eor	x17,x27,x27,ror#5
325	add	x26,x26,x16			// h+=Sigma1(e)
326	and	x19,x19,x28			// (b^c)&=(a^b)
327	add	x22,x22,x26			// d+=h
328	eor	x19,x19,x20			// Maj(a,b,c)
329	eor	x17,x15,x17,ror#34	// Sigma0(a)
330	add	x26,x26,x19			// h+=Maj(a,b,c)
331	ldr	x19,[x30],#8		// *K++, x28 in next round
332	//add	x26,x26,x17			// h+=Sigma0(a)
333#ifndef	__ARMEB__
334	rev	x13,x13			// 10
335#endif
336	add	x26,x26,x17			// h+=Sigma0(a)
337	ror	x16,x22,#14
338	add	x25,x25,x19			// h+=K[i]
339	eor	x0,x22,x22,ror#23
340	and	x17,x23,x22
341	bic	x19,x24,x22
342	add	x25,x25,x13			// h+=X[i]
343	orr	x17,x17,x19			// Ch(e,f,g)
344	eor	x19,x26,x27			// a^b, b^c in next round
345	eor	x16,x16,x0,ror#18	// Sigma1(e)
346	ror	x0,x26,#28
347	add	x25,x25,x17			// h+=Ch(e,f,g)
348	eor	x17,x26,x26,ror#5
349	add	x25,x25,x16			// h+=Sigma1(e)
350	and	x28,x28,x19			// (b^c)&=(a^b)
351	add	x21,x21,x25			// d+=h
352	eor	x28,x28,x27			// Maj(a,b,c)
353	eor	x17,x0,x17,ror#34	// Sigma0(a)
354	add	x25,x25,x28			// h+=Maj(a,b,c)
355	ldr	x28,[x30],#8		// *K++, x19 in next round
356	//add	x25,x25,x17			// h+=Sigma0(a)
357#ifndef	__ARMEB__
358	rev	x14,x14			// 11
359#endif
360	ldp	x15,x0,[x1],#2*8
361	add	x25,x25,x17			// h+=Sigma0(a)
362	str	x6,[sp,#24]
363	ror	x16,x21,#14
364	add	x24,x24,x28			// h+=K[i]
365	eor	x6,x21,x21,ror#23
366	and	x17,x22,x21
367	bic	x28,x23,x21
368	add	x24,x24,x14			// h+=X[i]
369	orr	x17,x17,x28			// Ch(e,f,g)
370	eor	x28,x25,x26			// a^b, b^c in next round
371	eor	x16,x16,x6,ror#18	// Sigma1(e)
372	ror	x6,x25,#28
373	add	x24,x24,x17			// h+=Ch(e,f,g)
374	eor	x17,x25,x25,ror#5
375	add	x24,x24,x16			// h+=Sigma1(e)
376	and	x19,x19,x28			// (b^c)&=(a^b)
377	add	x20,x20,x24			// d+=h
378	eor	x19,x19,x26			// Maj(a,b,c)
379	eor	x17,x6,x17,ror#34	// Sigma0(a)
380	add	x24,x24,x19			// h+=Maj(a,b,c)
381	ldr	x19,[x30],#8		// *K++, x28 in next round
382	//add	x24,x24,x17			// h+=Sigma0(a)
383#ifndef	__ARMEB__
384	rev	x15,x15			// 12
385#endif
386	add	x24,x24,x17			// h+=Sigma0(a)
387	str	x7,[sp,#0]
388	ror	x16,x20,#14
389	add	x23,x23,x19			// h+=K[i]
390	eor	x7,x20,x20,ror#23
391	and	x17,x21,x20
392	bic	x19,x22,x20
393	add	x23,x23,x15			// h+=X[i]
394	orr	x17,x17,x19			// Ch(e,f,g)
395	eor	x19,x24,x25			// a^b, b^c in next round
396	eor	x16,x16,x7,ror#18	// Sigma1(e)
397	ror	x7,x24,#28
398	add	x23,x23,x17			// h+=Ch(e,f,g)
399	eor	x17,x24,x24,ror#5
400	add	x23,x23,x16			// h+=Sigma1(e)
401	and	x28,x28,x19			// (b^c)&=(a^b)
402	add	x27,x27,x23			// d+=h
403	eor	x28,x28,x25			// Maj(a,b,c)
404	eor	x17,x7,x17,ror#34	// Sigma0(a)
405	add	x23,x23,x28			// h+=Maj(a,b,c)
406	ldr	x28,[x30],#8		// *K++, x19 in next round
407	//add	x23,x23,x17			// h+=Sigma0(a)
408#ifndef	__ARMEB__
409	rev	x0,x0			// 13
410#endif
411	ldp	x1,x2,[x1]
412	add	x23,x23,x17			// h+=Sigma0(a)
413	str	x8,[sp,#8]
414	ror	x16,x27,#14
415	add	x22,x22,x28			// h+=K[i]
416	eor	x8,x27,x27,ror#23
417	and	x17,x20,x27
418	bic	x28,x21,x27
419	add	x22,x22,x0			// h+=X[i]
420	orr	x17,x17,x28			// Ch(e,f,g)
421	eor	x28,x23,x24			// a^b, b^c in next round
422	eor	x16,x16,x8,ror#18	// Sigma1(e)
423	ror	x8,x23,#28
424	add	x22,x22,x17			// h+=Ch(e,f,g)
425	eor	x17,x23,x23,ror#5
426	add	x22,x22,x16			// h+=Sigma1(e)
427	and	x19,x19,x28			// (b^c)&=(a^b)
428	add	x26,x26,x22			// d+=h
429	eor	x19,x19,x24			// Maj(a,b,c)
430	eor	x17,x8,x17,ror#34	// Sigma0(a)
431	add	x22,x22,x19			// h+=Maj(a,b,c)
432	ldr	x19,[x30],#8		// *K++, x28 in next round
433	//add	x22,x22,x17			// h+=Sigma0(a)
434#ifndef	__ARMEB__
435	rev	x1,x1			// 14
436#endif
437	ldr	x6,[sp,#24]
438	add	x22,x22,x17			// h+=Sigma0(a)
439	str	x9,[sp,#16]
440	ror	x16,x26,#14
441	add	x21,x21,x19			// h+=K[i]
442	eor	x9,x26,x26,ror#23
443	and	x17,x27,x26
444	bic	x19,x20,x26
445	add	x21,x21,x1			// h+=X[i]
446	orr	x17,x17,x19			// Ch(e,f,g)
447	eor	x19,x22,x23			// a^b, b^c in next round
448	eor	x16,x16,x9,ror#18	// Sigma1(e)
449	ror	x9,x22,#28
450	add	x21,x21,x17			// h+=Ch(e,f,g)
451	eor	x17,x22,x22,ror#5
452	add	x21,x21,x16			// h+=Sigma1(e)
453	and	x28,x28,x19			// (b^c)&=(a^b)
454	add	x25,x25,x21			// d+=h
455	eor	x28,x28,x23			// Maj(a,b,c)
456	eor	x17,x9,x17,ror#34	// Sigma0(a)
457	add	x21,x21,x28			// h+=Maj(a,b,c)
458	ldr	x28,[x30],#8		// *K++, x19 in next round
459	//add	x21,x21,x17			// h+=Sigma0(a)
460#ifndef	__ARMEB__
461	rev	x2,x2			// 15
462#endif
463	ldr	x7,[sp,#0]
464	add	x21,x21,x17			// h+=Sigma0(a)
465	str	x10,[sp,#24]
466	ror	x16,x25,#14
467	add	x20,x20,x28			// h+=K[i]
468	ror	x9,x4,#1
469	and	x17,x26,x25
470	ror	x8,x1,#19
471	bic	x28,x27,x25
472	ror	x10,x21,#28
473	add	x20,x20,x2			// h+=X[i]
474	eor	x16,x16,x25,ror#18
475	eor	x9,x9,x4,ror#8
476	orr	x17,x17,x28			// Ch(e,f,g)
477	eor	x28,x21,x22			// a^b, b^c in next round
478	eor	x16,x16,x25,ror#41	// Sigma1(e)
479	eor	x10,x10,x21,ror#34
480	add	x20,x20,x17			// h+=Ch(e,f,g)
481	and	x19,x19,x28			// (b^c)&=(a^b)
482	eor	x8,x8,x1,ror#61
483	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
484	add	x20,x20,x16			// h+=Sigma1(e)
485	eor	x19,x19,x22			// Maj(a,b,c)
486	eor	x17,x10,x21,ror#39	// Sigma0(a)
487	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
488	add	x3,x3,x12
489	add	x24,x24,x20			// d+=h
490	add	x20,x20,x19			// h+=Maj(a,b,c)
491	ldr	x19,[x30],#8		// *K++, x28 in next round
492	add	x3,x3,x9
493	add	x20,x20,x17			// h+=Sigma0(a)
494	add	x3,x3,x8
495Loop_16_xx:
496	ldr	x8,[sp,#8]
497	str	x11,[sp,#0]
498	ror	x16,x24,#14
499	add	x27,x27,x19			// h+=K[i]
500	ror	x10,x5,#1
501	and	x17,x25,x24
502	ror	x9,x2,#19
503	bic	x19,x26,x24
504	ror	x11,x20,#28
505	add	x27,x27,x3			// h+=X[i]
506	eor	x16,x16,x24,ror#18
507	eor	x10,x10,x5,ror#8
508	orr	x17,x17,x19			// Ch(e,f,g)
509	eor	x19,x20,x21			// a^b, b^c in next round
510	eor	x16,x16,x24,ror#41	// Sigma1(e)
511	eor	x11,x11,x20,ror#34
512	add	x27,x27,x17			// h+=Ch(e,f,g)
513	and	x28,x28,x19			// (b^c)&=(a^b)
514	eor	x9,x9,x2,ror#61
515	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
516	add	x27,x27,x16			// h+=Sigma1(e)
517	eor	x28,x28,x21			// Maj(a,b,c)
518	eor	x17,x11,x20,ror#39	// Sigma0(a)
519	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
520	add	x4,x4,x13
521	add	x23,x23,x27			// d+=h
522	add	x27,x27,x28			// h+=Maj(a,b,c)
523	ldr	x28,[x30],#8		// *K++, x19 in next round
524	add	x4,x4,x10
525	add	x27,x27,x17			// h+=Sigma0(a)
526	add	x4,x4,x9
527	ldr	x9,[sp,#16]
528	str	x12,[sp,#8]
529	ror	x16,x23,#14
530	add	x26,x26,x28			// h+=K[i]
531	ror	x11,x6,#1
532	and	x17,x24,x23
533	ror	x10,x3,#19
534	bic	x28,x25,x23
535	ror	x12,x27,#28
536	add	x26,x26,x4			// h+=X[i]
537	eor	x16,x16,x23,ror#18
538	eor	x11,x11,x6,ror#8
539	orr	x17,x17,x28			// Ch(e,f,g)
540	eor	x28,x27,x20			// a^b, b^c in next round
541	eor	x16,x16,x23,ror#41	// Sigma1(e)
542	eor	x12,x12,x27,ror#34
543	add	x26,x26,x17			// h+=Ch(e,f,g)
544	and	x19,x19,x28			// (b^c)&=(a^b)
545	eor	x10,x10,x3,ror#61
546	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
547	add	x26,x26,x16			// h+=Sigma1(e)
548	eor	x19,x19,x20			// Maj(a,b,c)
549	eor	x17,x12,x27,ror#39	// Sigma0(a)
550	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
551	add	x5,x5,x14
552	add	x22,x22,x26			// d+=h
553	add	x26,x26,x19			// h+=Maj(a,b,c)
554	ldr	x19,[x30],#8		// *K++, x28 in next round
555	add	x5,x5,x11
556	add	x26,x26,x17			// h+=Sigma0(a)
557	add	x5,x5,x10
558	ldr	x10,[sp,#24]
559	str	x13,[sp,#16]
560	ror	x16,x22,#14
561	add	x25,x25,x19			// h+=K[i]
562	ror	x12,x7,#1
563	and	x17,x23,x22
564	ror	x11,x4,#19
565	bic	x19,x24,x22
566	ror	x13,x26,#28
567	add	x25,x25,x5			// h+=X[i]
568	eor	x16,x16,x22,ror#18
569	eor	x12,x12,x7,ror#8
570	orr	x17,x17,x19			// Ch(e,f,g)
571	eor	x19,x26,x27			// a^b, b^c in next round
572	eor	x16,x16,x22,ror#41	// Sigma1(e)
573	eor	x13,x13,x26,ror#34
574	add	x25,x25,x17			// h+=Ch(e,f,g)
575	and	x28,x28,x19			// (b^c)&=(a^b)
576	eor	x11,x11,x4,ror#61
577	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
578	add	x25,x25,x16			// h+=Sigma1(e)
579	eor	x28,x28,x27			// Maj(a,b,c)
580	eor	x17,x13,x26,ror#39	// Sigma0(a)
581	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
582	add	x6,x6,x15
583	add	x21,x21,x25			// d+=h
584	add	x25,x25,x28			// h+=Maj(a,b,c)
585	ldr	x28,[x30],#8		// *K++, x19 in next round
586	add	x6,x6,x12
587	add	x25,x25,x17			// h+=Sigma0(a)
588	add	x6,x6,x11
589	ldr	x11,[sp,#0]
590	str	x14,[sp,#24]
591	ror	x16,x21,#14
592	add	x24,x24,x28			// h+=K[i]
593	ror	x13,x8,#1
594	and	x17,x22,x21
595	ror	x12,x5,#19
596	bic	x28,x23,x21
597	ror	x14,x25,#28
598	add	x24,x24,x6			// h+=X[i]
599	eor	x16,x16,x21,ror#18
600	eor	x13,x13,x8,ror#8
601	orr	x17,x17,x28			// Ch(e,f,g)
602	eor	x28,x25,x26			// a^b, b^c in next round
603	eor	x16,x16,x21,ror#41	// Sigma1(e)
604	eor	x14,x14,x25,ror#34
605	add	x24,x24,x17			// h+=Ch(e,f,g)
606	and	x19,x19,x28			// (b^c)&=(a^b)
607	eor	x12,x12,x5,ror#61
608	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
609	add	x24,x24,x16			// h+=Sigma1(e)
610	eor	x19,x19,x26			// Maj(a,b,c)
611	eor	x17,x14,x25,ror#39	// Sigma0(a)
612	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
613	add	x7,x7,x0
614	add	x20,x20,x24			// d+=h
615	add	x24,x24,x19			// h+=Maj(a,b,c)
616	ldr	x19,[x30],#8		// *K++, x28 in next round
617	add	x7,x7,x13
618	add	x24,x24,x17			// h+=Sigma0(a)
619	add	x7,x7,x12
620	ldr	x12,[sp,#8]
621	str	x15,[sp,#0]
622	ror	x16,x20,#14
623	add	x23,x23,x19			// h+=K[i]
624	ror	x14,x9,#1
625	and	x17,x21,x20
626	ror	x13,x6,#19
627	bic	x19,x22,x20
628	ror	x15,x24,#28
629	add	x23,x23,x7			// h+=X[i]
630	eor	x16,x16,x20,ror#18
631	eor	x14,x14,x9,ror#8
632	orr	x17,x17,x19			// Ch(e,f,g)
633	eor	x19,x24,x25			// a^b, b^c in next round
634	eor	x16,x16,x20,ror#41	// Sigma1(e)
635	eor	x15,x15,x24,ror#34
636	add	x23,x23,x17			// h+=Ch(e,f,g)
637	and	x28,x28,x19			// (b^c)&=(a^b)
638	eor	x13,x13,x6,ror#61
639	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
640	add	x23,x23,x16			// h+=Sigma1(e)
641	eor	x28,x28,x25			// Maj(a,b,c)
642	eor	x17,x15,x24,ror#39	// Sigma0(a)
643	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
644	add	x8,x8,x1
645	add	x27,x27,x23			// d+=h
646	add	x23,x23,x28			// h+=Maj(a,b,c)
647	ldr	x28,[x30],#8		// *K++, x19 in next round
648	add	x8,x8,x14
649	add	x23,x23,x17			// h+=Sigma0(a)
650	add	x8,x8,x13
651	ldr	x13,[sp,#16]
652	str	x0,[sp,#8]
653	ror	x16,x27,#14
654	add	x22,x22,x28			// h+=K[i]
655	ror	x15,x10,#1
656	and	x17,x20,x27
657	ror	x14,x7,#19
658	bic	x28,x21,x27
659	ror	x0,x23,#28
660	add	x22,x22,x8			// h+=X[i]
661	eor	x16,x16,x27,ror#18
662	eor	x15,x15,x10,ror#8
663	orr	x17,x17,x28			// Ch(e,f,g)
664	eor	x28,x23,x24			// a^b, b^c in next round
665	eor	x16,x16,x27,ror#41	// Sigma1(e)
666	eor	x0,x0,x23,ror#34
667	add	x22,x22,x17			// h+=Ch(e,f,g)
668	and	x19,x19,x28			// (b^c)&=(a^b)
669	eor	x14,x14,x7,ror#61
670	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
671	add	x22,x22,x16			// h+=Sigma1(e)
672	eor	x19,x19,x24			// Maj(a,b,c)
673	eor	x17,x0,x23,ror#39	// Sigma0(a)
674	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
675	add	x9,x9,x2
676	add	x26,x26,x22			// d+=h
677	add	x22,x22,x19			// h+=Maj(a,b,c)
678	ldr	x19,[x30],#8		// *K++, x28 in next round
679	add	x9,x9,x15
680	add	x22,x22,x17			// h+=Sigma0(a)
681	add	x9,x9,x14
682	ldr	x14,[sp,#24]
683	str	x1,[sp,#16]
684	ror	x16,x26,#14
685	add	x21,x21,x19			// h+=K[i]
686	ror	x0,x11,#1
687	and	x17,x27,x26
688	ror	x15,x8,#19
689	bic	x19,x20,x26
690	ror	x1,x22,#28
691	add	x21,x21,x9			// h+=X[i]
692	eor	x16,x16,x26,ror#18
693	eor	x0,x0,x11,ror#8
694	orr	x17,x17,x19			// Ch(e,f,g)
695	eor	x19,x22,x23			// a^b, b^c in next round
696	eor	x16,x16,x26,ror#41	// Sigma1(e)
697	eor	x1,x1,x22,ror#34
698	add	x21,x21,x17			// h+=Ch(e,f,g)
699	and	x28,x28,x19			// (b^c)&=(a^b)
700	eor	x15,x15,x8,ror#61
701	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
702	add	x21,x21,x16			// h+=Sigma1(e)
703	eor	x28,x28,x23			// Maj(a,b,c)
704	eor	x17,x1,x22,ror#39	// Sigma0(a)
705	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
706	add	x10,x10,x3
707	add	x25,x25,x21			// d+=h
708	add	x21,x21,x28			// h+=Maj(a,b,c)
709	ldr	x28,[x30],#8		// *K++, x19 in next round
710	add	x10,x10,x0
711	add	x21,x21,x17			// h+=Sigma0(a)
712	add	x10,x10,x15
713	ldr	x15,[sp,#0]
714	str	x2,[sp,#24]
715	ror	x16,x25,#14
716	add	x20,x20,x28			// h+=K[i]
717	ror	x1,x12,#1
718	and	x17,x26,x25
719	ror	x0,x9,#19
720	bic	x28,x27,x25
721	ror	x2,x21,#28
722	add	x20,x20,x10			// h+=X[i]
723	eor	x16,x16,x25,ror#18
724	eor	x1,x1,x12,ror#8
725	orr	x17,x17,x28			// Ch(e,f,g)
726	eor	x28,x21,x22			// a^b, b^c in next round
727	eor	x16,x16,x25,ror#41	// Sigma1(e)
728	eor	x2,x2,x21,ror#34
729	add	x20,x20,x17			// h+=Ch(e,f,g)
730	and	x19,x19,x28			// (b^c)&=(a^b)
731	eor	x0,x0,x9,ror#61
732	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
733	add	x20,x20,x16			// h+=Sigma1(e)
734	eor	x19,x19,x22			// Maj(a,b,c)
735	eor	x17,x2,x21,ror#39	// Sigma0(a)
736	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
737	add	x11,x11,x4
738	add	x24,x24,x20			// d+=h
739	add	x20,x20,x19			// h+=Maj(a,b,c)
740	ldr	x19,[x30],#8		// *K++, x28 in next round
741	add	x11,x11,x1
742	add	x20,x20,x17			// h+=Sigma0(a)
743	add	x11,x11,x0
744	ldr	x0,[sp,#8]
745	str	x3,[sp,#0]
746	ror	x16,x24,#14
747	add	x27,x27,x19			// h+=K[i]
748	ror	x2,x13,#1
749	and	x17,x25,x24
750	ror	x1,x10,#19
751	bic	x19,x26,x24
752	ror	x3,x20,#28
753	add	x27,x27,x11			// h+=X[i]
754	eor	x16,x16,x24,ror#18
755	eor	x2,x2,x13,ror#8
756	orr	x17,x17,x19			// Ch(e,f,g)
757	eor	x19,x20,x21			// a^b, b^c in next round
758	eor	x16,x16,x24,ror#41	// Sigma1(e)
759	eor	x3,x3,x20,ror#34
760	add	x27,x27,x17			// h+=Ch(e,f,g)
761	and	x28,x28,x19			// (b^c)&=(a^b)
762	eor	x1,x1,x10,ror#61
763	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
764	add	x27,x27,x16			// h+=Sigma1(e)
765	eor	x28,x28,x21			// Maj(a,b,c)
766	eor	x17,x3,x20,ror#39	// Sigma0(a)
767	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
768	add	x12,x12,x5
769	add	x23,x23,x27			// d+=h
770	add	x27,x27,x28			// h+=Maj(a,b,c)
771	ldr	x28,[x30],#8		// *K++, x19 in next round
772	add	x12,x12,x2
773	add	x27,x27,x17			// h+=Sigma0(a)
774	add	x12,x12,x1
775	ldr	x1,[sp,#16]
776	str	x4,[sp,#8]
777	ror	x16,x23,#14
778	add	x26,x26,x28			// h+=K[i]
779	ror	x3,x14,#1
780	and	x17,x24,x23
781	ror	x2,x11,#19
782	bic	x28,x25,x23
783	ror	x4,x27,#28
784	add	x26,x26,x12			// h+=X[i]
785	eor	x16,x16,x23,ror#18
786	eor	x3,x3,x14,ror#8
787	orr	x17,x17,x28			// Ch(e,f,g)
788	eor	x28,x27,x20			// a^b, b^c in next round
789	eor	x16,x16,x23,ror#41	// Sigma1(e)
790	eor	x4,x4,x27,ror#34
791	add	x26,x26,x17			// h+=Ch(e,f,g)
792	and	x19,x19,x28			// (b^c)&=(a^b)
793	eor	x2,x2,x11,ror#61
794	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
795	add	x26,x26,x16			// h+=Sigma1(e)
796	eor	x19,x19,x20			// Maj(a,b,c)
797	eor	x17,x4,x27,ror#39	// Sigma0(a)
798	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
799	add	x13,x13,x6
800	add	x22,x22,x26			// d+=h
801	add	x26,x26,x19			// h+=Maj(a,b,c)
802	ldr	x19,[x30],#8		// *K++, x28 in next round
803	add	x13,x13,x3
804	add	x26,x26,x17			// h+=Sigma0(a)
805	add	x13,x13,x2
806	ldr	x2,[sp,#24]
807	str	x5,[sp,#16]
808	ror	x16,x22,#14
809	add	x25,x25,x19			// h+=K[i]
810	ror	x4,x15,#1
811	and	x17,x23,x22
812	ror	x3,x12,#19
813	bic	x19,x24,x22
814	ror	x5,x26,#28
815	add	x25,x25,x13			// h+=X[i]
816	eor	x16,x16,x22,ror#18
817	eor	x4,x4,x15,ror#8
818	orr	x17,x17,x19			// Ch(e,f,g)
819	eor	x19,x26,x27			// a^b, b^c in next round
820	eor	x16,x16,x22,ror#41	// Sigma1(e)
821	eor	x5,x5,x26,ror#34
822	add	x25,x25,x17			// h+=Ch(e,f,g)
823	and	x28,x28,x19			// (b^c)&=(a^b)
824	eor	x3,x3,x12,ror#61
825	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
826	add	x25,x25,x16			// h+=Sigma1(e)
827	eor	x28,x28,x27			// Maj(a,b,c)
828	eor	x17,x5,x26,ror#39	// Sigma0(a)
829	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
830	add	x14,x14,x7
831	add	x21,x21,x25			// d+=h
832	add	x25,x25,x28			// h+=Maj(a,b,c)
833	ldr	x28,[x30],#8		// *K++, x19 in next round
834	add	x14,x14,x4
835	add	x25,x25,x17			// h+=Sigma0(a)
836	add	x14,x14,x3
837	ldr	x3,[sp,#0]
838	str	x6,[sp,#24]
839	ror	x16,x21,#14
840	add	x24,x24,x28			// h+=K[i]
841	ror	x5,x0,#1
842	and	x17,x22,x21
843	ror	x4,x13,#19
844	bic	x28,x23,x21
845	ror	x6,x25,#28
846	add	x24,x24,x14			// h+=X[i]
847	eor	x16,x16,x21,ror#18
848	eor	x5,x5,x0,ror#8
849	orr	x17,x17,x28			// Ch(e,f,g)
850	eor	x28,x25,x26			// a^b, b^c in next round
851	eor	x16,x16,x21,ror#41	// Sigma1(e)
852	eor	x6,x6,x25,ror#34
853	add	x24,x24,x17			// h+=Ch(e,f,g)
854	and	x19,x19,x28			// (b^c)&=(a^b)
855	eor	x4,x4,x13,ror#61
856	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
857	add	x24,x24,x16			// h+=Sigma1(e)
858	eor	x19,x19,x26			// Maj(a,b,c)
859	eor	x17,x6,x25,ror#39	// Sigma0(a)
860	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
861	add	x15,x15,x8
862	add	x20,x20,x24			// d+=h
863	add	x24,x24,x19			// h+=Maj(a,b,c)
864	ldr	x19,[x30],#8		// *K++, x28 in next round
865	add	x15,x15,x5
866	add	x24,x24,x17			// h+=Sigma0(a)
867	add	x15,x15,x4
868	ldr	x4,[sp,#8]
869	str	x7,[sp,#0]
870	ror	x16,x20,#14
871	add	x23,x23,x19			// h+=K[i]
872	ror	x6,x1,#1
873	and	x17,x21,x20
874	ror	x5,x14,#19
875	bic	x19,x22,x20
876	ror	x7,x24,#28
877	add	x23,x23,x15			// h+=X[i]
878	eor	x16,x16,x20,ror#18
879	eor	x6,x6,x1,ror#8
880	orr	x17,x17,x19			// Ch(e,f,g)
881	eor	x19,x24,x25			// a^b, b^c in next round
882	eor	x16,x16,x20,ror#41	// Sigma1(e)
883	eor	x7,x7,x24,ror#34
884	add	x23,x23,x17			// h+=Ch(e,f,g)
885	and	x28,x28,x19			// (b^c)&=(a^b)
886	eor	x5,x5,x14,ror#61
887	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
888	add	x23,x23,x16			// h+=Sigma1(e)
889	eor	x28,x28,x25			// Maj(a,b,c)
890	eor	x17,x7,x24,ror#39	// Sigma0(a)
891	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
892	add	x0,x0,x9
893	add	x27,x27,x23			// d+=h
894	add	x23,x23,x28			// h+=Maj(a,b,c)
895	ldr	x28,[x30],#8		// *K++, x19 in next round
896	add	x0,x0,x6
897	add	x23,x23,x17			// h+=Sigma0(a)
898	add	x0,x0,x5
899	ldr	x5,[sp,#16]
900	str	x8,[sp,#8]
901	ror	x16,x27,#14
902	add	x22,x22,x28			// h+=K[i]
903	ror	x7,x2,#1
904	and	x17,x20,x27
905	ror	x6,x15,#19
906	bic	x28,x21,x27
907	ror	x8,x23,#28
908	add	x22,x22,x0			// h+=X[i]
909	eor	x16,x16,x27,ror#18
910	eor	x7,x7,x2,ror#8
911	orr	x17,x17,x28			// Ch(e,f,g)
912	eor	x28,x23,x24			// a^b, b^c in next round
913	eor	x16,x16,x27,ror#41	// Sigma1(e)
914	eor	x8,x8,x23,ror#34
915	add	x22,x22,x17			// h+=Ch(e,f,g)
916	and	x19,x19,x28			// (b^c)&=(a^b)
917	eor	x6,x6,x15,ror#61
918	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
919	add	x22,x22,x16			// h+=Sigma1(e)
920	eor	x19,x19,x24			// Maj(a,b,c)
921	eor	x17,x8,x23,ror#39	// Sigma0(a)
922	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
923	add	x1,x1,x10
924	add	x26,x26,x22			// d+=h
925	add	x22,x22,x19			// h+=Maj(a,b,c)
926	ldr	x19,[x30],#8		// *K++, x28 in next round
927	add	x1,x1,x7
928	add	x22,x22,x17			// h+=Sigma0(a)
929	add	x1,x1,x6
930	ldr	x6,[sp,#24]
931	str	x9,[sp,#16]
932	ror	x16,x26,#14
933	add	x21,x21,x19			// h+=K[i]
934	ror	x8,x3,#1
935	and	x17,x27,x26
936	ror	x7,x0,#19
937	bic	x19,x20,x26
938	ror	x9,x22,#28
939	add	x21,x21,x1			// h+=X[i]
940	eor	x16,x16,x26,ror#18
941	eor	x8,x8,x3,ror#8
942	orr	x17,x17,x19			// Ch(e,f,g)
943	eor	x19,x22,x23			// a^b, b^c in next round
944	eor	x16,x16,x26,ror#41	// Sigma1(e)
945	eor	x9,x9,x22,ror#34
946	add	x21,x21,x17			// h+=Ch(e,f,g)
947	and	x28,x28,x19			// (b^c)&=(a^b)
948	eor	x7,x7,x0,ror#61
949	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
950	add	x21,x21,x16			// h+=Sigma1(e)
951	eor	x28,x28,x23			// Maj(a,b,c)
952	eor	x17,x9,x22,ror#39	// Sigma0(a)
953	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
954	add	x2,x2,x11
955	add	x25,x25,x21			// d+=h
956	add	x21,x21,x28			// h+=Maj(a,b,c)
957	ldr	x28,[x30],#8		// *K++, x19 in next round
958	add	x2,x2,x8
959	add	x21,x21,x17			// h+=Sigma0(a)
960	add	x2,x2,x7
961	ldr	x7,[sp,#0]
962	str	x10,[sp,#24]
963	ror	x16,x25,#14
964	add	x20,x20,x28			// h+=K[i]
965	ror	x9,x4,#1
966	and	x17,x26,x25
967	ror	x8,x1,#19
968	bic	x28,x27,x25
969	ror	x10,x21,#28
970	add	x20,x20,x2			// h+=X[i]
971	eor	x16,x16,x25,ror#18
972	eor	x9,x9,x4,ror#8
973	orr	x17,x17,x28			// Ch(e,f,g)
974	eor	x28,x21,x22			// a^b, b^c in next round
975	eor	x16,x16,x25,ror#41	// Sigma1(e)
976	eor	x10,x10,x21,ror#34
977	add	x20,x20,x17			// h+=Ch(e,f,g)
978	and	x19,x19,x28			// (b^c)&=(a^b)
979	eor	x8,x8,x1,ror#61
980	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
981	add	x20,x20,x16			// h+=Sigma1(e)
982	eor	x19,x19,x22			// Maj(a,b,c)
983	eor	x17,x10,x21,ror#39	// Sigma0(a)
984	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
985	add	x3,x3,x12
986	add	x24,x24,x20			// d+=h
987	add	x20,x20,x19			// h+=Maj(a,b,c)
988	ldr	x19,[x30],#8		// *K++, x28 in next round
989	add	x3,x3,x9
990	add	x20,x20,x17			// h+=Sigma0(a)
991	add	x3,x3,x8
992	cbnz	x19,Loop_16_xx
993
994	ldp	x0,x2,[x29,#96]
995	ldr	x1,[x29,#112]
996	sub	x30,x30,#648		// rewind
997
998	ldp	x3,x4,[x0]
999	ldp	x5,x6,[x0,#2*8]
1000	add	x1,x1,#14*8			// advance input pointer
1001	ldp	x7,x8,[x0,#4*8]
1002	add	x20,x20,x3
1003	ldp	x9,x10,[x0,#6*8]
1004	add	x21,x21,x4
1005	add	x22,x22,x5
1006	add	x23,x23,x6
1007	stp	x20,x21,[x0]
1008	add	x24,x24,x7
1009	add	x25,x25,x8
1010	stp	x22,x23,[x0,#2*8]
1011	add	x26,x26,x9
1012	add	x27,x27,x10
1013	cmp	x1,x2
1014	stp	x24,x25,[x0,#4*8]
1015	stp	x26,x27,[x0,#6*8]
1016	b.ne	Loop
1017
1018	ldp	x19,x20,[x29,#16]
1019	add	sp,sp,#4*8
1020	ldp	x21,x22,[x29,#32]
1021	ldp	x23,x24,[x29,#48]
1022	ldp	x25,x26,[x29,#64]
1023	ldp	x27,x28,[x29,#80]
1024	ldp	x29,x30,[sp],#128
1025	AARCH64_VALIDATE_LINK_REGISTER
1026	ret
1027
1028
1029.section	__TEXT,__const
1030.align	6
1031
1032LK512:
1033.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1034.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1035.quad	0x3956c25bf348b538,0x59f111f1b605d019
1036.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1037.quad	0xd807aa98a3030242,0x12835b0145706fbe
1038.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1039.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1040.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1041.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1042.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1043.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1044.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1045.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1046.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1047.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1048.quad	0x06ca6351e003826f,0x142929670a0e6e70
1049.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1050.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1051.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1052.quad	0x81c2c92e47edaee6,0x92722c851482353b
1053.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1054.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1055.quad	0xd192e819d6ef5218,0xd69906245565a910
1056.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1057.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1058.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1059.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1060.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1061.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
1062.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
1063.quad	0x90befffa23631e28,0xa4506cebde82bde9
1064.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
1065.quad	0xca273eceea26619c,0xd186b8c721c0c207
1066.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1067.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
1068.quad	0x113f9804bef90dae,0x1b710b35131c471b
1069.quad	0x28db77f523047d84,0x32caab7b40c72493
1070.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1071.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1072.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
1073.quad	0	// terminator
1074
1075.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1076.align	2
1077.align	2
1078#endif  // !OPENSSL_NO_ASM
1079