1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2022 Samuel Neves and Matthew Krupcale
25 * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de>
26 *
27 * This is converted assembly: SSE2 -> ARMv8-A
28 * Used tools: SIMDe https://github.com/simd-everywhere/simde
29 *
30 * Should work on FreeBSD, Linux and macOS
31 * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh
32 */
33
34#if defined(__aarch64__)
35
36/* make gcc <= 9 happy */
37#if !defined(LD_VERSION) || LD_VERSION >= 233010000
38#define CFI_NEGATE_RA_STATE .cfi_negate_ra_state
39#else
40#define CFI_NEGATE_RA_STATE
41#endif
42
43	.text
44	.section	.note.gnu.property,"a",@note
45	.p2align	3
46	.word	4
47	.word	16
48	.word	5
49	.asciz	"GNU"
50	.word	3221225472
51	.word	4
52	.word	3
53	.word	0
54.Lsec_end0:
55	.text
56	.globl	zfs_blake3_compress_in_place_sse2
57	.p2align	2
58	.type	zfs_blake3_compress_in_place_sse2,@function
59zfs_blake3_compress_in_place_sse2:
60	.cfi_startproc
61	hint	#25
62	CFI_NEGATE_RA_STATE
63	sub	sp, sp, #96
64	stp	x29, x30, [sp, #64]
65	add	x29, sp, #64
66	str	x19, [sp, #80]
67	.cfi_def_cfa w29, 32
68	.cfi_offset w19, -16
69	.cfi_offset w30, -24
70	.cfi_offset w29, -32
71	mov	x19, x0
72	mov	w5, w4
73	mov	x4, x3
74	mov	w3, w2
75	mov	x2, x1
76	mov	x0, sp
77	mov	x1, x19
78	bl	compress_pre
79	ldp	q0, q1, [sp]
80	ldp	q2, q3, [sp, #32]
81	eor	v0.16b, v2.16b, v0.16b
82	eor	v1.16b, v3.16b, v1.16b
83	ldp	x29, x30, [sp, #64]
84	stp	q0, q1, [x19]
85	ldr	x19, [sp, #80]
86	add	sp, sp, #96
87	hint	#29
88	ret
89.Lfunc_end0:
90	.size	zfs_blake3_compress_in_place_sse2, .Lfunc_end0-zfs_blake3_compress_in_place_sse2
91	.cfi_endproc
92
93	.section	.rodata.cst16,"aM",@progbits,16
94	.p2align	4
95.LCPI1_0:
96	.xword	-4942790177982912921
97	.xword	-6534734903820487822
98	.text
99	.p2align	2
100	.type	compress_pre,@function
101compress_pre:
102	.cfi_startproc
103	hint	#34
104	fmov	s1, w3
105	movi	d0, #0x0000ff000000ff
106	ldr	q2, [x1]
107	fmov	d3, x4
108	adrp	x8, .LCPI1_0
109	mov	v1.s[1], w5
110	str	q2, [x0]
111	ldr	q4, [x8, :lo12:.LCPI1_0]
112	add	x8, x2, #32
113	ldr	q5, [x1, #16]
114	and	v0.8b, v1.8b, v0.8b
115	stp	q5, q4, [x0, #16]
116	mov	v3.d[1], v0.d[0]
117	str	q3, [x0, #48]
118	ldp	q0, q6, [x2]
119	uzp1	v1.4s, v0.4s, v6.4s
120	uzp2	v0.4s, v0.4s, v6.4s
121	add	v2.4s, v2.4s, v1.4s
122	uzp1	v18.4s, v1.4s, v1.4s
123	add	v2.4s, v2.4s, v5.4s
124	eor	v3.16b, v2.16b, v3.16b
125	add	v2.4s, v2.4s, v0.4s
126	rev32	v3.8h, v3.8h
127	add	v4.4s, v3.4s, v4.4s
128	eor	v5.16b, v4.16b, v5.16b
129	ushr	v6.4s, v5.4s, #12
130	shl	v5.4s, v5.4s, #20
131	orr	v5.16b, v5.16b, v6.16b
132	add	v2.4s, v2.4s, v5.4s
133	eor	v3.16b, v2.16b, v3.16b
134	ushr	v6.4s, v3.4s, #8
135	shl	v3.4s, v3.4s, #24
136	orr	v3.16b, v3.16b, v6.16b
137	ld2	{ v6.4s, v7.4s }, [x8]
138	add	v4.4s, v3.4s, v4.4s
139	ext	v3.16b, v3.16b, v3.16b, #8
140	add	v2.4s, v2.4s, v6.4s
141	eor	v5.16b, v4.16b, v5.16b
142	ext	v4.16b, v4.16b, v4.16b, #4
143	ext	v6.16b, v6.16b, v6.16b, #12
144	ext	v2.16b, v2.16b, v2.16b, #12
145	ushr	v16.4s, v5.4s, #7
146	shl	v5.4s, v5.4s, #25
147	orr	v5.16b, v5.16b, v16.16b
148	ext	v16.16b, v7.16b, v7.16b, #12
149	add	v2.4s, v2.4s, v5.4s
150	mov	v7.16b, v16.16b
151	eor	v3.16b, v3.16b, v2.16b
152	add	v2.4s, v2.4s, v16.4s
153	mov	v7.s[1], v6.s[2]
154	rev32	v3.8h, v3.8h
155	add	v4.4s, v4.4s, v3.4s
156	eor	v5.16b, v4.16b, v5.16b
157	ushr	v17.4s, v5.4s, #12
158	shl	v5.4s, v5.4s, #20
159	orr	v5.16b, v5.16b, v17.16b
160	add	v2.4s, v2.4s, v5.4s
161	eor	v3.16b, v2.16b, v3.16b
162	ushr	v17.4s, v3.4s, #8
163	shl	v3.4s, v3.4s, #24
164	orr	v3.16b, v3.16b, v17.16b
165	ext	v17.16b, v18.16b, v1.16b, #8
166	add	v4.4s, v3.4s, v4.4s
167	uzp2	v17.4s, v17.4s, v0.4s
168	ext	v3.16b, v3.16b, v3.16b, #8
169	eor	v5.16b, v4.16b, v5.16b
170	add	v2.4s, v2.4s, v17.4s
171	ext	v4.16b, v4.16b, v4.16b, #12
172	ushr	v18.4s, v5.4s, #7
173	shl	v5.4s, v5.4s, #25
174	ext	v2.16b, v2.16b, v2.16b, #4
175	orr	v5.16b, v5.16b, v18.16b
176	ext	v18.16b, v1.16b, v1.16b, #12
177	add	v2.4s, v2.4s, v5.4s
178	ext	v1.16b, v1.16b, v18.16b, #12
179	zip1	v18.2d, v16.2d, v0.2d
180	zip2	v0.4s, v0.4s, v16.4s
181	eor	v3.16b, v3.16b, v2.16b
182	rev64	v1.4s, v1.4s
183	mov	v18.s[3], v6.s[3]
184	zip1	v16.4s, v0.4s, v6.4s
185	rev32	v3.8h, v3.8h
186	trn2	v1.4s, v1.4s, v7.4s
187	zip1	v0.4s, v6.4s, v0.4s
188	add	v4.4s, v4.4s, v3.4s
189	add	v2.4s, v2.4s, v1.4s
190	ext	v6.16b, v0.16b, v16.16b, #8
191	eor	v5.16b, v4.16b, v5.16b
192	ushr	v7.4s, v5.4s, #12
193	shl	v5.4s, v5.4s, #20
194	orr	v5.16b, v5.16b, v7.16b
195	add	v7.4s, v2.4s, v5.4s
196	eor	v2.16b, v7.16b, v3.16b
197	ext	v7.16b, v7.16b, v7.16b, #12
198	ushr	v3.4s, v2.4s, #8
199	shl	v2.4s, v2.4s, #24
200	orr	v3.16b, v2.16b, v3.16b
201	ext	v2.16b, v18.16b, v18.16b, #12
202	add	v4.4s, v3.4s, v4.4s
203	uzp1	v2.4s, v18.4s, v2.4s
204	ext	v3.16b, v3.16b, v3.16b, #8
205	eor	v5.16b, v4.16b, v5.16b
206	add	v7.4s, v7.4s, v2.4s
207	ext	v4.16b, v4.16b, v4.16b, #4
208	ushr	v18.4s, v5.4s, #7
209	shl	v5.4s, v5.4s, #25
210	orr	v5.16b, v5.16b, v18.16b
211	add	v7.4s, v7.4s, v5.4s
212	eor	v3.16b, v3.16b, v7.16b
213	add	v7.4s, v7.4s, v6.4s
214	rev32	v3.8h, v3.8h
215	add	v4.4s, v4.4s, v3.4s
216	eor	v5.16b, v4.16b, v5.16b
217	ushr	v0.4s, v5.4s, #12
218	shl	v5.4s, v5.4s, #20
219	orr	v0.16b, v5.16b, v0.16b
220	add	v5.4s, v7.4s, v0.4s
221	ext	v7.16b, v17.16b, v17.16b, #4
222	eor	v3.16b, v5.16b, v3.16b
223	uzp1	v17.4s, v7.4s, v7.4s
224	ushr	v16.4s, v3.4s, #8
225	shl	v3.4s, v3.4s, #24
226	orr	v3.16b, v3.16b, v16.16b
227	ext	v16.16b, v17.16b, v7.16b, #8
228	add	v4.4s, v3.4s, v4.4s
229	uzp2	v16.4s, v16.4s, v1.4s
230	ext	v3.16b, v3.16b, v3.16b, #8
231	eor	v0.16b, v4.16b, v0.16b
232	add	v5.4s, v5.4s, v16.4s
233	ext	v4.16b, v4.16b, v4.16b, #12
234	ushr	v17.4s, v0.4s, #7
235	shl	v0.4s, v0.4s, #25
236	ext	v5.16b, v5.16b, v5.16b, #4
237	orr	v0.16b, v0.16b, v17.16b
238	ext	v17.16b, v7.16b, v7.16b, #12
239	add	v5.4s, v5.4s, v0.4s
240	ext	v7.16b, v7.16b, v17.16b, #12
241	mov	v17.16b, v6.16b
242	eor	v3.16b, v3.16b, v5.16b
243	rev64	v7.4s, v7.4s
244	mov	v17.s[1], v2.s[2]
245	rev32	v3.8h, v3.8h
246	add	v4.4s, v4.4s, v3.4s
247	eor	v18.16b, v4.16b, v0.16b
248	trn2	v0.4s, v7.4s, v17.4s
249	ushr	v7.4s, v18.4s, #12
250	shl	v17.4s, v18.4s, #20
251	add	v5.4s, v5.4s, v0.4s
252	zip1	v18.2d, v6.2d, v1.2d
253	zip2	v1.4s, v1.4s, v6.4s
254	orr	v7.16b, v17.16b, v7.16b
255	mov	v18.s[3], v2.s[3]
256	zip1	v6.4s, v1.4s, v2.4s
257	add	v5.4s, v5.4s, v7.4s
258	zip1	v1.4s, v2.4s, v1.4s
259	eor	v3.16b, v5.16b, v3.16b
260	ext	v5.16b, v5.16b, v5.16b, #12
261	ext	v6.16b, v1.16b, v6.16b, #8
262	ushr	v17.4s, v3.4s, #8
263	shl	v3.4s, v3.4s, #24
264	orr	v17.16b, v3.16b, v17.16b
265	ext	v3.16b, v18.16b, v18.16b, #12
266	add	v4.4s, v17.4s, v4.4s
267	uzp1	v3.4s, v18.4s, v3.4s
268	ext	v17.16b, v17.16b, v17.16b, #8
269	eor	v7.16b, v4.16b, v7.16b
270	add	v5.4s, v5.4s, v3.4s
271	ext	v4.16b, v4.16b, v4.16b, #4
272	ushr	v18.4s, v7.4s, #7
273	shl	v7.4s, v7.4s, #25
274	orr	v7.16b, v7.16b, v18.16b
275	add	v5.4s, v5.4s, v7.4s
276	eor	v17.16b, v17.16b, v5.16b
277	add	v5.4s, v5.4s, v6.4s
278	rev32	v17.8h, v17.8h
279	add	v4.4s, v4.4s, v17.4s
280	eor	v2.16b, v4.16b, v7.16b
281	ext	v7.16b, v16.16b, v16.16b, #4
282	ushr	v1.4s, v2.4s, #12
283	shl	v2.4s, v2.4s, #20
284	orr	v1.16b, v2.16b, v1.16b
285	add	v2.4s, v5.4s, v1.4s
286	eor	v5.16b, v2.16b, v17.16b
287	uzp1	v17.4s, v7.4s, v7.4s
288	ushr	v16.4s, v5.4s, #8
289	shl	v5.4s, v5.4s, #24
290	orr	v5.16b, v5.16b, v16.16b
291	ext	v16.16b, v17.16b, v7.16b, #8
292	add	v4.4s, v5.4s, v4.4s
293	uzp2	v16.4s, v16.4s, v0.4s
294	ext	v5.16b, v5.16b, v5.16b, #8
295	eor	v1.16b, v4.16b, v1.16b
296	add	v2.4s, v2.4s, v16.4s
297	ext	v4.16b, v4.16b, v4.16b, #12
298	ushr	v17.4s, v1.4s, #7
299	shl	v1.4s, v1.4s, #25
300	ext	v2.16b, v2.16b, v2.16b, #4
301	orr	v1.16b, v1.16b, v17.16b
302	ext	v17.16b, v7.16b, v7.16b, #12
303	add	v2.4s, v2.4s, v1.4s
304	ext	v7.16b, v7.16b, v17.16b, #12
305	mov	v17.16b, v6.16b
306	eor	v5.16b, v5.16b, v2.16b
307	rev64	v7.4s, v7.4s
308	mov	v17.s[1], v3.s[2]
309	rev32	v5.8h, v5.8h
310	add	v4.4s, v4.4s, v5.4s
311	eor	v18.16b, v4.16b, v1.16b
312	trn2	v1.4s, v7.4s, v17.4s
313	ushr	v7.4s, v18.4s, #12
314	shl	v17.4s, v18.4s, #20
315	add	v2.4s, v2.4s, v1.4s
316	zip1	v18.2d, v6.2d, v0.2d
317	zip2	v0.4s, v0.4s, v6.4s
318	orr	v7.16b, v17.16b, v7.16b
319	mov	v18.s[3], v3.s[3]
320	add	v2.4s, v2.4s, v7.4s
321	eor	v5.16b, v2.16b, v5.16b
322	ext	v2.16b, v2.16b, v2.16b, #12
323	ushr	v17.4s, v5.4s, #8
324	shl	v5.4s, v5.4s, #24
325	orr	v5.16b, v5.16b, v17.16b
326	add	v17.4s, v5.4s, v4.4s
327	ext	v4.16b, v18.16b, v18.16b, #12
328	ext	v5.16b, v5.16b, v5.16b, #8
329	eor	v7.16b, v17.16b, v7.16b
330	uzp1	v4.4s, v18.4s, v4.4s
331	ext	v17.16b, v17.16b, v17.16b, #4
332	ushr	v18.4s, v7.4s, #7
333	shl	v7.4s, v7.4s, #25
334	add	v2.4s, v2.4s, v4.4s
335	orr	v7.16b, v7.16b, v18.16b
336	add	v2.4s, v2.4s, v7.4s
337	eor	v5.16b, v5.16b, v2.16b
338	rev32	v5.8h, v5.8h
339	add	v6.4s, v17.4s, v5.4s
340	zip1	v17.4s, v0.4s, v3.4s
341	zip1	v0.4s, v3.4s, v0.4s
342	eor	v3.16b, v6.16b, v7.16b
343	ext	v0.16b, v0.16b, v17.16b, #8
344	ushr	v7.4s, v3.4s, #12
345	shl	v3.4s, v3.4s, #20
346	add	v2.4s, v2.4s, v0.4s
347	orr	v3.16b, v3.16b, v7.16b
348	ext	v7.16b, v16.16b, v16.16b, #4
349	add	v2.4s, v2.4s, v3.4s
350	uzp1	v17.4s, v7.4s, v7.4s
351	eor	v5.16b, v2.16b, v5.16b
352	ushr	v16.4s, v5.4s, #8
353	shl	v5.4s, v5.4s, #24
354	orr	v5.16b, v5.16b, v16.16b
355	ext	v16.16b, v17.16b, v7.16b, #8
356	add	v6.4s, v5.4s, v6.4s
357	uzp2	v16.4s, v16.4s, v1.4s
358	ext	v5.16b, v5.16b, v5.16b, #8
359	eor	v3.16b, v6.16b, v3.16b
360	add	v2.4s, v2.4s, v16.4s
361	ext	v6.16b, v6.16b, v6.16b, #12
362	ushr	v17.4s, v3.4s, #7
363	shl	v3.4s, v3.4s, #25
364	ext	v2.16b, v2.16b, v2.16b, #4
365	orr	v3.16b, v3.16b, v17.16b
366	add	v17.4s, v2.4s, v3.4s
367	eor	v2.16b, v5.16b, v17.16b
368	ext	v5.16b, v7.16b, v7.16b, #12
369	rev32	v18.8h, v2.8h
370	ext	v2.16b, v7.16b, v5.16b, #12
371	mov	v5.16b, v0.16b
372	add	v6.4s, v6.4s, v18.4s
373	rev64	v2.4s, v2.4s
374	mov	v5.s[1], v4.s[2]
375	eor	v3.16b, v6.16b, v3.16b
376	trn2	v2.4s, v2.4s, v5.4s
377	ushr	v5.4s, v3.4s, #12
378	shl	v3.4s, v3.4s, #20
379	add	v7.4s, v17.4s, v2.4s
380	orr	v3.16b, v3.16b, v5.16b
381	add	v5.4s, v7.4s, v3.4s
382	eor	v7.16b, v5.16b, v18.16b
383	zip1	v18.2d, v0.2d, v1.2d
384	ext	v5.16b, v5.16b, v5.16b, #12
385	zip2	v0.4s, v1.4s, v0.4s
386	ushr	v17.4s, v7.4s, #8
387	shl	v7.4s, v7.4s, #24
388	mov	v18.s[3], v4.s[3]
389	orr	v7.16b, v7.16b, v17.16b
390	ext	v17.16b, v18.16b, v18.16b, #12
391	add	v6.4s, v7.4s, v6.4s
392	ext	v7.16b, v7.16b, v7.16b, #8
393	eor	v19.16b, v6.16b, v3.16b
394	uzp1	v3.4s, v18.4s, v17.4s
395	ext	v6.16b, v6.16b, v6.16b, #4
396	ushr	v17.4s, v19.4s, #7
397	shl	v18.4s, v19.4s, #25
398	add	v5.4s, v5.4s, v3.4s
399	orr	v17.16b, v18.16b, v17.16b
400	add	v5.4s, v5.4s, v17.4s
401	eor	v7.16b, v7.16b, v5.16b
402	rev32	v7.8h, v7.8h
403	add	v1.4s, v6.4s, v7.4s
404	zip1	v6.4s, v0.4s, v4.4s
405	zip1	v0.4s, v4.4s, v0.4s
406	eor	v4.16b, v1.16b, v17.16b
407	ext	v6.16b, v0.16b, v6.16b, #8
408	ushr	v0.4s, v4.4s, #12
409	shl	v4.4s, v4.4s, #20
410	add	v5.4s, v5.4s, v6.4s
411	zip1	v20.2d, v6.2d, v2.2d
412	orr	v0.16b, v4.16b, v0.16b
413	mov	v20.s[3], v3.s[3]
414	add	v4.4s, v5.4s, v0.4s
415	eor	v5.16b, v4.16b, v7.16b
416	ext	v7.16b, v16.16b, v16.16b, #4
417	ushr	v16.4s, v5.4s, #8
418	shl	v5.4s, v5.4s, #24
419	uzp1	v17.4s, v7.4s, v7.4s
420	orr	v5.16b, v5.16b, v16.16b
421	ext	v16.16b, v17.16b, v7.16b, #8
422	add	v1.4s, v5.4s, v1.4s
423	uzp2	v16.4s, v16.4s, v2.4s
424	zip2	v2.4s, v2.4s, v6.4s
425	eor	v0.16b, v1.16b, v0.16b
426	add	v4.4s, v4.4s, v16.4s
427	ext	v1.16b, v1.16b, v1.16b, #12
428	ext	v16.16b, v16.16b, v16.16b, #4
429	ushr	v17.4s, v0.4s, #7
430	shl	v0.4s, v0.4s, #25
431	ext	v4.16b, v4.16b, v4.16b, #4
432	orr	v17.16b, v0.16b, v17.16b
433	ext	v0.16b, v5.16b, v5.16b, #8
434	ext	v5.16b, v7.16b, v7.16b, #12
435	add	v4.4s, v4.4s, v17.4s
436	eor	v0.16b, v0.16b, v4.16b
437	rev32	v18.8h, v0.8h
438	ext	v0.16b, v7.16b, v5.16b, #12
439	mov	v5.16b, v6.16b
440	add	v7.4s, v1.4s, v18.4s
441	rev64	v1.4s, v0.4s
442	mov	v5.s[1], v3.s[2]
443	eor	v17.16b, v7.16b, v17.16b
444	trn2	v1.4s, v1.4s, v5.4s
445	ushr	v19.4s, v17.4s, #12
446	shl	v17.4s, v17.4s, #20
447	add	v4.4s, v4.4s, v1.4s
448	orr	v17.16b, v17.16b, v19.16b
449	add	v19.4s, v4.4s, v17.4s
450	eor	v4.16b, v19.16b, v18.16b
451	ext	v19.16b, v19.16b, v19.16b, #12
452	ushr	v18.4s, v4.4s, #8
453	shl	v4.4s, v4.4s, #24
454	orr	v18.16b, v4.16b, v18.16b
455	ext	v4.16b, v20.16b, v20.16b, #12
456	add	v7.4s, v18.4s, v7.4s
457	uzp1	v4.4s, v20.4s, v4.4s
458	ext	v18.16b, v18.16b, v18.16b, #8
459	eor	v17.16b, v7.16b, v17.16b
460	add	v19.4s, v19.4s, v4.4s
461	ext	v7.16b, v7.16b, v7.16b, #4
462	ushr	v20.4s, v17.4s, #7
463	shl	v17.4s, v17.4s, #25
464	orr	v17.16b, v17.16b, v20.16b
465	add	v19.4s, v19.4s, v17.4s
466	eor	v18.16b, v18.16b, v19.16b
467	rev32	v18.8h, v18.8h
468	add	v6.4s, v7.4s, v18.4s
469	zip1	v7.4s, v2.4s, v3.4s
470	zip1	v2.4s, v3.4s, v2.4s
471	eor	v3.16b, v6.16b, v17.16b
472	ext	v2.16b, v2.16b, v7.16b, #8
473	ushr	v7.4s, v3.4s, #12
474	shl	v3.4s, v3.4s, #20
475	add	v17.4s, v19.4s, v2.4s
476	zip1	v1.2d, v2.2d, v1.2d
477	zip2	v0.4s, v0.4s, v2.4s
478	orr	v3.16b, v3.16b, v7.16b
479	mov	v1.s[3], v4.s[3]
480	add	v7.4s, v17.4s, v3.4s
481	eor	v17.16b, v7.16b, v18.16b
482	ext	v7.16b, v7.16b, v7.16b, #4
483	ushr	v18.4s, v17.4s, #8
484	shl	v17.4s, v17.4s, #24
485	orr	v17.16b, v17.16b, v18.16b
486	ext	v18.16b, v16.16b, v16.16b, #8
487	add	v6.4s, v17.4s, v6.4s
488	uzp2	v5.4s, v18.4s, v5.4s
489	eor	v3.16b, v6.16b, v3.16b
490	ext	v5.16b, v5.16b, v18.16b, #4
491	ext	v6.16b, v6.16b, v6.16b, #12
492	ushr	v18.4s, v3.4s, #7
493	shl	v3.4s, v3.4s, #25
494	add	v5.4s, v7.4s, v5.4s
495	ext	v7.16b, v17.16b, v17.16b, #8
496	ext	v17.16b, v16.16b, v16.16b, #12
497	orr	v3.16b, v3.16b, v18.16b
498	ext	v16.16b, v16.16b, v17.16b, #12
499	add	v5.4s, v3.4s, v5.4s
500	mov	v17.16b, v2.16b
501	rev64	v16.4s, v16.4s
502	eor	v7.16b, v7.16b, v5.16b
503	mov	v17.s[1], v4.s[2]
504	rev32	v7.8h, v7.8h
505	trn2	v16.4s, v16.4s, v17.4s
506	add	v6.4s, v6.4s, v7.4s
507	add	v5.4s, v5.4s, v16.4s
508	eor	v3.16b, v6.16b, v3.16b
509	ushr	v17.4s, v3.4s, #12
510	shl	v3.4s, v3.4s, #20
511	orr	v3.16b, v3.16b, v17.16b
512	add	v5.4s, v5.4s, v3.4s
513	eor	v7.16b, v5.16b, v7.16b
514	ext	v5.16b, v5.16b, v5.16b, #12
515	ushr	v16.4s, v7.4s, #8
516	shl	v7.4s, v7.4s, #24
517	orr	v7.16b, v7.16b, v16.16b
518	ext	v16.16b, v1.16b, v1.16b, #12
519	add	v6.4s, v7.4s, v6.4s
520	uzp1	v1.4s, v1.4s, v16.4s
521	eor	v3.16b, v6.16b, v3.16b
522	add	v1.4s, v5.4s, v1.4s
523	ext	v5.16b, v7.16b, v7.16b, #8
524	ext	v6.16b, v6.16b, v6.16b, #4
525	ushr	v16.4s, v3.4s, #7
526	shl	v3.4s, v3.4s, #25
527	orr	v3.16b, v3.16b, v16.16b
528	add	v1.4s, v1.4s, v3.4s
529	eor	v5.16b, v5.16b, v1.16b
530	rev32	v5.8h, v5.8h
531	add	v2.4s, v6.4s, v5.4s
532	zip1	v6.4s, v0.4s, v4.4s
533	zip1	v0.4s, v4.4s, v0.4s
534	eor	v3.16b, v2.16b, v3.16b
535	ext	v0.16b, v0.16b, v6.16b, #8
536	ushr	v4.4s, v3.4s, #12
537	shl	v3.4s, v3.4s, #20
538	add	v0.4s, v1.4s, v0.4s
539	orr	v1.16b, v3.16b, v4.16b
540	add	v0.4s, v0.4s, v1.4s
541	eor	v3.16b, v0.16b, v5.16b
542	ext	v0.16b, v0.16b, v0.16b, #4
543	ushr	v4.4s, v3.4s, #8
544	shl	v3.4s, v3.4s, #24
545	orr	v3.16b, v3.16b, v4.16b
546	add	v2.4s, v3.4s, v2.4s
547	ext	v3.16b, v3.16b, v3.16b, #8
548	eor	v1.16b, v2.16b, v1.16b
549	ext	v2.16b, v2.16b, v2.16b, #12
550	ushr	v4.4s, v1.4s, #7
551	shl	v1.4s, v1.4s, #25
552	stp	q2, q3, [x0, #32]
553	orr	v1.16b, v1.16b, v4.16b
554	stp	q0, q1, [x0]
555	ret
556.Lfunc_end1:
557	.size	compress_pre, .Lfunc_end1-compress_pre
558	.cfi_endproc
559
560	.globl	zfs_blake3_compress_xof_sse2
561	.p2align	2
562	.type	zfs_blake3_compress_xof_sse2,@function
563zfs_blake3_compress_xof_sse2:
564	.cfi_startproc
565	hint	#25
566	CFI_NEGATE_RA_STATE
567	sub	sp, sp, #96
568	stp	x29, x30, [sp, #64]
569	add	x29, sp, #64
570	stp	x20, x19, [sp, #80]
571	.cfi_def_cfa w29, 32
572	.cfi_offset w19, -8
573	.cfi_offset w20, -16
574	.cfi_offset w30, -24
575	.cfi_offset w29, -32
576	mov	x20, x0
577	mov	x19, x5
578	mov	w5, w4
579	mov	x4, x3
580	mov	w3, w2
581	mov	x2, x1
582	mov	x0, sp
583	mov	x1, x20
584	bl	compress_pre
585	ldp	q0, q1, [sp]
586	ldp	q2, q3, [sp, #32]
587	eor	v0.16b, v2.16b, v0.16b
588	eor	v1.16b, v3.16b, v1.16b
589	ldp	x29, x30, [sp, #64]
590	stp	q0, q1, [x19]
591	ldr	q0, [x20]
592	eor	v0.16b, v0.16b, v2.16b
593	str	q0, [x19, #32]
594	ldr	q0, [x20, #16]
595	eor	v0.16b, v0.16b, v3.16b
596	str	q0, [x19, #48]
597	ldp	x20, x19, [sp, #80]
598	add	sp, sp, #96
599	hint	#29
600	ret
601.Lfunc_end2:
602	.size	zfs_blake3_compress_xof_sse2, .Lfunc_end2-zfs_blake3_compress_xof_sse2
603	.cfi_endproc
604
605	.section	.rodata.cst16,"aM",@progbits,16
606	.p2align	4
607.LCPI3_0:
608	.word	0
609	.word	1
610	.word	2
611	.word	3
612	.text
613	.globl	zfs_blake3_hash_many_sse2
614	.p2align	2
615	.type	zfs_blake3_hash_many_sse2,@function
616zfs_blake3_hash_many_sse2:
617	.cfi_startproc
618	hint	#25
619	CFI_NEGATE_RA_STATE
620	stp	d15, d14, [sp, #-160]!
621	stp	d13, d12, [sp, #16]
622	stp	d11, d10, [sp, #32]
623	stp	d9, d8, [sp, #48]
624	stp	x29, x30, [sp, #64]
625	add	x29, sp, #64
626	stp	x28, x27, [sp, #80]
627	stp	x26, x25, [sp, #96]
628	stp	x24, x23, [sp, #112]
629	stp	x22, x21, [sp, #128]
630	stp	x20, x19, [sp, #144]
631	sub	sp, sp, #464
632	.cfi_def_cfa w29, 96
633	.cfi_offset w19, -8
634	.cfi_offset w20, -16
635	.cfi_offset w21, -24
636	.cfi_offset w22, -32
637	.cfi_offset w23, -40
638	.cfi_offset w24, -48
639	.cfi_offset w25, -56
640	.cfi_offset w26, -64
641	.cfi_offset w27, -72
642	.cfi_offset w28, -80
643	.cfi_offset w30, -88
644	.cfi_offset w29, -96
645	.cfi_offset b8, -104
646	.cfi_offset b9, -112
647	.cfi_offset b10, -120
648	.cfi_offset b11, -128
649	.cfi_offset b12, -136
650	.cfi_offset b13, -144
651	.cfi_offset b14, -152
652	.cfi_offset b15, -160
653	mov	w19, w6
654	mov	x20, x4
655	mov	x24, x1
656	ldr	x26, [x29, #104]
657	ldrb	w27, [x29, #96]
658	cmp	x1, #4
659	str	x3, [sp, #40]
660	b.lo	.LBB3_6
661	adrp	x8, .LCPI3_0
662	sbfx	w9, w5, #0, #1
663	mov	w10, #44677
664	mov	w11, #62322
665	movk	w10, #47975, lsl #16
666	movk	w11, #15470, lsl #16
667	ldr	q0, [x8, :lo12:.LCPI3_0]
668	dup	v1.4s, w9
669	mov	w9, #58983
670	orr	w8, w7, w19
671	movk	w9, #27145, lsl #16
672	and	v0.16b, v1.16b, v0.16b
673	dup	v1.4s, w11
674	movi	v24.4s, #64
675	dup	v2.4s, w9
676	mov	w9, #62778
677	movk	w9, #42319, lsl #16
678	str	q0, [sp, #16]
679	orr	v0.4s, #128, lsl #24
680	stp	q2, q1, [sp, #48]
681	str	q0, [sp]
682	dup	v0.4s, w10
683	str	q0, [sp, #80]
684	b	.LBB3_3
685.LBB3_2:
686	zip1	v0.4s, v12.4s, v31.4s
687	add	x10, x20, #4
688	zip1	v1.4s, v29.4s, v30.4s
689	tst	w5, #0x1
690	zip1	v2.4s, v28.4s, v23.4s
691	csel	x20, x10, x20, ne
692	zip1	v3.4s, v13.4s, v25.4s
693	add	x0, x0, #32
694	zip2	v6.4s, v12.4s, v31.4s
695	sub	x24, x24, #4
696	zip1	v4.2d, v0.2d, v1.2d
697	cmp	x24, #3
698	zip2	v7.4s, v29.4s, v30.4s
699	zip1	v5.2d, v2.2d, v3.2d
700	zip2	v0.2d, v0.2d, v1.2d
701	zip2	v1.2d, v2.2d, v3.2d
702	zip2	v2.4s, v28.4s, v23.4s
703	zip2	v3.4s, v13.4s, v25.4s
704	stp	q4, q5, [x26]
705	zip2	v4.2d, v6.2d, v7.2d
706	stp	q0, q1, [x26, #32]
707	zip1	v0.2d, v6.2d, v7.2d
708	zip1	v1.2d, v2.2d, v3.2d
709	zip2	v2.2d, v2.2d, v3.2d
710	stp	q0, q1, [x26, #64]
711	stp	q4, q2, [x26, #96]
712	add	x26, x26, #128
713	b.ls	.LBB3_6
714.LBB3_3:
715	ldr	x14, [sp, #40]
716	mov	x10, x14
717	add	x11, x14, #8
718	add	x12, x14, #12
719	add	x13, x14, #16
720	ld1r	{ v12.4s }, [x10], #4
721	ld1r	{ v29.4s }, [x11]
722	add	x11, x14, #20
723	ld1r	{ v30.4s }, [x12]
724	add	x12, x14, #24
725	ld1r	{ v28.4s }, [x13]
726	ld1r	{ v23.4s }, [x11]
727	add	x11, x14, #28
728	ld1r	{ v13.4s }, [x12]
729	ld1r	{ v31.4s }, [x10]
730	ld1r	{ v25.4s }, [x11]
731	cbz	x2, .LBB3_2
732	ldr	q1, [sp, #16]
733	dup	v0.4s, w20
734	lsr	x12, x20, #32
735	mov	x10, xzr
736	ldp	x13, x14, [x0, #16]
737	add	v1.4s, v0.4s, v1.4s
738	mov	x15, x2
739	movi	v0.4s, #128, lsl #24
740	mov	w4, w8
741	str	q1, [sp, #112]
742	eor	v0.16b, v1.16b, v0.16b
743	ldr	q1, [sp]
744	cmgt	v0.4s, v1.4s, v0.4s
745	dup	v1.4s, w12
746	ldp	x11, x12, [x0]
747	sub	v0.4s, v1.4s, v0.4s
748	str	q0, [sp, #96]
749.LBB3_5:
750	add	x17, x11, x10
751	add	x21, x12, x10
752	add	x16, x13, x10
753	add	x6, x14, x10
754	subs	x15, x15, #1
755	add	x10, x10, #64
756	ldp	q0, q1, [x17]
757	csel	w3, w27, wzr, eq
758	orr	w3, w3, w4
759	mov	w4, w19
760	and	w3, w3, #0xff
761	ldp	q3, q6, [x21]
762	dup	v2.4s, w3
763	zip1	v21.4s, v0.4s, v3.4s
764	zip2	v19.4s, v0.4s, v3.4s
765	ldp	q5, q7, [x16]
766	zip1	v17.4s, v1.4s, v6.4s
767	zip2	v22.4s, v1.4s, v6.4s
768	ldp	q16, q18, [x6]
769	zip1	v4.4s, v5.4s, v16.4s
770	zip2	v0.4s, v5.4s, v16.4s
771	ldp	q26, q27, [x17, #32]
772	zip1	v1.4s, v7.4s, v18.4s
773	zip2	v3.4s, v7.4s, v18.4s
774	zip2	v20.2d, v19.2d, v0.2d
775	mov	v19.d[1], v0.d[0]
776	dup	v18.4s, w9
777	ldp	q8, q9, [x21, #32]
778	stur	q19, [x29, #-208]
779	zip2	v7.4s, v26.4s, v8.4s
780	zip1	v10.4s, v26.4s, v8.4s
781	ldp	q11, q5, [x16, #32]
782	zip2	v26.2d, v17.2d, v1.2d
783	stp	q7, q26, [sp, #192]
784	mov	v17.d[1], v1.d[0]
785	add	v1.4s, v23.4s, v31.4s
786	ldp	q16, q6, [x6, #32]
787	stur	q17, [x29, #-256]
788	add	v1.4s, v1.4s, v19.4s
789	zip1	v8.4s, v11.4s, v16.4s
790	zip2	v7.4s, v11.4s, v16.4s
791	zip1	v11.4s, v27.4s, v9.4s
792	zip2	v9.4s, v27.4s, v9.4s
793	zip2	v27.2d, v21.2d, v4.2d
794	mov	v21.d[1], v4.d[0]
795	str	q7, [sp, #224]
796	add	v4.4s, v28.4s, v12.4s
797	zip1	v15.4s, v5.4s, v6.4s
798	zip2	v14.4s, v5.4s, v6.4s
799	stur	q27, [x29, #-192]
800	zip2	v16.2d, v22.2d, v3.2d
801	stp	q20, q21, [x29, #-240]
802	add	v0.4s, v4.4s, v21.4s
803	ldp	q6, q4, [sp, #96]
804	mov	v22.d[1], v3.d[0]
805	add	v5.4s, v25.4s, v30.4s
806	add	v3.4s, v13.4s, v29.4s
807	eor	v6.16b, v1.16b, v6.16b
808	add	v1.4s, v1.4s, v20.4s
809	str	q22, [sp, #256]
810	eor	v4.16b, v0.16b, v4.16b
811	add	v5.4s, v5.4s, v22.4s
812	add	v3.4s, v3.4s, v17.4s
813	ldr	q17, [sp, #48]
814	rev32	v6.8h, v6.8h
815	rev32	v4.8h, v4.8h
816	eor	v2.16b, v5.16b, v2.16b
817	eor	v7.16b, v3.16b, v24.16b
818	add	v0.4s, v0.4s, v27.4s
819	add	v21.4s, v4.4s, v17.4s
820	rev32	v31.8h, v2.8h
821	ldr	q2, [sp, #80]
822	rev32	v7.8h, v7.8h
823	mov	v27.16b, v16.16b
824	eor	v17.16b, v21.16b, v28.16b
825	add	v29.4s, v6.4s, v2.4s
826	ldr	q2, [sp, #64]
827	add	v24.4s, v31.4s, v18.4s
828	str	q27, [sp, #176]
829	ushr	v19.4s, v17.4s, #12
830	shl	v17.4s, v17.4s, #20
831	add	v30.4s, v7.4s, v2.4s
832	eor	v18.16b, v29.16b, v23.16b
833	orr	v12.16b, v17.16b, v19.16b
834	eor	v17.16b, v30.16b, v13.16b
835	eor	v19.16b, v24.16b, v25.16b
836	ushr	v23.4s, v18.4s, #12
837	shl	v18.4s, v18.4s, #20
838	ushr	v25.4s, v17.4s, #12
839	shl	v17.4s, v17.4s, #20
840	ushr	v28.4s, v19.4s, #12
841	shl	v19.4s, v19.4s, #20
842	orr	v13.16b, v18.16b, v23.16b
843	orr	v25.16b, v17.16b, v25.16b
844	orr	v2.16b, v19.16b, v28.16b
845	add	v28.4s, v0.4s, v12.4s
846	add	v0.4s, v3.4s, v26.4s
847	add	v18.4s, v1.4s, v13.4s
848	add	v3.4s, v5.4s, v16.4s
849	eor	v1.16b, v28.16b, v4.16b
850	add	v17.4s, v0.4s, v25.4s
851	eor	v0.16b, v18.16b, v6.16b
852	add	v19.4s, v3.4s, v2.4s
853	ushr	v16.4s, v1.4s, #8
854	shl	v3.4s, v1.4s, #24
855	eor	v4.16b, v17.16b, v7.16b
856	ushr	v6.4s, v0.4s, #8
857	shl	v1.4s, v0.4s, #24
858	eor	v5.16b, v19.16b, v31.16b
859	ushr	v23.4s, v4.4s, #8
860	shl	v4.4s, v4.4s, #24
861	orr	v7.16b, v3.16b, v16.16b
862	orr	v6.16b, v1.16b, v6.16b
863	ushr	v31.4s, v5.4s, #8
864	shl	v0.4s, v5.4s, #24
865	orr	v5.16b, v4.16b, v23.16b
866	add	v4.4s, v7.4s, v21.4s
867	ldr	q21, [sp, #192]
868	add	v3.4s, v6.4s, v29.4s
869	orr	v31.16b, v0.16b, v31.16b
870	add	v23.4s, v5.4s, v30.4s
871	eor	v0.16b, v4.16b, v12.16b
872	eor	v1.16b, v3.16b, v13.16b
873	add	v16.4s, v31.4s, v24.4s
874	eor	v20.16b, v23.16b, v25.16b
875	ushr	v24.4s, v0.4s, #7
876	shl	v0.4s, v0.4s, #25
877	ushr	v29.4s, v1.4s, #7
878	shl	v1.4s, v1.4s, #25
879	ushr	v30.4s, v20.4s, #7
880	shl	v20.4s, v20.4s, #25
881	orr	v25.16b, v0.16b, v24.16b
882	orr	v0.16b, v1.16b, v29.16b
883	mov	v29.16b, v10.16b
884	orr	v1.16b, v20.16b, v30.16b
885	mov	v20.16b, v10.16b
886	mov	v24.16b, v21.16b
887	ldr	q20, [sp, #224]
888	mov	v29.d[1], v8.d[0]
889	mov	v13.16b, v9.16b
890	zip2	v30.2d, v10.2d, v8.2d
891	zip2	v8.2d, v21.2d, v20.2d
892	mov	v26.16b, v11.16b
893	mov	v24.d[1], v20.d[0]
894	add	v20.4s, v28.4s, v29.4s
895	mov	v13.d[1], v14.d[0]
896	str	q8, [sp, #128]
897	eor	v2.16b, v16.16b, v2.16b
898	mov	v26.d[1], v15.d[0]
899	str	q24, [sp, #192]
900	add	v20.4s, v20.4s, v0.4s
901	add	v19.4s, v19.4s, v13.4s
902	ushr	v12.4s, v2.4s, #7
903	shl	v2.4s, v2.4s, #25
904	zip2	v10.2d, v9.2d, v14.2d
905	add	v18.4s, v18.4s, v24.4s
906	add	v17.4s, v17.4s, v26.4s
907	mov	v14.16b, v26.16b
908	eor	v26.16b, v20.16b, v31.16b
909	stp	q10, q30, [sp, #224]
910	add	v19.4s, v19.4s, v25.4s
911	orr	v2.16b, v2.16b, v12.16b
912	add	v18.4s, v18.4s, v1.4s
913	rev32	v26.8h, v26.8h
914	eor	v5.16b, v19.16b, v5.16b
915	add	v17.4s, v17.4s, v2.4s
916	eor	v7.16b, v18.16b, v7.16b
917	add	v23.4s, v23.4s, v26.4s
918	rev32	v5.8h, v5.8h
919	eor	v6.16b, v17.16b, v6.16b
920	rev32	v7.8h, v7.8h
921	eor	v0.16b, v23.16b, v0.16b
922	add	v3.4s, v3.4s, v5.4s
923	rev32	v6.8h, v6.8h
924	add	v16.4s, v16.4s, v7.4s
925	ushr	v31.4s, v0.4s, #12
926	shl	v0.4s, v0.4s, #20
927	eor	v25.16b, v3.16b, v25.16b
928	add	v4.4s, v4.4s, v6.4s
929	eor	v1.16b, v16.16b, v1.16b
930	orr	v0.16b, v0.16b, v31.16b
931	ushr	v31.4s, v25.4s, #12
932	shl	v25.4s, v25.4s, #20
933	add	v20.4s, v20.4s, v30.4s
934	zip2	v21.2d, v11.2d, v15.2d
935	ushr	v11.4s, v1.4s, #12
936	shl	v1.4s, v1.4s, #20
937	eor	v2.16b, v4.16b, v2.16b
938	orr	v25.16b, v25.16b, v31.16b
939	add	v19.4s, v19.4s, v10.4s
940	add	v20.4s, v20.4s, v0.4s
941	orr	v1.16b, v1.16b, v11.16b
942	ushr	v11.4s, v2.4s, #12
943	shl	v2.4s, v2.4s, #20
944	add	v18.4s, v18.4s, v8.4s
945	add	v19.4s, v19.4s, v25.4s
946	eor	v26.16b, v20.16b, v26.16b
947	orr	v2.16b, v2.16b, v11.16b
948	add	v17.4s, v17.4s, v21.4s
949	add	v18.4s, v18.4s, v1.4s
950	eor	v5.16b, v19.16b, v5.16b
951	ushr	v31.4s, v26.4s, #8
952	shl	v26.4s, v26.4s, #24
953	add	v17.4s, v17.4s, v2.4s
954	ushr	v11.4s, v5.4s, #8
955	shl	v5.4s, v5.4s, #24
956	eor	v7.16b, v18.16b, v7.16b
957	orr	v26.16b, v26.16b, v31.16b
958	eor	v6.16b, v17.16b, v6.16b
959	orr	v5.16b, v5.16b, v11.16b
960	ushr	v31.4s, v7.4s, #8
961	shl	v7.4s, v7.4s, #24
962	add	v23.4s, v26.4s, v23.4s
963	ushr	v11.4s, v6.4s, #8
964	shl	v6.4s, v6.4s, #24
965	orr	v7.16b, v7.16b, v31.16b
966	add	v3.4s, v5.4s, v3.4s
967	eor	v0.16b, v23.16b, v0.16b
968	ldp	q28, q12, [x29, #-256]
969	orr	v6.16b, v6.16b, v11.16b
970	add	v16.4s, v7.4s, v16.4s
971	eor	v25.16b, v3.16b, v25.16b
972	ushr	v31.4s, v0.4s, #7
973	shl	v0.4s, v0.4s, #25
974	add	v4.4s, v6.4s, v4.4s
975	ushr	v11.4s, v25.4s, #7
976	shl	v25.4s, v25.4s, #25
977	eor	v1.16b, v16.16b, v1.16b
978	orr	v0.16b, v0.16b, v31.16b
979	add	v18.4s, v18.4s, v12.4s
980	mov	v15.16b, v29.16b
981	ldur	q29, [x29, #-208]
982	eor	v2.16b, v4.16b, v2.16b
983	orr	v25.16b, v25.16b, v11.16b
984	ushr	v31.4s, v1.4s, #7
985	shl	v1.4s, v1.4s, #25
986	str	q15, [sp, #160]
987	add	v20.4s, v20.4s, v29.4s
988	add	v18.4s, v18.4s, v0.4s
989	ushr	v11.4s, v2.4s, #7
990	shl	v2.4s, v2.4s, #25
991	orr	v1.16b, v1.16b, v31.16b
992	add	v20.4s, v20.4s, v25.4s
993	add	v17.4s, v17.4s, v27.4s
994	eor	v6.16b, v6.16b, v18.16b
995	orr	v2.16b, v2.16b, v11.16b
996	add	v19.4s, v19.4s, v28.4s
997	eor	v7.16b, v7.16b, v20.16b
998	add	v17.4s, v17.4s, v1.4s
999	rev32	v6.8h, v6.8h
1000	add	v19.4s, v19.4s, v2.4s
1001	rev32	v7.8h, v7.8h
1002	eor	v5.16b, v17.16b, v5.16b
1003	add	v3.4s, v3.4s, v6.4s
1004	eor	v26.16b, v19.16b, v26.16b
1005	add	v4.4s, v4.4s, v7.4s
1006	rev32	v5.8h, v5.8h
1007	eor	v0.16b, v3.16b, v0.16b
1008	rev32	v26.8h, v26.8h
1009	eor	v25.16b, v4.16b, v25.16b
1010	add	v23.4s, v23.4s, v5.4s
1011	ushr	v11.4s, v0.4s, #12
1012	shl	v0.4s, v0.4s, #20
1013	add	v16.4s, v16.4s, v26.4s
1014	ushr	v31.4s, v25.4s, #12
1015	shl	v25.4s, v25.4s, #20
1016	eor	v1.16b, v23.16b, v1.16b
1017	orr	v0.16b, v0.16b, v11.16b
1018	add	v18.4s, v18.4s, v24.4s
1019	orr	v25.16b, v25.16b, v31.16b
1020	eor	v2.16b, v16.16b, v2.16b
1021	ushr	v31.4s, v1.4s, #12
1022	shl	v1.4s, v1.4s, #20
1023	add	v20.4s, v20.4s, v22.4s
1024	add	v18.4s, v18.4s, v0.4s
1025	mov	v9.16b, v30.16b
1026	mov	v30.16b, v21.16b
1027	ldur	q21, [x29, #-224]
1028	ushr	v11.4s, v2.4s, #12
1029	shl	v2.4s, v2.4s, #20
1030	orr	v1.16b, v1.16b, v31.16b
1031	add	v20.4s, v20.4s, v25.4s
1032	str	q30, [sp, #144]
1033	add	v17.4s, v17.4s, v21.4s
1034	ldur	q21, [x29, #-192]
1035	eor	v6.16b, v18.16b, v6.16b
1036	orr	v2.16b, v2.16b, v11.16b
1037	add	v19.4s, v19.4s, v30.4s
1038	eor	v7.16b, v20.16b, v7.16b
1039	add	v17.4s, v17.4s, v1.4s
1040	ushr	v11.4s, v6.4s, #8
1041	shl	v6.4s, v6.4s, #24
1042	add	v19.4s, v19.4s, v2.4s
1043	ushr	v31.4s, v7.4s, #8
1044	shl	v7.4s, v7.4s, #24
1045	eor	v5.16b, v17.16b, v5.16b
1046	orr	v6.16b, v6.16b, v11.16b
1047	eor	v26.16b, v19.16b, v26.16b
1048	orr	v7.16b, v7.16b, v31.16b
1049	ushr	v31.4s, v5.4s, #8
1050	shl	v5.4s, v5.4s, #24
1051	add	v3.4s, v6.4s, v3.4s
1052	ushr	v11.4s, v26.4s, #8
1053	shl	v26.4s, v26.4s, #24
1054	add	v4.4s, v7.4s, v4.4s
1055	orr	v5.16b, v5.16b, v31.16b
1056	eor	v0.16b, v3.16b, v0.16b
1057	orr	v26.16b, v26.16b, v11.16b
1058	eor	v25.16b, v4.16b, v25.16b
1059	add	v23.4s, v5.4s, v23.4s
1060	ushr	v11.4s, v0.4s, #7
1061	shl	v0.4s, v0.4s, #25
1062	add	v16.4s, v26.4s, v16.4s
1063	ushr	v31.4s, v25.4s, #7
1064	shl	v25.4s, v25.4s, #25
1065	eor	v1.16b, v23.16b, v1.16b
1066	orr	v0.16b, v0.16b, v11.16b
1067	add	v20.4s, v20.4s, v21.4s
1068	orr	v25.16b, v25.16b, v31.16b
1069	eor	v2.16b, v16.16b, v2.16b
1070	ushr	v31.4s, v1.4s, #7
1071	shl	v1.4s, v1.4s, #25
1072	add	v20.4s, v20.4s, v0.4s
1073	add	v19.4s, v19.4s, v10.4s
1074	ushr	v11.4s, v2.4s, #7
1075	shl	v2.4s, v2.4s, #25
1076	orr	v1.16b, v1.16b, v31.16b
1077	add	v18.4s, v18.4s, v14.4s
1078	eor	v26.16b, v20.16b, v26.16b
1079	add	v19.4s, v19.4s, v25.4s
1080	orr	v2.16b, v2.16b, v11.16b
1081	add	v17.4s, v17.4s, v9.4s
1082	ldr	q9, [sp, #208]
1083	add	v18.4s, v18.4s, v1.4s
1084	rev32	v26.8h, v26.8h
1085	eor	v5.16b, v19.16b, v5.16b
1086	add	v17.4s, v17.4s, v2.4s
1087	eor	v7.16b, v18.16b, v7.16b
1088	add	v23.4s, v23.4s, v26.4s
1089	rev32	v5.8h, v5.8h
1090	eor	v6.16b, v17.16b, v6.16b
1091	rev32	v7.8h, v7.8h
1092	eor	v0.16b, v23.16b, v0.16b
1093	add	v3.4s, v3.4s, v5.4s
1094	rev32	v6.8h, v6.8h
1095	add	v16.4s, v16.4s, v7.4s
1096	ushr	v31.4s, v0.4s, #12
1097	shl	v0.4s, v0.4s, #20
1098	eor	v25.16b, v3.16b, v25.16b
1099	add	v4.4s, v4.4s, v6.4s
1100	eor	v1.16b, v16.16b, v1.16b
1101	orr	v0.16b, v0.16b, v31.16b
1102	ushr	v31.4s, v25.4s, #12
1103	shl	v25.4s, v25.4s, #20
1104	add	v20.4s, v20.4s, v8.4s
1105	ushr	v11.4s, v1.4s, #12
1106	shl	v1.4s, v1.4s, #20
1107	eor	v2.16b, v4.16b, v2.16b
1108	orr	v25.16b, v25.16b, v31.16b
1109	add	v19.4s, v19.4s, v15.4s
1110	add	v20.4s, v20.4s, v0.4s
1111	orr	v1.16b, v1.16b, v11.16b
1112	ushr	v11.4s, v2.4s, #12
1113	shl	v2.4s, v2.4s, #20
1114	add	v18.4s, v18.4s, v9.4s
1115	add	v19.4s, v19.4s, v25.4s
1116	eor	v26.16b, v20.16b, v26.16b
1117	orr	v2.16b, v2.16b, v11.16b
1118	add	v17.4s, v17.4s, v13.4s
1119	add	v18.4s, v18.4s, v1.4s
1120	eor	v5.16b, v19.16b, v5.16b
1121	ushr	v31.4s, v26.4s, #8
1122	shl	v26.4s, v26.4s, #24
1123	add	v17.4s, v17.4s, v2.4s
1124	ushr	v11.4s, v5.4s, #8
1125	shl	v5.4s, v5.4s, #24
1126	eor	v7.16b, v18.16b, v7.16b
1127	orr	v26.16b, v26.16b, v31.16b
1128	eor	v6.16b, v17.16b, v6.16b
1129	orr	v5.16b, v5.16b, v11.16b
1130	ushr	v31.4s, v7.4s, #8
1131	shl	v7.4s, v7.4s, #24
1132	add	v23.4s, v26.4s, v23.4s
1133	ushr	v11.4s, v6.4s, #8
1134	shl	v6.4s, v6.4s, #24
1135	orr	v7.16b, v7.16b, v31.16b
1136	add	v3.4s, v5.4s, v3.4s
1137	eor	v0.16b, v23.16b, v0.16b
1138	orr	v6.16b, v6.16b, v11.16b
1139	add	v16.4s, v7.4s, v16.4s
1140	eor	v25.16b, v3.16b, v25.16b
1141	ushr	v31.4s, v0.4s, #7
1142	shl	v0.4s, v0.4s, #25
1143	add	v4.4s, v6.4s, v4.4s
1144	ushr	v11.4s, v25.4s, #7
1145	shl	v25.4s, v25.4s, #25
1146	eor	v1.16b, v16.16b, v1.16b
1147	orr	v0.16b, v0.16b, v31.16b
1148	add	v18.4s, v18.4s, v24.4s
1149	eor	v2.16b, v4.16b, v2.16b
1150	orr	v25.16b, v25.16b, v11.16b
1151	ushr	v31.4s, v1.4s, #7
1152	shl	v1.4s, v1.4s, #25
1153	add	v20.4s, v20.4s, v12.4s
1154	add	v18.4s, v18.4s, v0.4s
1155	ushr	v11.4s, v2.4s, #7
1156	shl	v2.4s, v2.4s, #25
1157	orr	v1.16b, v1.16b, v31.16b
1158	add	v20.4s, v20.4s, v25.4s
1159	add	v17.4s, v17.4s, v30.4s
1160	eor	v6.16b, v6.16b, v18.16b
1161	orr	v2.16b, v2.16b, v11.16b
1162	add	v19.4s, v19.4s, v27.4s
1163	eor	v7.16b, v7.16b, v20.16b
1164	add	v17.4s, v17.4s, v1.4s
1165	rev32	v6.8h, v6.8h
1166	add	v19.4s, v19.4s, v2.4s
1167	rev32	v7.8h, v7.8h
1168	eor	v5.16b, v17.16b, v5.16b
1169	add	v3.4s, v3.4s, v6.4s
1170	eor	v26.16b, v19.16b, v26.16b
1171	add	v4.4s, v4.4s, v7.4s
1172	rev32	v5.8h, v5.8h
1173	eor	v0.16b, v3.16b, v0.16b
1174	rev32	v26.8h, v26.8h
1175	eor	v25.16b, v4.16b, v25.16b
1176	add	v23.4s, v23.4s, v5.4s
1177	ushr	v11.4s, v0.4s, #12
1178	shl	v0.4s, v0.4s, #20
1179	add	v16.4s, v16.4s, v26.4s
1180	ushr	v31.4s, v25.4s, #12
1181	shl	v25.4s, v25.4s, #20
1182	eor	v1.16b, v23.16b, v1.16b
1183	orr	v0.16b, v0.16b, v11.16b
1184	add	v18.4s, v18.4s, v14.4s
1185	orr	v25.16b, v25.16b, v31.16b
1186	eor	v2.16b, v16.16b, v2.16b
1187	ushr	v31.4s, v1.4s, #12
1188	shl	v1.4s, v1.4s, #20
1189	add	v20.4s, v20.4s, v28.4s
1190	add	v18.4s, v18.4s, v0.4s
1191	mov	v10.16b, v13.16b
1192	ushr	v11.4s, v2.4s, #12
1193	shl	v2.4s, v2.4s, #20
1194	orr	v1.16b, v1.16b, v31.16b
1195	add	v20.4s, v20.4s, v25.4s
1196	add	v17.4s, v17.4s, v29.4s
1197	eor	v6.16b, v18.16b, v6.16b
1198	orr	v2.16b, v2.16b, v11.16b
1199	add	v19.4s, v19.4s, v10.4s
1200	eor	v7.16b, v20.16b, v7.16b
1201	add	v17.4s, v17.4s, v1.4s
1202	ushr	v11.4s, v6.4s, #8
1203	shl	v6.4s, v6.4s, #24
1204	add	v19.4s, v19.4s, v2.4s
1205	ushr	v31.4s, v7.4s, #8
1206	shl	v7.4s, v7.4s, #24
1207	eor	v5.16b, v17.16b, v5.16b
1208	orr	v6.16b, v6.16b, v11.16b
1209	eor	v26.16b, v19.16b, v26.16b
1210	orr	v7.16b, v7.16b, v31.16b
1211	ushr	v31.4s, v5.4s, #8
1212	shl	v5.4s, v5.4s, #24
1213	add	v3.4s, v6.4s, v3.4s
1214	ushr	v11.4s, v26.4s, #8
1215	shl	v26.4s, v26.4s, #24
1216	add	v4.4s, v7.4s, v4.4s
1217	orr	v5.16b, v5.16b, v31.16b
1218	eor	v0.16b, v3.16b, v0.16b
1219	mov	v22.16b, v8.16b
1220	ldp	q8, q28, [sp, #240]
1221	orr	v26.16b, v26.16b, v11.16b
1222	eor	v25.16b, v4.16b, v25.16b
1223	add	v23.4s, v5.4s, v23.4s
1224	ushr	v11.4s, v0.4s, #7
1225	shl	v0.4s, v0.4s, #25
1226	add	v16.4s, v26.4s, v16.4s
1227	ushr	v31.4s, v25.4s, #7
1228	shl	v25.4s, v25.4s, #25
1229	eor	v1.16b, v23.16b, v1.16b
1230	orr	v0.16b, v0.16b, v11.16b
1231	add	v20.4s, v20.4s, v28.4s
1232	orr	v25.16b, v25.16b, v31.16b
1233	eor	v2.16b, v16.16b, v2.16b
1234	ushr	v31.4s, v1.4s, #7
1235	shl	v1.4s, v1.4s, #25
1236	add	v20.4s, v20.4s, v0.4s
1237	add	v19.4s, v19.4s, v15.4s
1238	ushr	v11.4s, v2.4s, #7
1239	shl	v2.4s, v2.4s, #25
1240	orr	v1.16b, v1.16b, v31.16b
1241	add	v18.4s, v18.4s, v8.4s
1242	eor	v26.16b, v20.16b, v26.16b
1243	add	v19.4s, v19.4s, v25.4s
1244	orr	v2.16b, v2.16b, v11.16b
1245	add	v17.4s, v17.4s, v22.4s
1246	ldur	q22, [x29, #-256]
1247	add	v18.4s, v18.4s, v1.4s
1248	rev32	v26.8h, v26.8h
1249	eor	v5.16b, v19.16b, v5.16b
1250	add	v17.4s, v17.4s, v2.4s
1251	eor	v7.16b, v18.16b, v7.16b
1252	add	v23.4s, v23.4s, v26.4s
1253	rev32	v5.8h, v5.8h
1254	eor	v6.16b, v17.16b, v6.16b
1255	rev32	v7.8h, v7.8h
1256	eor	v0.16b, v23.16b, v0.16b
1257	add	v3.4s, v3.4s, v5.4s
1258	rev32	v6.8h, v6.8h
1259	add	v16.4s, v16.4s, v7.4s
1260	ushr	v31.4s, v0.4s, #12
1261	shl	v0.4s, v0.4s, #20
1262	eor	v25.16b, v3.16b, v25.16b
1263	add	v4.4s, v4.4s, v6.4s
1264	eor	v1.16b, v16.16b, v1.16b
1265	orr	v0.16b, v0.16b, v31.16b
1266	ushr	v31.4s, v25.4s, #12
1267	shl	v25.4s, v25.4s, #20
1268	add	v20.4s, v20.4s, v9.4s
1269	mov	v13.16b, v12.16b
1270	mov	v12.16b, v27.16b
1271	mov	v27.16b, v9.16b
1272	ldur	q9, [x29, #-192]
1273	mov	v21.16b, v15.16b
1274	ldr	q15, [sp, #224]
1275	ushr	v11.4s, v1.4s, #12
1276	ldur	q21, [x29, #-224]
1277	shl	v1.4s, v1.4s, #20
1278	eor	v2.16b, v4.16b, v2.16b
1279	orr	v25.16b, v25.16b, v31.16b
1280	add	v19.4s, v19.4s, v9.4s
1281	add	v20.4s, v20.4s, v0.4s
1282	orr	v1.16b, v1.16b, v11.16b
1283	ushr	v11.4s, v2.4s, #12
1284	shl	v2.4s, v2.4s, #20
1285	add	v18.4s, v18.4s, v21.4s
1286	add	v19.4s, v19.4s, v25.4s
1287	eor	v26.16b, v20.16b, v26.16b
1288	orr	v2.16b, v2.16b, v11.16b
1289	add	v17.4s, v17.4s, v15.4s
1290	add	v18.4s, v18.4s, v1.4s
1291	eor	v5.16b, v19.16b, v5.16b
1292	ushr	v31.4s, v26.4s, #8
1293	shl	v26.4s, v26.4s, #24
1294	add	v17.4s, v17.4s, v2.4s
1295	ushr	v11.4s, v5.4s, #8
1296	shl	v5.4s, v5.4s, #24
1297	eor	v7.16b, v18.16b, v7.16b
1298	orr	v26.16b, v26.16b, v31.16b
1299	eor	v6.16b, v17.16b, v6.16b
1300	orr	v5.16b, v5.16b, v11.16b
1301	ushr	v31.4s, v7.4s, #8
1302	shl	v7.4s, v7.4s, #24
1303	add	v23.4s, v26.4s, v23.4s
1304	ushr	v11.4s, v6.4s, #8
1305	shl	v6.4s, v6.4s, #24
1306	orr	v7.16b, v7.16b, v31.16b
1307	add	v3.4s, v5.4s, v3.4s
1308	eor	v0.16b, v23.16b, v0.16b
1309	orr	v6.16b, v6.16b, v11.16b
1310	add	v16.4s, v7.4s, v16.4s
1311	eor	v25.16b, v3.16b, v25.16b
1312	ushr	v31.4s, v0.4s, #7
1313	shl	v0.4s, v0.4s, #25
1314	add	v4.4s, v6.4s, v4.4s
1315	ushr	v11.4s, v25.4s, #7
1316	shl	v25.4s, v25.4s, #25
1317	eor	v1.16b, v16.16b, v1.16b
1318	orr	v0.16b, v0.16b, v31.16b
1319	add	v18.4s, v18.4s, v14.4s
1320	eor	v2.16b, v4.16b, v2.16b
1321	orr	v25.16b, v25.16b, v11.16b
1322	ushr	v31.4s, v1.4s, #7
1323	shl	v1.4s, v1.4s, #25
1324	add	v20.4s, v20.4s, v24.4s
1325	add	v18.4s, v18.4s, v0.4s
1326	ushr	v11.4s, v2.4s, #7
1327	shl	v2.4s, v2.4s, #25
1328	orr	v1.16b, v1.16b, v31.16b
1329	add	v20.4s, v20.4s, v25.4s
1330	add	v17.4s, v17.4s, v10.4s
1331	eor	v6.16b, v6.16b, v18.16b
1332	orr	v2.16b, v2.16b, v11.16b
1333	add	v19.4s, v19.4s, v30.4s
1334	eor	v7.16b, v7.16b, v20.16b
1335	add	v17.4s, v17.4s, v1.4s
1336	rev32	v6.8h, v6.8h
1337	add	v19.4s, v19.4s, v2.4s
1338	rev32	v7.8h, v7.8h
1339	eor	v5.16b, v17.16b, v5.16b
1340	add	v3.4s, v3.4s, v6.4s
1341	eor	v26.16b, v19.16b, v26.16b
1342	add	v4.4s, v4.4s, v7.4s
1343	rev32	v5.8h, v5.8h
1344	eor	v0.16b, v3.16b, v0.16b
1345	rev32	v26.8h, v26.8h
1346	eor	v25.16b, v4.16b, v25.16b
1347	add	v23.4s, v23.4s, v5.4s
1348	ushr	v11.4s, v0.4s, #12
1349	shl	v0.4s, v0.4s, #20
1350	add	v16.4s, v16.4s, v26.4s
1351	ushr	v31.4s, v25.4s, #12
1352	shl	v25.4s, v25.4s, #20
1353	eor	v1.16b, v23.16b, v1.16b
1354	orr	v0.16b, v0.16b, v11.16b
1355	add	v18.4s, v18.4s, v8.4s
1356	orr	v25.16b, v25.16b, v31.16b
1357	eor	v2.16b, v16.16b, v2.16b
1358	ushr	v31.4s, v1.4s, #12
1359	shl	v1.4s, v1.4s, #20
1360	add	v20.4s, v20.4s, v12.4s
1361	add	v18.4s, v18.4s, v0.4s
1362	ushr	v11.4s, v2.4s, #12
1363	shl	v2.4s, v2.4s, #20
1364	orr	v1.16b, v1.16b, v31.16b
1365	add	v20.4s, v20.4s, v25.4s
1366	add	v17.4s, v17.4s, v13.4s
1367	ldr	q13, [sp, #160]
1368	eor	v6.16b, v18.16b, v6.16b
1369	orr	v2.16b, v2.16b, v11.16b
1370	add	v19.4s, v19.4s, v15.4s
1371	eor	v7.16b, v20.16b, v7.16b
1372	add	v17.4s, v17.4s, v1.4s
1373	ushr	v11.4s, v6.4s, #8
1374	shl	v6.4s, v6.4s, #24
1375	add	v19.4s, v19.4s, v2.4s
1376	ushr	v31.4s, v7.4s, #8
1377	shl	v7.4s, v7.4s, #24
1378	eor	v5.16b, v17.16b, v5.16b
1379	orr	v6.16b, v6.16b, v11.16b
1380	eor	v26.16b, v19.16b, v26.16b
1381	orr	v7.16b, v7.16b, v31.16b
1382	ushr	v31.4s, v5.4s, #8
1383	shl	v5.4s, v5.4s, #24
1384	add	v3.4s, v6.4s, v3.4s
1385	ushr	v11.4s, v26.4s, #8
1386	shl	v26.4s, v26.4s, #24
1387	add	v4.4s, v7.4s, v4.4s
1388	orr	v5.16b, v5.16b, v31.16b
1389	eor	v0.16b, v3.16b, v0.16b
1390	orr	v26.16b, v26.16b, v11.16b
1391	eor	v25.16b, v4.16b, v25.16b
1392	add	v23.4s, v5.4s, v23.4s
1393	ushr	v11.4s, v0.4s, #7
1394	shl	v0.4s, v0.4s, #25
1395	add	v16.4s, v26.4s, v16.4s
1396	ushr	v31.4s, v25.4s, #7
1397	shl	v25.4s, v25.4s, #25
1398	eor	v1.16b, v23.16b, v1.16b
1399	orr	v0.16b, v0.16b, v11.16b
1400	add	v20.4s, v20.4s, v22.4s
1401	orr	v25.16b, v25.16b, v31.16b
1402	eor	v2.16b, v16.16b, v2.16b
1403	ushr	v31.4s, v1.4s, #7
1404	shl	v1.4s, v1.4s, #25
1405	add	v20.4s, v20.4s, v0.4s
1406	add	v19.4s, v19.4s, v9.4s
1407	mov	v29.16b, v14.16b
1408	ldr	q14, [sp, #128]
1409	ushr	v11.4s, v2.4s, #7
1410	shl	v2.4s, v2.4s, #25
1411	orr	v1.16b, v1.16b, v31.16b
1412	add	v18.4s, v18.4s, v14.4s
1413	eor	v26.16b, v20.16b, v26.16b
1414	add	v19.4s, v19.4s, v25.4s
1415	orr	v2.16b, v2.16b, v11.16b
1416	add	v17.4s, v17.4s, v27.4s
1417	add	v18.4s, v18.4s, v1.4s
1418	rev32	v26.8h, v26.8h
1419	eor	v5.16b, v19.16b, v5.16b
1420	add	v17.4s, v17.4s, v2.4s
1421	eor	v7.16b, v18.16b, v7.16b
1422	add	v23.4s, v23.4s, v26.4s
1423	rev32	v5.8h, v5.8h
1424	eor	v6.16b, v17.16b, v6.16b
1425	rev32	v7.8h, v7.8h
1426	eor	v0.16b, v23.16b, v0.16b
1427	add	v3.4s, v3.4s, v5.4s
1428	rev32	v6.8h, v6.8h
1429	add	v16.4s, v16.4s, v7.4s
1430	ushr	v31.4s, v0.4s, #12
1431	shl	v0.4s, v0.4s, #20
1432	eor	v25.16b, v3.16b, v25.16b
1433	add	v4.4s, v4.4s, v6.4s
1434	eor	v1.16b, v16.16b, v1.16b
1435	orr	v0.16b, v0.16b, v31.16b
1436	ushr	v31.4s, v25.4s, #12
1437	shl	v25.4s, v25.4s, #20
1438	add	v20.4s, v20.4s, v21.4s
1439	ushr	v11.4s, v1.4s, #12
1440	shl	v1.4s, v1.4s, #20
1441	eor	v2.16b, v4.16b, v2.16b
1442	orr	v25.16b, v25.16b, v31.16b
1443	add	v19.4s, v19.4s, v28.4s
1444	add	v20.4s, v20.4s, v0.4s
1445	mov	v12.16b, v27.16b
1446	ldur	q27, [x29, #-208]
1447	orr	v1.16b, v1.16b, v11.16b
1448	ushr	v11.4s, v2.4s, #12
1449	shl	v2.4s, v2.4s, #20
1450	add	v18.4s, v18.4s, v27.4s
1451	add	v19.4s, v19.4s, v25.4s
1452	eor	v26.16b, v20.16b, v26.16b
1453	orr	v2.16b, v2.16b, v11.16b
1454	add	v17.4s, v17.4s, v13.4s
1455	add	v18.4s, v18.4s, v1.4s
1456	eor	v5.16b, v19.16b, v5.16b
1457	ushr	v31.4s, v26.4s, #8
1458	shl	v26.4s, v26.4s, #24
1459	add	v17.4s, v17.4s, v2.4s
1460	ushr	v11.4s, v5.4s, #8
1461	shl	v5.4s, v5.4s, #24
1462	eor	v7.16b, v18.16b, v7.16b
1463	orr	v26.16b, v26.16b, v31.16b
1464	eor	v6.16b, v17.16b, v6.16b
1465	orr	v5.16b, v5.16b, v11.16b
1466	ushr	v31.4s, v7.4s, #8
1467	shl	v7.4s, v7.4s, #24
1468	add	v23.4s, v26.4s, v23.4s
1469	ushr	v11.4s, v6.4s, #8
1470	shl	v6.4s, v6.4s, #24
1471	orr	v7.16b, v7.16b, v31.16b
1472	add	v3.4s, v5.4s, v3.4s
1473	eor	v0.16b, v23.16b, v0.16b
1474	orr	v6.16b, v6.16b, v11.16b
1475	add	v16.4s, v7.4s, v16.4s
1476	eor	v25.16b, v3.16b, v25.16b
1477	ushr	v31.4s, v0.4s, #7
1478	shl	v0.4s, v0.4s, #25
1479	add	v4.4s, v6.4s, v4.4s
1480	ushr	v11.4s, v25.4s, #7
1481	shl	v25.4s, v25.4s, #25
1482	eor	v1.16b, v16.16b, v1.16b
1483	orr	v0.16b, v0.16b, v31.16b
1484	add	v18.4s, v18.4s, v8.4s
1485	eor	v2.16b, v4.16b, v2.16b
1486	orr	v25.16b, v25.16b, v11.16b
1487	ushr	v31.4s, v1.4s, #7
1488	shl	v1.4s, v1.4s, #25
1489	add	v20.4s, v20.4s, v29.4s
1490	add	v18.4s, v18.4s, v0.4s
1491	ushr	v11.4s, v2.4s, #7
1492	shl	v2.4s, v2.4s, #25
1493	orr	v1.16b, v1.16b, v31.16b
1494	add	v20.4s, v20.4s, v25.4s
1495	add	v17.4s, v17.4s, v15.4s
1496	eor	v6.16b, v6.16b, v18.16b
1497	orr	v2.16b, v2.16b, v11.16b
1498	add	v19.4s, v19.4s, v10.4s
1499	eor	v7.16b, v7.16b, v20.16b
1500	add	v17.4s, v17.4s, v1.4s
1501	rev32	v6.8h, v6.8h
1502	add	v19.4s, v19.4s, v2.4s
1503	rev32	v7.8h, v7.8h
1504	eor	v5.16b, v17.16b, v5.16b
1505	add	v3.4s, v3.4s, v6.4s
1506	eor	v26.16b, v19.16b, v26.16b
1507	add	v4.4s, v4.4s, v7.4s
1508	rev32	v5.8h, v5.8h
1509	eor	v0.16b, v3.16b, v0.16b
1510	rev32	v26.8h, v26.8h
1511	eor	v25.16b, v4.16b, v25.16b
1512	add	v23.4s, v23.4s, v5.4s
1513	ushr	v11.4s, v0.4s, #12
1514	shl	v0.4s, v0.4s, #20
1515	add	v16.4s, v16.4s, v26.4s
1516	ushr	v31.4s, v25.4s, #12
1517	shl	v25.4s, v25.4s, #20
1518	eor	v1.16b, v23.16b, v1.16b
1519	orr	v0.16b, v0.16b, v11.16b
1520	add	v18.4s, v18.4s, v14.4s
1521	mov	v30.16b, v29.16b
1522	mov	v29.16b, v15.16b
1523	ldr	q15, [sp, #144]
1524	orr	v25.16b, v25.16b, v31.16b
1525	eor	v2.16b, v16.16b, v2.16b
1526	ushr	v31.4s, v1.4s, #12
1527	shl	v1.4s, v1.4s, #20
1528	add	v20.4s, v20.4s, v15.4s
1529	add	v18.4s, v18.4s, v0.4s
1530	ushr	v11.4s, v2.4s, #12
1531	shl	v2.4s, v2.4s, #20
1532	orr	v1.16b, v1.16b, v31.16b
1533	add	v20.4s, v20.4s, v25.4s
1534	add	v17.4s, v17.4s, v24.4s
1535	eor	v6.16b, v18.16b, v6.16b
1536	orr	v2.16b, v2.16b, v11.16b
1537	add	v19.4s, v19.4s, v13.4s
1538	eor	v7.16b, v20.16b, v7.16b
1539	add	v17.4s, v17.4s, v1.4s
1540	ushr	v11.4s, v6.4s, #8
1541	shl	v6.4s, v6.4s, #24
1542	add	v19.4s, v19.4s, v2.4s
1543	ushr	v31.4s, v7.4s, #8
1544	shl	v7.4s, v7.4s, #24
1545	eor	v5.16b, v17.16b, v5.16b
1546	orr	v6.16b, v6.16b, v11.16b
1547	eor	v26.16b, v19.16b, v26.16b
1548	orr	v7.16b, v7.16b, v31.16b
1549	ushr	v31.4s, v5.4s, #8
1550	shl	v5.4s, v5.4s, #24
1551	add	v3.4s, v6.4s, v3.4s
1552	ushr	v11.4s, v26.4s, #8
1553	shl	v26.4s, v26.4s, #24
1554	add	v4.4s, v7.4s, v4.4s
1555	orr	v5.16b, v5.16b, v31.16b
1556	eor	v0.16b, v3.16b, v0.16b
1557	orr	v26.16b, v26.16b, v11.16b
1558	eor	v25.16b, v4.16b, v25.16b
1559	add	v23.4s, v5.4s, v23.4s
1560	ushr	v11.4s, v0.4s, #7
1561	shl	v0.4s, v0.4s, #25
1562	mov	v9.16b, v28.16b
1563	mov	v28.16b, v10.16b
1564	ldr	q10, [sp, #176]
1565	add	v16.4s, v26.4s, v16.4s
1566	ushr	v31.4s, v25.4s, #7
1567	shl	v25.4s, v25.4s, #25
1568	eor	v1.16b, v23.16b, v1.16b
1569	orr	v0.16b, v0.16b, v11.16b
1570	add	v20.4s, v20.4s, v10.4s
1571	orr	v25.16b, v25.16b, v31.16b
1572	eor	v2.16b, v16.16b, v2.16b
1573	ushr	v31.4s, v1.4s, #7
1574	shl	v1.4s, v1.4s, #25
1575	add	v20.4s, v20.4s, v0.4s
1576	add	v19.4s, v19.4s, v9.4s
1577	ushr	v11.4s, v2.4s, #7
1578	shl	v2.4s, v2.4s, #25
1579	orr	v1.16b, v1.16b, v31.16b
1580	add	v18.4s, v18.4s, v12.4s
1581	eor	v26.16b, v20.16b, v26.16b
1582	add	v19.4s, v19.4s, v25.4s
1583	orr	v2.16b, v2.16b, v11.16b
1584	add	v17.4s, v17.4s, v21.4s
1585	add	v18.4s, v18.4s, v1.4s
1586	rev32	v26.8h, v26.8h
1587	eor	v5.16b, v19.16b, v5.16b
1588	add	v17.4s, v17.4s, v2.4s
1589	eor	v7.16b, v18.16b, v7.16b
1590	add	v23.4s, v23.4s, v26.4s
1591	rev32	v5.8h, v5.8h
1592	eor	v6.16b, v17.16b, v6.16b
1593	rev32	v7.8h, v7.8h
1594	eor	v0.16b, v23.16b, v0.16b
1595	add	v3.4s, v3.4s, v5.4s
1596	rev32	v6.8h, v6.8h
1597	add	v16.4s, v16.4s, v7.4s
1598	ushr	v31.4s, v0.4s, #12
1599	shl	v0.4s, v0.4s, #20
1600	eor	v25.16b, v3.16b, v25.16b
1601	add	v4.4s, v4.4s, v6.4s
1602	eor	v1.16b, v16.16b, v1.16b
1603	orr	v0.16b, v0.16b, v31.16b
1604	ushr	v31.4s, v25.4s, #12
1605	shl	v25.4s, v25.4s, #20
1606	ushr	v11.4s, v1.4s, #12
1607	shl	v1.4s, v1.4s, #20
1608	eor	v2.16b, v4.16b, v2.16b
1609	add	v20.4s, v20.4s, v27.4s
1610	orr	v25.16b, v25.16b, v31.16b
1611	add	v19.4s, v19.4s, v22.4s
1612	mov	v9.16b, v22.16b
1613	ldur	q22, [x29, #-240]
1614	orr	v1.16b, v1.16b, v11.16b
1615	ushr	v11.4s, v2.4s, #12
1616	shl	v2.4s, v2.4s, #20
1617	add	v20.4s, v20.4s, v0.4s
1618	add	v18.4s, v18.4s, v22.4s
1619	add	v19.4s, v19.4s, v25.4s
1620	mov	v24.16b, v21.16b
1621	ldur	q21, [x29, #-192]
1622	orr	v2.16b, v2.16b, v11.16b
1623	eor	v26.16b, v20.16b, v26.16b
1624	add	v17.4s, v17.4s, v21.4s
1625	add	v18.4s, v18.4s, v1.4s
1626	eor	v5.16b, v19.16b, v5.16b
1627	ushr	v31.4s, v26.4s, #8
1628	add	v17.4s, v17.4s, v2.4s
1629	shl	v26.4s, v26.4s, #24
1630	ushr	v11.4s, v5.4s, #8
1631	shl	v5.4s, v5.4s, #24
1632	eor	v7.16b, v18.16b, v7.16b
1633	orr	v26.16b, v26.16b, v31.16b
1634	eor	v6.16b, v17.16b, v6.16b
1635	orr	v5.16b, v5.16b, v11.16b
1636	ushr	v31.4s, v7.4s, #8
1637	shl	v7.4s, v7.4s, #24
1638	ushr	v11.4s, v6.4s, #8
1639	shl	v6.4s, v6.4s, #24
1640	add	v23.4s, v26.4s, v23.4s
1641	orr	v7.16b, v7.16b, v31.16b
1642	add	v3.4s, v5.4s, v3.4s
1643	orr	v6.16b, v6.16b, v11.16b
1644	eor	v0.16b, v23.16b, v0.16b
1645	add	v16.4s, v7.4s, v16.4s
1646	eor	v25.16b, v3.16b, v25.16b
1647	add	v4.4s, v6.4s, v4.4s
1648	ushr	v31.4s, v0.4s, #7
1649	shl	v0.4s, v0.4s, #25
1650	ushr	v11.4s, v25.4s, #7
1651	shl	v25.4s, v25.4s, #25
1652	eor	v1.16b, v16.16b, v1.16b
1653	orr	v0.16b, v0.16b, v31.16b
1654	eor	v2.16b, v4.16b, v2.16b
1655	orr	v25.16b, v25.16b, v11.16b
1656	ushr	v31.4s, v1.4s, #7
1657	shl	v1.4s, v1.4s, #25
1658	add	v20.4s, v20.4s, v8.4s
1659	add	v18.4s, v18.4s, v14.4s
1660	ushr	v11.4s, v2.4s, #7
1661	shl	v2.4s, v2.4s, #25
1662	orr	v1.16b, v1.16b, v31.16b
1663	add	v20.4s, v20.4s, v25.4s
1664	add	v17.4s, v17.4s, v13.4s
1665	add	v18.4s, v18.4s, v0.4s
1666	orr	v2.16b, v2.16b, v11.16b
1667	add	v19.4s, v19.4s, v29.4s
1668	eor	v7.16b, v7.16b, v20.16b
1669	add	v17.4s, v17.4s, v1.4s
1670	eor	v6.16b, v6.16b, v18.16b
1671	add	v19.4s, v19.4s, v2.4s
1672	rev32	v7.8h, v7.8h
1673	eor	v5.16b, v17.16b, v5.16b
1674	rev32	v6.8h, v6.8h
1675	eor	v26.16b, v19.16b, v26.16b
1676	add	v4.4s, v4.4s, v7.4s
1677	rev32	v5.8h, v5.8h
1678	add	v3.4s, v3.4s, v6.4s
1679	rev32	v26.8h, v26.8h
1680	eor	v25.16b, v4.16b, v25.16b
1681	add	v23.4s, v23.4s, v5.4s
1682	eor	v0.16b, v3.16b, v0.16b
1683	add	v16.4s, v16.4s, v26.4s
1684	ushr	v31.4s, v25.4s, #12
1685	shl	v25.4s, v25.4s, #20
1686	ushr	v11.4s, v0.4s, #12
1687	shl	v0.4s, v0.4s, #20
1688	eor	v1.16b, v23.16b, v1.16b
1689	orr	v25.16b, v25.16b, v31.16b
1690	eor	v2.16b, v16.16b, v2.16b
1691	orr	v0.16b, v0.16b, v11.16b
1692	ushr	v31.4s, v1.4s, #12
1693	shl	v1.4s, v1.4s, #20
1694	add	v20.4s, v20.4s, v28.4s
1695	add	v18.4s, v18.4s, v12.4s
1696	ushr	v11.4s, v2.4s, #12
1697	shl	v2.4s, v2.4s, #20
1698	orr	v1.16b, v1.16b, v31.16b
1699	add	v20.4s, v20.4s, v25.4s
1700	add	v17.4s, v17.4s, v30.4s
1701	add	v18.4s, v18.4s, v0.4s
1702	orr	v2.16b, v2.16b, v11.16b
1703	add	v19.4s, v19.4s, v21.4s
1704	eor	v7.16b, v20.16b, v7.16b
1705	add	v17.4s, v17.4s, v1.4s
1706	eor	v6.16b, v18.16b, v6.16b
1707	add	v19.4s, v19.4s, v2.4s
1708	ushr	v31.4s, v7.4s, #8
1709	shl	v7.4s, v7.4s, #24
1710	ushr	v11.4s, v6.4s, #8
1711	shl	v6.4s, v6.4s, #24
1712	eor	v5.16b, v17.16b, v5.16b
1713	orr	v7.16b, v7.16b, v31.16b
1714	eor	v26.16b, v19.16b, v26.16b
1715	orr	v6.16b, v6.16b, v11.16b
1716	ushr	v31.4s, v5.4s, #8
1717	shl	v5.4s, v5.4s, #24
1718	ushr	v11.4s, v26.4s, #8
1719	shl	v26.4s, v26.4s, #24
1720	add	v4.4s, v7.4s, v4.4s
1721	orr	v5.16b, v5.16b, v31.16b
1722	add	v3.4s, v6.4s, v3.4s
1723	orr	v26.16b, v26.16b, v11.16b
1724	eor	v25.16b, v4.16b, v25.16b
1725	add	v23.4s, v5.4s, v23.4s
1726	eor	v0.16b, v3.16b, v0.16b
1727	add	v16.4s, v26.4s, v16.4s
1728	ushr	v31.4s, v25.4s, #7
1729	shl	v25.4s, v25.4s, #25
1730	ushr	v11.4s, v0.4s, #7
1731	shl	v0.4s, v0.4s, #25
1732	eor	v1.16b, v23.16b, v1.16b
1733	orr	v25.16b, v25.16b, v31.16b
1734	eor	v2.16b, v16.16b, v2.16b
1735	orr	v0.16b, v0.16b, v11.16b
1736	ushr	v31.4s, v1.4s, #7
1737	shl	v1.4s, v1.4s, #25
1738	add	v20.4s, v20.4s, v15.4s
1739	ushr	v11.4s, v2.4s, #7
1740	shl	v2.4s, v2.4s, #25
1741	orr	v1.16b, v1.16b, v31.16b
1742	add	v18.4s, v18.4s, v24.4s
1743	add	v20.4s, v20.4s, v0.4s
1744	add	v19.4s, v19.4s, v9.4s
1745	mov	v8.16b, v13.16b
1746	ldur	q13, [x29, #-208]
1747	orr	v2.16b, v2.16b, v11.16b
1748	add	v18.4s, v18.4s, v1.4s
1749	add	v17.4s, v17.4s, v13.4s
1750	eor	v26.16b, v20.16b, v26.16b
1751	add	v19.4s, v19.4s, v25.4s
1752	eor	v7.16b, v18.16b, v7.16b
1753	add	v17.4s, v17.4s, v2.4s
1754	rev32	v26.8h, v26.8h
1755	eor	v5.16b, v19.16b, v5.16b
1756	rev32	v7.8h, v7.8h
1757	eor	v6.16b, v17.16b, v6.16b
1758	add	v23.4s, v23.4s, v26.4s
1759	rev32	v5.8h, v5.8h
1760	add	v16.4s, v16.4s, v7.4s
1761	rev32	v6.8h, v6.8h
1762	eor	v0.16b, v23.16b, v0.16b
1763	add	v3.4s, v3.4s, v5.4s
1764	eor	v1.16b, v16.16b, v1.16b
1765	add	v4.4s, v4.4s, v6.4s
1766	ushr	v31.4s, v0.4s, #12
1767	shl	v0.4s, v0.4s, #20
1768	eor	v25.16b, v3.16b, v25.16b
1769	ushr	v11.4s, v1.4s, #12
1770	shl	v1.4s, v1.4s, #20
1771	orr	v0.16b, v0.16b, v31.16b
1772	eor	v2.16b, v4.16b, v2.16b
1773	ushr	v31.4s, v25.4s, #12
1774	shl	v25.4s, v25.4s, #20
1775	orr	v1.16b, v1.16b, v11.16b
1776	ushr	v11.4s, v2.4s, #12
1777	shl	v2.4s, v2.4s, #20
1778	add	v20.4s, v20.4s, v22.4s
1779	orr	v25.16b, v25.16b, v31.16b
1780	add	v19.4s, v19.4s, v10.4s
1781	mov	v27.16b, v12.16b
1782	mov	v12.16b, v30.16b
1783	mov	v29.16b, v21.16b
1784	mov	v21.16b, v24.16b
1785	ldr	q24, [sp, #192]
1786	mov	v30.16b, v22.16b
1787	ldr	q22, [sp, #256]
1788	orr	v2.16b, v2.16b, v11.16b
1789	add	v20.4s, v20.4s, v0.4s
1790	add	v18.4s, v18.4s, v24.4s
1791	add	v19.4s, v19.4s, v25.4s
1792	add	v17.4s, v17.4s, v22.4s
1793	eor	v26.16b, v20.16b, v26.16b
1794	add	v18.4s, v18.4s, v1.4s
1795	eor	v5.16b, v19.16b, v5.16b
1796	add	v17.4s, v17.4s, v2.4s
1797	ushr	v31.4s, v26.4s, #8
1798	shl	v26.4s, v26.4s, #24
1799	ushr	v11.4s, v5.4s, #8
1800	shl	v5.4s, v5.4s, #24
1801	eor	v7.16b, v18.16b, v7.16b
1802	eor	v6.16b, v17.16b, v6.16b
1803	orr	v26.16b, v26.16b, v31.16b
1804	orr	v5.16b, v5.16b, v11.16b
1805	ushr	v31.4s, v7.4s, #8
1806	shl	v7.4s, v7.4s, #24
1807	ushr	v11.4s, v6.4s, #8
1808	shl	v6.4s, v6.4s, #24
1809	add	v23.4s, v26.4s, v23.4s
1810	orr	v7.16b, v7.16b, v31.16b
1811	add	v3.4s, v5.4s, v3.4s
1812	orr	v6.16b, v6.16b, v11.16b
1813	eor	v0.16b, v23.16b, v0.16b
1814	add	v16.4s, v7.4s, v16.4s
1815	eor	v25.16b, v3.16b, v25.16b
1816	add	v4.4s, v6.4s, v4.4s
1817	ushr	v31.4s, v0.4s, #7
1818	shl	v0.4s, v0.4s, #25
1819	ushr	v11.4s, v25.4s, #7
1820	shl	v25.4s, v25.4s, #25
1821	eor	v1.16b, v16.16b, v1.16b
1822	eor	v2.16b, v4.16b, v2.16b
1823	orr	v0.16b, v0.16b, v31.16b
1824	orr	v25.16b, v25.16b, v11.16b
1825	ushr	v31.4s, v1.4s, #7
1826	shl	v1.4s, v1.4s, #25
1827	ushr	v11.4s, v2.4s, #7
1828	shl	v2.4s, v2.4s, #25
1829	add	v20.4s, v20.4s, v14.4s
1830	add	v18.4s, v18.4s, v27.4s
1831	ldr	q27, [sp, #224]
1832	orr	v1.16b, v1.16b, v31.16b
1833	orr	v2.16b, v2.16b, v11.16b
1834	add	v20.4s, v20.4s, v25.4s
1835	add	v17.4s, v17.4s, v29.4s
1836	add	v18.4s, v18.4s, v0.4s
1837	add	v19.4s, v19.4s, v8.4s
1838	eor	v7.16b, v7.16b, v20.16b
1839	add	v17.4s, v17.4s, v1.4s
1840	eor	v6.16b, v6.16b, v18.16b
1841	add	v19.4s, v19.4s, v2.4s
1842	rev32	v7.8h, v7.8h
1843	eor	v5.16b, v17.16b, v5.16b
1844	rev32	v6.8h, v6.8h
1845	eor	v26.16b, v19.16b, v26.16b
1846	add	v4.4s, v4.4s, v7.4s
1847	rev32	v5.8h, v5.8h
1848	add	v3.4s, v3.4s, v6.4s
1849	rev32	v26.8h, v26.8h
1850	eor	v25.16b, v4.16b, v25.16b
1851	add	v23.4s, v23.4s, v5.4s
1852	eor	v0.16b, v3.16b, v0.16b
1853	add	v16.4s, v16.4s, v26.4s
1854	ushr	v29.4s, v25.4s, #12
1855	shl	v25.4s, v25.4s, #20
1856	ushr	v31.4s, v0.4s, #12
1857	shl	v0.4s, v0.4s, #20
1858	eor	v1.16b, v23.16b, v1.16b
1859	eor	v2.16b, v16.16b, v2.16b
1860	orr	v25.16b, v25.16b, v29.16b
1861	orr	v0.16b, v0.16b, v31.16b
1862	ushr	v29.4s, v1.4s, #12
1863	shl	v1.4s, v1.4s, #20
1864	ushr	v31.4s, v2.4s, #12
1865	shl	v2.4s, v2.4s, #20
1866	add	v18.4s, v18.4s, v21.4s
1867	ldr	q21, [sp, #240]
1868	add	v20.4s, v20.4s, v27.4s
1869	prfm	pldl1keep, [x17, #256]
1870	orr	v1.16b, v1.16b, v29.16b
1871	prfm	pldl1keep, [x21, #256]
1872	orr	v2.16b, v2.16b, v31.16b
1873	prfm	pldl1keep, [x16, #256]
1874	add	v18.4s, v18.4s, v0.4s
1875	prfm	pldl1keep, [x6, #256]
1876	add	v17.4s, v17.4s, v21.4s
1877	add	v19.4s, v19.4s, v22.4s
1878	add	v20.4s, v20.4s, v25.4s
1879	eor	v6.16b, v18.16b, v6.16b
1880	add	v17.4s, v17.4s, v1.4s
1881	add	v19.4s, v19.4s, v2.4s
1882	eor	v7.16b, v20.16b, v7.16b
1883	ushr	v22.4s, v6.4s, #8
1884	shl	v6.4s, v6.4s, #24
1885	eor	v5.16b, v17.16b, v5.16b
1886	eor	v26.16b, v19.16b, v26.16b
1887	ushr	v21.4s, v7.4s, #8
1888	shl	v7.4s, v7.4s, #24
1889	orr	v6.16b, v6.16b, v22.16b
1890	ushr	v22.4s, v5.4s, #8
1891	shl	v5.4s, v5.4s, #24
1892	ushr	v29.4s, v26.4s, #8
1893	shl	v26.4s, v26.4s, #24
1894	orr	v7.16b, v7.16b, v21.16b
1895	orr	v5.16b, v5.16b, v22.16b
1896	add	v3.4s, v6.4s, v3.4s
1897	orr	v21.16b, v26.16b, v29.16b
1898	add	v4.4s, v7.4s, v4.4s
1899	add	v22.4s, v5.4s, v23.4s
1900	eor	v0.16b, v3.16b, v0.16b
1901	add	v16.4s, v21.4s, v16.4s
1902	eor	v23.16b, v4.16b, v25.16b
1903	eor	v1.16b, v22.16b, v1.16b
1904	ushr	v25.4s, v0.4s, #7
1905	shl	v0.4s, v0.4s, #25
1906	eor	v2.16b, v16.16b, v2.16b
1907	ushr	v26.4s, v23.4s, #7
1908	shl	v23.4s, v23.4s, #25
1909	orr	v0.16b, v0.16b, v25.16b
1910	ushr	v25.4s, v1.4s, #7
1911	shl	v1.4s, v1.4s, #25
1912	ushr	v29.4s, v2.4s, #7
1913	shl	v2.4s, v2.4s, #25
1914	add	v20.4s, v20.4s, v28.4s
1915	orr	v23.16b, v23.16b, v26.16b
1916	orr	v1.16b, v1.16b, v25.16b
1917	orr	v2.16b, v2.16b, v29.16b
1918	add	v20.4s, v20.4s, v0.4s
1919	add	v18.4s, v18.4s, v13.4s
1920	add	v17.4s, v17.4s, v30.4s
1921	add	v19.4s, v19.4s, v10.4s
1922	eor	v21.16b, v20.16b, v21.16b
1923	add	v18.4s, v18.4s, v1.4s
1924	add	v17.4s, v17.4s, v2.4s
1925	add	v19.4s, v19.4s, v23.4s
1926	rev32	v21.8h, v21.8h
1927	eor	v7.16b, v18.16b, v7.16b
1928	eor	v6.16b, v17.16b, v6.16b
1929	eor	v5.16b, v19.16b, v5.16b
1930	add	v22.4s, v22.4s, v21.4s
1931	rev32	v7.8h, v7.8h
1932	rev32	v6.8h, v6.8h
1933	rev32	v5.8h, v5.8h
1934	eor	v0.16b, v22.16b, v0.16b
1935	add	v16.4s, v16.4s, v7.4s
1936	add	v4.4s, v4.4s, v6.4s
1937	add	v3.4s, v3.4s, v5.4s
1938	ushr	v25.4s, v0.4s, #12
1939	shl	v0.4s, v0.4s, #20
1940	eor	v1.16b, v16.16b, v1.16b
1941	eor	v2.16b, v4.16b, v2.16b
1942	eor	v23.16b, v3.16b, v23.16b
1943	orr	v0.16b, v0.16b, v25.16b
1944	ushr	v25.4s, v1.4s, #12
1945	shl	v1.4s, v1.4s, #20
1946	ushr	v26.4s, v2.4s, #12
1947	shl	v2.4s, v2.4s, #20
1948	ushr	v27.4s, v23.4s, #12
1949	shl	v23.4s, v23.4s, #20
1950	orr	v1.16b, v1.16b, v25.16b
1951	add	v20.4s, v20.4s, v24.4s
1952	orr	v2.16b, v2.16b, v26.16b
1953	orr	v23.16b, v23.16b, v27.16b
1954	add	v18.4s, v18.4s, v12.4s
1955	add	v17.4s, v17.4s, v9.4s
1956	add	v19.4s, v19.4s, v15.4s
1957	add	v20.4s, v20.4s, v0.4s
1958	add	v18.4s, v18.4s, v1.4s
1959	add	v17.4s, v17.4s, v2.4s
1960	add	v19.4s, v19.4s, v23.4s
1961	eor	v21.16b, v20.16b, v21.16b
1962	eor	v7.16b, v18.16b, v7.16b
1963	eor	v6.16b, v17.16b, v6.16b
1964	eor	v5.16b, v19.16b, v5.16b
1965	ushr	v24.4s, v21.4s, #8
1966	shl	v21.4s, v21.4s, #24
1967	ushr	v25.4s, v7.4s, #8
1968	shl	v7.4s, v7.4s, #24
1969	ushr	v26.4s, v6.4s, #8
1970	shl	v6.4s, v6.4s, #24
1971	ushr	v27.4s, v5.4s, #8
1972	shl	v5.4s, v5.4s, #24
1973	orr	v21.16b, v21.16b, v24.16b
1974	orr	v7.16b, v7.16b, v25.16b
1975	orr	v6.16b, v6.16b, v26.16b
1976	orr	v5.16b, v5.16b, v27.16b
1977	add	v22.4s, v21.4s, v22.4s
1978	add	v16.4s, v7.4s, v16.4s
1979	add	v4.4s, v6.4s, v4.4s
1980	add	v3.4s, v5.4s, v3.4s
1981	eor	v0.16b, v22.16b, v0.16b
1982	eor	v1.16b, v16.16b, v1.16b
1983	eor	v2.16b, v4.16b, v2.16b
1984	eor	v23.16b, v3.16b, v23.16b
1985	ushr	v24.4s, v0.4s, #7
1986	shl	v0.4s, v0.4s, #25
1987	ushr	v25.4s, v1.4s, #7
1988	shl	v1.4s, v1.4s, #25
1989	ushr	v26.4s, v2.4s, #7
1990	shl	v2.4s, v2.4s, #25
1991	ushr	v27.4s, v23.4s, #7
1992	shl	v23.4s, v23.4s, #25
1993	orr	v0.16b, v0.16b, v24.16b
1994	orr	v1.16b, v1.16b, v25.16b
1995	orr	v2.16b, v2.16b, v26.16b
1996	orr	v23.16b, v23.16b, v27.16b
1997	movi	v24.4s, #64
1998	eor	v12.16b, v4.16b, v20.16b
1999	eor	v31.16b, v18.16b, v3.16b
2000	eor	v29.16b, v17.16b, v22.16b
2001	eor	v30.16b, v16.16b, v19.16b
2002	eor	v28.16b, v7.16b, v23.16b
2003	eor	v23.16b, v6.16b, v0.16b
2004	eor	v13.16b, v1.16b, v5.16b
2005	eor	v25.16b, v2.16b, v21.16b
2006	cbnz	x15, .LBB3_5
2007	b	.LBB3_2
2008.LBB3_6:
2009	cbz	x24, .LBB3_14
2010	orr	w8, w7, w19
2011	and	x22, x5, #0x1
2012	stur	w8, [x29, #-192]
2013.LBB3_8:
2014	ldr	x8, [sp, #40]
2015	mov	x28, x0
2016	ldr	x25, [x0]
2017	mov	x23, x2
2018	ldur	w5, [x29, #-192]
2019	ldp	q0, q1, [x8]
2020	mov	x8, x2
2021	b	.LBB3_11
2022.LBB3_9:
2023	orr	w5, w5, w27
2024.LBB3_10:
2025	sub	x0, x29, #144
2026	sub	x1, x29, #176
2027	mov	x2, x25
2028	mov	w3, #64
2029	mov	x4, x20
2030	bl	compress_pre
2031	ldp	q0, q1, [x29, #-144]
2032	add	x25, x25, #64
2033	mov	x8, x21
2034	mov	w5, w19
2035	ldp	q2, q3, [x29, #-112]
2036	eor	v0.16b, v2.16b, v0.16b
2037	eor	v1.16b, v3.16b, v1.16b
2038.LBB3_11:
2039	subs	x21, x8, #1
2040	stp	q0, q1, [x29, #-176]
2041	b.eq	.LBB3_9
2042	cbnz	x8, .LBB3_10
2043	ldp	q1, q0, [x29, #-176]
2044	mov	x0, x28
2045	add	x20, x20, x22
2046	add	x0, x28, #8
2047	subs	x24, x24, #1
2048	mov	x2, x23
2049	stp	q1, q0, [x26], #32
2050	b.ne	.LBB3_8
2051.LBB3_14:
2052	add	sp, sp, #464
2053	ldp	x20, x19, [sp, #144]
2054	ldp	x22, x21, [sp, #128]
2055	ldp	x24, x23, [sp, #112]
2056	ldp	x26, x25, [sp, #96]
2057	ldp	x28, x27, [sp, #80]
2058	ldp	x29, x30, [sp, #64]
2059	ldp	d9, d8, [sp, #48]
2060	ldp	d11, d10, [sp, #32]
2061	ldp	d13, d12, [sp, #16]
2062	ldp	d15, d14, [sp], #160
2063	hint	#29
2064	ret
2065.Lfunc_end3:
2066	.size	zfs_blake3_hash_many_sse2, .Lfunc_end3-zfs_blake3_hash_many_sse2
2067	.cfi_endproc
2068	.section	".note.GNU-stack","",@progbits
2069#endif
2070