1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2022 Samuel Neves
25 * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de>
26 *
27 * This is converted assembly: SSE4.1 -> ARMv8-A
28 * Used tools: SIMDe https://github.com/simd-everywhere/simde
29 *
30 * Should work on FreeBSD, Linux and macOS
31 * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh
32 */
33
34#if defined(__aarch64__)
35	.text
36	.section	.note.gnu.property,"a",@note
37	.p2align	3
38	.word	4
39	.word	16
40	.word	5
41	.asciz	"GNU"
42	.word	3221225472
43	.word	4
44	.word	3
45	.word	0
46.Lsec_end0:
47	.text
48	.globl	zfs_blake3_compress_in_place_sse41
49	.p2align	2
50	.type	zfs_blake3_compress_in_place_sse41,@function
51zfs_blake3_compress_in_place_sse41:
52	.cfi_startproc
53	hint	#25
54	.cfi_negate_ra_state
55	sub	sp, sp, #96
56	stp	x29, x30, [sp, #64]
57	add	x29, sp, #64
58	str	x19, [sp, #80]
59	.cfi_def_cfa w29, 32
60	.cfi_offset w19, -16
61	.cfi_offset w30, -24
62	.cfi_offset w29, -32
63	mov	x19, x0
64	mov	w5, w4
65	mov	x4, x3
66	mov	w3, w2
67	mov	x2, x1
68	mov	x0, sp
69	mov	x1, x19
70	bl	compress_pre
71	ldp	q0, q1, [sp]
72	ldp	q2, q3, [sp, #32]
73	eor	v0.16b, v2.16b, v0.16b
74	eor	v1.16b, v3.16b, v1.16b
75	ldp	x29, x30, [sp, #64]
76	stp	q0, q1, [x19]
77	ldr	x19, [sp, #80]
78	add	sp, sp, #96
79	hint	#29
80	ret
81.Lfunc_end0:
82	.size	zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41
83	.cfi_endproc
84
85	.section	.rodata.cst16,"aM",@progbits,16
86	.p2align	4
87.LCPI1_0:
88	.xword	-4942790177982912921
89	.xword	-6534734903820487822
90.LCPI1_1:
91	.byte	2
92	.byte	3
93	.byte	0
94	.byte	1
95	.byte	6
96	.byte	7
97	.byte	4
98	.byte	5
99	.byte	10
100	.byte	11
101	.byte	8
102	.byte	9
103	.byte	14
104	.byte	15
105	.byte	12
106	.byte	13
107.LCPI1_2:
108	.byte	1
109	.byte	2
110	.byte	3
111	.byte	0
112	.byte	5
113	.byte	6
114	.byte	7
115	.byte	4
116	.byte	9
117	.byte	10
118	.byte	11
119	.byte	8
120	.byte	13
121	.byte	14
122	.byte	15
123	.byte	12
124	.text
125	.p2align	2
126	.type	compress_pre,@function
127compress_pre:
128	.cfi_startproc
129	hint	#34
130	fmov	s1, w3
131	movi	d0, #0x0000ff000000ff
132	ldr	q2, [x1]
133	adrp	x8, .LCPI1_0
134	mov	v1.s[1], w5
135	str	q2, [x0]
136	ldr	q4, [x8, :lo12:.LCPI1_0]
137	ldr	q5, [x1, #16]
138	adrp	x8, .LCPI1_1
139	and	v0.8b, v1.8b, v0.8b
140	fmov	d1, x4
141	stp	q5, q4, [x0, #16]
142	mov	v1.d[1], v0.d[0]
143	str	q1, [x0, #48]
144	ldp	q6, q7, [x2]
145	uzp1	v3.4s, v6.4s, v7.4s
146	add	v0.4s, v2.4s, v3.4s
147	uzp2	v2.4s, v6.4s, v7.4s
148	add	v16.4s, v0.4s, v5.4s
149	ldr	q0, [x8, :lo12:.LCPI1_1]
150	adrp	x8, .LCPI1_2
151	eor	v1.16b, v16.16b, v1.16b
152	add	v7.4s, v16.4s, v2.4s
153	tbl	v1.16b, { v1.16b }, v0.16b
154	add	v4.4s, v1.4s, v4.4s
155	eor	v5.16b, v4.16b, v5.16b
156	ushr	v6.4s, v5.4s, #12
157	shl	v5.4s, v5.4s, #20
158	orr	v5.16b, v5.16b, v6.16b
159	add	v6.4s, v7.4s, v5.4s
160	eor	v7.16b, v1.16b, v6.16b
161	ldr	q1, [x8, :lo12:.LCPI1_2]
162	add	x8, x2, #32
163	tbl	v7.16b, { v7.16b }, v1.16b
164	ld2	{ v16.4s, v17.4s }, [x8]
165	add	v4.4s, v4.4s, v7.4s
166	ext	v7.16b, v7.16b, v7.16b, #8
167	add	v6.4s, v6.4s, v16.4s
168	eor	v5.16b, v4.16b, v5.16b
169	ext	v4.16b, v4.16b, v4.16b, #4
170	ext	v16.16b, v16.16b, v16.16b, #12
171	ext	v6.16b, v6.16b, v6.16b, #12
172	ushr	v18.4s, v5.4s, #7
173	shl	v5.4s, v5.4s, #25
174	orr	v5.16b, v5.16b, v18.16b
175	ext	v18.16b, v17.16b, v17.16b, #12
176	add	v6.4s, v6.4s, v5.4s
177	mov	v17.16b, v18.16b
178	eor	v7.16b, v7.16b, v6.16b
179	add	v6.4s, v6.4s, v18.4s
180	mov	v17.s[1], v16.s[2]
181	tbl	v7.16b, { v7.16b }, v0.16b
182	add	v4.4s, v4.4s, v7.4s
183	eor	v5.16b, v4.16b, v5.16b
184	ushr	v19.4s, v5.4s, #12
185	shl	v5.4s, v5.4s, #20
186	orr	v5.16b, v5.16b, v19.16b
187	uzp1	v19.4s, v3.4s, v3.4s
188	add	v6.4s, v6.4s, v5.4s
189	ext	v19.16b, v19.16b, v3.16b, #8
190	eor	v7.16b, v7.16b, v6.16b
191	uzp2	v19.4s, v19.4s, v2.4s
192	tbl	v7.16b, { v7.16b }, v1.16b
193	add	v6.4s, v6.4s, v19.4s
194	add	v4.4s, v4.4s, v7.4s
195	ext	v6.16b, v6.16b, v6.16b, #4
196	ext	v7.16b, v7.16b, v7.16b, #8
197	eor	v5.16b, v4.16b, v5.16b
198	ext	v4.16b, v4.16b, v4.16b, #12
199	ushr	v20.4s, v5.4s, #7
200	shl	v5.4s, v5.4s, #25
201	orr	v5.16b, v5.16b, v20.16b
202	ext	v20.16b, v3.16b, v3.16b, #12
203	add	v6.4s, v6.4s, v5.4s
204	ext	v3.16b, v3.16b, v20.16b, #12
205	eor	v7.16b, v7.16b, v6.16b
206	rev64	v3.4s, v3.4s
207	tbl	v7.16b, { v7.16b }, v0.16b
208	trn2	v3.4s, v3.4s, v17.4s
209	add	v4.4s, v4.4s, v7.4s
210	add	v6.4s, v6.4s, v3.4s
211	eor	v5.16b, v4.16b, v5.16b
212	ushr	v17.4s, v5.4s, #12
213	shl	v5.4s, v5.4s, #20
214	orr	v5.16b, v5.16b, v17.16b
215	zip1	v17.2d, v18.2d, v2.2d
216	zip2	v2.4s, v2.4s, v18.4s
217	add	v6.4s, v6.4s, v5.4s
218	mov	v17.s[3], v16.s[3]
219	zip1	v18.4s, v2.4s, v16.4s
220	zip1	v2.4s, v16.4s, v2.4s
221	eor	v7.16b, v7.16b, v6.16b
222	ext	v6.16b, v6.16b, v6.16b, #12
223	ext	v16.16b, v2.16b, v18.16b, #8
224	tbl	v7.16b, { v7.16b }, v1.16b
225	add	v20.4s, v4.4s, v7.4s
226	ext	v4.16b, v17.16b, v17.16b, #12
227	ext	v7.16b, v7.16b, v7.16b, #8
228	eor	v5.16b, v20.16b, v5.16b
229	uzp1	v4.4s, v17.4s, v4.4s
230	ushr	v17.4s, v5.4s, #7
231	shl	v5.4s, v5.4s, #25
232	add	v6.4s, v6.4s, v4.4s
233	orr	v5.16b, v5.16b, v17.16b
234	ext	v17.16b, v20.16b, v20.16b, #4
235	add	v6.4s, v6.4s, v5.4s
236	eor	v7.16b, v7.16b, v6.16b
237	add	v6.4s, v6.4s, v16.4s
238	tbl	v7.16b, { v7.16b }, v0.16b
239	add	v17.4s, v17.4s, v7.4s
240	eor	v5.16b, v17.16b, v5.16b
241	ushr	v2.4s, v5.4s, #12
242	shl	v5.4s, v5.4s, #20
243	orr	v2.16b, v5.16b, v2.16b
244	add	v5.4s, v6.4s, v2.4s
245	ext	v6.16b, v19.16b, v19.16b, #4
246	eor	v7.16b, v7.16b, v5.16b
247	uzp1	v18.4s, v6.4s, v6.4s
248	tbl	v7.16b, { v7.16b }, v1.16b
249	ext	v18.16b, v18.16b, v6.16b, #8
250	add	v17.4s, v17.4s, v7.4s
251	uzp2	v18.4s, v18.4s, v3.4s
252	ext	v7.16b, v7.16b, v7.16b, #8
253	eor	v2.16b, v17.16b, v2.16b
254	add	v5.4s, v5.4s, v18.4s
255	ext	v17.16b, v17.16b, v17.16b, #12
256	ushr	v19.4s, v2.4s, #7
257	shl	v2.4s, v2.4s, #25
258	ext	v5.16b, v5.16b, v5.16b, #4
259	orr	v2.16b, v2.16b, v19.16b
260	ext	v19.16b, v6.16b, v6.16b, #12
261	add	v5.4s, v5.4s, v2.4s
262	ext	v6.16b, v6.16b, v19.16b, #12
263	mov	v19.16b, v16.16b
264	eor	v7.16b, v7.16b, v5.16b
265	rev64	v6.4s, v6.4s
266	mov	v19.s[1], v4.s[2]
267	tbl	v7.16b, { v7.16b }, v0.16b
268	add	v17.4s, v17.4s, v7.4s
269	eor	v20.16b, v17.16b, v2.16b
270	trn2	v2.4s, v6.4s, v19.4s
271	ushr	v6.4s, v20.4s, #12
272	shl	v19.4s, v20.4s, #20
273	add	v5.4s, v5.4s, v2.4s
274	orr	v6.16b, v19.16b, v6.16b
275	add	v19.4s, v5.4s, v6.4s
276	eor	v5.16b, v7.16b, v19.16b
277	zip1	v7.2d, v16.2d, v3.2d
278	zip2	v3.4s, v3.4s, v16.4s
279	tbl	v20.16b, { v5.16b }, v1.16b
280	mov	v7.s[3], v4.s[3]
281	add	v17.4s, v17.4s, v20.4s
282	ext	v5.16b, v7.16b, v7.16b, #12
283	eor	v6.16b, v17.16b, v6.16b
284	uzp1	v5.4s, v7.4s, v5.4s
285	ext	v7.16b, v19.16b, v19.16b, #12
286	ext	v17.16b, v17.16b, v17.16b, #4
287	ushr	v19.4s, v6.4s, #7
288	shl	v6.4s, v6.4s, #25
289	add	v7.4s, v7.4s, v5.4s
290	orr	v6.16b, v6.16b, v19.16b
291	ext	v19.16b, v20.16b, v20.16b, #8
292	add	v7.4s, v7.4s, v6.4s
293	eor	v19.16b, v19.16b, v7.16b
294	tbl	v19.16b, { v19.16b }, v0.16b
295	add	v16.4s, v17.4s, v19.4s
296	zip1	v17.4s, v3.4s, v4.4s
297	zip1	v3.4s, v4.4s, v3.4s
298	eor	v4.16b, v16.16b, v6.16b
299	ext	v17.16b, v3.16b, v17.16b, #8
300	ushr	v3.4s, v4.4s, #12
301	shl	v4.4s, v4.4s, #20
302	add	v6.4s, v7.4s, v17.4s
303	orr	v3.16b, v4.16b, v3.16b
304	add	v4.4s, v6.4s, v3.4s
305	ext	v6.16b, v18.16b, v18.16b, #4
306	eor	v7.16b, v19.16b, v4.16b
307	uzp1	v18.4s, v6.4s, v6.4s
308	tbl	v7.16b, { v7.16b }, v1.16b
309	ext	v18.16b, v18.16b, v6.16b, #8
310	add	v16.4s, v16.4s, v7.4s
311	uzp2	v18.4s, v18.4s, v2.4s
312	ext	v7.16b, v7.16b, v7.16b, #8
313	eor	v3.16b, v16.16b, v3.16b
314	add	v4.4s, v4.4s, v18.4s
315	ext	v16.16b, v16.16b, v16.16b, #12
316	ushr	v19.4s, v3.4s, #7
317	shl	v3.4s, v3.4s, #25
318	ext	v4.16b, v4.16b, v4.16b, #4
319	orr	v3.16b, v3.16b, v19.16b
320	ext	v19.16b, v6.16b, v6.16b, #12
321	add	v4.4s, v4.4s, v3.4s
322	ext	v6.16b, v6.16b, v19.16b, #12
323	mov	v19.16b, v17.16b
324	eor	v7.16b, v7.16b, v4.16b
325	rev64	v6.4s, v6.4s
326	mov	v19.s[1], v5.s[2]
327	tbl	v7.16b, { v7.16b }, v0.16b
328	add	v16.4s, v16.4s, v7.4s
329	eor	v20.16b, v16.16b, v3.16b
330	trn2	v3.4s, v6.4s, v19.4s
331	ushr	v6.4s, v20.4s, #12
332	shl	v19.4s, v20.4s, #20
333	add	v4.4s, v4.4s, v3.4s
334	orr	v6.16b, v19.16b, v6.16b
335	zip1	v19.2d, v17.2d, v2.2d
336	zip2	v2.4s, v2.4s, v17.4s
337	add	v4.4s, v4.4s, v6.4s
338	mov	v19.s[3], v5.s[3]
339	zip1	v17.4s, v2.4s, v5.4s
340	zip1	v2.4s, v5.4s, v2.4s
341	eor	v7.16b, v7.16b, v4.16b
342	ext	v20.16b, v19.16b, v19.16b, #12
343	ext	v4.16b, v4.16b, v4.16b, #12
344	ext	v2.16b, v2.16b, v17.16b, #8
345	tbl	v7.16b, { v7.16b }, v1.16b
346	add	v16.4s, v16.4s, v7.4s
347	ext	v7.16b, v7.16b, v7.16b, #8
348	eor	v21.16b, v16.16b, v6.16b
349	uzp1	v6.4s, v19.4s, v20.4s
350	ext	v16.16b, v16.16b, v16.16b, #4
351	ushr	v19.4s, v21.4s, #7
352	shl	v20.4s, v21.4s, #25
353	add	v4.4s, v4.4s, v6.4s
354	orr	v19.16b, v20.16b, v19.16b
355	add	v4.4s, v4.4s, v19.4s
356	eor	v7.16b, v7.16b, v4.16b
357	add	v4.4s, v4.4s, v2.4s
358	tbl	v7.16b, { v7.16b }, v0.16b
359	add	v16.4s, v16.4s, v7.4s
360	eor	v5.16b, v16.16b, v19.16b
361	ushr	v17.4s, v5.4s, #12
362	shl	v5.4s, v5.4s, #20
363	orr	v5.16b, v5.16b, v17.16b
364	ext	v17.16b, v18.16b, v18.16b, #4
365	add	v4.4s, v4.4s, v5.4s
366	uzp1	v18.4s, v17.4s, v17.4s
367	eor	v7.16b, v7.16b, v4.16b
368	ext	v18.16b, v18.16b, v17.16b, #8
369	tbl	v7.16b, { v7.16b }, v1.16b
370	uzp2	v18.4s, v18.4s, v3.4s
371	add	v16.4s, v16.4s, v7.4s
372	add	v4.4s, v4.4s, v18.4s
373	ext	v7.16b, v7.16b, v7.16b, #8
374	eor	v5.16b, v16.16b, v5.16b
375	ext	v4.16b, v4.16b, v4.16b, #4
376	ext	v16.16b, v16.16b, v16.16b, #12
377	ushr	v19.4s, v5.4s, #7
378	shl	v5.4s, v5.4s, #25
379	orr	v5.16b, v5.16b, v19.16b
380	add	v19.4s, v4.4s, v5.4s
381	eor	v4.16b, v7.16b, v19.16b
382	ext	v7.16b, v17.16b, v17.16b, #12
383	tbl	v20.16b, { v4.16b }, v0.16b
384	ext	v4.16b, v17.16b, v7.16b, #12
385	mov	v7.16b, v2.16b
386	add	v16.4s, v16.4s, v20.4s
387	rev64	v4.4s, v4.4s
388	mov	v7.s[1], v6.s[2]
389	eor	v5.16b, v16.16b, v5.16b
390	trn2	v4.4s, v4.4s, v7.4s
391	ushr	v7.4s, v5.4s, #12
392	shl	v5.4s, v5.4s, #20
393	add	v17.4s, v19.4s, v4.4s
394	zip1	v19.2d, v2.2d, v3.2d
395	zip2	v2.4s, v3.4s, v2.4s
396	orr	v5.16b, v5.16b, v7.16b
397	mov	v19.s[3], v6.s[3]
398	add	v7.4s, v17.4s, v5.4s
399	eor	v17.16b, v20.16b, v7.16b
400	ext	v20.16b, v19.16b, v19.16b, #12
401	ext	v7.16b, v7.16b, v7.16b, #12
402	tbl	v17.16b, { v17.16b }, v1.16b
403	add	v16.4s, v16.4s, v17.4s
404	ext	v17.16b, v17.16b, v17.16b, #8
405	eor	v21.16b, v16.16b, v5.16b
406	uzp1	v5.4s, v19.4s, v20.4s
407	ext	v16.16b, v16.16b, v16.16b, #4
408	ushr	v19.4s, v21.4s, #7
409	shl	v20.4s, v21.4s, #25
410	add	v7.4s, v7.4s, v5.4s
411	orr	v19.16b, v20.16b, v19.16b
412	add	v7.4s, v7.4s, v19.4s
413	eor	v17.16b, v17.16b, v7.16b
414	tbl	v17.16b, { v17.16b }, v0.16b
415	add	v3.4s, v16.4s, v17.4s
416	zip1	v16.4s, v2.4s, v6.4s
417	zip1	v2.4s, v6.4s, v2.4s
418	eor	v6.16b, v3.16b, v19.16b
419	ext	v16.16b, v2.16b, v16.16b, #8
420	ushr	v2.4s, v6.4s, #12
421	shl	v6.4s, v6.4s, #20
422	add	v7.4s, v7.4s, v16.4s
423	orr	v2.16b, v6.16b, v2.16b
424	add	v6.4s, v7.4s, v2.4s
425	ext	v7.16b, v18.16b, v18.16b, #4
426	eor	v17.16b, v17.16b, v6.16b
427	uzp1	v18.4s, v7.4s, v7.4s
428	tbl	v17.16b, { v17.16b }, v1.16b
429	ext	v18.16b, v18.16b, v7.16b, #8
430	add	v3.4s, v3.4s, v17.4s
431	uzp2	v18.4s, v18.4s, v4.4s
432	eor	v2.16b, v3.16b, v2.16b
433	add	v6.4s, v6.4s, v18.4s
434	ext	v3.16b, v3.16b, v3.16b, #12
435	ext	v18.16b, v18.16b, v18.16b, #4
436	ushr	v19.4s, v2.4s, #7
437	shl	v2.4s, v2.4s, #25
438	ext	v6.16b, v6.16b, v6.16b, #4
439	orr	v19.16b, v2.16b, v19.16b
440	ext	v2.16b, v17.16b, v17.16b, #8
441	ext	v17.16b, v7.16b, v7.16b, #12
442	add	v6.4s, v6.4s, v19.4s
443	eor	v2.16b, v2.16b, v6.16b
444	tbl	v20.16b, { v2.16b }, v0.16b
445	ext	v2.16b, v7.16b, v17.16b, #12
446	mov	v7.16b, v16.16b
447	add	v17.4s, v3.4s, v20.4s
448	rev64	v3.4s, v2.4s
449	mov	v7.s[1], v5.s[2]
450	eor	v19.16b, v17.16b, v19.16b
451	trn2	v3.4s, v3.4s, v7.4s
452	ushr	v21.4s, v19.4s, #12
453	shl	v19.4s, v19.4s, #20
454	add	v6.4s, v6.4s, v3.4s
455	orr	v19.16b, v19.16b, v21.16b
456	add	v21.4s, v6.4s, v19.4s
457	eor	v6.16b, v20.16b, v21.16b
458	zip1	v20.2d, v16.2d, v4.2d
459	zip2	v4.4s, v4.4s, v16.4s
460	tbl	v22.16b, { v6.16b }, v1.16b
461	mov	v20.s[3], v5.s[3]
462	add	v17.4s, v17.4s, v22.4s
463	ext	v6.16b, v20.16b, v20.16b, #12
464	eor	v19.16b, v17.16b, v19.16b
465	uzp1	v6.4s, v20.4s, v6.4s
466	ext	v20.16b, v21.16b, v21.16b, #12
467	ext	v17.16b, v17.16b, v17.16b, #4
468	ushr	v21.4s, v19.4s, #7
469	shl	v19.4s, v19.4s, #25
470	add	v20.4s, v20.4s, v6.4s
471	orr	v19.16b, v19.16b, v21.16b
472	ext	v21.16b, v22.16b, v22.16b, #8
473	add	v20.4s, v20.4s, v19.4s
474	eor	v21.16b, v21.16b, v20.16b
475	tbl	v21.16b, { v21.16b }, v0.16b
476	add	v16.4s, v17.4s, v21.4s
477	zip1	v17.4s, v4.4s, v5.4s
478	zip1	v4.4s, v5.4s, v4.4s
479	eor	v5.16b, v16.16b, v19.16b
480	ext	v4.16b, v4.16b, v17.16b, #8
481	ushr	v17.4s, v5.4s, #12
482	shl	v5.4s, v5.4s, #20
483	add	v19.4s, v20.4s, v4.4s
484	ext	v20.16b, v18.16b, v18.16b, #8
485	zip1	v3.2d, v4.2d, v3.2d
486	orr	v5.16b, v5.16b, v17.16b
487	zip2	v2.4s, v2.4s, v4.4s
488	uzp2	v7.4s, v20.4s, v7.4s
489	mov	v3.s[3], v6.s[3]
490	add	v17.4s, v19.4s, v5.4s
491	ext	v7.16b, v7.16b, v20.16b, #4
492	eor	v19.16b, v21.16b, v17.16b
493	ext	v17.16b, v17.16b, v17.16b, #4
494	tbl	v19.16b, { v19.16b }, v1.16b
495	add	v7.4s, v17.4s, v7.4s
496	add	v16.4s, v16.4s, v19.4s
497	ext	v17.16b, v19.16b, v19.16b, #8
498	ext	v19.16b, v18.16b, v18.16b, #12
499	eor	v5.16b, v16.16b, v5.16b
500	ext	v16.16b, v16.16b, v16.16b, #12
501	ext	v18.16b, v18.16b, v19.16b, #12
502	mov	v19.16b, v4.16b
503	ushr	v20.4s, v5.4s, #7
504	shl	v5.4s, v5.4s, #25
505	rev64	v18.4s, v18.4s
506	mov	v19.s[1], v6.s[2]
507	orr	v5.16b, v5.16b, v20.16b
508	trn2	v18.4s, v18.4s, v19.4s
509	add	v7.4s, v5.4s, v7.4s
510	eor	v17.16b, v17.16b, v7.16b
511	add	v7.4s, v7.4s, v18.4s
512	ext	v18.16b, v3.16b, v3.16b, #12
513	tbl	v17.16b, { v17.16b }, v0.16b
514	uzp1	v3.4s, v3.4s, v18.4s
515	add	v16.4s, v16.4s, v17.4s
516	eor	v5.16b, v16.16b, v5.16b
517	ushr	v19.4s, v5.4s, #12
518	shl	v5.4s, v5.4s, #20
519	orr	v5.16b, v5.16b, v19.16b
520	add	v7.4s, v7.4s, v5.4s
521	eor	v17.16b, v17.16b, v7.16b
522	ext	v7.16b, v7.16b, v7.16b, #12
523	tbl	v17.16b, { v17.16b }, v1.16b
524	add	v3.4s, v7.4s, v3.4s
525	add	v16.4s, v16.4s, v17.4s
526	ext	v7.16b, v17.16b, v17.16b, #8
527	eor	v5.16b, v16.16b, v5.16b
528	ext	v16.16b, v16.16b, v16.16b, #4
529	ushr	v18.4s, v5.4s, #7
530	shl	v5.4s, v5.4s, #25
531	orr	v5.16b, v5.16b, v18.16b
532	add	v3.4s, v3.4s, v5.4s
533	eor	v7.16b, v7.16b, v3.16b
534	tbl	v0.16b, { v7.16b }, v0.16b
535	zip1	v7.4s, v2.4s, v6.4s
536	zip1	v2.4s, v6.4s, v2.4s
537	add	v4.4s, v16.4s, v0.4s
538	ext	v2.16b, v2.16b, v7.16b, #8
539	eor	v5.16b, v4.16b, v5.16b
540	add	v2.4s, v3.4s, v2.4s
541	ushr	v6.4s, v5.4s, #12
542	shl	v5.4s, v5.4s, #20
543	orr	v3.16b, v5.16b, v6.16b
544	add	v2.4s, v2.4s, v3.4s
545	eor	v0.16b, v0.16b, v2.16b
546	ext	v2.16b, v2.16b, v2.16b, #4
547	tbl	v0.16b, { v0.16b }, v1.16b
548	add	v1.4s, v4.4s, v0.4s
549	ext	v0.16b, v0.16b, v0.16b, #8
550	eor	v3.16b, v1.16b, v3.16b
551	ext	v1.16b, v1.16b, v1.16b, #12
552	ushr	v4.4s, v3.4s, #7
553	shl	v3.4s, v3.4s, #25
554	stp	q1, q0, [x0, #32]
555	orr	v3.16b, v3.16b, v4.16b
556	stp	q2, q3, [x0]
557	ret
558.Lfunc_end1:
559	.size	compress_pre, .Lfunc_end1-compress_pre
560	.cfi_endproc
561
562	.globl	zfs_blake3_compress_xof_sse41
563	.p2align	2
564	.type	zfs_blake3_compress_xof_sse41,@function
565zfs_blake3_compress_xof_sse41:
566	.cfi_startproc
567	hint	#25
568	.cfi_negate_ra_state
569	sub	sp, sp, #96
570	stp	x29, x30, [sp, #64]
571	add	x29, sp, #64
572	stp	x20, x19, [sp, #80]
573	.cfi_def_cfa w29, 32
574	.cfi_offset w19, -8
575	.cfi_offset w20, -16
576	.cfi_offset w30, -24
577	.cfi_offset w29, -32
578	mov	x20, x0
579	mov	x19, x5
580	mov	w5, w4
581	mov	x4, x3
582	mov	w3, w2
583	mov	x2, x1
584	mov	x0, sp
585	mov	x1, x20
586	bl	compress_pre
587	ldp	q0, q1, [sp]
588	ldp	q2, q3, [sp, #32]
589	eor	v0.16b, v2.16b, v0.16b
590	eor	v1.16b, v3.16b, v1.16b
591	ldp	x29, x30, [sp, #64]
592	stp	q0, q1, [x19]
593	ldr	q0, [x20]
594	eor	v0.16b, v0.16b, v2.16b
595	str	q0, [x19, #32]
596	ldr	q0, [x20, #16]
597	eor	v0.16b, v0.16b, v3.16b
598	str	q0, [x19, #48]
599	ldp	x20, x19, [sp, #80]
600	add	sp, sp, #96
601	hint	#29
602	ret
603.Lfunc_end2:
604	.size	zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41
605	.cfi_endproc
606
607	.section	.rodata.cst16,"aM",@progbits,16
608	.p2align	4
609.LCPI3_0:
610	.word	0
611	.word	1
612	.word	2
613	.word	3
614.LCPI3_1:
615	.byte	2
616	.byte	3
617	.byte	0
618	.byte	1
619	.byte	6
620	.byte	7
621	.byte	4
622	.byte	5
623	.byte	10
624	.byte	11
625	.byte	8
626	.byte	9
627	.byte	14
628	.byte	15
629	.byte	12
630	.byte	13
631.LCPI3_2:
632	.byte	1
633	.byte	2
634	.byte	3
635	.byte	0
636	.byte	5
637	.byte	6
638	.byte	7
639	.byte	4
640	.byte	9
641	.byte	10
642	.byte	11
643	.byte	8
644	.byte	13
645	.byte	14
646	.byte	15
647	.byte	12
648.LCPI3_3:
649	.word	1779033703
650	.word	3144134277
651	.word	1013904242
652	.word	2773480762
653	.text
654	.globl	zfs_blake3_hash_many_sse41
655	.p2align	2
656	.type	zfs_blake3_hash_many_sse41,@function
657zfs_blake3_hash_many_sse41:
658	.cfi_startproc
659	hint	#34
660	stp	d15, d14, [sp, #-144]!
661	stp	d13, d12, [sp, #16]
662	stp	d11, d10, [sp, #32]
663	stp	d9, d8, [sp, #48]
664	stp	x29, x27, [sp, #64]
665	stp	x26, x25, [sp, #80]
666	stp	x24, x23, [sp, #96]
667	stp	x22, x21, [sp, #112]
668	stp	x20, x19, [sp, #128]
669	sub	sp, sp, #368
670	.cfi_def_cfa_offset 512
671	.cfi_offset w19, -8
672	.cfi_offset w20, -16
673	.cfi_offset w21, -24
674	.cfi_offset w22, -32
675	.cfi_offset w23, -40
676	.cfi_offset w24, -48
677	.cfi_offset w25, -56
678	.cfi_offset w26, -64
679	.cfi_offset w27, -72
680	.cfi_offset w29, -80
681	.cfi_offset b8, -88
682	.cfi_offset b9, -96
683	.cfi_offset b10, -104
684	.cfi_offset b11, -112
685	.cfi_offset b12, -120
686	.cfi_offset b13, -128
687	.cfi_offset b14, -136
688	.cfi_offset b15, -144
689	ldr	x8, [sp, #520]
690	adrp	x11, .LCPI3_1
691	ldrb	w9, [sp, #512]
692	adrp	x10, .LCPI3_2
693	cmp	x1, #4
694	b.lo	.LBB3_6
695	adrp	x12, .LCPI3_0
696	sbfx	w13, w5, #0, #1
697	mov	w15, #58983
698	mov	w16, #44677
699	movk	w15, #27145, lsl #16
700	movk	w16, #47975, lsl #16
701	ldr	q0, [x12, :lo12:.LCPI3_0]
702	dup	v1.4s, w13
703	movi	v13.4s, #64
704	mov	w13, #62322
705	mov	w14, #62778
706	orr	w12, w7, w6
707	and	v0.16b, v1.16b, v0.16b
708	ldr	q1, [x11, :lo12:.LCPI3_1]
709	movk	w13, #15470, lsl #16
710	movk	w14, #42319, lsl #16
711	dup	v14.4s, w15
712	stp	q0, q1, [sp, #16]
713	orr	v0.4s, #128, lsl #24
714	str	q0, [sp]
715	dup	v0.4s, w16
716	stp	q0, q14, [sp, #48]
717	b	.LBB3_3
718.LBB3_2:
719	zip1	v0.4s, v29.4s, v8.4s
720	add	x15, x4, #4
721	zip1	v1.4s, v30.4s, v31.4s
722	tst	w5, #0x1
723	zip1	v2.4s, v24.4s, v18.4s
724	csel	x4, x15, x4, ne
725	zip1	v3.4s, v25.4s, v26.4s
726	add	x0, x0, #32
727	zip2	v6.4s, v29.4s, v8.4s
728	sub	x1, x1, #4
729	zip1	v4.2d, v0.2d, v1.2d
730	cmp	x1, #3
731	zip2	v7.4s, v30.4s, v31.4s
732	zip1	v5.2d, v2.2d, v3.2d
733	zip2	v0.2d, v0.2d, v1.2d
734	zip2	v1.2d, v2.2d, v3.2d
735	zip2	v2.4s, v24.4s, v18.4s
736	zip2	v3.4s, v25.4s, v26.4s
737	stp	q4, q5, [x8]
738	zip2	v4.2d, v6.2d, v7.2d
739	stp	q0, q1, [x8, #32]
740	zip1	v0.2d, v6.2d, v7.2d
741	zip1	v1.2d, v2.2d, v3.2d
742	zip2	v2.2d, v2.2d, v3.2d
743	stp	q0, q1, [x8, #64]
744	stp	q4, q2, [x8, #96]
745	add	x8, x8, #128
746	b.ls	.LBB3_6
747.LBB3_3:
748	mov	x15, x3
749	add	x16, x3, #8
750	add	x17, x3, #12
751	add	x19, x3, #16
752	add	x20, x3, #20
753	ld1r	{ v29.4s }, [x15], #4
754	ld1r	{ v30.4s }, [x16]
755	add	x16, x3, #24
756	ld1r	{ v31.4s }, [x17]
757	add	x17, x3, #28
758	ld1r	{ v24.4s }, [x19]
759	ld1r	{ v18.4s }, [x20]
760	ld1r	{ v25.4s }, [x16]
761	ld1r	{ v8.4s }, [x15]
762	ld1r	{ v26.4s }, [x17]
763	cbz	x2, .LBB3_2
764	ldr	q1, [sp, #16]
765	dup	v0.4s, w4
766	lsr	x17, x4, #32
767	mov	x15, xzr
768	ldp	x19, x20, [x0, #16]
769	add	v1.4s, v0.4s, v1.4s
770	mov	x21, x2
771	movi	v0.4s, #128, lsl #24
772	mov	w26, w12
773	str	q1, [sp, #96]
774	eor	v0.16b, v1.16b, v0.16b
775	ldr	q1, [sp]
776	cmgt	v0.4s, v1.4s, v0.4s
777	dup	v1.4s, w17
778	ldp	x16, x17, [x0]
779	sub	v0.4s, v1.4s, v0.4s
780	str	q0, [sp, #80]
781.LBB3_5:
782	add	x23, x16, x15
783	add	x24, x17, x15
784	add	x22, x19, x15
785	add	x25, x20, x15
786	subs	x21, x21, #1
787	add	x15, x15, #64
788	ldp	q1, q2, [x23]
789	csel	w27, w9, wzr, eq
790	orr	w26, w27, w26
791	and	w26, w26, #0xff
792	ldp	q4, q5, [x24]
793	dup	v0.4s, w26
794	mov	w26, w6
795	zip1	v22.4s, v1.4s, v4.4s
796	zip2	v20.4s, v1.4s, v4.4s
797	ldp	q6, q7, [x22]
798	zip1	v17.4s, v2.4s, v5.4s
799	zip2	v23.4s, v2.4s, v5.4s
800	ldp	q16, q21, [x25]
801	zip1	v19.4s, v6.4s, v16.4s
802	zip2	v1.4s, v6.4s, v16.4s
803	ldp	q27, q28, [x23, #32]
804	zip1	v4.4s, v7.4s, v21.4s
805	zip2	v5.4s, v7.4s, v21.4s
806	zip2	v15.2d, v17.2d, v4.2d
807	ldp	q9, q10, [x24, #32]
808	mov	v17.d[1], v4.d[0]
809	add	v4.4s, v30.4s, v25.4s
810	zip2	v11.2d, v23.2d, v5.2d
811	zip2	v3.4s, v27.4s, v9.4s
812	zip1	v7.4s, v27.4s, v9.4s
813	ldp	q12, q6, [x22, #32]
814	mov	v23.d[1], v5.d[0]
815	stp	q11, q3, [sp, #256]
816	add	v5.4s, v31.4s, v26.4s
817	add	v4.4s, v4.4s, v17.4s
818	str	q23, [sp, #352]
819	ldp	q16, q2, [x25, #32]
820	add	v5.4s, v5.4s, v23.4s
821	zip1	v3.4s, v12.4s, v16.4s
822	eor	v0.16b, v5.16b, v0.16b
823	zip1	v9.4s, v6.4s, v2.4s
824	zip2	v2.4s, v6.4s, v2.4s
825	stp	q7, q3, [sp, #208]
826	zip2	v3.4s, v12.4s, v16.4s
827	zip1	v12.4s, v28.4s, v10.4s
828	zip2	v10.4s, v28.4s, v10.4s
829	stp	q17, q2, [sp, #160]
830	zip2	v28.2d, v22.2d, v19.2d
831	mov	v22.d[1], v19.d[0]
832	str	q3, [sp, #240]
833	add	v2.4s, v8.4s, v18.4s
834	eor	v16.16b, v4.16b, v13.16b
835	dup	v17.4s, w13
836	mov	v3.16b, v22.16b
837	stp	q22, q28, [sp, #320]
838	zip2	v22.2d, v20.2d, v1.2d
839	mov	v20.d[1], v1.d[0]
840	add	v1.4s, v29.4s, v24.4s
841	add	v4.4s, v4.4s, v15.4s
842	add	v5.4s, v5.4s, v11.4s
843	add	v2.4s, v2.4s, v20.4s
844	stp	q15, q20, [sp, #288]
845	add	v1.4s, v1.4s, v3.4s
846	ldr	q3, [sp, #96]
847	dup	v20.4s, w14
848	mov	v23.16b, v22.16b
849	mov	v15.16b, v10.16b
850	eor	v6.16b, v1.16b, v3.16b
851	ldr	q3, [sp, #80]
852	add	v1.4s, v1.4s, v28.4s
853	ldr	q28, [sp, #272]
854	str	q23, [sp, #128]
855	eor	v7.16b, v2.16b, v3.16b
856	ldp	q27, q3, [sp, #32]
857	add	v2.4s, v2.4s, v22.4s
858	tbl	v6.16b, { v6.16b }, v27.16b
859	tbl	v7.16b, { v7.16b }, v27.16b
860	tbl	v16.16b, { v16.16b }, v27.16b
861	tbl	v0.16b, { v0.16b }, v27.16b
862	add	v19.4s, v6.4s, v14.4s
863	add	v21.4s, v7.4s, v3.4s
864	add	v30.4s, v16.4s, v17.4s
865	add	v31.4s, v0.4s, v20.4s
866	eor	v24.16b, v19.16b, v24.16b
867	eor	v17.16b, v21.16b, v18.16b
868	ushr	v18.4s, v24.4s, #12
869	shl	v20.4s, v24.4s, #20
870	eor	v24.16b, v30.16b, v25.16b
871	eor	v25.16b, v31.16b, v26.16b
872	ushr	v26.4s, v17.4s, #12
873	shl	v17.4s, v17.4s, #20
874	ushr	v29.4s, v24.4s, #12
875	shl	v24.4s, v24.4s, #20
876	ushr	v8.4s, v25.4s, #12
877	shl	v25.4s, v25.4s, #20
878	orr	v3.16b, v20.16b, v18.16b
879	ldr	q18, [x10, :lo12:.LCPI3_2]
880	orr	v13.16b, v17.16b, v26.16b
881	orr	v24.16b, v24.16b, v29.16b
882	orr	v14.16b, v25.16b, v8.16b
883	add	v8.4s, v1.4s, v3.4s
884	add	v29.4s, v2.4s, v13.4s
885	add	v17.4s, v4.4s, v24.4s
886	add	v20.4s, v5.4s, v14.4s
887	eor	v1.16b, v6.16b, v8.16b
888	eor	v2.16b, v7.16b, v29.16b
889	eor	v4.16b, v16.16b, v17.16b
890	eor	v0.16b, v0.16b, v20.16b
891	tbl	v25.16b, { v1.16b }, v18.16b
892	tbl	v16.16b, { v2.16b }, v18.16b
893	tbl	v6.16b, { v4.16b }, v18.16b
894	tbl	v4.16b, { v0.16b }, v18.16b
895	add	v19.4s, v19.4s, v25.4s
896	add	v21.4s, v21.4s, v16.4s
897	add	v26.4s, v30.4s, v6.4s
898	add	v7.4s, v31.4s, v4.4s
899	eor	v0.16b, v19.16b, v3.16b
900	eor	v1.16b, v21.16b, v13.16b
901	eor	v2.16b, v26.16b, v24.16b
902	eor	v3.16b, v7.16b, v14.16b
903	ushr	v5.4s, v0.4s, #7
904	shl	v0.4s, v0.4s, #25
905	ushr	v24.4s, v1.4s, #7
906	shl	v1.4s, v1.4s, #25
907	ushr	v30.4s, v2.4s, #7
908	shl	v2.4s, v2.4s, #25
909	orr	v5.16b, v0.16b, v5.16b
910	orr	v0.16b, v1.16b, v24.16b
911	ushr	v31.4s, v3.4s, #7
912	orr	v2.16b, v2.16b, v30.16b
913	ldp	q24, q30, [sp, #208]
914	shl	v3.4s, v3.4s, #25
915	zip2	v14.2d, v12.2d, v9.2d
916	mov	v22.16b, v24.16b
917	orr	v1.16b, v3.16b, v31.16b
918	zip2	v3.2d, v24.2d, v30.2d
919	mov	v24.16b, v28.16b
920	mov	v22.d[1], v30.d[0]
921	ldr	q30, [sp, #240]
922	mov	v31.16b, v12.16b
923	stp	q22, q14, [sp, #224]
924	mov	v24.d[1], v30.d[0]
925	add	v12.4s, v8.4s, v22.4s
926	mov	v31.d[1], v9.d[0]
927	add	v22.4s, v29.4s, v24.4s
928	ldr	q29, [sp, #176]
929	zip2	v28.2d, v28.2d, v30.2d
930	mov	v9.16b, v24.16b
931	mov	v15.d[1], v29.d[0]
932	zip2	v8.2d, v10.2d, v29.2d
933	add	v10.4s, v12.4s, v0.4s
934	add	v22.4s, v22.4s, v2.4s
935	str	q9, [sp, #144]
936	add	v20.4s, v20.4s, v15.4s
937	add	v17.4s, v17.4s, v31.4s
938	stp	q3, q8, [sp, #192]
939	eor	v4.16b, v4.16b, v10.16b
940	eor	v25.16b, v25.16b, v22.16b
941	add	v20.4s, v20.4s, v5.4s
942	add	v17.4s, v17.4s, v1.4s
943	tbl	v4.16b, { v4.16b }, v27.16b
944	tbl	v25.16b, { v25.16b }, v27.16b
945	eor	v6.16b, v6.16b, v20.16b
946	eor	v16.16b, v16.16b, v17.16b
947	add	v26.4s, v26.4s, v4.4s
948	add	v7.4s, v7.4s, v25.4s
949	tbl	v6.16b, { v6.16b }, v27.16b
950	tbl	v16.16b, { v16.16b }, v27.16b
951	eor	v0.16b, v26.16b, v0.16b
952	eor	v2.16b, v7.16b, v2.16b
953	add	v21.4s, v21.4s, v6.4s
954	add	v19.4s, v19.4s, v16.4s
955	ushr	v12.4s, v0.4s, #12
956	shl	v0.4s, v0.4s, #20
957	ushr	v13.4s, v2.4s, #12
958	shl	v2.4s, v2.4s, #20
959	eor	v5.16b, v21.16b, v5.16b
960	eor	v1.16b, v19.16b, v1.16b
961	orr	v0.16b, v0.16b, v12.16b
962	add	v10.4s, v10.4s, v3.4s
963	orr	v2.16b, v2.16b, v13.16b
964	ushr	v13.4s, v5.4s, #12
965	shl	v5.4s, v5.4s, #20
966	add	v22.4s, v22.4s, v28.4s
967	ushr	v12.4s, v1.4s, #12
968	shl	v1.4s, v1.4s, #20
969	add	v10.4s, v10.4s, v0.4s
970	orr	v5.16b, v5.16b, v13.16b
971	add	v22.4s, v22.4s, v2.4s
972	add	v20.4s, v20.4s, v8.4s
973	orr	v1.16b, v1.16b, v12.16b
974	add	v17.4s, v17.4s, v14.4s
975	eor	v4.16b, v4.16b, v10.16b
976	eor	v25.16b, v25.16b, v22.16b
977	add	v20.4s, v20.4s, v5.4s
978	add	v17.4s, v17.4s, v1.4s
979	tbl	v4.16b, { v4.16b }, v18.16b
980	tbl	v25.16b, { v25.16b }, v18.16b
981	eor	v6.16b, v6.16b, v20.16b
982	eor	v16.16b, v16.16b, v17.16b
983	add	v26.4s, v26.4s, v4.4s
984	add	v7.4s, v7.4s, v25.4s
985	tbl	v6.16b, { v6.16b }, v18.16b
986	tbl	v16.16b, { v16.16b }, v18.16b
987	eor	v0.16b, v26.16b, v0.16b
988	eor	v2.16b, v7.16b, v2.16b
989	add	v21.4s, v21.4s, v6.4s
990	add	v19.4s, v19.4s, v16.4s
991	ushr	v12.4s, v0.4s, #7
992	shl	v0.4s, v0.4s, #25
993	ushr	v13.4s, v2.4s, #7
994	shl	v2.4s, v2.4s, #25
995	eor	v5.16b, v21.16b, v5.16b
996	eor	v1.16b, v19.16b, v1.16b
997	orr	v0.16b, v0.16b, v12.16b
998	add	v22.4s, v22.4s, v23.4s
999	orr	v2.16b, v2.16b, v13.16b
1000	ushr	v13.4s, v5.4s, #7
1001	shl	v5.4s, v5.4s, #25
1002	add	v17.4s, v17.4s, v11.4s
1003	mov	v30.16b, v28.16b
1004	mov	v28.16b, v23.16b
1005	ldr	q23, [sp, #304]
1006	ushr	v12.4s, v1.4s, #7
1007	shl	v1.4s, v1.4s, #25
1008	add	v22.4s, v22.4s, v0.4s
1009	mov	v29.16b, v31.16b
1010	ldr	q31, [sp, #160]
1011	orr	v5.16b, v5.16b, v13.16b
1012	add	v17.4s, v17.4s, v2.4s
1013	add	v10.4s, v10.4s, v23.4s
1014	orr	v1.16b, v1.16b, v12.16b
1015	str	q29, [sp, #272]
1016	eor	v16.16b, v16.16b, v22.16b
1017	add	v20.4s, v20.4s, v31.4s
1018	eor	v6.16b, v6.16b, v17.16b
1019	add	v10.4s, v10.4s, v5.4s
1020	tbl	v16.16b, { v16.16b }, v27.16b
1021	add	v20.4s, v20.4s, v1.4s
1022	tbl	v6.16b, { v6.16b }, v27.16b
1023	eor	v25.16b, v25.16b, v10.16b
1024	add	v21.4s, v21.4s, v16.4s
1025	eor	v4.16b, v4.16b, v20.16b
1026	add	v26.4s, v26.4s, v6.4s
1027	tbl	v25.16b, { v25.16b }, v27.16b
1028	eor	v0.16b, v21.16b, v0.16b
1029	tbl	v4.16b, { v4.16b }, v27.16b
1030	eor	v2.16b, v26.16b, v2.16b
1031	add	v19.4s, v19.4s, v25.4s
1032	ushr	v12.4s, v0.4s, #12
1033	shl	v0.4s, v0.4s, #20
1034	add	v7.4s, v7.4s, v4.4s
1035	ushr	v13.4s, v2.4s, #12
1036	shl	v2.4s, v2.4s, #20
1037	eor	v5.16b, v5.16b, v19.16b
1038	add	v22.4s, v22.4s, v24.4s
1039	ldr	q24, [sp, #320]
1040	orr	v0.16b, v0.16b, v12.16b
1041	eor	v1.16b, v7.16b, v1.16b
1042	orr	v2.16b, v2.16b, v13.16b
1043	ushr	v12.4s, v5.4s, #12
1044	shl	v5.4s, v5.4s, #20
1045	add	v17.4s, v17.4s, v24.4s
1046	ldr	q24, [sp, #352]
1047	ushr	v13.4s, v1.4s, #12
1048	shl	v1.4s, v1.4s, #20
1049	add	v22.4s, v22.4s, v0.4s
1050	orr	v5.16b, v5.16b, v12.16b
1051	add	v17.4s, v17.4s, v2.4s
1052	add	v10.4s, v10.4s, v24.4s
1053	ldr	q24, [sp, #336]
1054	orr	v1.16b, v1.16b, v13.16b
1055	eor	v16.16b, v16.16b, v22.16b
1056	add	v20.4s, v20.4s, v14.4s
1057	eor	v6.16b, v6.16b, v17.16b
1058	add	v10.4s, v10.4s, v5.4s
1059	tbl	v16.16b, { v16.16b }, v18.16b
1060	add	v20.4s, v20.4s, v1.4s
1061	tbl	v6.16b, { v6.16b }, v18.16b
1062	eor	v25.16b, v25.16b, v10.16b
1063	add	v21.4s, v21.4s, v16.4s
1064	eor	v4.16b, v4.16b, v20.16b
1065	add	v26.4s, v26.4s, v6.4s
1066	tbl	v25.16b, { v25.16b }, v18.16b
1067	eor	v0.16b, v21.16b, v0.16b
1068	tbl	v4.16b, { v4.16b }, v18.16b
1069	eor	v2.16b, v26.16b, v2.16b
1070	add	v19.4s, v19.4s, v25.4s
1071	ushr	v12.4s, v0.4s, #7
1072	shl	v0.4s, v0.4s, #25
1073	add	v7.4s, v7.4s, v4.4s
1074	ushr	v13.4s, v2.4s, #7
1075	shl	v2.4s, v2.4s, #25
1076	eor	v5.16b, v19.16b, v5.16b
1077	orr	v0.16b, v0.16b, v12.16b
1078	eor	v1.16b, v7.16b, v1.16b
1079	add	v10.4s, v10.4s, v24.4s
1080	orr	v2.16b, v2.16b, v13.16b
1081	ushr	v12.4s, v5.4s, #7
1082	shl	v5.4s, v5.4s, #25
1083	add	v22.4s, v22.4s, v29.4s
1084	ushr	v13.4s, v1.4s, #7
1085	shl	v1.4s, v1.4s, #25
1086	add	v10.4s, v10.4s, v0.4s
1087	orr	v5.16b, v5.16b, v12.16b
1088	add	v22.4s, v22.4s, v2.4s
1089	add	v20.4s, v20.4s, v8.4s
1090	ldr	q8, [sp, #288]
1091	orr	v1.16b, v1.16b, v13.16b
1092	add	v17.4s, v17.4s, v3.4s
1093	ldr	q3, [sp, #352]
1094	eor	v4.16b, v4.16b, v10.16b
1095	eor	v25.16b, v25.16b, v22.16b
1096	add	v20.4s, v20.4s, v5.4s
1097	add	v17.4s, v17.4s, v1.4s
1098	tbl	v4.16b, { v4.16b }, v27.16b
1099	tbl	v25.16b, { v25.16b }, v27.16b
1100	eor	v6.16b, v6.16b, v20.16b
1101	eor	v16.16b, v16.16b, v17.16b
1102	add	v26.4s, v26.4s, v4.4s
1103	add	v7.4s, v7.4s, v25.4s
1104	tbl	v6.16b, { v6.16b }, v27.16b
1105	tbl	v16.16b, { v16.16b }, v27.16b
1106	eor	v0.16b, v26.16b, v0.16b
1107	eor	v2.16b, v7.16b, v2.16b
1108	add	v21.4s, v21.4s, v6.4s
1109	add	v19.4s, v19.4s, v16.4s
1110	ushr	v12.4s, v0.4s, #12
1111	shl	v0.4s, v0.4s, #20
1112	ushr	v13.4s, v2.4s, #12
1113	shl	v2.4s, v2.4s, #20
1114	eor	v5.16b, v21.16b, v5.16b
1115	eor	v1.16b, v19.16b, v1.16b
1116	orr	v0.16b, v0.16b, v12.16b
1117	add	v10.4s, v10.4s, v30.4s
1118	orr	v2.16b, v2.16b, v13.16b
1119	ushr	v13.4s, v5.4s, #12
1120	shl	v5.4s, v5.4s, #20
1121	add	v22.4s, v22.4s, v8.4s
1122	mov	v24.16b, v30.16b
1123	mov	v30.16b, v15.16b
1124	add	v17.4s, v17.4s, v15.4s
1125	ldr	q15, [sp, #224]
1126	ushr	v12.4s, v1.4s, #12
1127	shl	v1.4s, v1.4s, #20
1128	add	v10.4s, v10.4s, v0.4s
1129	str	q30, [sp, #176]
1130	orr	v5.16b, v5.16b, v13.16b
1131	add	v22.4s, v22.4s, v2.4s
1132	add	v20.4s, v20.4s, v15.4s
1133	orr	v1.16b, v1.16b, v12.16b
1134	eor	v4.16b, v4.16b, v10.16b
1135	eor	v25.16b, v25.16b, v22.16b
1136	add	v20.4s, v20.4s, v5.4s
1137	add	v17.4s, v17.4s, v1.4s
1138	tbl	v4.16b, { v4.16b }, v18.16b
1139	tbl	v25.16b, { v25.16b }, v18.16b
1140	eor	v6.16b, v6.16b, v20.16b
1141	eor	v16.16b, v16.16b, v17.16b
1142	add	v26.4s, v26.4s, v4.4s
1143	add	v7.4s, v7.4s, v25.4s
1144	tbl	v6.16b, { v6.16b }, v18.16b
1145	tbl	v16.16b, { v16.16b }, v18.16b
1146	eor	v0.16b, v26.16b, v0.16b
1147	eor	v2.16b, v7.16b, v2.16b
1148	add	v21.4s, v21.4s, v6.4s
1149	add	v19.4s, v19.4s, v16.4s
1150	ushr	v12.4s, v0.4s, #7
1151	shl	v0.4s, v0.4s, #25
1152	ushr	v13.4s, v2.4s, #7
1153	shl	v2.4s, v2.4s, #25
1154	eor	v5.16b, v21.16b, v5.16b
1155	eor	v1.16b, v19.16b, v1.16b
1156	orr	v0.16b, v0.16b, v12.16b
1157	add	v22.4s, v22.4s, v9.4s
1158	orr	v2.16b, v2.16b, v13.16b
1159	ushr	v13.4s, v5.4s, #7
1160	shl	v5.4s, v5.4s, #25
1161	add	v17.4s, v17.4s, v14.4s
1162	ushr	v12.4s, v1.4s, #7
1163	shl	v1.4s, v1.4s, #25
1164	add	v22.4s, v22.4s, v0.4s
1165	orr	v5.16b, v5.16b, v13.16b
1166	add	v17.4s, v17.4s, v2.4s
1167	add	v10.4s, v10.4s, v28.4s
1168	orr	v1.16b, v1.16b, v12.16b
1169	eor	v16.16b, v16.16b, v22.16b
1170	add	v20.4s, v20.4s, v11.4s
1171	eor	v6.16b, v6.16b, v17.16b
1172	add	v10.4s, v10.4s, v5.4s
1173	tbl	v16.16b, { v16.16b }, v27.16b
1174	add	v20.4s, v20.4s, v1.4s
1175	tbl	v6.16b, { v6.16b }, v27.16b
1176	eor	v25.16b, v25.16b, v10.16b
1177	add	v21.4s, v21.4s, v16.4s
1178	eor	v4.16b, v4.16b, v20.16b
1179	add	v26.4s, v26.4s, v6.4s
1180	tbl	v25.16b, { v25.16b }, v27.16b
1181	eor	v0.16b, v21.16b, v0.16b
1182	tbl	v4.16b, { v4.16b }, v27.16b
1183	eor	v2.16b, v26.16b, v2.16b
1184	add	v19.4s, v19.4s, v25.4s
1185	ushr	v12.4s, v0.4s, #12
1186	shl	v0.4s, v0.4s, #20
1187	add	v7.4s, v7.4s, v4.4s
1188	ushr	v13.4s, v2.4s, #12
1189	shl	v2.4s, v2.4s, #20
1190	eor	v5.16b, v5.16b, v19.16b
1191	orr	v0.16b, v0.16b, v12.16b
1192	eor	v1.16b, v7.16b, v1.16b
1193	add	v22.4s, v22.4s, v29.4s
1194	orr	v2.16b, v2.16b, v13.16b
1195	ushr	v12.4s, v5.4s, #12
1196	shl	v5.4s, v5.4s, #20
1197	add	v17.4s, v17.4s, v23.4s
1198	ushr	v13.4s, v1.4s, #12
1199	shl	v1.4s, v1.4s, #20
1200	add	v22.4s, v22.4s, v0.4s
1201	orr	v5.16b, v5.16b, v12.16b
1202	add	v17.4s, v17.4s, v2.4s
1203	add	v10.4s, v10.4s, v31.4s
1204	orr	v1.16b, v1.16b, v13.16b
1205	eor	v16.16b, v16.16b, v22.16b
1206	add	v20.4s, v20.4s, v30.4s
1207	eor	v6.16b, v6.16b, v17.16b
1208	add	v10.4s, v10.4s, v5.4s
1209	tbl	v16.16b, { v16.16b }, v18.16b
1210	add	v20.4s, v20.4s, v1.4s
1211	tbl	v6.16b, { v6.16b }, v18.16b
1212	eor	v25.16b, v25.16b, v10.16b
1213	add	v21.4s, v21.4s, v16.4s
1214	eor	v4.16b, v4.16b, v20.16b
1215	add	v26.4s, v26.4s, v6.4s
1216	tbl	v25.16b, { v25.16b }, v18.16b
1217	eor	v0.16b, v21.16b, v0.16b
1218	tbl	v4.16b, { v4.16b }, v18.16b
1219	eor	v2.16b, v26.16b, v2.16b
1220	add	v19.4s, v19.4s, v25.4s
1221	ushr	v12.4s, v0.4s, #7
1222	shl	v0.4s, v0.4s, #25
1223	add	v7.4s, v7.4s, v4.4s
1224	ushr	v13.4s, v2.4s, #7
1225	shl	v2.4s, v2.4s, #25
1226	eor	v5.16b, v19.16b, v5.16b
1227	add	v10.4s, v10.4s, v3.4s
1228	ldr	q3, [sp, #192]
1229	orr	v0.16b, v0.16b, v12.16b
1230	eor	v1.16b, v7.16b, v1.16b
1231	orr	v2.16b, v2.16b, v13.16b
1232	ushr	v12.4s, v5.4s, #7
1233	shl	v5.4s, v5.4s, #25
1234	add	v22.4s, v22.4s, v3.4s
1235	ushr	v13.4s, v1.4s, #7
1236	shl	v1.4s, v1.4s, #25
1237	add	v10.4s, v10.4s, v0.4s
1238	orr	v5.16b, v5.16b, v12.16b
1239	add	v22.4s, v22.4s, v2.4s
1240	add	v20.4s, v20.4s, v15.4s
1241	ldr	q15, [sp, #128]
1242	orr	v1.16b, v1.16b, v13.16b
1243	add	v17.4s, v17.4s, v24.4s
1244	eor	v4.16b, v4.16b, v10.16b
1245	eor	v25.16b, v25.16b, v22.16b
1246	add	v20.4s, v20.4s, v5.4s
1247	add	v17.4s, v17.4s, v1.4s
1248	tbl	v4.16b, { v4.16b }, v27.16b
1249	tbl	v25.16b, { v25.16b }, v27.16b
1250	eor	v6.16b, v6.16b, v20.16b
1251	eor	v16.16b, v16.16b, v17.16b
1252	add	v26.4s, v26.4s, v4.4s
1253	add	v7.4s, v7.4s, v25.4s
1254	tbl	v6.16b, { v6.16b }, v27.16b
1255	tbl	v16.16b, { v16.16b }, v27.16b
1256	eor	v0.16b, v26.16b, v0.16b
1257	eor	v2.16b, v7.16b, v2.16b
1258	add	v21.4s, v21.4s, v6.4s
1259	add	v19.4s, v19.4s, v16.4s
1260	ushr	v12.4s, v0.4s, #12
1261	shl	v0.4s, v0.4s, #20
1262	ushr	v13.4s, v2.4s, #12
1263	shl	v2.4s, v2.4s, #20
1264	eor	v5.16b, v21.16b, v5.16b
1265	ldp	q23, q11, [sp, #320]
1266	eor	v1.16b, v19.16b, v1.16b
1267	orr	v0.16b, v0.16b, v12.16b
1268	add	v10.4s, v10.4s, v8.4s
1269	orr	v2.16b, v2.16b, v13.16b
1270	ushr	v13.4s, v5.4s, #12
1271	shl	v5.4s, v5.4s, #20
1272	add	v22.4s, v22.4s, v23.4s
1273	ushr	v12.4s, v1.4s, #12
1274	shl	v1.4s, v1.4s, #20
1275	add	v10.4s, v10.4s, v0.4s
1276	mov	v28.16b, v31.16b
1277	mov	v31.16b, v8.16b
1278	ldr	q8, [sp, #208]
1279	orr	v5.16b, v5.16b, v13.16b
1280	add	v22.4s, v22.4s, v2.4s
1281	add	v20.4s, v20.4s, v11.4s
1282	orr	v1.16b, v1.16b, v12.16b
1283	add	v17.4s, v17.4s, v8.4s
1284	eor	v4.16b, v4.16b, v10.16b
1285	eor	v25.16b, v25.16b, v22.16b
1286	add	v20.4s, v20.4s, v5.4s
1287	add	v17.4s, v17.4s, v1.4s
1288	tbl	v4.16b, { v4.16b }, v18.16b
1289	tbl	v25.16b, { v25.16b }, v18.16b
1290	eor	v6.16b, v6.16b, v20.16b
1291	eor	v16.16b, v16.16b, v17.16b
1292	add	v26.4s, v26.4s, v4.4s
1293	add	v7.4s, v7.4s, v25.4s
1294	tbl	v6.16b, { v6.16b }, v18.16b
1295	tbl	v16.16b, { v16.16b }, v18.16b
1296	eor	v0.16b, v26.16b, v0.16b
1297	eor	v2.16b, v7.16b, v2.16b
1298	add	v21.4s, v21.4s, v6.4s
1299	add	v19.4s, v19.4s, v16.4s
1300	ushr	v12.4s, v0.4s, #7
1301	shl	v0.4s, v0.4s, #25
1302	ushr	v13.4s, v2.4s, #7
1303	shl	v2.4s, v2.4s, #25
1304	eor	v5.16b, v21.16b, v5.16b
1305	eor	v1.16b, v19.16b, v1.16b
1306	orr	v0.16b, v0.16b, v12.16b
1307	add	v22.4s, v22.4s, v29.4s
1308	orr	v2.16b, v2.16b, v13.16b
1309	ushr	v13.4s, v5.4s, #7
1310	shl	v5.4s, v5.4s, #25
1311	add	v17.4s, v17.4s, v30.4s
1312	ushr	v12.4s, v1.4s, #7
1313	shl	v1.4s, v1.4s, #25
1314	add	v22.4s, v22.4s, v0.4s
1315	orr	v5.16b, v5.16b, v13.16b
1316	add	v17.4s, v17.4s, v2.4s
1317	add	v10.4s, v10.4s, v9.4s
1318	orr	v1.16b, v1.16b, v12.16b
1319	eor	v16.16b, v16.16b, v22.16b
1320	add	v20.4s, v20.4s, v14.4s
1321	ldr	q14, [sp, #256]
1322	eor	v6.16b, v6.16b, v17.16b
1323	add	v10.4s, v10.4s, v5.4s
1324	tbl	v16.16b, { v16.16b }, v27.16b
1325	add	v20.4s, v20.4s, v1.4s
1326	tbl	v6.16b, { v6.16b }, v27.16b
1327	eor	v25.16b, v25.16b, v10.16b
1328	add	v21.4s, v21.4s, v16.4s
1329	eor	v4.16b, v4.16b, v20.16b
1330	add	v26.4s, v26.4s, v6.4s
1331	tbl	v25.16b, { v25.16b }, v27.16b
1332	eor	v0.16b, v21.16b, v0.16b
1333	tbl	v4.16b, { v4.16b }, v27.16b
1334	eor	v2.16b, v26.16b, v2.16b
1335	add	v19.4s, v19.4s, v25.4s
1336	ushr	v12.4s, v0.4s, #12
1337	shl	v0.4s, v0.4s, #20
1338	add	v7.4s, v7.4s, v4.4s
1339	ushr	v13.4s, v2.4s, #12
1340	shl	v2.4s, v2.4s, #20
1341	eor	v5.16b, v5.16b, v19.16b
1342	orr	v0.16b, v0.16b, v12.16b
1343	eor	v1.16b, v7.16b, v1.16b
1344	add	v22.4s, v22.4s, v3.4s
1345	orr	v2.16b, v2.16b, v13.16b
1346	ushr	v12.4s, v5.4s, #12
1347	shl	v5.4s, v5.4s, #20
1348	add	v17.4s, v17.4s, v15.4s
1349	ushr	v13.4s, v1.4s, #12
1350	shl	v1.4s, v1.4s, #20
1351	add	v22.4s, v22.4s, v0.4s
1352	orr	v5.16b, v5.16b, v12.16b
1353	add	v17.4s, v17.4s, v2.4s
1354	add	v10.4s, v10.4s, v14.4s
1355	orr	v1.16b, v1.16b, v13.16b
1356	eor	v16.16b, v16.16b, v22.16b
1357	add	v20.4s, v20.4s, v8.4s
1358	eor	v6.16b, v6.16b, v17.16b
1359	add	v10.4s, v10.4s, v5.4s
1360	tbl	v16.16b, { v16.16b }, v18.16b
1361	add	v20.4s, v20.4s, v1.4s
1362	tbl	v6.16b, { v6.16b }, v18.16b
1363	eor	v25.16b, v25.16b, v10.16b
1364	add	v21.4s, v21.4s, v16.4s
1365	eor	v4.16b, v4.16b, v20.16b
1366	add	v26.4s, v26.4s, v6.4s
1367	tbl	v25.16b, { v25.16b }, v18.16b
1368	eor	v0.16b, v21.16b, v0.16b
1369	tbl	v4.16b, { v4.16b }, v18.16b
1370	eor	v2.16b, v26.16b, v2.16b
1371	add	v19.4s, v19.4s, v25.4s
1372	ushr	v12.4s, v0.4s, #7
1373	shl	v0.4s, v0.4s, #25
1374	add	v7.4s, v7.4s, v4.4s
1375	ushr	v13.4s, v2.4s, #7
1376	shl	v2.4s, v2.4s, #25
1377	eor	v5.16b, v19.16b, v5.16b
1378	orr	v0.16b, v0.16b, v12.16b
1379	eor	v1.16b, v7.16b, v1.16b
1380	add	v10.4s, v10.4s, v28.4s
1381	orr	v2.16b, v2.16b, v13.16b
1382	ushr	v12.4s, v5.4s, #7
1383	shl	v5.4s, v5.4s, #25
1384	add	v22.4s, v22.4s, v24.4s
1385	ushr	v13.4s, v1.4s, #7
1386	shl	v1.4s, v1.4s, #25
1387	add	v10.4s, v10.4s, v0.4s
1388	orr	v5.16b, v5.16b, v12.16b
1389	add	v22.4s, v22.4s, v2.4s
1390	add	v20.4s, v20.4s, v11.4s
1391	ldr	q11, [sp, #304]
1392	orr	v1.16b, v1.16b, v13.16b
1393	add	v17.4s, v17.4s, v31.4s
1394	ldr	q31, [sp, #224]
1395	eor	v4.16b, v4.16b, v10.16b
1396	eor	v25.16b, v25.16b, v22.16b
1397	add	v20.4s, v20.4s, v5.4s
1398	add	v17.4s, v17.4s, v1.4s
1399	tbl	v4.16b, { v4.16b }, v27.16b
1400	tbl	v25.16b, { v25.16b }, v27.16b
1401	eor	v6.16b, v6.16b, v20.16b
1402	eor	v16.16b, v16.16b, v17.16b
1403	add	v26.4s, v26.4s, v4.4s
1404	add	v7.4s, v7.4s, v25.4s
1405	tbl	v6.16b, { v6.16b }, v27.16b
1406	tbl	v16.16b, { v16.16b }, v27.16b
1407	eor	v0.16b, v26.16b, v0.16b
1408	eor	v2.16b, v7.16b, v2.16b
1409	add	v21.4s, v21.4s, v6.4s
1410	add	v19.4s, v19.4s, v16.4s
1411	ushr	v12.4s, v0.4s, #12
1412	shl	v0.4s, v0.4s, #20
1413	ushr	v13.4s, v2.4s, #12
1414	shl	v2.4s, v2.4s, #20
1415	eor	v5.16b, v21.16b, v5.16b
1416	eor	v1.16b, v19.16b, v1.16b
1417	orr	v0.16b, v0.16b, v12.16b
1418	add	v10.4s, v10.4s, v23.4s
1419	ldr	q23, [sp, #240]
1420	orr	v2.16b, v2.16b, v13.16b
1421	ushr	v13.4s, v5.4s, #12
1422	shl	v5.4s, v5.4s, #20
1423	add	v22.4s, v22.4s, v11.4s
1424	mov	v30.16b, v8.16b
1425	mov	v8.16b, v24.16b
1426	ldr	q24, [sp, #352]
1427	ushr	v12.4s, v1.4s, #12
1428	shl	v1.4s, v1.4s, #20
1429	add	v10.4s, v10.4s, v0.4s
1430	orr	v5.16b, v5.16b, v13.16b
1431	str	q8, [sp, #112]
1432	add	v22.4s, v22.4s, v2.4s
1433	add	v20.4s, v20.4s, v24.4s
1434	orr	v1.16b, v1.16b, v12.16b
1435	add	v17.4s, v17.4s, v31.4s
1436	eor	v4.16b, v4.16b, v10.16b
1437	eor	v25.16b, v25.16b, v22.16b
1438	add	v20.4s, v20.4s, v5.4s
1439	add	v17.4s, v17.4s, v1.4s
1440	tbl	v4.16b, { v4.16b }, v18.16b
1441	tbl	v25.16b, { v25.16b }, v18.16b
1442	eor	v6.16b, v6.16b, v20.16b
1443	eor	v16.16b, v16.16b, v17.16b
1444	add	v26.4s, v26.4s, v4.4s
1445	add	v7.4s, v7.4s, v25.4s
1446	tbl	v6.16b, { v6.16b }, v18.16b
1447	tbl	v16.16b, { v16.16b }, v18.16b
1448	eor	v0.16b, v26.16b, v0.16b
1449	eor	v2.16b, v7.16b, v2.16b
1450	add	v21.4s, v21.4s, v6.4s
1451	mov	v29.16b, v3.16b
1452	add	v19.4s, v19.4s, v16.4s
1453	ushr	v12.4s, v0.4s, #7
1454	shl	v0.4s, v0.4s, #25
1455	ushr	v13.4s, v2.4s, #7
1456	shl	v2.4s, v2.4s, #25
1457	eor	v5.16b, v21.16b, v5.16b
1458	eor	v1.16b, v19.16b, v1.16b
1459	orr	v0.16b, v0.16b, v12.16b
1460	add	v22.4s, v22.4s, v29.4s
1461	orr	v2.16b, v2.16b, v13.16b
1462	ushr	v13.4s, v5.4s, #7
1463	shl	v5.4s, v5.4s, #25
1464	add	v17.4s, v17.4s, v30.4s
1465	ldr	q30, [sp, #272]
1466	ushr	v12.4s, v1.4s, #7
1467	shl	v1.4s, v1.4s, #25
1468	add	v22.4s, v22.4s, v0.4s
1469	mov	v3.16b, v28.16b
1470	ldr	q28, [sp, #176]
1471	orr	v5.16b, v5.16b, v13.16b
1472	add	v17.4s, v17.4s, v2.4s
1473	add	v10.4s, v10.4s, v30.4s
1474	orr	v1.16b, v1.16b, v12.16b
1475	eor	v16.16b, v16.16b, v22.16b
1476	add	v20.4s, v20.4s, v28.4s
1477	eor	v6.16b, v6.16b, v17.16b
1478	add	v10.4s, v10.4s, v5.4s
1479	tbl	v16.16b, { v16.16b }, v27.16b
1480	add	v20.4s, v20.4s, v1.4s
1481	tbl	v6.16b, { v6.16b }, v27.16b
1482	eor	v25.16b, v25.16b, v10.16b
1483	add	v21.4s, v21.4s, v16.4s
1484	eor	v4.16b, v4.16b, v20.16b
1485	add	v26.4s, v26.4s, v6.4s
1486	tbl	v25.16b, { v25.16b }, v27.16b
1487	eor	v0.16b, v21.16b, v0.16b
1488	tbl	v4.16b, { v4.16b }, v27.16b
1489	eor	v2.16b, v26.16b, v2.16b
1490	add	v19.4s, v19.4s, v25.4s
1491	ushr	v12.4s, v0.4s, #12
1492	shl	v0.4s, v0.4s, #20
1493	add	v7.4s, v7.4s, v4.4s
1494	ushr	v13.4s, v2.4s, #12
1495	shl	v2.4s, v2.4s, #20
1496	eor	v5.16b, v5.16b, v19.16b
1497	orr	v0.16b, v0.16b, v12.16b
1498	eor	v1.16b, v7.16b, v1.16b
1499	add	v22.4s, v22.4s, v8.4s
1500	orr	v2.16b, v2.16b, v13.16b
1501	ushr	v12.4s, v5.4s, #12
1502	shl	v5.4s, v5.4s, #20
1503	add	v17.4s, v17.4s, v9.4s
1504	ldr	q9, [sp, #320]
1505	ushr	v13.4s, v1.4s, #12
1506	shl	v1.4s, v1.4s, #20
1507	add	v22.4s, v22.4s, v0.4s
1508	orr	v5.16b, v5.16b, v12.16b
1509	add	v17.4s, v17.4s, v2.4s
1510	add	v10.4s, v10.4s, v23.4s
1511	orr	v1.16b, v1.16b, v13.16b
1512	eor	v16.16b, v16.16b, v22.16b
1513	add	v20.4s, v20.4s, v31.4s
1514	eor	v6.16b, v6.16b, v17.16b
1515	add	v10.4s, v10.4s, v5.4s
1516	tbl	v16.16b, { v16.16b }, v18.16b
1517	add	v20.4s, v20.4s, v1.4s
1518	tbl	v6.16b, { v6.16b }, v18.16b
1519	eor	v25.16b, v25.16b, v10.16b
1520	add	v21.4s, v21.4s, v16.4s
1521	eor	v4.16b, v4.16b, v20.16b
1522	add	v26.4s, v26.4s, v6.4s
1523	tbl	v25.16b, { v25.16b }, v18.16b
1524	eor	v0.16b, v21.16b, v0.16b
1525	tbl	v4.16b, { v4.16b }, v18.16b
1526	eor	v2.16b, v26.16b, v2.16b
1527	add	v19.4s, v19.4s, v25.4s
1528	ushr	v12.4s, v0.4s, #7
1529	shl	v0.4s, v0.4s, #25
1530	add	v7.4s, v7.4s, v4.4s
1531	ushr	v13.4s, v2.4s, #7
1532	shl	v2.4s, v2.4s, #25
1533	eor	v5.16b, v19.16b, v5.16b
1534	add	v10.4s, v10.4s, v14.4s
1535	ldr	q14, [sp, #288]
1536	orr	v0.16b, v0.16b, v12.16b
1537	eor	v1.16b, v7.16b, v1.16b
1538	orr	v2.16b, v2.16b, v13.16b
1539	ushr	v12.4s, v5.4s, #7
1540	shl	v5.4s, v5.4s, #25
1541	add	v22.4s, v22.4s, v14.4s
1542	ushr	v13.4s, v1.4s, #7
1543	shl	v1.4s, v1.4s, #25
1544	add	v10.4s, v10.4s, v0.4s
1545	orr	v5.16b, v5.16b, v12.16b
1546	add	v22.4s, v22.4s, v2.4s
1547	add	v20.4s, v20.4s, v24.4s
1548	orr	v1.16b, v1.16b, v13.16b
1549	eor	v4.16b, v4.16b, v10.16b
1550	add	v17.4s, v17.4s, v9.4s
1551	eor	v25.16b, v25.16b, v22.16b
1552	add	v20.4s, v20.4s, v5.4s
1553	tbl	v4.16b, { v4.16b }, v27.16b
1554	add	v17.4s, v17.4s, v1.4s
1555	tbl	v25.16b, { v25.16b }, v27.16b
1556	eor	v6.16b, v6.16b, v20.16b
1557	add	v26.4s, v26.4s, v4.4s
1558	eor	v16.16b, v16.16b, v17.16b
1559	add	v7.4s, v7.4s, v25.4s
1560	tbl	v6.16b, { v6.16b }, v27.16b
1561	eor	v0.16b, v26.16b, v0.16b
1562	tbl	v16.16b, { v16.16b }, v27.16b
1563	eor	v2.16b, v7.16b, v2.16b
1564	add	v21.4s, v21.4s, v6.4s
1565	ushr	v12.4s, v0.4s, #12
1566	shl	v0.4s, v0.4s, #20
1567	add	v19.4s, v19.4s, v16.4s
1568	ushr	v13.4s, v2.4s, #12
1569	shl	v2.4s, v2.4s, #20
1570	eor	v5.16b, v21.16b, v5.16b
1571	orr	v0.16b, v0.16b, v12.16b
1572	eor	v1.16b, v19.16b, v1.16b
1573	add	v10.4s, v10.4s, v11.4s
1574	orr	v2.16b, v2.16b, v13.16b
1575	ushr	v13.4s, v5.4s, #12
1576	shl	v5.4s, v5.4s, #20
1577	ushr	v12.4s, v1.4s, #12
1578	shl	v1.4s, v1.4s, #20
1579	add	v10.4s, v10.4s, v0.4s
1580	add	v22.4s, v22.4s, v15.4s
1581	orr	v5.16b, v5.16b, v13.16b
1582	add	v20.4s, v20.4s, v3.4s
1583	mov	v24.16b, v3.16b
1584	ldr	q3, [sp, #336]
1585	orr	v1.16b, v1.16b, v12.16b
1586	eor	v4.16b, v4.16b, v10.16b
1587	add	v22.4s, v22.4s, v2.4s
1588	add	v17.4s, v17.4s, v3.4s
1589	add	v20.4s, v20.4s, v5.4s
1590	tbl	v4.16b, { v4.16b }, v18.16b
1591	eor	v25.16b, v25.16b, v22.16b
1592	add	v17.4s, v17.4s, v1.4s
1593	eor	v6.16b, v6.16b, v20.16b
1594	add	v26.4s, v26.4s, v4.4s
1595	tbl	v25.16b, { v25.16b }, v18.16b
1596	eor	v16.16b, v16.16b, v17.16b
1597	tbl	v6.16b, { v6.16b }, v18.16b
1598	eor	v0.16b, v26.16b, v0.16b
1599	add	v7.4s, v7.4s, v25.4s
1600	tbl	v16.16b, { v16.16b }, v18.16b
1601	add	v21.4s, v21.4s, v6.4s
1602	ushr	v12.4s, v0.4s, #7
1603	shl	v0.4s, v0.4s, #25
1604	eor	v2.16b, v7.16b, v2.16b
1605	add	v19.4s, v19.4s, v16.4s
1606	eor	v5.16b, v21.16b, v5.16b
1607	orr	v0.16b, v0.16b, v12.16b
1608	ushr	v12.4s, v2.4s, #7
1609	shl	v2.4s, v2.4s, #25
1610	eor	v1.16b, v19.16b, v1.16b
1611	ushr	v13.4s, v5.4s, #7
1612	shl	v5.4s, v5.4s, #25
1613	add	v22.4s, v22.4s, v8.4s
1614	orr	v2.16b, v2.16b, v12.16b
1615	ushr	v12.4s, v1.4s, #7
1616	shl	v1.4s, v1.4s, #25
1617	orr	v5.16b, v5.16b, v13.16b
1618	add	v22.4s, v22.4s, v0.4s
1619	add	v10.4s, v10.4s, v29.4s
1620	ldr	q29, [sp, #208]
1621	add	v17.4s, v17.4s, v31.4s
1622	orr	v1.16b, v1.16b, v12.16b
1623	add	v20.4s, v20.4s, v29.4s
1624	eor	v16.16b, v16.16b, v22.16b
1625	add	v10.4s, v10.4s, v5.4s
1626	add	v17.4s, v17.4s, v2.4s
1627	add	v20.4s, v20.4s, v1.4s
1628	tbl	v16.16b, { v16.16b }, v27.16b
1629	eor	v25.16b, v25.16b, v10.16b
1630	eor	v6.16b, v6.16b, v17.16b
1631	eor	v4.16b, v4.16b, v20.16b
1632	add	v21.4s, v21.4s, v16.4s
1633	tbl	v25.16b, { v25.16b }, v27.16b
1634	tbl	v6.16b, { v6.16b }, v27.16b
1635	tbl	v4.16b, { v4.16b }, v27.16b
1636	eor	v0.16b, v21.16b, v0.16b
1637	add	v19.4s, v19.4s, v25.4s
1638	add	v26.4s, v26.4s, v6.4s
1639	add	v7.4s, v7.4s, v4.4s
1640	ushr	v12.4s, v0.4s, #12
1641	shl	v0.4s, v0.4s, #20
1642	eor	v5.16b, v5.16b, v19.16b
1643	eor	v2.16b, v26.16b, v2.16b
1644	eor	v1.16b, v7.16b, v1.16b
1645	orr	v0.16b, v0.16b, v12.16b
1646	ushr	v12.4s, v5.4s, #12
1647	shl	v5.4s, v5.4s, #20
1648	add	v22.4s, v22.4s, v14.4s
1649	mov	v8.16b, v31.16b
1650	ushr	v13.4s, v2.4s, #12
1651	shl	v2.4s, v2.4s, #20
1652	mov	v31.16b, v14.16b
1653	ushr	v14.4s, v1.4s, #12
1654	shl	v1.4s, v1.4s, #20
1655	orr	v5.16b, v5.16b, v12.16b
1656	add	v22.4s, v22.4s, v0.4s
1657	add	v10.4s, v10.4s, v28.4s
1658	ldr	q28, [sp, #352]
1659	orr	v2.16b, v2.16b, v13.16b
1660	orr	v1.16b, v1.16b, v14.16b
1661	add	v17.4s, v17.4s, v30.4s
1662	add	v20.4s, v20.4s, v3.4s
1663	eor	v16.16b, v16.16b, v22.16b
1664	add	v10.4s, v10.4s, v5.4s
1665	add	v17.4s, v17.4s, v2.4s
1666	add	v20.4s, v20.4s, v1.4s
1667	tbl	v16.16b, { v16.16b }, v18.16b
1668	eor	v25.16b, v25.16b, v10.16b
1669	eor	v6.16b, v6.16b, v17.16b
1670	eor	v4.16b, v4.16b, v20.16b
1671	add	v21.4s, v21.4s, v16.4s
1672	tbl	v25.16b, { v25.16b }, v18.16b
1673	tbl	v6.16b, { v6.16b }, v18.16b
1674	tbl	v4.16b, { v4.16b }, v18.16b
1675	eor	v0.16b, v21.16b, v0.16b
1676	add	v19.4s, v19.4s, v25.4s
1677	add	v26.4s, v26.4s, v6.4s
1678	add	v7.4s, v7.4s, v4.4s
1679	ushr	v12.4s, v0.4s, #7
1680	shl	v0.4s, v0.4s, #25
1681	eor	v5.16b, v19.16b, v5.16b
1682	eor	v2.16b, v26.16b, v2.16b
1683	eor	v1.16b, v7.16b, v1.16b
1684	orr	v0.16b, v0.16b, v12.16b
1685	ushr	v12.4s, v5.4s, #7
1686	shl	v5.4s, v5.4s, #25
1687	add	v10.4s, v10.4s, v23.4s
1688	ushr	v13.4s, v2.4s, #7
1689	shl	v2.4s, v2.4s, #25
1690	ushr	v14.4s, v1.4s, #7
1691	shl	v1.4s, v1.4s, #25
1692	orr	v5.16b, v5.16b, v12.16b
1693	add	v10.4s, v10.4s, v0.4s
1694	add	v20.4s, v20.4s, v24.4s
1695	ldr	q24, [sp, #144]
1696	orr	v2.16b, v2.16b, v13.16b
1697	orr	v1.16b, v1.16b, v14.16b
1698	add	v22.4s, v22.4s, v9.4s
1699	add	v17.4s, v17.4s, v11.4s
1700	eor	v4.16b, v4.16b, v10.16b
1701	add	v20.4s, v20.4s, v5.4s
1702	add	v22.4s, v22.4s, v2.4s
1703	add	v17.4s, v17.4s, v1.4s
1704	tbl	v4.16b, { v4.16b }, v27.16b
1705	eor	v6.16b, v6.16b, v20.16b
1706	eor	v25.16b, v25.16b, v22.16b
1707	eor	v16.16b, v16.16b, v17.16b
1708	add	v26.4s, v26.4s, v4.4s
1709	tbl	v6.16b, { v6.16b }, v27.16b
1710	tbl	v25.16b, { v25.16b }, v27.16b
1711	tbl	v16.16b, { v16.16b }, v27.16b
1712	eor	v0.16b, v26.16b, v0.16b
1713	add	v21.4s, v21.4s, v6.4s
1714	add	v7.4s, v7.4s, v25.4s
1715	add	v19.4s, v19.4s, v16.4s
1716	ushr	v12.4s, v0.4s, #12
1717	shl	v0.4s, v0.4s, #20
1718	eor	v5.16b, v21.16b, v5.16b
1719	eor	v2.16b, v7.16b, v2.16b
1720	eor	v1.16b, v19.16b, v1.16b
1721	orr	v0.16b, v0.16b, v12.16b
1722	add	v10.4s, v10.4s, v15.4s
1723	ushr	v14.4s, v5.4s, #12
1724	shl	v5.4s, v5.4s, #20
1725	mov	v30.16b, v3.16b
1726	ldr	q3, [sp, #256]
1727	ushr	v12.4s, v2.4s, #12
1728	shl	v2.4s, v2.4s, #20
1729	ushr	v13.4s, v1.4s, #12
1730	shl	v1.4s, v1.4s, #20
1731	add	v10.4s, v10.4s, v0.4s
1732	orr	v5.16b, v5.16b, v14.16b
1733	add	v20.4s, v20.4s, v3.4s
1734	orr	v2.16b, v2.16b, v12.16b
1735	orr	v1.16b, v1.16b, v13.16b
1736	add	v22.4s, v22.4s, v24.4s
1737	add	v17.4s, v17.4s, v28.4s
1738	eor	v4.16b, v4.16b, v10.16b
1739	add	v20.4s, v20.4s, v5.4s
1740	add	v22.4s, v22.4s, v2.4s
1741	add	v17.4s, v17.4s, v1.4s
1742	tbl	v4.16b, { v4.16b }, v18.16b
1743	eor	v6.16b, v6.16b, v20.16b
1744	eor	v25.16b, v25.16b, v22.16b
1745	eor	v16.16b, v16.16b, v17.16b
1746	add	v26.4s, v26.4s, v4.4s
1747	tbl	v6.16b, { v6.16b }, v18.16b
1748	tbl	v25.16b, { v25.16b }, v18.16b
1749	tbl	v16.16b, { v16.16b }, v18.16b
1750	eor	v0.16b, v26.16b, v0.16b
1751	add	v21.4s, v21.4s, v6.4s
1752	add	v7.4s, v7.4s, v25.4s
1753	add	v19.4s, v19.4s, v16.4s
1754	ushr	v12.4s, v0.4s, #7
1755	shl	v0.4s, v0.4s, #25
1756	eor	v5.16b, v21.16b, v5.16b
1757	eor	v2.16b, v7.16b, v2.16b
1758	eor	v1.16b, v19.16b, v1.16b
1759	orr	v0.16b, v0.16b, v12.16b
1760	ushr	v12.4s, v5.4s, #7
1761	shl	v5.4s, v5.4s, #25
1762	mov	v23.16b, v9.16b
1763	ldr	q9, [sp, #112]
1764	ushr	v13.4s, v2.4s, #7
1765	shl	v2.4s, v2.4s, #25
1766	ushr	v14.4s, v1.4s, #7
1767	shl	v1.4s, v1.4s, #25
1768	orr	v5.16b, v5.16b, v12.16b
1769	add	v9.4s, v10.4s, v9.4s
1770	orr	v2.16b, v2.16b, v13.16b
1771	orr	v1.16b, v1.16b, v14.16b
1772	ldr	q14, [sp, #64]
1773	add	v22.4s, v22.4s, v31.4s
1774	add	v17.4s, v17.4s, v30.4s
1775	add	v20.4s, v20.4s, v8.4s
1776	add	v9.4s, v9.4s, v5.4s
1777	add	v22.4s, v22.4s, v0.4s
1778	add	v17.4s, v17.4s, v2.4s
1779	add	v20.4s, v20.4s, v1.4s
1780	eor	v25.16b, v25.16b, v9.16b
1781	eor	v16.16b, v16.16b, v22.16b
1782	eor	v6.16b, v6.16b, v17.16b
1783	eor	v4.16b, v4.16b, v20.16b
1784	tbl	v25.16b, { v25.16b }, v27.16b
1785	tbl	v16.16b, { v16.16b }, v27.16b
1786	tbl	v6.16b, { v6.16b }, v27.16b
1787	tbl	v4.16b, { v4.16b }, v27.16b
1788	add	v19.4s, v19.4s, v25.4s
1789	add	v21.4s, v21.4s, v16.4s
1790	add	v26.4s, v26.4s, v6.4s
1791	add	v7.4s, v7.4s, v4.4s
1792	eor	v5.16b, v5.16b, v19.16b
1793	eor	v0.16b, v21.16b, v0.16b
1794	eor	v2.16b, v26.16b, v2.16b
1795	eor	v1.16b, v7.16b, v1.16b
1796	ushr	v30.4s, v5.4s, #12
1797	shl	v5.4s, v5.4s, #20
1798	ushr	v10.4s, v0.4s, #12
1799	shl	v0.4s, v0.4s, #20
1800	ushr	v12.4s, v2.4s, #12
1801	shl	v2.4s, v2.4s, #20
1802	ushr	v13.4s, v1.4s, #12
1803	shl	v1.4s, v1.4s, #20
1804	orr	v5.16b, v5.16b, v30.16b
1805	add	v30.4s, v9.4s, v29.4s
1806	add	v22.4s, v22.4s, v23.4s
1807	ldr	q23, [sp, #192]
1808	orr	v0.16b, v0.16b, v10.16b
1809	orr	v2.16b, v2.16b, v12.16b
1810	orr	v1.16b, v1.16b, v13.16b
1811	add	v17.4s, v17.4s, v23.4s
1812	add	v20.4s, v20.4s, v28.4s
1813	add	v23.4s, v30.4s, v5.4s
1814	add	v22.4s, v22.4s, v0.4s
1815	add	v17.4s, v17.4s, v2.4s
1816	add	v20.4s, v20.4s, v1.4s
1817	eor	v25.16b, v25.16b, v23.16b
1818	eor	v16.16b, v16.16b, v22.16b
1819	eor	v6.16b, v6.16b, v17.16b
1820	eor	v4.16b, v4.16b, v20.16b
1821	tbl	v25.16b, { v25.16b }, v18.16b
1822	tbl	v16.16b, { v16.16b }, v18.16b
1823	tbl	v6.16b, { v6.16b }, v18.16b
1824	tbl	v4.16b, { v4.16b }, v18.16b
1825	add	v19.4s, v19.4s, v25.4s
1826	add	v21.4s, v21.4s, v16.4s
1827	add	v26.4s, v26.4s, v6.4s
1828	add	v7.4s, v7.4s, v4.4s
1829	eor	v5.16b, v19.16b, v5.16b
1830	eor	v0.16b, v21.16b, v0.16b
1831	eor	v2.16b, v26.16b, v2.16b
1832	eor	v1.16b, v7.16b, v1.16b
1833	ushr	v28.4s, v5.4s, #7
1834	shl	v5.4s, v5.4s, #25
1835	ushr	v30.4s, v0.4s, #7
1836	shl	v0.4s, v0.4s, #25
1837	ushr	v31.4s, v2.4s, #7
1838	shl	v2.4s, v2.4s, #25
1839	ushr	v8.4s, v1.4s, #7
1840	shl	v1.4s, v1.4s, #25
1841	orr	v5.16b, v5.16b, v28.16b
1842	ldr	q28, [sp, #176]
1843	orr	v0.16b, v0.16b, v30.16b
1844	orr	v2.16b, v2.16b, v31.16b
1845	orr	v1.16b, v1.16b, v8.16b
1846	add	v23.4s, v23.4s, v28.4s
1847	add	v22.4s, v22.4s, v11.4s
1848	add	v17.4s, v17.4s, v15.4s
1849	add	v20.4s, v20.4s, v3.4s
1850	ldr	q3, [sp, #272]
1851	add	v23.4s, v23.4s, v0.4s
1852	add	v22.4s, v22.4s, v2.4s
1853	add	v17.4s, v17.4s, v1.4s
1854	add	v20.4s, v20.4s, v5.4s
1855	eor	v4.16b, v4.16b, v23.16b
1856	eor	v25.16b, v25.16b, v22.16b
1857	eor	v16.16b, v16.16b, v17.16b
1858	eor	v6.16b, v6.16b, v20.16b
1859	tbl	v4.16b, { v4.16b }, v27.16b
1860	tbl	v25.16b, { v25.16b }, v27.16b
1861	tbl	v16.16b, { v16.16b }, v27.16b
1862	tbl	v6.16b, { v6.16b }, v27.16b
1863	add	v26.4s, v26.4s, v4.4s
1864	add	v7.4s, v7.4s, v25.4s
1865	add	v19.4s, v19.4s, v16.4s
1866	add	v21.4s, v21.4s, v6.4s
1867	eor	v0.16b, v26.16b, v0.16b
1868	eor	v2.16b, v7.16b, v2.16b
1869	eor	v1.16b, v19.16b, v1.16b
1870	eor	v5.16b, v21.16b, v5.16b
1871	add	v3.4s, v22.4s, v3.4s
1872	ldr	q22, [sp, #160]
1873	ushr	v28.4s, v0.4s, #12
1874	shl	v0.4s, v0.4s, #20
1875	ushr	v29.4s, v2.4s, #12
1876	shl	v2.4s, v2.4s, #20
1877	ushr	v30.4s, v1.4s, #12
1878	shl	v1.4s, v1.4s, #20
1879	ushr	v31.4s, v5.4s, #12
1880	shl	v5.4s, v5.4s, #20
1881	add	v17.4s, v17.4s, v22.4s
1882	ldr	q22, [sp, #240]
1883	orr	v0.16b, v0.16b, v28.16b
1884	prfm	pldl1keep, [x23, #256]
1885	orr	v2.16b, v2.16b, v29.16b
1886	prfm	pldl1keep, [x24, #256]
1887	orr	v1.16b, v1.16b, v30.16b
1888	prfm	pldl1keep, [x22, #256]
1889	orr	v5.16b, v5.16b, v31.16b
1890	prfm	pldl1keep, [x25, #256]
1891	add	v23.4s, v23.4s, v24.4s
1892	add	v20.4s, v20.4s, v22.4s
1893	add	v3.4s, v3.4s, v2.4s
1894	add	v17.4s, v17.4s, v1.4s
1895	add	v22.4s, v23.4s, v0.4s
1896	add	v20.4s, v20.4s, v5.4s
1897	eor	v23.16b, v25.16b, v3.16b
1898	eor	v16.16b, v16.16b, v17.16b
1899	eor	v4.16b, v4.16b, v22.16b
1900	eor	v6.16b, v6.16b, v20.16b
1901	tbl	v23.16b, { v23.16b }, v18.16b
1902	tbl	v16.16b, { v16.16b }, v18.16b
1903	tbl	v4.16b, { v4.16b }, v18.16b
1904	tbl	v6.16b, { v6.16b }, v18.16b
1905	add	v7.4s, v7.4s, v23.4s
1906	add	v19.4s, v19.4s, v16.4s
1907	add	v18.4s, v26.4s, v4.4s
1908	add	v21.4s, v21.4s, v6.4s
1909	eor	v2.16b, v7.16b, v2.16b
1910	eor	v1.16b, v19.16b, v1.16b
1911	eor	v0.16b, v18.16b, v0.16b
1912	eor	v5.16b, v21.16b, v5.16b
1913	ushr	v25.4s, v2.4s, #7
1914	shl	v2.4s, v2.4s, #25
1915	ushr	v24.4s, v0.4s, #7
1916	shl	v0.4s, v0.4s, #25
1917	ushr	v26.4s, v1.4s, #7
1918	shl	v1.4s, v1.4s, #25
1919	ushr	v27.4s, v5.4s, #7
1920	shl	v5.4s, v5.4s, #25
1921	orr	v0.16b, v0.16b, v24.16b
1922	orr	v2.16b, v2.16b, v25.16b
1923	orr	v1.16b, v1.16b, v26.16b
1924	orr	v5.16b, v5.16b, v27.16b
1925	movi	v13.4s, #64
1926	eor	v29.16b, v19.16b, v22.16b
1927	eor	v8.16b, v21.16b, v3.16b
1928	eor	v30.16b, v17.16b, v18.16b
1929	eor	v31.16b, v20.16b, v7.16b
1930	eor	v24.16b, v5.16b, v23.16b
1931	eor	v18.16b, v0.16b, v16.16b
1932	eor	v25.16b, v2.16b, v6.16b
1933	eor	v26.16b, v1.16b, v4.16b
1934	cbnz	x21, .LBB3_5
1935	b	.LBB3_2
1936.LBB3_6:
1937	cbz	x1, .LBB3_14
1938	adrp	x12, .LCPI3_3
1939	ldr	q0, [x11, :lo12:.LCPI3_1]
1940	orr	w11, w7, w6
1941	ldr	q2, [x10, :lo12:.LCPI3_2]
1942	ldr	q1, [x12, :lo12:.LCPI3_3]
1943	and	x12, x5, #0x1
1944.LBB3_8:
1945	movi	v3.4s, #64
1946	lsr	x13, x4, #32
1947	ldp	q5, q4, [x3]
1948	mov	x15, x2
1949	mov	w14, w11
1950	mov	v3.s[0], w4
1951	ldr	x10, [x0]
1952	mov	v3.s[1], w13
1953	b	.LBB3_11
1954.LBB3_9:
1955	orr	w14, w14, w9
1956.LBB3_10:
1957	ldp	q6, q7, [x10]
1958	mov	v16.16b, v3.16b
1959	and	w14, w14, #0xff
1960	add	v5.4s, v5.4s, v4.4s
1961	mov	x15, x13
1962	mov	v16.s[3], w14
1963	add	x14, x10, #32
1964	uzp1	v17.4s, v6.4s, v7.4s
1965	add	x10, x10, #64
1966	add	v5.4s, v5.4s, v17.4s
1967	eor	v16.16b, v5.16b, v16.16b
1968	tbl	v16.16b, { v16.16b }, v0.16b
1969	add	v18.4s, v16.4s, v1.4s
1970	eor	v19.16b, v18.16b, v4.16b
1971	uzp2	v4.4s, v6.4s, v7.4s
1972	ushr	v6.4s, v19.4s, #12
1973	shl	v7.4s, v19.4s, #20
1974	ld2	{ v19.4s, v20.4s }, [x14]
1975	add	v5.4s, v5.4s, v4.4s
1976	mov	w14, w6
1977	orr	v6.16b, v7.16b, v6.16b
1978	add	v5.4s, v5.4s, v6.4s
1979	eor	v7.16b, v16.16b, v5.16b
1980	add	v5.4s, v5.4s, v19.4s
1981	tbl	v7.16b, { v7.16b }, v2.16b
1982	ext	v5.16b, v5.16b, v5.16b, #12
1983	add	v16.4s, v18.4s, v7.4s
1984	ext	v7.16b, v7.16b, v7.16b, #8
1985	eor	v6.16b, v6.16b, v16.16b
1986	ext	v16.16b, v16.16b, v16.16b, #4
1987	ushr	v18.4s, v6.4s, #7
1988	shl	v6.4s, v6.4s, #25
1989	orr	v6.16b, v6.16b, v18.16b
1990	ext	v18.16b, v20.16b, v20.16b, #12
1991	add	v5.4s, v5.4s, v6.4s
1992	eor	v7.16b, v5.16b, v7.16b
1993	add	v5.4s, v5.4s, v18.4s
1994	tbl	v7.16b, { v7.16b }, v0.16b
1995	add	v16.4s, v16.4s, v7.4s
1996	eor	v6.16b, v6.16b, v16.16b
1997	ushr	v21.4s, v6.4s, #12
1998	shl	v6.4s, v6.4s, #20
1999	orr	v6.16b, v6.16b, v21.16b
2000	uzp1	v21.4s, v17.4s, v17.4s
2001	add	v5.4s, v5.4s, v6.4s
2002	ext	v21.16b, v21.16b, v17.16b, #8
2003	eor	v7.16b, v7.16b, v5.16b
2004	uzp2	v21.4s, v21.4s, v4.4s
2005	tbl	v7.16b, { v7.16b }, v2.16b
2006	add	v5.4s, v5.4s, v21.4s
2007	add	v16.4s, v16.4s, v7.4s
2008	ext	v5.16b, v5.16b, v5.16b, #4
2009	ext	v7.16b, v7.16b, v7.16b, #8
2010	eor	v6.16b, v6.16b, v16.16b
2011	ushr	v22.4s, v6.4s, #7
2012	shl	v6.4s, v6.4s, #25
2013	orr	v6.16b, v6.16b, v22.16b
2014	add	v22.4s, v5.4s, v6.4s
2015	eor	v5.16b, v22.16b, v7.16b
2016	ext	v7.16b, v16.16b, v16.16b, #12
2017	tbl	v16.16b, { v5.16b }, v0.16b
2018	ext	v5.16b, v17.16b, v17.16b, #12
2019	add	v7.4s, v7.4s, v16.4s
2020	ext	v5.16b, v17.16b, v5.16b, #12
2021	ext	v17.16b, v19.16b, v19.16b, #12
2022	mov	v19.16b, v18.16b
2023	eor	v6.16b, v6.16b, v7.16b
2024	rev64	v5.4s, v5.4s
2025	mov	v19.s[1], v17.s[2]
2026	ushr	v20.4s, v6.4s, #12
2027	shl	v6.4s, v6.4s, #20
2028	trn2	v5.4s, v5.4s, v19.4s
2029	orr	v6.16b, v6.16b, v20.16b
2030	zip1	v20.2d, v18.2d, v4.2d
2031	zip2	v4.4s, v4.4s, v18.4s
2032	add	v19.4s, v6.4s, v5.4s
2033	mov	v20.s[3], v17.s[3]
2034	add	v19.4s, v19.4s, v22.4s
2035	ext	v22.16b, v20.16b, v20.16b, #12
2036	eor	v16.16b, v16.16b, v19.16b
2037	ext	v19.16b, v19.16b, v19.16b, #12
2038	tbl	v16.16b, { v16.16b }, v2.16b
2039	add	v7.4s, v7.4s, v16.4s
2040	ext	v16.16b, v16.16b, v16.16b, #8
2041	eor	v6.16b, v6.16b, v7.16b
2042	ext	v7.16b, v7.16b, v7.16b, #4
2043	ushr	v23.4s, v6.4s, #7
2044	shl	v24.4s, v6.4s, #25
2045	uzp1	v6.4s, v20.4s, v22.4s
2046	orr	v20.16b, v24.16b, v23.16b
2047	add	v22.4s, v20.4s, v6.4s
2048	add	v19.4s, v22.4s, v19.4s
2049	eor	v16.16b, v19.16b, v16.16b
2050	tbl	v16.16b, { v16.16b }, v0.16b
2051	add	v7.4s, v7.4s, v16.4s
2052	eor	v18.16b, v20.16b, v7.16b
2053	zip1	v20.4s, v4.4s, v17.4s
2054	zip1	v4.4s, v17.4s, v4.4s
2055	ushr	v17.4s, v18.4s, #12
2056	shl	v18.4s, v18.4s, #20
2057	ext	v20.16b, v4.16b, v20.16b, #8
2058	orr	v4.16b, v18.16b, v17.16b
2059	ext	v18.16b, v21.16b, v21.16b, #4
2060	add	v17.4s, v4.4s, v20.4s
2061	add	v17.4s, v17.4s, v19.4s
2062	uzp1	v19.4s, v18.4s, v18.4s
2063	eor	v16.16b, v16.16b, v17.16b
2064	ext	v19.16b, v19.16b, v18.16b, #8
2065	tbl	v16.16b, { v16.16b }, v2.16b
2066	uzp2	v19.4s, v19.4s, v5.4s
2067	add	v7.4s, v7.4s, v16.4s
2068	add	v17.4s, v17.4s, v19.4s
2069	ext	v16.16b, v16.16b, v16.16b, #8
2070	eor	v4.16b, v4.16b, v7.16b
2071	ext	v17.16b, v17.16b, v17.16b, #4
2072	ext	v7.16b, v7.16b, v7.16b, #12
2073	ushr	v21.4s, v4.4s, #7
2074	shl	v4.4s, v4.4s, #25
2075	orr	v4.16b, v4.16b, v21.16b
2076	ext	v21.16b, v18.16b, v18.16b, #12
2077	add	v17.4s, v17.4s, v4.4s
2078	ext	v18.16b, v18.16b, v21.16b, #12
2079	mov	v21.16b, v20.16b
2080	eor	v16.16b, v17.16b, v16.16b
2081	rev64	v18.4s, v18.4s
2082	mov	v21.s[1], v6.s[2]
2083	tbl	v16.16b, { v16.16b }, v0.16b
2084	add	v7.4s, v7.4s, v16.4s
2085	eor	v4.16b, v4.16b, v7.16b
2086	ushr	v22.4s, v4.4s, #12
2087	shl	v23.4s, v4.4s, #20
2088	trn2	v4.4s, v18.4s, v21.4s
2089	orr	v18.16b, v23.16b, v22.16b
2090	add	v21.4s, v18.4s, v4.4s
2091	add	v17.4s, v21.4s, v17.4s
2092	zip1	v21.2d, v20.2d, v5.2d
2093	zip2	v5.4s, v5.4s, v20.4s
2094	eor	v16.16b, v16.16b, v17.16b
2095	mov	v21.s[3], v6.s[3]
2096	ext	v17.16b, v17.16b, v17.16b, #12
2097	zip1	v20.4s, v5.4s, v6.4s
2098	tbl	v16.16b, { v16.16b }, v2.16b
2099	zip1	v5.4s, v6.4s, v5.4s
2100	add	v22.4s, v7.4s, v16.4s
2101	ext	v16.16b, v16.16b, v16.16b, #8
2102	ext	v20.16b, v5.16b, v20.16b, #8
2103	eor	v7.16b, v18.16b, v22.16b
2104	ext	v18.16b, v21.16b, v21.16b, #12
2105	ushr	v23.4s, v7.4s, #7
2106	shl	v24.4s, v7.4s, #25
2107	uzp1	v7.4s, v21.4s, v18.4s
2108	orr	v18.16b, v24.16b, v23.16b
2109	add	v21.4s, v18.4s, v7.4s
2110	add	v17.4s, v21.4s, v17.4s
2111	ext	v21.16b, v22.16b, v22.16b, #4
2112	eor	v16.16b, v17.16b, v16.16b
2113	tbl	v16.16b, { v16.16b }, v0.16b
2114	add	v21.4s, v21.4s, v16.4s
2115	eor	v18.16b, v18.16b, v21.16b
2116	ushr	v6.4s, v18.4s, #12
2117	shl	v18.4s, v18.4s, #20
2118	orr	v5.16b, v18.16b, v6.16b
2119	add	v6.4s, v5.4s, v20.4s
2120	add	v6.4s, v6.4s, v17.4s
2121	ext	v17.16b, v19.16b, v19.16b, #4
2122	eor	v16.16b, v16.16b, v6.16b
2123	uzp1	v18.4s, v17.4s, v17.4s
2124	tbl	v16.16b, { v16.16b }, v2.16b
2125	ext	v18.16b, v18.16b, v17.16b, #8
2126	add	v19.4s, v21.4s, v16.4s
2127	uzp2	v18.4s, v18.4s, v4.4s
2128	ext	v16.16b, v16.16b, v16.16b, #8
2129	eor	v5.16b, v5.16b, v19.16b
2130	add	v6.4s, v6.4s, v18.4s
2131	ext	v19.16b, v19.16b, v19.16b, #12
2132	ushr	v21.4s, v5.4s, #7
2133	shl	v5.4s, v5.4s, #25
2134	ext	v6.16b, v6.16b, v6.16b, #4
2135	orr	v5.16b, v5.16b, v21.16b
2136	ext	v21.16b, v17.16b, v17.16b, #12
2137	add	v6.4s, v6.4s, v5.4s
2138	ext	v17.16b, v17.16b, v21.16b, #12
2139	mov	v21.16b, v20.16b
2140	eor	v16.16b, v6.16b, v16.16b
2141	rev64	v17.4s, v17.4s
2142	mov	v21.s[1], v7.s[2]
2143	tbl	v16.16b, { v16.16b }, v0.16b
2144	add	v19.4s, v19.4s, v16.4s
2145	eor	v5.16b, v5.16b, v19.16b
2146	ushr	v22.4s, v5.4s, #12
2147	shl	v23.4s, v5.4s, #20
2148	trn2	v5.4s, v17.4s, v21.4s
2149	orr	v17.16b, v23.16b, v22.16b
2150	add	v21.4s, v17.4s, v5.4s
2151	add	v6.4s, v21.4s, v6.4s
2152	eor	v16.16b, v16.16b, v6.16b
2153	ext	v6.16b, v6.16b, v6.16b, #12
2154	tbl	v21.16b, { v16.16b }, v2.16b
2155	zip1	v16.2d, v20.2d, v4.2d
2156	zip2	v4.4s, v4.4s, v20.4s
2157	add	v19.4s, v19.4s, v21.4s
2158	mov	v16.s[3], v7.s[3]
2159	ext	v21.16b, v21.16b, v21.16b, #8
2160	zip1	v20.4s, v4.4s, v7.4s
2161	eor	v17.16b, v17.16b, v19.16b
2162	ext	v22.16b, v16.16b, v16.16b, #12
2163	ext	v19.16b, v19.16b, v19.16b, #4
2164	zip1	v4.4s, v7.4s, v4.4s
2165	ushr	v23.4s, v17.4s, #7
2166	shl	v17.4s, v17.4s, #25
2167	uzp1	v16.4s, v16.4s, v22.4s
2168	ext	v4.16b, v4.16b, v20.16b, #8
2169	orr	v17.16b, v17.16b, v23.16b
2170	add	v22.4s, v17.4s, v16.4s
2171	add	v6.4s, v22.4s, v6.4s
2172	eor	v21.16b, v6.16b, v21.16b
2173	tbl	v21.16b, { v21.16b }, v0.16b
2174	add	v19.4s, v19.4s, v21.4s
2175	eor	v17.16b, v17.16b, v19.16b
2176	ushr	v7.4s, v17.4s, #12
2177	shl	v17.4s, v17.4s, #20
2178	orr	v7.16b, v17.16b, v7.16b
2179	add	v17.4s, v7.4s, v4.4s
2180	add	v6.4s, v17.4s, v6.4s
2181	ext	v17.16b, v18.16b, v18.16b, #4
2182	eor	v18.16b, v21.16b, v6.16b
2183	uzp1	v20.4s, v17.4s, v17.4s
2184	tbl	v18.16b, { v18.16b }, v2.16b
2185	ext	v20.16b, v20.16b, v17.16b, #8
2186	add	v19.4s, v19.4s, v18.4s
2187	uzp2	v20.4s, v20.4s, v5.4s
2188	ext	v18.16b, v18.16b, v18.16b, #8
2189	eor	v7.16b, v7.16b, v19.16b
2190	add	v6.4s, v6.4s, v20.4s
2191	ushr	v21.4s, v7.4s, #7
2192	shl	v7.4s, v7.4s, #25
2193	ext	v6.16b, v6.16b, v6.16b, #4
2194	orr	v7.16b, v7.16b, v21.16b
2195	add	v21.4s, v6.4s, v7.4s
2196	eor	v6.16b, v21.16b, v18.16b
2197	ext	v18.16b, v19.16b, v19.16b, #12
2198	tbl	v19.16b, { v6.16b }, v0.16b
2199	ext	v6.16b, v17.16b, v17.16b, #12
2200	add	v18.4s, v18.4s, v19.4s
2201	ext	v6.16b, v17.16b, v6.16b, #12
2202	mov	v17.16b, v4.16b
2203	eor	v7.16b, v7.16b, v18.16b
2204	rev64	v6.4s, v6.4s
2205	mov	v17.s[1], v16.s[2]
2206	ushr	v22.4s, v7.4s, #12
2207	shl	v7.4s, v7.4s, #20
2208	trn2	v6.4s, v6.4s, v17.4s
2209	orr	v7.16b, v7.16b, v22.16b
2210	add	v17.4s, v7.4s, v6.4s
2211	add	v17.4s, v17.4s, v21.4s
2212	zip1	v21.2d, v4.2d, v5.2d
2213	zip2	v4.4s, v5.4s, v4.4s
2214	eor	v19.16b, v19.16b, v17.16b
2215	mov	v21.s[3], v16.s[3]
2216	ext	v17.16b, v17.16b, v17.16b, #12
2217	tbl	v19.16b, { v19.16b }, v2.16b
2218	ext	v22.16b, v21.16b, v21.16b, #12
2219	add	v18.4s, v18.4s, v19.4s
2220	ext	v19.16b, v19.16b, v19.16b, #8
2221	eor	v7.16b, v7.16b, v18.16b
2222	ext	v18.16b, v18.16b, v18.16b, #4
2223	ushr	v23.4s, v7.4s, #7
2224	shl	v24.4s, v7.4s, #25
2225	uzp1	v7.4s, v21.4s, v22.4s
2226	orr	v21.16b, v24.16b, v23.16b
2227	add	v22.4s, v21.4s, v7.4s
2228	add	v17.4s, v22.4s, v17.4s
2229	eor	v19.16b, v17.16b, v19.16b
2230	tbl	v19.16b, { v19.16b }, v0.16b
2231	add	v18.4s, v18.4s, v19.4s
2232	eor	v5.16b, v21.16b, v18.16b
2233	zip1	v21.4s, v4.4s, v16.4s
2234	zip1	v4.4s, v16.4s, v4.4s
2235	ushr	v16.4s, v5.4s, #12
2236	shl	v5.4s, v5.4s, #20
2237	ext	v21.16b, v4.16b, v21.16b, #8
2238	orr	v4.16b, v5.16b, v16.16b
2239	ext	v16.16b, v20.16b, v20.16b, #4
2240	mov	v23.16b, v21.16b
2241	add	v5.4s, v4.4s, v21.4s
2242	mov	v23.s[1], v7.s[2]
2243	add	v5.4s, v5.4s, v17.4s
2244	eor	v17.16b, v19.16b, v5.16b
2245	uzp1	v19.4s, v16.4s, v16.4s
2246	tbl	v17.16b, { v17.16b }, v2.16b
2247	ext	v19.16b, v19.16b, v16.16b, #8
2248	add	v18.4s, v18.4s, v17.4s
2249	uzp2	v19.4s, v19.4s, v6.4s
2250	eor	v4.16b, v4.16b, v18.16b
2251	add	v5.4s, v5.4s, v19.4s
2252	ext	v19.16b, v19.16b, v19.16b, #4
2253	ushr	v20.4s, v4.4s, #7
2254	shl	v4.4s, v4.4s, #25
2255	ext	v5.16b, v5.16b, v5.16b, #4
2256	orr	v20.16b, v4.16b, v20.16b
2257	ext	v4.16b, v17.16b, v17.16b, #8
2258	add	v17.4s, v5.4s, v20.4s
2259	ext	v5.16b, v18.16b, v18.16b, #12
2260	eor	v4.16b, v17.16b, v4.16b
2261	tbl	v18.16b, { v4.16b }, v0.16b
2262	ext	v4.16b, v16.16b, v16.16b, #12
2263	add	v22.4s, v5.4s, v18.4s
2264	ext	v4.16b, v16.16b, v4.16b, #12
2265	eor	v5.16b, v20.16b, v22.16b
2266	rev64	v16.4s, v4.4s
2267	ushr	v20.4s, v5.4s, #12
2268	shl	v24.4s, v5.4s, #20
2269	trn2	v5.4s, v16.4s, v23.4s
2270	orr	v16.16b, v24.16b, v20.16b
2271	add	v20.4s, v16.4s, v5.4s
2272	add	v17.4s, v20.4s, v17.4s
2273	zip1	v20.2d, v21.2d, v6.2d
2274	zip2	v6.4s, v6.4s, v21.4s
2275	eor	v18.16b, v18.16b, v17.16b
2276	mov	v20.s[3], v7.s[3]
2277	ext	v17.16b, v17.16b, v17.16b, #12
2278	zip1	v21.4s, v6.4s, v7.4s
2279	tbl	v18.16b, { v18.16b }, v2.16b
2280	ext	v24.16b, v20.16b, v20.16b, #12
2281	zip1	v6.4s, v7.4s, v6.4s
2282	add	v22.4s, v22.4s, v18.4s
2283	ext	v18.16b, v18.16b, v18.16b, #8
2284	ext	v6.16b, v6.16b, v21.16b, #8
2285	eor	v16.16b, v16.16b, v22.16b
2286	ext	v22.16b, v22.16b, v22.16b, #4
2287	zip1	v5.2d, v6.2d, v5.2d
2288	zip2	v4.4s, v4.4s, v6.4s
2289	ushr	v25.4s, v16.4s, #7
2290	shl	v26.4s, v16.4s, #25
2291	uzp1	v16.4s, v20.4s, v24.4s
2292	orr	v20.16b, v26.16b, v25.16b
2293	mov	v5.s[3], v16.s[3]
2294	add	v24.4s, v20.4s, v16.4s
2295	add	v17.4s, v24.4s, v17.4s
2296	eor	v18.16b, v17.16b, v18.16b
2297	tbl	v18.16b, { v18.16b }, v0.16b
2298	add	v22.4s, v22.4s, v18.4s
2299	eor	v20.16b, v20.16b, v22.16b
2300	ushr	v7.4s, v20.4s, #12
2301	shl	v20.4s, v20.4s, #20
2302	orr	v7.16b, v20.16b, v7.16b
2303	add	v20.4s, v7.4s, v6.4s
2304	add	v17.4s, v20.4s, v17.4s
2305	ext	v20.16b, v19.16b, v19.16b, #8
2306	eor	v18.16b, v18.16b, v17.16b
2307	ext	v17.16b, v17.16b, v17.16b, #4
2308	tbl	v18.16b, { v18.16b }, v2.16b
2309	add	v21.4s, v22.4s, v18.4s
2310	uzp2	v22.4s, v20.4s, v23.4s
2311	ext	v18.16b, v18.16b, v18.16b, #8
2312	eor	v7.16b, v7.16b, v21.16b
2313	ext	v20.16b, v22.16b, v20.16b, #4
2314	ushr	v22.4s, v7.4s, #7
2315	shl	v7.4s, v7.4s, #25
2316	add	v17.4s, v17.4s, v20.4s
2317	ext	v20.16b, v21.16b, v21.16b, #12
2318	ext	v21.16b, v19.16b, v19.16b, #12
2319	orr	v7.16b, v7.16b, v22.16b
2320	ext	v19.16b, v19.16b, v21.16b, #12
2321	add	v17.4s, v17.4s, v7.4s
2322	mov	v21.16b, v6.16b
2323	rev64	v19.4s, v19.4s
2324	eor	v18.16b, v17.16b, v18.16b
2325	mov	v21.s[1], v16.s[2]
2326	tbl	v18.16b, { v18.16b }, v0.16b
2327	trn2	v19.4s, v19.4s, v21.4s
2328	add	v20.4s, v20.4s, v18.4s
2329	eor	v7.16b, v7.16b, v20.16b
2330	ushr	v22.4s, v7.4s, #12
2331	shl	v7.4s, v7.4s, #20
2332	orr	v7.16b, v7.16b, v22.16b
2333	add	v19.4s, v7.4s, v19.4s
2334	add	v17.4s, v19.4s, v17.4s
2335	eor	v18.16b, v18.16b, v17.16b
2336	ext	v17.16b, v17.16b, v17.16b, #12
2337	tbl	v18.16b, { v18.16b }, v2.16b
2338	add	v19.4s, v20.4s, v18.4s
2339	ext	v20.16b, v5.16b, v5.16b, #12
2340	ext	v18.16b, v18.16b, v18.16b, #8
2341	eor	v7.16b, v7.16b, v19.16b
2342	uzp1	v5.4s, v5.4s, v20.4s
2343	ushr	v21.4s, v7.4s, #7
2344	shl	v7.4s, v7.4s, #25
2345	orr	v7.16b, v7.16b, v21.16b
2346	add	v5.4s, v7.4s, v5.4s
2347	add	v5.4s, v5.4s, v17.4s
2348	eor	v17.16b, v5.16b, v18.16b
2349	ext	v18.16b, v19.16b, v19.16b, #4
2350	tbl	v17.16b, { v17.16b }, v0.16b
2351	add	v18.4s, v18.4s, v17.4s
2352	eor	v6.16b, v7.16b, v18.16b
2353	zip1	v7.4s, v4.4s, v16.4s
2354	zip1	v4.4s, v16.4s, v4.4s
2355	ushr	v16.4s, v6.4s, #12
2356	shl	v6.4s, v6.4s, #20
2357	ext	v4.16b, v4.16b, v7.16b, #8
2358	orr	v6.16b, v6.16b, v16.16b
2359	add	v4.4s, v6.4s, v4.4s
2360	add	v4.4s, v4.4s, v5.4s
2361	eor	v5.16b, v17.16b, v4.16b
2362	ext	v4.16b, v4.16b, v4.16b, #4
2363	tbl	v5.16b, { v5.16b }, v2.16b
2364	add	v7.4s, v18.4s, v5.4s
2365	eor	v6.16b, v6.16b, v7.16b
2366	ext	v7.16b, v7.16b, v7.16b, #12
2367	ushr	v16.4s, v6.4s, #7
2368	shl	v6.4s, v6.4s, #25
2369	orr	v6.16b, v6.16b, v16.16b
2370	ext	v16.16b, v5.16b, v5.16b, #8
2371	eor	v5.16b, v4.16b, v7.16b
2372	eor	v4.16b, v6.16b, v16.16b
2373.LBB3_11:
2374	subs	x13, x15, #1
2375	b.eq	.LBB3_9
2376	cbnz	x15, .LBB3_10
2377	add	x4, x4, x12
2378	add	x0, x0, #8
2379	subs	x1, x1, #1
2380	stp	q5, q4, [x8], #32
2381	b.ne	.LBB3_8
2382.LBB3_14:
2383	add	sp, sp, #368
2384	ldp	x20, x19, [sp, #128]
2385	ldp	x22, x21, [sp, #112]
2386	ldp	x24, x23, [sp, #96]
2387	ldp	x26, x25, [sp, #80]
2388	ldp	x29, x27, [sp, #64]
2389	ldp	d9, d8, [sp, #48]
2390	ldp	d11, d10, [sp, #32]
2391	ldp	d13, d12, [sp, #16]
2392	ldp	d15, d14, [sp], #144
2393	ret
2394.Lfunc_end3:
2395	.size	zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41
2396	.cfi_endproc
2397	.section	".note.GNU-stack","",@progbits
2398#endif