1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2022 Samuel Neves
25 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
26 *
27 * This is converted assembly: SSE4.1 -> ARMv8-A
28 * Used tools: SIMDe https://github.com/simd-everywhere/simde
29 */
30
31#if defined(__aarch64__)
32	.text
33	.section	.rodata.cst16,"aM",@progbits,16
34	.p2align	4
35.LCPI0_0:
36	.byte	2
37	.byte	3
38	.byte	0
39	.byte	1
40	.byte	6
41	.byte	7
42	.byte	4
43	.byte	5
44	.byte	10
45	.byte	11
46	.byte	8
47	.byte	9
48	.byte	14
49	.byte	15
50	.byte	12
51	.byte	13
52.LCPI0_1:
53	.word	1779033703
54	.word	3144134277
55	.word	1013904242
56	.word	2773480762
57.LCPI0_2:
58	.byte	1
59	.byte	2
60	.byte	3
61	.byte	0
62	.byte	5
63	.byte	6
64	.byte	7
65	.byte	4
66	.byte	9
67	.byte	10
68	.byte	11
69	.byte	8
70	.byte	13
71	.byte	14
72	.byte	15
73	.byte	12
74.LCPI0_3:
75	.byte	0
76	.byte	1
77	.byte	2
78	.byte	3
79	.byte	20
80	.byte	21
81	.byte	22
82	.byte	23
83	.byte	8
84	.byte	9
85	.byte	10
86	.byte	11
87	.byte	28
88	.byte	29
89	.byte	30
90	.byte	31
91.LCPI0_4:
92	.byte	0
93	.byte	1
94	.byte	2
95	.byte	3
96	.byte	4
97	.byte	5
98	.byte	6
99	.byte	7
100	.byte	8
101	.byte	9
102	.byte	10
103	.byte	11
104	.byte	28
105	.byte	29
106	.byte	30
107	.byte	31
108	.text
109	.globl	zfs_blake3_compress_in_place_sse41
110	.p2align	2
111	.type	zfs_blake3_compress_in_place_sse41,@function
112zfs_blake3_compress_in_place_sse41:
113	.cfi_startproc
114	ldp	q7, q6, [x0]
115	ldp	q17, q18, [x1]
116	add	x12, x1, #32
117	ld2	{ v4.4s, v5.4s }, [x12]
118	lsr	x10, x3, #32
119	fmov	s16, w3
120	adrp	x13, .LCPI0_0
121	adrp	x11, .LCPI0_1
122	and	w8, w2, #0xff
123	mov	v16.s[1], w10
124	ldr	q0, [x13, :lo12:.LCPI0_0]
125	ldr	q20, [x11, :lo12:.LCPI0_1]
126	adrp	x11, .LCPI0_4
127	and	w9, w4, #0xff
128	ldr	q2, [x11, :lo12:.LCPI0_4]
129	mov	v16.s[2], w8
130	uzp1	v21.4s, v17.4s, v18.4s
131	add	v7.4s, v6.4s, v7.4s
132	adrp	x12, .LCPI0_3
133	mov	v16.s[3], w9
134	uzp2	v18.4s, v17.4s, v18.4s
135	add	v7.4s, v7.4s, v21.4s
136	ext	v17.16b, v5.16b, v5.16b, #12
137	ldr	q3, [x12, :lo12:.LCPI0_3]
138	ext	v24.16b, v4.16b, v4.16b, #12
139	eor	v16.16b, v7.16b, v16.16b
140	mov	v27.16b, v17.16b
141	uzp1	v19.4s, v21.4s, v21.4s
142	ext	v25.16b, v21.16b, v21.16b, #12
143	zip2	v28.4s, v18.4s, v17.4s
144	tbl	v29.16b, { v16.16b }, v0.16b
145	mov	v27.s[1], v24.s[2]
146	zip1	v23.2d, v17.2d, v18.2d
147	ext	v19.16b, v19.16b, v21.16b, #8
148	add	v22.4s, v29.4s, v20.4s
149	ext	v26.16b, v21.16b, v25.16b, #12
150	tbl	v20.16b, { v23.16b, v24.16b }, v2.16b
151	zip1	v21.4s, v28.4s, v24.4s
152	zip1	v23.4s, v24.4s, v28.4s
153	uzp2	v19.4s, v19.4s, v18.4s
154	eor	v24.16b, v22.16b, v6.16b
155	ext	v25.16b, v20.16b, v20.16b, #12
156	ext	v6.16b, v23.16b, v21.16b, #8
157	add	v7.4s, v7.4s, v18.4s
158	ext	v18.16b, v19.16b, v19.16b, #4
159	tbl	v16.16b, { v26.16b, v27.16b }, v3.16b
160	uzp1	v21.4s, v20.4s, v25.4s
161	mov	v26.16b, v6.16b
162	ext	v23.16b, v18.16b, v18.16b, #12
163	mov	v26.s[1], v21.s[2]
164	adrp	x10, .LCPI0_2
165	ext	v25.16b, v18.16b, v23.16b, #12
166	uzp1	v23.4s, v18.4s, v18.4s
167	ldr	q1, [x10, :lo12:.LCPI0_2]
168	ext	v18.16b, v23.16b, v18.16b, #8
169	ushr	v23.4s, v24.4s, #12
170	shl	v24.4s, v24.4s, #20
171	orr	v23.16b, v24.16b, v23.16b
172	add	v7.4s, v7.4s, v23.4s
173	eor	v27.16b, v29.16b, v7.16b
174	add	v4.4s, v7.4s, v4.4s
175	tbl	v7.16b, { v25.16b, v26.16b }, v3.16b
176	tbl	v26.16b, { v27.16b }, v1.16b
177	add	v22.4s, v22.4s, v26.4s
178	uzp2	v18.4s, v18.4s, v16.4s
179	eor	v23.16b, v23.16b, v22.16b
180	ext	v5.16b, v18.16b, v18.16b, #4
181	ushr	v27.4s, v23.4s, #7
182	shl	v23.4s, v23.4s, #25
183	uzp1	v25.4s, v5.4s, v5.4s
184	orr	v23.16b, v23.16b, v27.16b
185	ext	v28.16b, v4.16b, v4.16b, #12
186	ext	v4.16b, v25.16b, v5.16b, #8
187	ext	v25.16b, v26.16b, v26.16b, #8
188	add	v26.4s, v28.4s, v23.4s
189	eor	v25.16b, v26.16b, v25.16b
190	ext	v22.16b, v22.16b, v22.16b, #4
191	tbl	v25.16b, { v25.16b }, v0.16b
192	add	v22.4s, v22.4s, v25.4s
193	eor	v23.16b, v23.16b, v22.16b
194	add	v17.4s, v26.4s, v17.4s
195	ushr	v26.4s, v23.4s, #12
196	shl	v23.4s, v23.4s, #20
197	orr	v23.16b, v23.16b, v26.16b
198	add	v17.4s, v17.4s, v23.4s
199	eor	v25.16b, v25.16b, v17.16b
200	add	v17.4s, v17.4s, v19.4s
201	tbl	v19.16b, { v25.16b }, v1.16b
202	add	v22.4s, v22.4s, v19.4s
203	eor	v23.16b, v23.16b, v22.16b
204	ushr	v25.4s, v23.4s, #7
205	shl	v23.4s, v23.4s, #25
206	ext	v17.16b, v17.16b, v17.16b, #4
207	orr	v23.16b, v23.16b, v25.16b
208	ext	v19.16b, v19.16b, v19.16b, #8
209	add	v17.4s, v17.4s, v23.4s
210	eor	v19.16b, v17.16b, v19.16b
211	ext	v22.16b, v22.16b, v22.16b, #12
212	tbl	v19.16b, { v19.16b }, v0.16b
213	add	v22.4s, v22.4s, v19.4s
214	eor	v23.16b, v23.16b, v22.16b
215	ushr	v25.4s, v23.4s, #12
216	shl	v23.4s, v23.4s, #20
217	add	v17.4s, v17.4s, v16.4s
218	orr	v23.16b, v23.16b, v25.16b
219	add	v17.4s, v17.4s, v23.4s
220	ext	v25.16b, v17.16b, v17.16b, #12
221	eor	v17.16b, v19.16b, v17.16b
222	tbl	v17.16b, { v17.16b }, v1.16b
223	add	v19.4s, v22.4s, v17.4s
224	eor	v22.16b, v23.16b, v19.16b
225	add	v25.4s, v25.4s, v21.4s
226	zip1	v20.2d, v6.2d, v16.2d
227	ushr	v23.4s, v22.4s, #7
228	shl	v22.4s, v22.4s, #25
229	zip2	v24.4s, v16.4s, v6.4s
230	tbl	v26.16b, { v20.16b, v21.16b }, v2.16b
231	orr	v22.16b, v22.16b, v23.16b
232	zip1	v16.4s, v24.4s, v21.4s
233	zip1	v20.4s, v21.4s, v24.4s
234	ext	v21.16b, v26.16b, v26.16b, #12
235	ext	v17.16b, v17.16b, v17.16b, #8
236	add	v25.4s, v25.4s, v22.4s
237	ext	v16.16b, v20.16b, v16.16b, #8
238	uzp1	v21.4s, v26.4s, v21.4s
239	eor	v26.16b, v25.16b, v17.16b
240	ext	v19.16b, v19.16b, v19.16b, #4
241	tbl	v26.16b, { v26.16b }, v0.16b
242	mov	v29.16b, v16.16b
243	add	v19.4s, v19.4s, v26.4s
244	ext	v27.16b, v5.16b, v5.16b, #12
245	mov	v29.s[1], v21.s[2]
246	eor	v22.16b, v22.16b, v19.16b
247	ext	v28.16b, v5.16b, v27.16b, #12
248	ushr	v27.4s, v22.4s, #12
249	shl	v22.4s, v22.4s, #20
250	add	v6.4s, v25.4s, v6.4s
251	orr	v22.16b, v22.16b, v27.16b
252	add	v6.4s, v6.4s, v22.4s
253	eor	v26.16b, v26.16b, v6.16b
254	add	v6.4s, v6.4s, v18.4s
255	tbl	v18.16b, { v26.16b }, v1.16b
256	add	v19.4s, v19.4s, v18.4s
257	eor	v22.16b, v22.16b, v19.16b
258	ushr	v26.4s, v22.4s, #7
259	shl	v22.4s, v22.4s, #25
260	ext	v6.16b, v6.16b, v6.16b, #4
261	orr	v22.16b, v22.16b, v26.16b
262	ext	v18.16b, v18.16b, v18.16b, #8
263	add	v6.4s, v6.4s, v22.4s
264	eor	v18.16b, v6.16b, v18.16b
265	ext	v19.16b, v19.16b, v19.16b, #12
266	tbl	v18.16b, { v18.16b }, v0.16b
267	add	v19.4s, v19.4s, v18.4s
268	eor	v22.16b, v22.16b, v19.16b
269	ushr	v26.4s, v22.4s, #12
270	shl	v22.4s, v22.4s, #20
271	add	v6.4s, v6.4s, v7.4s
272	orr	v22.16b, v22.16b, v26.16b
273	add	v6.4s, v6.4s, v22.4s
274	ext	v26.16b, v6.16b, v6.16b, #12
275	eor	v6.16b, v18.16b, v6.16b
276	uzp2	v4.4s, v4.4s, v7.4s
277	zip2	v25.4s, v7.4s, v16.4s
278	add	v26.4s, v26.4s, v21.4s
279	zip1	v20.2d, v16.2d, v7.2d
280	tbl	v6.16b, { v6.16b }, v1.16b
281	ext	v24.16b, v4.16b, v4.16b, #4
282	tbl	v27.16b, { v20.16b, v21.16b }, v2.16b
283	zip1	v7.4s, v25.4s, v21.4s
284	zip1	v20.4s, v21.4s, v25.4s
285	add	v18.4s, v19.4s, v6.4s
286	uzp1	v5.4s, v24.4s, v24.4s
287	ext	v21.16b, v27.16b, v27.16b, #12
288	ext	v7.16b, v20.16b, v7.16b, #8
289	eor	v19.16b, v22.16b, v18.16b
290	ext	v5.16b, v5.16b, v24.16b, #8
291	tbl	v17.16b, { v28.16b, v29.16b }, v3.16b
292	uzp1	v21.4s, v27.4s, v21.4s
293	mov	v28.16b, v7.16b
294	ushr	v22.4s, v19.4s, #7
295	shl	v19.4s, v19.4s, #25
296	ext	v23.16b, v24.16b, v24.16b, #12
297	uzp2	v5.4s, v5.4s, v17.4s
298	mov	v28.s[1], v21.s[2]
299	orr	v19.16b, v19.16b, v22.16b
300	ext	v27.16b, v24.16b, v23.16b, #12
301	ext	v23.16b, v5.16b, v5.16b, #4
302	ext	v6.16b, v6.16b, v6.16b, #8
303	ext	v25.16b, v18.16b, v18.16b, #4
304	add	v18.4s, v26.4s, v19.4s
305	uzp1	v24.4s, v23.4s, v23.4s
306	eor	v6.16b, v18.16b, v6.16b
307	ext	v24.16b, v24.16b, v23.16b, #8
308	add	v16.4s, v18.4s, v16.4s
309	tbl	v18.16b, { v27.16b, v28.16b }, v3.16b
310	tbl	v27.16b, { v6.16b }, v0.16b
311	uzp2	v6.4s, v24.4s, v18.4s
312	add	v24.4s, v25.4s, v27.4s
313	eor	v19.16b, v19.16b, v24.16b
314	ushr	v25.4s, v19.4s, #12
315	shl	v19.4s, v19.4s, #20
316	orr	v19.16b, v19.16b, v25.16b
317	add	v16.4s, v16.4s, v19.4s
318	eor	v25.16b, v27.16b, v16.16b
319	add	v4.4s, v16.4s, v4.4s
320	tbl	v16.16b, { v25.16b }, v1.16b
321	add	v24.4s, v24.4s, v16.4s
322	eor	v19.16b, v19.16b, v24.16b
323	ushr	v25.4s, v19.4s, #7
324	shl	v19.4s, v19.4s, #25
325	ext	v4.16b, v4.16b, v4.16b, #4
326	orr	v19.16b, v19.16b, v25.16b
327	ext	v16.16b, v16.16b, v16.16b, #8
328	add	v4.4s, v4.4s, v19.4s
329	eor	v16.16b, v4.16b, v16.16b
330	ext	v24.16b, v24.16b, v24.16b, #12
331	tbl	v25.16b, { v16.16b }, v0.16b
332	add	v24.4s, v24.4s, v25.4s
333	eor	v16.16b, v19.16b, v24.16b
334	ushr	v19.4s, v16.4s, #12
335	shl	v16.4s, v16.4s, #20
336	add	v4.4s, v4.4s, v17.4s
337	orr	v19.16b, v16.16b, v19.16b
338	add	v27.4s, v4.4s, v19.4s
339	eor	v25.16b, v25.16b, v27.16b
340	tbl	v25.16b, { v25.16b }, v1.16b
341	add	v24.4s, v24.4s, v25.4s
342	zip2	v26.4s, v17.4s, v7.4s
343	ext	v4.16b, v27.16b, v27.16b, #12
344	eor	v19.16b, v19.16b, v24.16b
345	add	v28.4s, v4.4s, v21.4s
346	zip1	v20.2d, v7.2d, v17.2d
347	zip1	v4.4s, v26.4s, v21.4s
348	zip1	v17.4s, v21.4s, v26.4s
349	ushr	v26.4s, v19.4s, #7
350	shl	v19.4s, v19.4s, #25
351	orr	v19.16b, v19.16b, v26.16b
352	ext	v25.16b, v25.16b, v25.16b, #8
353	add	v27.4s, v28.4s, v19.4s
354	eor	v25.16b, v27.16b, v25.16b
355	ext	v24.16b, v24.16b, v24.16b, #4
356	tbl	v25.16b, { v25.16b }, v0.16b
357	add	v24.4s, v24.4s, v25.4s
358	eor	v19.16b, v19.16b, v24.16b
359	add	v7.4s, v27.4s, v7.4s
360	ushr	v27.4s, v19.4s, #12
361	shl	v19.4s, v19.4s, #20
362	orr	v19.16b, v19.16b, v27.16b
363	add	v7.4s, v7.4s, v19.4s
364	eor	v25.16b, v25.16b, v7.16b
365	add	v5.4s, v7.4s, v5.4s
366	tbl	v7.16b, { v25.16b }, v1.16b
367	add	v24.4s, v24.4s, v7.4s
368	eor	v19.16b, v19.16b, v24.16b
369	ushr	v25.4s, v19.4s, #7
370	shl	v19.4s, v19.4s, #25
371	ext	v5.16b, v5.16b, v5.16b, #4
372	orr	v19.16b, v19.16b, v25.16b
373	ext	v7.16b, v7.16b, v7.16b, #8
374	add	v5.4s, v5.4s, v19.4s
375	eor	v7.16b, v5.16b, v7.16b
376	ext	v24.16b, v24.16b, v24.16b, #12
377	tbl	v7.16b, { v7.16b }, v0.16b
378	add	v24.4s, v24.4s, v7.4s
379	eor	v19.16b, v19.16b, v24.16b
380	ushr	v25.4s, v19.4s, #12
381	shl	v19.4s, v19.4s, #20
382	tbl	v16.16b, { v20.16b, v21.16b }, v2.16b
383	add	v5.4s, v5.4s, v18.4s
384	orr	v19.16b, v19.16b, v25.16b
385	ext	v20.16b, v16.16b, v16.16b, #12
386	ext	v4.16b, v17.16b, v4.16b, #8
387	add	v5.4s, v5.4s, v19.4s
388	uzp1	v21.4s, v16.4s, v20.4s
389	mov	v17.16b, v4.16b
390	ext	v25.16b, v5.16b, v5.16b, #12
391	mov	v17.s[1], v21.s[2]
392	add	v25.4s, v25.4s, v21.4s
393	zip1	v20.2d, v4.2d, v18.2d
394	ext	v22.16b, v23.16b, v23.16b, #12
395	zip2	v26.4s, v18.4s, v4.4s
396	tbl	v18.16b, { v20.16b, v21.16b }, v2.16b
397	eor	v5.16b, v7.16b, v5.16b
398	ext	v16.16b, v23.16b, v22.16b, #12
399	ext	v22.16b, v6.16b, v6.16b, #4
400	zip1	v27.4s, v26.4s, v21.4s
401	zip1	v20.4s, v21.4s, v26.4s
402	ext	v21.16b, v18.16b, v18.16b, #12
403	tbl	v5.16b, { v5.16b }, v1.16b
404	ext	v20.16b, v20.16b, v27.16b, #8
405	uzp1	v27.4s, v18.4s, v21.4s
406	uzp1	v18.4s, v22.4s, v22.4s
407	add	v21.4s, v24.4s, v5.4s
408	ext	v18.16b, v18.16b, v22.16b, #8
409	eor	v19.16b, v19.16b, v21.16b
410	tbl	v7.16b, { v16.16b, v17.16b }, v3.16b
411	uzp2	v18.4s, v18.4s, v17.4s
412	zip2	v16.4s, v16.4s, v20.4s
413	ushr	v17.4s, v19.4s, #7
414	shl	v19.4s, v19.4s, #25
415	orr	v17.16b, v19.16b, v17.16b
416	ext	v5.16b, v5.16b, v5.16b, #8
417	add	v19.4s, v25.4s, v17.4s
418	eor	v5.16b, v19.16b, v5.16b
419	ext	v21.16b, v21.16b, v21.16b, #4
420	tbl	v5.16b, { v5.16b }, v0.16b
421	add	v4.4s, v19.4s, v4.4s
422	add	v19.4s, v21.4s, v5.4s
423	eor	v17.16b, v17.16b, v19.16b
424	ushr	v21.4s, v17.4s, #12
425	shl	v17.4s, v17.4s, #20
426	orr	v17.16b, v17.16b, v21.16b
427	add	v4.4s, v4.4s, v17.4s
428	eor	v5.16b, v5.16b, v4.16b
429	tbl	v5.16b, { v5.16b }, v1.16b
430	add	v4.4s, v4.4s, v6.4s
431	add	v6.4s, v19.4s, v5.4s
432	eor	v17.16b, v17.16b, v6.16b
433	ushr	v19.4s, v17.4s, #7
434	shl	v17.4s, v17.4s, #25
435	ext	v4.16b, v4.16b, v4.16b, #4
436	orr	v17.16b, v17.16b, v19.16b
437	ext	v5.16b, v5.16b, v5.16b, #8
438	add	v4.4s, v4.4s, v17.4s
439	eor	v5.16b, v4.16b, v5.16b
440	ext	v6.16b, v6.16b, v6.16b, #12
441	tbl	v5.16b, { v5.16b }, v0.16b
442	add	v6.4s, v6.4s, v5.4s
443	eor	v17.16b, v17.16b, v6.16b
444	ushr	v19.4s, v17.4s, #12
445	shl	v17.4s, v17.4s, #20
446	add	v4.4s, v4.4s, v7.4s
447	orr	v17.16b, v17.16b, v19.16b
448	add	v4.4s, v4.4s, v17.4s
449	eor	v5.16b, v5.16b, v4.16b
450	tbl	v5.16b, { v5.16b }, v1.16b
451	mov	v29.16b, v20.16b
452	ext	v4.16b, v4.16b, v4.16b, #12
453	add	v6.4s, v6.4s, v5.4s
454	mov	v29.s[1], v27.s[2]
455	add	v4.4s, v4.4s, v27.4s
456	zip1	v26.2d, v20.2d, v7.2d
457	zip1	v7.4s, v16.4s, v27.4s
458	zip1	v16.4s, v27.4s, v16.4s
459	eor	v17.16b, v17.16b, v6.16b
460	ext	v7.16b, v16.16b, v7.16b, #8
461	ushr	v16.4s, v17.4s, #7
462	shl	v17.4s, v17.4s, #25
463	orr	v16.16b, v17.16b, v16.16b
464	ext	v5.16b, v5.16b, v5.16b, #8
465	add	v4.4s, v4.4s, v16.4s
466	eor	v5.16b, v4.16b, v5.16b
467	ext	v6.16b, v6.16b, v6.16b, #4
468	tbl	v5.16b, { v5.16b }, v0.16b
469	add	v6.4s, v6.4s, v5.4s
470	eor	v16.16b, v16.16b, v6.16b
471	ushr	v17.4s, v16.4s, #12
472	shl	v16.4s, v16.4s, #20
473	add	v4.4s, v4.4s, v20.4s
474	orr	v16.16b, v16.16b, v17.16b
475	add	v4.4s, v4.4s, v16.4s
476	eor	v5.16b, v5.16b, v4.16b
477	tbl	v5.16b, { v5.16b }, v1.16b
478	add	v6.4s, v6.4s, v5.4s
479	eor	v16.16b, v16.16b, v6.16b
480	add	v4.4s, v4.4s, v18.4s
481	ushr	v17.4s, v16.4s, #7
482	shl	v16.4s, v16.4s, #25
483	ext	v23.16b, v22.16b, v22.16b, #12
484	ext	v4.16b, v4.16b, v4.16b, #4
485	orr	v16.16b, v16.16b, v17.16b
486	ext	v28.16b, v22.16b, v23.16b, #12
487	ext	v5.16b, v5.16b, v5.16b, #8
488	add	v4.4s, v16.4s, v4.4s
489	tbl	v3.16b, { v28.16b, v29.16b }, v3.16b
490	eor	v5.16b, v4.16b, v5.16b
491	ext	v6.16b, v6.16b, v6.16b, #12
492	add	v3.4s, v4.4s, v3.4s
493	tbl	v4.16b, { v5.16b }, v0.16b
494	add	v5.4s, v6.4s, v4.4s
495	eor	v6.16b, v16.16b, v5.16b
496	ushr	v16.4s, v6.4s, #12
497	shl	v6.4s, v6.4s, #20
498	orr	v6.16b, v6.16b, v16.16b
499	tbl	v2.16b, { v26.16b, v27.16b }, v2.16b
500	add	v3.4s, v3.4s, v6.4s
501	ext	v19.16b, v2.16b, v2.16b, #12
502	eor	v4.16b, v4.16b, v3.16b
503	uzp1	v2.4s, v2.4s, v19.4s
504	ext	v3.16b, v3.16b, v3.16b, #12
505	tbl	v4.16b, { v4.16b }, v1.16b
506	add	v2.4s, v3.4s, v2.4s
507	add	v3.4s, v5.4s, v4.4s
508	eor	v5.16b, v6.16b, v3.16b
509	ushr	v6.4s, v5.4s, #7
510	shl	v5.4s, v5.4s, #25
511	orr	v5.16b, v5.16b, v6.16b
512	ext	v4.16b, v4.16b, v4.16b, #8
513	add	v2.4s, v2.4s, v5.4s
514	eor	v4.16b, v2.16b, v4.16b
515	ext	v3.16b, v3.16b, v3.16b, #4
516	tbl	v0.16b, { v4.16b }, v0.16b
517	add	v3.4s, v3.4s, v0.4s
518	eor	v4.16b, v5.16b, v3.16b
519	ushr	v5.4s, v4.4s, #12
520	shl	v4.4s, v4.4s, #20
521	add	v2.4s, v2.4s, v7.4s
522	orr	v4.16b, v4.16b, v5.16b
523	add	v2.4s, v2.4s, v4.4s
524	eor	v0.16b, v0.16b, v2.16b
525	tbl	v0.16b, { v0.16b }, v1.16b
526	add	v1.4s, v3.4s, v0.4s
527	eor	v3.16b, v4.16b, v1.16b
528	ext	v2.16b, v2.16b, v2.16b, #4
529	ext	v1.16b, v1.16b, v1.16b, #12
530	ushr	v4.4s, v3.4s, #7
531	shl	v3.4s, v3.4s, #25
532	ext	v0.16b, v0.16b, v0.16b, #8
533	eor	v1.16b, v2.16b, v1.16b
534	orr	v2.16b, v3.16b, v4.16b
535	eor	v0.16b, v2.16b, v0.16b
536	stp	q1, q0, [x0]
537	ret
538.Lfunc_end0:
539	.size	zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41
540	.cfi_endproc
541
542	.section	.rodata.cst16,"aM",@progbits,16
543	.p2align	4
544.LCPI1_0:
545	.byte	2
546	.byte	3
547	.byte	0
548	.byte	1
549	.byte	6
550	.byte	7
551	.byte	4
552	.byte	5
553	.byte	10
554	.byte	11
555	.byte	8
556	.byte	9
557	.byte	14
558	.byte	15
559	.byte	12
560	.byte	13
561.LCPI1_1:
562	.word	1779033703
563	.word	3144134277
564	.word	1013904242
565	.word	2773480762
566.LCPI1_2:
567	.byte	1
568	.byte	2
569	.byte	3
570	.byte	0
571	.byte	5
572	.byte	6
573	.byte	7
574	.byte	4
575	.byte	9
576	.byte	10
577	.byte	11
578	.byte	8
579	.byte	13
580	.byte	14
581	.byte	15
582	.byte	12
583.LCPI1_3:
584	.byte	0
585	.byte	1
586	.byte	2
587	.byte	3
588	.byte	20
589	.byte	21
590	.byte	22
591	.byte	23
592	.byte	8
593	.byte	9
594	.byte	10
595	.byte	11
596	.byte	28
597	.byte	29
598	.byte	30
599	.byte	31
600.LCPI1_4:
601	.byte	0
602	.byte	1
603	.byte	2
604	.byte	3
605	.byte	4
606	.byte	5
607	.byte	6
608	.byte	7
609	.byte	8
610	.byte	9
611	.byte	10
612	.byte	11
613	.byte	28
614	.byte	29
615	.byte	30
616	.byte	31
617	.text
618	.globl	zfs_blake3_compress_xof_sse41
619	.p2align	2
620	.type	zfs_blake3_compress_xof_sse41,@function
621zfs_blake3_compress_xof_sse41:
622	.cfi_startproc
623	ldp	q7, q6, [x0]
624	ldp	q17, q18, [x1]
625	add	x12, x1, #32
626	ld2	{ v4.4s, v5.4s }, [x12]
627	lsr	x10, x3, #32
628	fmov	s16, w3
629	adrp	x13, .LCPI1_0
630	adrp	x11, .LCPI1_1
631	and	w8, w2, #0xff
632	mov	v16.s[1], w10
633	ldr	q0, [x13, :lo12:.LCPI1_0]
634	ldr	q20, [x11, :lo12:.LCPI1_1]
635	adrp	x11, .LCPI1_4
636	and	w9, w4, #0xff
637	ldr	q2, [x11, :lo12:.LCPI1_4]
638	mov	v16.s[2], w8
639	uzp1	v21.4s, v17.4s, v18.4s
640	add	v7.4s, v6.4s, v7.4s
641	adrp	x12, .LCPI1_3
642	mov	v16.s[3], w9
643	uzp2	v18.4s, v17.4s, v18.4s
644	add	v7.4s, v7.4s, v21.4s
645	ext	v17.16b, v5.16b, v5.16b, #12
646	ldr	q3, [x12, :lo12:.LCPI1_3]
647	ext	v24.16b, v4.16b, v4.16b, #12
648	eor	v16.16b, v7.16b, v16.16b
649	mov	v27.16b, v17.16b
650	uzp1	v19.4s, v21.4s, v21.4s
651	ext	v25.16b, v21.16b, v21.16b, #12
652	zip2	v28.4s, v18.4s, v17.4s
653	tbl	v29.16b, { v16.16b }, v0.16b
654	mov	v27.s[1], v24.s[2]
655	zip1	v23.2d, v17.2d, v18.2d
656	ext	v19.16b, v19.16b, v21.16b, #8
657	add	v22.4s, v29.4s, v20.4s
658	ext	v26.16b, v21.16b, v25.16b, #12
659	tbl	v20.16b, { v23.16b, v24.16b }, v2.16b
660	zip1	v21.4s, v28.4s, v24.4s
661	zip1	v23.4s, v24.4s, v28.4s
662	uzp2	v19.4s, v19.4s, v18.4s
663	eor	v24.16b, v22.16b, v6.16b
664	ext	v25.16b, v20.16b, v20.16b, #12
665	ext	v6.16b, v23.16b, v21.16b, #8
666	add	v7.4s, v7.4s, v18.4s
667	ext	v18.16b, v19.16b, v19.16b, #4
668	tbl	v16.16b, { v26.16b, v27.16b }, v3.16b
669	uzp1	v21.4s, v20.4s, v25.4s
670	mov	v26.16b, v6.16b
671	ext	v23.16b, v18.16b, v18.16b, #12
672	mov	v26.s[1], v21.s[2]
673	adrp	x10, .LCPI1_2
674	ext	v25.16b, v18.16b, v23.16b, #12
675	uzp1	v23.4s, v18.4s, v18.4s
676	ldr	q1, [x10, :lo12:.LCPI1_2]
677	ext	v18.16b, v23.16b, v18.16b, #8
678	ushr	v23.4s, v24.4s, #12
679	shl	v24.4s, v24.4s, #20
680	orr	v23.16b, v24.16b, v23.16b
681	add	v7.4s, v7.4s, v23.4s
682	eor	v27.16b, v29.16b, v7.16b
683	add	v4.4s, v7.4s, v4.4s
684	tbl	v7.16b, { v25.16b, v26.16b }, v3.16b
685	tbl	v26.16b, { v27.16b }, v1.16b
686	add	v22.4s, v22.4s, v26.4s
687	uzp2	v18.4s, v18.4s, v16.4s
688	eor	v23.16b, v23.16b, v22.16b
689	ext	v5.16b, v18.16b, v18.16b, #4
690	ushr	v27.4s, v23.4s, #7
691	shl	v23.4s, v23.4s, #25
692	uzp1	v25.4s, v5.4s, v5.4s
693	orr	v23.16b, v23.16b, v27.16b
694	ext	v28.16b, v4.16b, v4.16b, #12
695	ext	v4.16b, v25.16b, v5.16b, #8
696	ext	v25.16b, v26.16b, v26.16b, #8
697	add	v26.4s, v28.4s, v23.4s
698	eor	v25.16b, v26.16b, v25.16b
699	ext	v22.16b, v22.16b, v22.16b, #4
700	tbl	v25.16b, { v25.16b }, v0.16b
701	add	v22.4s, v22.4s, v25.4s
702	eor	v23.16b, v23.16b, v22.16b
703	add	v17.4s, v26.4s, v17.4s
704	ushr	v26.4s, v23.4s, #12
705	shl	v23.4s, v23.4s, #20
706	orr	v23.16b, v23.16b, v26.16b
707	add	v17.4s, v17.4s, v23.4s
708	eor	v25.16b, v25.16b, v17.16b
709	add	v17.4s, v17.4s, v19.4s
710	tbl	v19.16b, { v25.16b }, v1.16b
711	add	v22.4s, v22.4s, v19.4s
712	eor	v23.16b, v23.16b, v22.16b
713	ushr	v25.4s, v23.4s, #7
714	shl	v23.4s, v23.4s, #25
715	ext	v17.16b, v17.16b, v17.16b, #4
716	orr	v23.16b, v23.16b, v25.16b
717	ext	v19.16b, v19.16b, v19.16b, #8
718	add	v17.4s, v17.4s, v23.4s
719	eor	v19.16b, v17.16b, v19.16b
720	ext	v22.16b, v22.16b, v22.16b, #12
721	tbl	v19.16b, { v19.16b }, v0.16b
722	add	v22.4s, v22.4s, v19.4s
723	eor	v23.16b, v23.16b, v22.16b
724	ushr	v25.4s, v23.4s, #12
725	shl	v23.4s, v23.4s, #20
726	add	v17.4s, v17.4s, v16.4s
727	orr	v23.16b, v23.16b, v25.16b
728	add	v17.4s, v17.4s, v23.4s
729	ext	v25.16b, v17.16b, v17.16b, #12
730	eor	v17.16b, v19.16b, v17.16b
731	tbl	v17.16b, { v17.16b }, v1.16b
732	add	v19.4s, v22.4s, v17.4s
733	eor	v22.16b, v23.16b, v19.16b
734	add	v25.4s, v25.4s, v21.4s
735	zip1	v20.2d, v6.2d, v16.2d
736	ushr	v23.4s, v22.4s, #7
737	shl	v22.4s, v22.4s, #25
738	zip2	v24.4s, v16.4s, v6.4s
739	tbl	v26.16b, { v20.16b, v21.16b }, v2.16b
740	orr	v22.16b, v22.16b, v23.16b
741	zip1	v16.4s, v24.4s, v21.4s
742	zip1	v20.4s, v21.4s, v24.4s
743	ext	v21.16b, v26.16b, v26.16b, #12
744	ext	v17.16b, v17.16b, v17.16b, #8
745	add	v25.4s, v25.4s, v22.4s
746	ext	v16.16b, v20.16b, v16.16b, #8
747	uzp1	v21.4s, v26.4s, v21.4s
748	eor	v26.16b, v25.16b, v17.16b
749	ext	v19.16b, v19.16b, v19.16b, #4
750	tbl	v26.16b, { v26.16b }, v0.16b
751	mov	v29.16b, v16.16b
752	add	v19.4s, v19.4s, v26.4s
753	ext	v27.16b, v5.16b, v5.16b, #12
754	mov	v29.s[1], v21.s[2]
755	eor	v22.16b, v22.16b, v19.16b
756	ext	v28.16b, v5.16b, v27.16b, #12
757	ushr	v27.4s, v22.4s, #12
758	shl	v22.4s, v22.4s, #20
759	add	v6.4s, v25.4s, v6.4s
760	orr	v22.16b, v22.16b, v27.16b
761	add	v6.4s, v6.4s, v22.4s
762	eor	v26.16b, v26.16b, v6.16b
763	add	v6.4s, v6.4s, v18.4s
764	tbl	v18.16b, { v26.16b }, v1.16b
765	add	v19.4s, v19.4s, v18.4s
766	eor	v22.16b, v22.16b, v19.16b
767	ushr	v26.4s, v22.4s, #7
768	shl	v22.4s, v22.4s, #25
769	ext	v6.16b, v6.16b, v6.16b, #4
770	orr	v22.16b, v22.16b, v26.16b
771	ext	v18.16b, v18.16b, v18.16b, #8
772	add	v6.4s, v6.4s, v22.4s
773	eor	v18.16b, v6.16b, v18.16b
774	ext	v19.16b, v19.16b, v19.16b, #12
775	tbl	v18.16b, { v18.16b }, v0.16b
776	add	v19.4s, v19.4s, v18.4s
777	eor	v22.16b, v22.16b, v19.16b
778	ushr	v26.4s, v22.4s, #12
779	shl	v22.4s, v22.4s, #20
780	add	v6.4s, v6.4s, v7.4s
781	orr	v22.16b, v22.16b, v26.16b
782	add	v6.4s, v6.4s, v22.4s
783	ext	v26.16b, v6.16b, v6.16b, #12
784	eor	v6.16b, v18.16b, v6.16b
785	uzp2	v4.4s, v4.4s, v7.4s
786	zip2	v25.4s, v7.4s, v16.4s
787	add	v26.4s, v26.4s, v21.4s
788	zip1	v20.2d, v16.2d, v7.2d
789	tbl	v6.16b, { v6.16b }, v1.16b
790	ext	v24.16b, v4.16b, v4.16b, #4
791	tbl	v27.16b, { v20.16b, v21.16b }, v2.16b
792	zip1	v7.4s, v25.4s, v21.4s
793	zip1	v20.4s, v21.4s, v25.4s
794	add	v18.4s, v19.4s, v6.4s
795	uzp1	v5.4s, v24.4s, v24.4s
796	ext	v21.16b, v27.16b, v27.16b, #12
797	ext	v7.16b, v20.16b, v7.16b, #8
798	eor	v19.16b, v22.16b, v18.16b
799	ext	v5.16b, v5.16b, v24.16b, #8
800	tbl	v17.16b, { v28.16b, v29.16b }, v3.16b
801	uzp1	v21.4s, v27.4s, v21.4s
802	mov	v28.16b, v7.16b
803	ushr	v22.4s, v19.4s, #7
804	shl	v19.4s, v19.4s, #25
805	ext	v23.16b, v24.16b, v24.16b, #12
806	uzp2	v5.4s, v5.4s, v17.4s
807	mov	v28.s[1], v21.s[2]
808	orr	v19.16b, v19.16b, v22.16b
809	ext	v27.16b, v24.16b, v23.16b, #12
810	ext	v23.16b, v5.16b, v5.16b, #4
811	ext	v6.16b, v6.16b, v6.16b, #8
812	ext	v25.16b, v18.16b, v18.16b, #4
813	add	v18.4s, v26.4s, v19.4s
814	uzp1	v24.4s, v23.4s, v23.4s
815	eor	v6.16b, v18.16b, v6.16b
816	ext	v24.16b, v24.16b, v23.16b, #8
817	add	v16.4s, v18.4s, v16.4s
818	tbl	v18.16b, { v27.16b, v28.16b }, v3.16b
819	tbl	v27.16b, { v6.16b }, v0.16b
820	uzp2	v6.4s, v24.4s, v18.4s
821	add	v24.4s, v25.4s, v27.4s
822	eor	v19.16b, v19.16b, v24.16b
823	ushr	v25.4s, v19.4s, #12
824	shl	v19.4s, v19.4s, #20
825	orr	v19.16b, v19.16b, v25.16b
826	add	v16.4s, v16.4s, v19.4s
827	eor	v25.16b, v27.16b, v16.16b
828	add	v4.4s, v16.4s, v4.4s
829	tbl	v16.16b, { v25.16b }, v1.16b
830	add	v24.4s, v24.4s, v16.4s
831	eor	v19.16b, v19.16b, v24.16b
832	ushr	v25.4s, v19.4s, #7
833	shl	v19.4s, v19.4s, #25
834	ext	v4.16b, v4.16b, v4.16b, #4
835	orr	v19.16b, v19.16b, v25.16b
836	ext	v16.16b, v16.16b, v16.16b, #8
837	add	v4.4s, v4.4s, v19.4s
838	eor	v16.16b, v4.16b, v16.16b
839	ext	v24.16b, v24.16b, v24.16b, #12
840	tbl	v25.16b, { v16.16b }, v0.16b
841	add	v24.4s, v24.4s, v25.4s
842	eor	v16.16b, v19.16b, v24.16b
843	ushr	v19.4s, v16.4s, #12
844	shl	v16.4s, v16.4s, #20
845	add	v4.4s, v4.4s, v17.4s
846	orr	v19.16b, v16.16b, v19.16b
847	add	v27.4s, v4.4s, v19.4s
848	eor	v25.16b, v25.16b, v27.16b
849	tbl	v25.16b, { v25.16b }, v1.16b
850	add	v24.4s, v24.4s, v25.4s
851	zip2	v26.4s, v17.4s, v7.4s
852	ext	v4.16b, v27.16b, v27.16b, #12
853	eor	v19.16b, v19.16b, v24.16b
854	add	v28.4s, v4.4s, v21.4s
855	zip1	v20.2d, v7.2d, v17.2d
856	zip1	v4.4s, v26.4s, v21.4s
857	zip1	v17.4s, v21.4s, v26.4s
858	ushr	v26.4s, v19.4s, #7
859	shl	v19.4s, v19.4s, #25
860	orr	v19.16b, v19.16b, v26.16b
861	ext	v25.16b, v25.16b, v25.16b, #8
862	add	v27.4s, v28.4s, v19.4s
863	eor	v25.16b, v27.16b, v25.16b
864	ext	v24.16b, v24.16b, v24.16b, #4
865	tbl	v25.16b, { v25.16b }, v0.16b
866	add	v24.4s, v24.4s, v25.4s
867	eor	v19.16b, v19.16b, v24.16b
868	add	v7.4s, v27.4s, v7.4s
869	ushr	v27.4s, v19.4s, #12
870	shl	v19.4s, v19.4s, #20
871	orr	v19.16b, v19.16b, v27.16b
872	add	v7.4s, v7.4s, v19.4s
873	eor	v25.16b, v25.16b, v7.16b
874	add	v5.4s, v7.4s, v5.4s
875	tbl	v7.16b, { v25.16b }, v1.16b
876	add	v24.4s, v24.4s, v7.4s
877	eor	v19.16b, v19.16b, v24.16b
878	ushr	v25.4s, v19.4s, #7
879	shl	v19.4s, v19.4s, #25
880	ext	v5.16b, v5.16b, v5.16b, #4
881	orr	v19.16b, v19.16b, v25.16b
882	ext	v7.16b, v7.16b, v7.16b, #8
883	add	v5.4s, v5.4s, v19.4s
884	eor	v7.16b, v5.16b, v7.16b
885	ext	v24.16b, v24.16b, v24.16b, #12
886	tbl	v7.16b, { v7.16b }, v0.16b
887	add	v24.4s, v24.4s, v7.4s
888	eor	v19.16b, v19.16b, v24.16b
889	ushr	v25.4s, v19.4s, #12
890	shl	v19.4s, v19.4s, #20
891	tbl	v16.16b, { v20.16b, v21.16b }, v2.16b
892	add	v5.4s, v5.4s, v18.4s
893	orr	v19.16b, v19.16b, v25.16b
894	ext	v20.16b, v16.16b, v16.16b, #12
895	ext	v4.16b, v17.16b, v4.16b, #8
896	add	v5.4s, v5.4s, v19.4s
897	uzp1	v21.4s, v16.4s, v20.4s
898	mov	v17.16b, v4.16b
899	ext	v25.16b, v5.16b, v5.16b, #12
900	mov	v17.s[1], v21.s[2]
901	add	v25.4s, v25.4s, v21.4s
902	zip1	v20.2d, v4.2d, v18.2d
903	ext	v22.16b, v23.16b, v23.16b, #12
904	zip2	v26.4s, v18.4s, v4.4s
905	tbl	v18.16b, { v20.16b, v21.16b }, v2.16b
906	eor	v5.16b, v7.16b, v5.16b
907	ext	v16.16b, v23.16b, v22.16b, #12
908	ext	v22.16b, v6.16b, v6.16b, #4
909	zip1	v27.4s, v26.4s, v21.4s
910	zip1	v20.4s, v21.4s, v26.4s
911	ext	v21.16b, v18.16b, v18.16b, #12
912	tbl	v5.16b, { v5.16b }, v1.16b
913	ext	v20.16b, v20.16b, v27.16b, #8
914	uzp1	v27.4s, v18.4s, v21.4s
915	uzp1	v18.4s, v22.4s, v22.4s
916	add	v21.4s, v24.4s, v5.4s
917	ext	v18.16b, v18.16b, v22.16b, #8
918	eor	v19.16b, v19.16b, v21.16b
919	tbl	v7.16b, { v16.16b, v17.16b }, v3.16b
920	uzp2	v18.4s, v18.4s, v17.4s
921	zip2	v16.4s, v16.4s, v20.4s
922	ushr	v17.4s, v19.4s, #7
923	shl	v19.4s, v19.4s, #25
924	orr	v17.16b, v19.16b, v17.16b
925	ext	v5.16b, v5.16b, v5.16b, #8
926	add	v19.4s, v25.4s, v17.4s
927	eor	v5.16b, v19.16b, v5.16b
928	ext	v21.16b, v21.16b, v21.16b, #4
929	tbl	v5.16b, { v5.16b }, v0.16b
930	add	v4.4s, v19.4s, v4.4s
931	add	v19.4s, v21.4s, v5.4s
932	eor	v17.16b, v17.16b, v19.16b
933	ushr	v21.4s, v17.4s, #12
934	shl	v17.4s, v17.4s, #20
935	orr	v17.16b, v17.16b, v21.16b
936	add	v4.4s, v4.4s, v17.4s
937	eor	v5.16b, v5.16b, v4.16b
938	tbl	v5.16b, { v5.16b }, v1.16b
939	add	v4.4s, v4.4s, v6.4s
940	add	v6.4s, v19.4s, v5.4s
941	eor	v17.16b, v17.16b, v6.16b
942	ushr	v19.4s, v17.4s, #7
943	shl	v17.4s, v17.4s, #25
944	ext	v4.16b, v4.16b, v4.16b, #4
945	orr	v17.16b, v17.16b, v19.16b
946	ext	v5.16b, v5.16b, v5.16b, #8
947	add	v4.4s, v4.4s, v17.4s
948	eor	v5.16b, v4.16b, v5.16b
949	ext	v6.16b, v6.16b, v6.16b, #12
950	tbl	v5.16b, { v5.16b }, v0.16b
951	add	v6.4s, v6.4s, v5.4s
952	eor	v17.16b, v17.16b, v6.16b
953	ushr	v19.4s, v17.4s, #12
954	shl	v17.4s, v17.4s, #20
955	add	v4.4s, v4.4s, v7.4s
956	orr	v17.16b, v17.16b, v19.16b
957	add	v4.4s, v4.4s, v17.4s
958	eor	v5.16b, v5.16b, v4.16b
959	tbl	v5.16b, { v5.16b }, v1.16b
960	mov	v29.16b, v20.16b
961	ext	v4.16b, v4.16b, v4.16b, #12
962	add	v6.4s, v6.4s, v5.4s
963	mov	v29.s[1], v27.s[2]
964	add	v4.4s, v4.4s, v27.4s
965	zip1	v26.2d, v20.2d, v7.2d
966	zip1	v7.4s, v16.4s, v27.4s
967	zip1	v16.4s, v27.4s, v16.4s
968	eor	v17.16b, v17.16b, v6.16b
969	ext	v7.16b, v16.16b, v7.16b, #8
970	ushr	v16.4s, v17.4s, #7
971	shl	v17.4s, v17.4s, #25
972	orr	v16.16b, v17.16b, v16.16b
973	ext	v5.16b, v5.16b, v5.16b, #8
974	add	v4.4s, v4.4s, v16.4s
975	eor	v5.16b, v4.16b, v5.16b
976	ext	v6.16b, v6.16b, v6.16b, #4
977	tbl	v5.16b, { v5.16b }, v0.16b
978	add	v6.4s, v6.4s, v5.4s
979	eor	v16.16b, v16.16b, v6.16b
980	ushr	v17.4s, v16.4s, #12
981	shl	v16.4s, v16.4s, #20
982	add	v4.4s, v4.4s, v20.4s
983	orr	v16.16b, v16.16b, v17.16b
984	add	v4.4s, v4.4s, v16.4s
985	eor	v5.16b, v5.16b, v4.16b
986	tbl	v5.16b, { v5.16b }, v1.16b
987	add	v6.4s, v6.4s, v5.4s
988	eor	v16.16b, v16.16b, v6.16b
989	add	v4.4s, v4.4s, v18.4s
990	ushr	v17.4s, v16.4s, #7
991	shl	v16.4s, v16.4s, #25
992	ext	v23.16b, v22.16b, v22.16b, #12
993	ext	v4.16b, v4.16b, v4.16b, #4
994	orr	v16.16b, v16.16b, v17.16b
995	ext	v28.16b, v22.16b, v23.16b, #12
996	ext	v5.16b, v5.16b, v5.16b, #8
997	add	v4.4s, v16.4s, v4.4s
998	tbl	v3.16b, { v28.16b, v29.16b }, v3.16b
999	eor	v5.16b, v4.16b, v5.16b
1000	ext	v6.16b, v6.16b, v6.16b, #12
1001	add	v3.4s, v4.4s, v3.4s
1002	tbl	v4.16b, { v5.16b }, v0.16b
1003	add	v5.4s, v6.4s, v4.4s
1004	eor	v6.16b, v16.16b, v5.16b
1005	ushr	v16.4s, v6.4s, #12
1006	shl	v6.4s, v6.4s, #20
1007	orr	v6.16b, v6.16b, v16.16b
1008	tbl	v2.16b, { v26.16b, v27.16b }, v2.16b
1009	add	v3.4s, v3.4s, v6.4s
1010	ext	v19.16b, v2.16b, v2.16b, #12
1011	eor	v4.16b, v4.16b, v3.16b
1012	uzp1	v2.4s, v2.4s, v19.4s
1013	ext	v3.16b, v3.16b, v3.16b, #12
1014	tbl	v4.16b, { v4.16b }, v1.16b
1015	add	v2.4s, v3.4s, v2.4s
1016	add	v3.4s, v5.4s, v4.4s
1017	eor	v5.16b, v6.16b, v3.16b
1018	ushr	v6.4s, v5.4s, #7
1019	shl	v5.4s, v5.4s, #25
1020	orr	v5.16b, v5.16b, v6.16b
1021	ext	v4.16b, v4.16b, v4.16b, #8
1022	add	v2.4s, v2.4s, v5.4s
1023	eor	v4.16b, v2.16b, v4.16b
1024	ext	v3.16b, v3.16b, v3.16b, #4
1025	tbl	v0.16b, { v4.16b }, v0.16b
1026	add	v3.4s, v3.4s, v0.4s
1027	eor	v4.16b, v5.16b, v3.16b
1028	ushr	v5.4s, v4.4s, #12
1029	shl	v4.4s, v4.4s, #20
1030	add	v2.4s, v2.4s, v7.4s
1031	orr	v4.16b, v4.16b, v5.16b
1032	add	v2.4s, v2.4s, v4.4s
1033	eor	v0.16b, v0.16b, v2.16b
1034	tbl	v0.16b, { v0.16b }, v1.16b
1035	add	v1.4s, v3.4s, v0.4s
1036	eor	v3.16b, v4.16b, v1.16b
1037	ushr	v4.4s, v3.4s, #7
1038	shl	v3.4s, v3.4s, #25
1039	ext	v2.16b, v2.16b, v2.16b, #4
1040	ext	v0.16b, v0.16b, v0.16b, #8
1041	ext	v1.16b, v1.16b, v1.16b, #12
1042	orr	v3.16b, v3.16b, v4.16b
1043	eor	v2.16b, v2.16b, v1.16b
1044	eor	v3.16b, v3.16b, v0.16b
1045	stp	q2, q3, [x5]
1046	ldr	q2, [x0]
1047	eor	v1.16b, v2.16b, v1.16b
1048	str	q1, [x5, #32]
1049	ldr	q1, [x0, #16]
1050	eor	v0.16b, v1.16b, v0.16b
1051	str	q0, [x5, #48]
1052	ret
1053.Lfunc_end1:
1054	.size	zfs_blake3_compress_xof_sse41, .Lfunc_end1-zfs_blake3_compress_xof_sse41
1055	.cfi_endproc
1056
1057	.section	.rodata.cst16,"aM",@progbits,16
1058	.p2align	4
1059.LCPI2_0:
1060	.word	0
1061	.word	1
1062	.word	2
1063	.word	3
1064.LCPI2_1:
1065	.byte	2
1066	.byte	3
1067	.byte	0
1068	.byte	1
1069	.byte	6
1070	.byte	7
1071	.byte	4
1072	.byte	5
1073	.byte	10
1074	.byte	11
1075	.byte	8
1076	.byte	9
1077	.byte	14
1078	.byte	15
1079	.byte	12
1080	.byte	13
1081.LCPI2_2:
1082	.byte	1
1083	.byte	2
1084	.byte	3
1085	.byte	0
1086	.byte	5
1087	.byte	6
1088	.byte	7
1089	.byte	4
1090	.byte	9
1091	.byte	10
1092	.byte	11
1093	.byte	8
1094	.byte	13
1095	.byte	14
1096	.byte	15
1097	.byte	12
1098	.text
1099	.globl	zfs_blake3_hash_many_sse41
1100	.p2align	2
1101	.type	zfs_blake3_hash_many_sse41,@function
1102zfs_blake3_hash_many_sse41:
1103	.cfi_startproc
1104	stp	d15, d14, [sp, #-160]!
1105	stp	d13, d12, [sp, #16]
1106	stp	d11, d10, [sp, #32]
1107	stp	d9, d8, [sp, #48]
1108	stp	x29, x30, [sp, #64]
1109	stp	x28, x27, [sp, #80]
1110	stp	x26, x25, [sp, #96]
1111	stp	x24, x23, [sp, #112]
1112	stp	x22, x21, [sp, #128]
1113	stp	x20, x19, [sp, #144]
1114	mov	x29, sp
1115	sub	sp, sp, #448
1116	.cfi_def_cfa w29, 160
1117	.cfi_offset w19, -8
1118	.cfi_offset w20, -16
1119	.cfi_offset w21, -24
1120	.cfi_offset w22, -32
1121	.cfi_offset w23, -40
1122	.cfi_offset w24, -48
1123	.cfi_offset w25, -56
1124	.cfi_offset w26, -64
1125	.cfi_offset w27, -72
1126	.cfi_offset w28, -80
1127	.cfi_offset w30, -88
1128	.cfi_offset w29, -96
1129	.cfi_offset b8, -104
1130	.cfi_offset b9, -112
1131	.cfi_offset b10, -120
1132	.cfi_offset b11, -128
1133	.cfi_offset b12, -136
1134	.cfi_offset b13, -144
1135	.cfi_offset b14, -152
1136	.cfi_offset b15, -160
1137	ldr	x26, [x29, #168]
1138	ldrb	w27, [x29, #160]
1139	mov	w19, w6
1140	mov	x20, x4
1141	mov	x22, x2
1142	mov	x28, x1
1143	cmp	x1, #4
1144	mov	x24, x0
1145	str	x3, [sp, #40]
1146	b.lo	.LBB2_8
1147	adrp	x11, .LCPI2_0
1148	ldr	q0, [x11, :lo12:.LCPI2_0]
1149	sbfx	w13, w5, #0, #1
1150	dup	v1.4s, w13
1151	mov	w10, #58983
1152	mov	w11, #44677
1153	mov	w12, #62322
1154	and	v0.16b, v1.16b, v0.16b
1155	mov	w13, #62778
1156	orr	w8, w7, w19
1157	adrp	x9, .LCPI2_1
1158	movk	w10, #27145, lsl #16
1159	movk	w11, #47975, lsl #16
1160	movk	w12, #15470, lsl #16
1161	movk	w13, #42319, lsl #16
1162	str	q0, [sp, #16]
1163	orr	v0.4s, #128, lsl #24
1164	adrp	x14, .LCPI2_2
1165	str	q0, [sp]
1166.LBB2_2:
1167	ldr	x2, [sp, #40]
1168	mov	x15, x2
1169	ld1r	{ v7.4s }, [x15], #4
1170	add	x16, x2, #8
1171	add	x17, x2, #12
1172	add	x18, x2, #16
1173	add	x0, x2, #20
1174	add	x3, x2, #24
1175	add	x2, x2, #28
1176	ld1r	{ v6.4s }, [x16]
1177	ld1r	{ v17.4s }, [x17]
1178	ld1r	{ v10.4s }, [x18]
1179	ld1r	{ v11.4s }, [x0]
1180	ld1r	{ v19.4s }, [x3]
1181	ld1r	{ v18.4s }, [x15]
1182	ld1r	{ v16.4s }, [x2]
1183	cbz	x22, .LBB2_7
1184	ldr	q1, [sp, #16]
1185	dup	v0.4s, w20
1186	ldp	x15, x16, [x24]
1187	ldp	x17, x18, [x24, #16]
1188	add	v1.4s, v0.4s, v1.4s
1189	movi	v0.4s, #128, lsl #24
1190	str	q1, [sp, #64]
1191	eor	v0.16b, v1.16b, v0.16b
1192	ldr	q1, [sp]
1193	lsr	x2, x20, #32
1194	mov	x0, xzr
1195	mov	w6, w8
1196	cmgt	v0.4s, v1.4s, v0.4s
1197	dup	v1.4s, w2
1198	sub	v0.4s, v1.4s, v0.4s
1199	str	q0, [sp, #48]
1200.LBB2_4:
1201	mov	w4, #16
1202	stp	q16, q17, [sp, #192]
1203	bfi	x4, x0, #6, #58
1204	ldr	q1, [x15, x4]
1205	ldr	q3, [x16, x4]
1206	ldr	q2, [x17, x4]
1207	ldr	q4, [x18, x4]
1208	mov	w4, #32
1209	bfi	x4, x0, #6, #58
1210	ldr	q5, [x15, x4]
1211	ldr	q20, [x16, x4]
1212	ldr	q21, [x17, x4]
1213	ldr	q22, [x18, x4]
1214	mov	w4, #48
1215	lsl	x3, x0, #6
1216	bfi	x4, x0, #6, #58
1217	add	x0, x0, #1
1218	ldr	q0, [x15, x3]
1219	ldr	q23, [x16, x3]
1220	ldr	q16, [x17, x3]
1221	ldr	q17, [x18, x3]
1222	cmp	x0, x22
1223	ldr	q25, [x15, x4]
1224	ldr	q14, [x16, x4]
1225	ldr	q28, [x17, x4]
1226	ldr	q31, [x18, x4]
1227	csel	w4, w27, wzr, eq
1228	orr	w4, w4, w6
1229	mov	x2, xzr
1230	and	w6, w4, #0xff
1231	add	x3, x3, #256
1232.LBB2_5:
1233	ldr	x4, [x24, x2]
1234	add	x2, x2, #8
1235	cmp	x2, #32
1236	add	x4, x4, x3
1237	prfm	pldl1keep, [x4]
1238	b.ne	.LBB2_5
1239	zip1	v29.4s, v0.4s, v23.4s
1240	zip2	v23.4s, v0.4s, v23.4s
1241	zip1	v0.4s, v16.4s, v17.4s
1242	zip2	v24.4s, v16.4s, v17.4s
1243	zip1	v9.4s, v1.4s, v3.4s
1244	zip2	v26.4s, v1.4s, v3.4s
1245	zip1	v27.4s, v2.4s, v4.4s
1246	zip2	v17.4s, v2.4s, v4.4s
1247	zip1	v12.4s, v21.4s, v22.4s
1248	zip2	v13.4s, v21.4s, v22.4s
1249	add	v2.4s, v7.4s, v10.4s
1250	add	v1.4s, v18.4s, v11.4s
1251	ext	v7.16b, v0.16b, v29.16b, #8
1252	ext	v22.16b, v24.16b, v23.16b, #8
1253	zip1	v30.4s, v5.4s, v20.4s
1254	zip2	v20.4s, v5.4s, v20.4s
1255	stp	q1, q2, [sp, #112]
1256	ext	v2.16b, v29.16b, v7.16b, #8
1257	mov	v29.d[1], v0.d[0]
1258	ext	v18.16b, v23.16b, v22.16b, #8
1259	mov	v23.d[1], v24.d[0]
1260	zip1	v21.4s, v25.4s, v14.4s
1261	zip2	v4.4s, v25.4s, v14.4s
1262	zip1	v14.4s, v28.4s, v31.4s
1263	zip2	v15.4s, v28.4s, v31.4s
1264	add	v8.4s, v6.4s, v19.4s
1265	ext	v28.16b, v27.16b, v9.16b, #8
1266	ext	v31.16b, v17.16b, v26.16b, #8
1267	stur	q2, [x29, #-208]
1268	mov	v7.16b, v29.16b
1269	ext	v0.16b, v12.16b, v30.16b, #8
1270	stp	q23, q29, [x29, #-80]
1271	mov	v2.16b, v19.16b
1272	ext	v19.16b, v13.16b, v20.16b, #8
1273	mov	v29.16b, v9.16b
1274	ext	v25.16b, v9.16b, v28.16b, #8
1275	mov	v29.d[1], v27.d[0]
1276	ext	v24.16b, v26.16b, v31.16b, #8
1277	mov	v26.d[1], v17.d[0]
1278	ext	v17.16b, v15.16b, v4.16b, #8
1279	ext	v27.16b, v30.16b, v0.16b, #8
1280	ext	v0.16b, v20.16b, v19.16b, #8
1281	stp	q0, q25, [sp, #80]
1282	ext	v0.16b, v4.16b, v17.16b, #8
1283	str	q0, [sp, #224]
1284	ldr	q0, [sp, #128]
1285	mov	v6.16b, v23.16b
1286	mov	v22.16b, v4.16b
1287	ldr	q16, [x9, :lo12:.LCPI2_1]
1288	add	v17.4s, v0.4s, v7.4s
1289	ldr	q0, [sp, #112]
1290	mov	v30.d[1], v12.d[0]
1291	add	v7.4s, v8.4s, v29.4s
1292	mov	v20.d[1], v13.d[0]
1293	add	v4.4s, v0.4s, v6.4s
1294	ldr	q0, [sp, #64]
1295	dup	v3.4s, w12
1296	ext	v28.16b, v14.16b, v21.16b, #8
1297	dup	v1.4s, w10
1298	eor	v19.16b, v17.16b, v0.16b
1299	ldr	q0, [sp, #48]
1300	ext	v23.16b, v21.16b, v28.16b, #8
1301	mov	v21.d[1], v14.d[0]
1302	tbl	v14.16b, { v19.16b }, v16.16b
1303	eor	v12.16b, v4.16b, v0.16b
1304	movi	v0.4s, #64
1305	eor	v13.16b, v7.16b, v0.16b
1306	tbl	v13.16b, { v13.16b }, v16.16b
1307	add	v6.4s, v13.4s, v3.4s
1308	dup	v5.4s, w11
1309	tbl	v12.16b, { v12.16b }, v16.16b
1310	add	v1.4s, v14.4s, v1.4s
1311	eor	v9.16b, v6.16b, v2.16b
1312	ldp	q2, q0, [sp, #192]
1313	add	v5.4s, v12.4s, v5.4s
1314	eor	v19.16b, v1.16b, v10.16b
1315	eor	v10.16b, v5.16b, v11.16b
1316	ushr	v11.4s, v19.4s, #12
1317	shl	v19.4s, v19.4s, #20
1318	orr	v11.16b, v19.16b, v11.16b
1319	ushr	v19.4s, v10.4s, #12
1320	shl	v10.4s, v10.4s, #20
1321	mov	v22.d[1], v15.d[0]
1322	orr	v10.16b, v10.16b, v19.16b
1323	ushr	v19.4s, v9.4s, #12
1324	shl	v9.4s, v9.4s, #20
1325	add	v15.4s, v0.4s, v2.4s
1326	orr	v9.16b, v9.16b, v19.16b
1327	dup	v19.4s, w6
1328	add	v15.4s, v15.4s, v26.4s
1329	eor	v19.16b, v15.16b, v19.16b
1330	tbl	v3.16b, { v19.16b }, v16.16b
1331	dup	v19.4s, w13
1332	add	v8.4s, v3.4s, v19.4s
1333	ldur	q31, [x29, #-208]
1334	eor	v19.16b, v8.16b, v2.16b
1335	ushr	v0.4s, v19.4s, #12
1336	shl	v19.4s, v19.4s, #20
1337	orr	v2.16b, v19.16b, v0.16b
1338	ldr	q19, [x14, :lo12:.LCPI2_2]
1339	add	v17.4s, v17.4s, v31.4s
1340	add	v17.4s, v17.4s, v11.4s
1341	eor	v14.16b, v14.16b, v17.16b
1342	tbl	v14.16b, { v14.16b }, v19.16b
1343	add	v1.4s, v1.4s, v14.4s
1344	eor	v11.16b, v1.16b, v11.16b
1345	add	v4.4s, v4.4s, v18.4s
1346	ushr	v0.4s, v11.4s, #7
1347	shl	v11.4s, v11.4s, #25
1348	add	v4.4s, v4.4s, v10.4s
1349	orr	v0.16b, v11.16b, v0.16b
1350	eor	v11.16b, v12.16b, v4.16b
1351	tbl	v11.16b, { v11.16b }, v19.16b
1352	add	v5.4s, v5.4s, v11.4s
1353	eor	v10.16b, v5.16b, v10.16b
1354	add	v7.4s, v7.4s, v25.4s
1355	ushr	v12.4s, v10.4s, #7
1356	shl	v10.4s, v10.4s, #25
1357	add	v7.4s, v7.4s, v9.4s
1358	orr	v10.16b, v10.16b, v12.16b
1359	eor	v12.16b, v13.16b, v7.16b
1360	tbl	v12.16b, { v12.16b }, v19.16b
1361	add	v6.4s, v6.4s, v12.4s
1362	eor	v9.16b, v6.16b, v9.16b
1363	ushr	v13.4s, v9.4s, #7
1364	shl	v9.4s, v9.4s, #25
1365	orr	v9.16b, v9.16b, v13.16b
1366	add	v13.4s, v15.4s, v24.4s
1367	add	v13.4s, v13.4s, v2.4s
1368	eor	v3.16b, v3.16b, v13.16b
1369	tbl	v3.16b, { v3.16b }, v19.16b
1370	add	v8.4s, v8.4s, v3.4s
1371	eor	v2.16b, v8.16b, v2.16b
1372	add	v17.4s, v17.4s, v30.4s
1373	ushr	v15.4s, v2.4s, #7
1374	shl	v2.4s, v2.4s, #25
1375	add	v17.4s, v17.4s, v10.4s
1376	add	v4.4s, v4.4s, v20.4s
1377	orr	v2.16b, v2.16b, v15.16b
1378	eor	v3.16b, v3.16b, v17.16b
1379	add	v4.4s, v4.4s, v9.4s
1380	add	v7.4s, v7.4s, v21.4s
1381	tbl	v3.16b, { v3.16b }, v16.16b
1382	eor	v14.16b, v14.16b, v4.16b
1383	add	v7.4s, v7.4s, v2.4s
1384	add	v13.4s, v13.4s, v22.4s
1385	mov	v28.16b, v26.16b
1386	stur	q26, [x29, #-112]
1387	mov	v26.16b, v18.16b
1388	mov	v18.16b, v24.16b
1389	stur	q24, [x29, #-160]
1390	add	v6.4s, v6.4s, v3.4s
1391	mov	v24.16b, v20.16b
1392	tbl	v14.16b, { v14.16b }, v16.16b
1393	eor	v11.16b, v11.16b, v7.16b
1394	add	v13.4s, v13.4s, v0.4s
1395	ldr	q20, [sp, #80]
1396	eor	v10.16b, v6.16b, v10.16b
1397	add	v8.4s, v8.4s, v14.4s
1398	tbl	v11.16b, { v11.16b }, v16.16b
1399	eor	v12.16b, v12.16b, v13.16b
1400	stp	q30, q22, [x29, #-192]
1401	ushr	v15.4s, v10.4s, #12
1402	shl	v10.4s, v10.4s, #20
1403	eor	v9.16b, v8.16b, v9.16b
1404	add	v1.4s, v1.4s, v11.4s
1405	tbl	v12.16b, { v12.16b }, v16.16b
1406	mov	v30.16b, v27.16b
1407	add	v17.4s, v17.4s, v27.4s
1408	ldr	q27, [sp, #224]
1409	orr	v10.16b, v10.16b, v15.16b
1410	ushr	v15.4s, v9.4s, #12
1411	shl	v9.4s, v9.4s, #20
1412	eor	v2.16b, v1.16b, v2.16b
1413	add	v5.4s, v5.4s, v12.4s
1414	orr	v9.16b, v9.16b, v15.16b
1415	ushr	v15.4s, v2.4s, #12
1416	shl	v2.4s, v2.4s, #20
1417	eor	v0.16b, v5.16b, v0.16b
1418	add	v17.4s, v17.4s, v10.4s
1419	add	v4.4s, v4.4s, v20.4s
1420	orr	v2.16b, v2.16b, v15.16b
1421	ushr	v15.4s, v0.4s, #12
1422	shl	v0.4s, v0.4s, #20
1423	eor	v3.16b, v3.16b, v17.16b
1424	add	v4.4s, v4.4s, v9.4s
1425	add	v7.4s, v7.4s, v23.4s
1426	orr	v0.16b, v0.16b, v15.16b
1427	tbl	v3.16b, { v3.16b }, v19.16b
1428	eor	v14.16b, v14.16b, v4.16b
1429	add	v7.4s, v7.4s, v2.4s
1430	add	v13.4s, v13.4s, v27.4s
1431	add	v6.4s, v6.4s, v3.4s
1432	tbl	v14.16b, { v14.16b }, v19.16b
1433	eor	v11.16b, v11.16b, v7.16b
1434	add	v13.4s, v13.4s, v0.4s
1435	eor	v10.16b, v6.16b, v10.16b
1436	add	v8.4s, v8.4s, v14.4s
1437	tbl	v11.16b, { v11.16b }, v19.16b
1438	eor	v12.16b, v12.16b, v13.16b
1439	stur	q21, [x29, #-144]
1440	ushr	v15.4s, v10.4s, #7
1441	shl	v10.4s, v10.4s, #25
1442	eor	v9.16b, v8.16b, v9.16b
1443	add	v1.4s, v1.4s, v11.4s
1444	tbl	v12.16b, { v12.16b }, v19.16b
1445	ldur	q21, [x29, #-80]
1446	orr	v10.16b, v10.16b, v15.16b
1447	ushr	v15.4s, v9.4s, #7
1448	shl	v9.4s, v9.4s, #25
1449	eor	v2.16b, v1.16b, v2.16b
1450	add	v5.4s, v5.4s, v12.4s
1451	orr	v9.16b, v9.16b, v15.16b
1452	ushr	v15.4s, v2.4s, #7
1453	shl	v2.4s, v2.4s, #25
1454	eor	v0.16b, v5.16b, v0.16b
1455	orr	v2.16b, v2.16b, v15.16b
1456	ushr	v15.4s, v0.4s, #7
1457	shl	v0.4s, v0.4s, #25
1458	orr	v0.16b, v0.16b, v15.16b
1459	add	v17.4s, v17.4s, v21.4s
1460	add	v17.4s, v17.4s, v0.4s
1461	add	v4.4s, v4.4s, v26.4s
1462	eor	v14.16b, v14.16b, v17.16b
1463	add	v4.4s, v4.4s, v10.4s
1464	add	v7.4s, v7.4s, v18.4s
1465	tbl	v14.16b, { v14.16b }, v16.16b
1466	eor	v11.16b, v11.16b, v4.16b
1467	add	v7.4s, v7.4s, v9.4s
1468	add	v13.4s, v13.4s, v29.4s
1469	add	v1.4s, v1.4s, v14.4s
1470	tbl	v11.16b, { v11.16b }, v16.16b
1471	eor	v12.16b, v12.16b, v7.16b
1472	add	v13.4s, v13.4s, v2.4s
1473	eor	v0.16b, v0.16b, v1.16b
1474	add	v5.4s, v5.4s, v11.4s
1475	tbl	v12.16b, { v12.16b }, v16.16b
1476	eor	v3.16b, v3.16b, v13.16b
1477	ldur	q22, [x29, #-64]
1478	ushr	v15.4s, v0.4s, #12
1479	shl	v0.4s, v0.4s, #20
1480	eor	v10.16b, v5.16b, v10.16b
1481	add	v6.4s, v6.4s, v12.4s
1482	tbl	v3.16b, { v3.16b }, v16.16b
1483	orr	v0.16b, v0.16b, v15.16b
1484	ushr	v15.4s, v10.4s, #12
1485	shl	v10.4s, v10.4s, #20
1486	eor	v9.16b, v6.16b, v9.16b
1487	add	v8.4s, v8.4s, v3.4s
1488	add	v17.4s, v17.4s, v28.4s
1489	orr	v10.16b, v10.16b, v15.16b
1490	ushr	v15.4s, v9.4s, #12
1491	shl	v9.4s, v9.4s, #20
1492	eor	v2.16b, v8.16b, v2.16b
1493	add	v17.4s, v17.4s, v0.4s
1494	add	v4.4s, v4.4s, v24.4s
1495	orr	v9.16b, v9.16b, v15.16b
1496	ushr	v15.4s, v2.4s, #12
1497	shl	v2.4s, v2.4s, #20
1498	eor	v14.16b, v14.16b, v17.16b
1499	add	v4.4s, v4.4s, v10.4s
1500	add	v7.4s, v7.4s, v22.4s
1501	orr	v2.16b, v2.16b, v15.16b
1502	tbl	v14.16b, { v14.16b }, v19.16b
1503	eor	v11.16b, v11.16b, v4.16b
1504	add	v7.4s, v7.4s, v9.4s
1505	add	v13.4s, v13.4s, v23.4s
1506	add	v1.4s, v1.4s, v14.4s
1507	tbl	v11.16b, { v11.16b }, v19.16b
1508	eor	v12.16b, v12.16b, v7.16b
1509	add	v13.4s, v13.4s, v2.4s
1510	eor	v0.16b, v0.16b, v1.16b
1511	add	v5.4s, v5.4s, v11.4s
1512	tbl	v12.16b, { v12.16b }, v19.16b
1513	eor	v3.16b, v3.16b, v13.16b
1514	ldur	q22, [x29, #-144]
1515	ushr	v15.4s, v0.4s, #7
1516	shl	v0.4s, v0.4s, #25
1517	eor	v10.16b, v5.16b, v10.16b
1518	add	v6.4s, v6.4s, v12.4s
1519	tbl	v3.16b, { v3.16b }, v19.16b
1520	orr	v0.16b, v0.16b, v15.16b
1521	ushr	v15.4s, v10.4s, #7
1522	shl	v10.4s, v10.4s, #25
1523	eor	v9.16b, v6.16b, v9.16b
1524	add	v8.4s, v8.4s, v3.4s
1525	orr	v10.16b, v10.16b, v15.16b
1526	ushr	v15.4s, v9.4s, #7
1527	shl	v9.4s, v9.4s, #25
1528	eor	v2.16b, v8.16b, v2.16b
1529	add	v17.4s, v17.4s, v31.4s
1530	orr	v9.16b, v9.16b, v15.16b
1531	ushr	v15.4s, v2.4s, #7
1532	shl	v2.4s, v2.4s, #25
1533	add	v17.4s, v17.4s, v10.4s
1534	add	v4.4s, v4.4s, v22.4s
1535	orr	v2.16b, v2.16b, v15.16b
1536	eor	v3.16b, v3.16b, v17.16b
1537	add	v4.4s, v4.4s, v9.4s
1538	add	v7.4s, v7.4s, v30.4s
1539	tbl	v3.16b, { v3.16b }, v16.16b
1540	eor	v14.16b, v14.16b, v4.16b
1541	add	v7.4s, v7.4s, v2.4s
1542	add	v13.4s, v13.4s, v27.4s
1543	add	v6.4s, v6.4s, v3.4s
1544	tbl	v14.16b, { v14.16b }, v16.16b
1545	eor	v11.16b, v11.16b, v7.16b
1546	add	v13.4s, v13.4s, v0.4s
1547	ldr	q27, [sp, #96]
1548	mov	v21.16b, v26.16b
1549	stur	q26, [x29, #-96]
1550	mov	v28.16b, v31.16b
1551	eor	v10.16b, v6.16b, v10.16b
1552	add	v8.4s, v8.4s, v14.4s
1553	tbl	v11.16b, { v11.16b }, v16.16b
1554	eor	v12.16b, v12.16b, v13.16b
1555	ldp	q31, q26, [x29, #-192]
1556	ushr	v15.4s, v10.4s, #12
1557	shl	v10.4s, v10.4s, #20
1558	eor	v9.16b, v8.16b, v9.16b
1559	add	v1.4s, v1.4s, v11.4s
1560	tbl	v12.16b, { v12.16b }, v16.16b
1561	orr	v10.16b, v10.16b, v15.16b
1562	ushr	v15.4s, v9.4s, #12
1563	shl	v9.4s, v9.4s, #20
1564	eor	v2.16b, v1.16b, v2.16b
1565	add	v5.4s, v5.4s, v12.4s
1566	add	v17.4s, v17.4s, v20.4s
1567	orr	v9.16b, v9.16b, v15.16b
1568	ushr	v15.4s, v2.4s, #12
1569	shl	v2.4s, v2.4s, #20
1570	eor	v0.16b, v5.16b, v0.16b
1571	add	v17.4s, v17.4s, v10.4s
1572	add	v4.4s, v4.4s, v27.4s
1573	orr	v2.16b, v2.16b, v15.16b
1574	ushr	v15.4s, v0.4s, #12
1575	shl	v0.4s, v0.4s, #20
1576	eor	v3.16b, v3.16b, v17.16b
1577	add	v4.4s, v4.4s, v9.4s
1578	add	v7.4s, v7.4s, v26.4s
1579	orr	v0.16b, v0.16b, v15.16b
1580	tbl	v3.16b, { v3.16b }, v19.16b
1581	eor	v14.16b, v14.16b, v4.16b
1582	add	v7.4s, v7.4s, v2.4s
1583	add	v13.4s, v13.4s, v31.4s
1584	add	v6.4s, v6.4s, v3.4s
1585	tbl	v14.16b, { v14.16b }, v19.16b
1586	eor	v11.16b, v11.16b, v7.16b
1587	add	v13.4s, v13.4s, v0.4s
1588	eor	v10.16b, v6.16b, v10.16b
1589	add	v8.4s, v8.4s, v14.4s
1590	tbl	v11.16b, { v11.16b }, v19.16b
1591	eor	v12.16b, v12.16b, v13.16b
1592	ushr	v15.4s, v10.4s, #7
1593	shl	v10.4s, v10.4s, #25
1594	eor	v9.16b, v8.16b, v9.16b
1595	add	v1.4s, v1.4s, v11.4s
1596	tbl	v12.16b, { v12.16b }, v19.16b
1597	orr	v10.16b, v10.16b, v15.16b
1598	ushr	v15.4s, v9.4s, #7
1599	shl	v9.4s, v9.4s, #25
1600	eor	v2.16b, v1.16b, v2.16b
1601	add	v5.4s, v5.4s, v12.4s
1602	orr	v9.16b, v9.16b, v15.16b
1603	ushr	v15.4s, v2.4s, #7
1604	shl	v2.4s, v2.4s, #25
1605	eor	v0.16b, v5.16b, v0.16b
1606	mov	v18.16b, v24.16b
1607	mov	v24.16b, v20.16b
1608	orr	v2.16b, v2.16b, v15.16b
1609	ushr	v15.4s, v0.4s, #7
1610	shl	v0.4s, v0.4s, #25
1611	ldur	q20, [x29, #-160]
1612	orr	v0.16b, v0.16b, v15.16b
1613	add	v17.4s, v17.4s, v21.4s
1614	add	v17.4s, v17.4s, v0.4s
1615	add	v4.4s, v4.4s, v18.4s
1616	eor	v14.16b, v14.16b, v17.16b
1617	add	v4.4s, v4.4s, v10.4s
1618	add	v7.4s, v7.4s, v23.4s
1619	tbl	v14.16b, { v14.16b }, v16.16b
1620	eor	v11.16b, v11.16b, v4.16b
1621	add	v7.4s, v7.4s, v9.4s
1622	add	v13.4s, v13.4s, v20.4s
1623	add	v1.4s, v1.4s, v14.4s
1624	tbl	v11.16b, { v11.16b }, v16.16b
1625	eor	v12.16b, v12.16b, v7.16b
1626	add	v13.4s, v13.4s, v2.4s
1627	eor	v0.16b, v0.16b, v1.16b
1628	add	v5.4s, v5.4s, v11.4s
1629	tbl	v12.16b, { v12.16b }, v16.16b
1630	eor	v3.16b, v3.16b, v13.16b
1631	ldur	q25, [x29, #-80]
1632	ushr	v15.4s, v0.4s, #12
1633	shl	v0.4s, v0.4s, #20
1634	eor	v10.16b, v5.16b, v10.16b
1635	add	v6.4s, v6.4s, v12.4s
1636	tbl	v3.16b, { v3.16b }, v16.16b
1637	orr	v0.16b, v0.16b, v15.16b
1638	ushr	v15.4s, v10.4s, #12
1639	shl	v10.4s, v10.4s, #20
1640	eor	v9.16b, v6.16b, v9.16b
1641	add	v8.4s, v8.4s, v3.4s
1642	add	v17.4s, v17.4s, v29.4s
1643	orr	v10.16b, v10.16b, v15.16b
1644	ushr	v15.4s, v9.4s, #12
1645	shl	v9.4s, v9.4s, #20
1646	eor	v2.16b, v8.16b, v2.16b
1647	add	v17.4s, v17.4s, v0.4s
1648	add	v4.4s, v4.4s, v22.4s
1649	orr	v9.16b, v9.16b, v15.16b
1650	ushr	v15.4s, v2.4s, #12
1651	shl	v2.4s, v2.4s, #20
1652	eor	v14.16b, v14.16b, v17.16b
1653	add	v4.4s, v4.4s, v10.4s
1654	add	v7.4s, v7.4s, v25.4s
1655	orr	v2.16b, v2.16b, v15.16b
1656	tbl	v14.16b, { v14.16b }, v19.16b
1657	eor	v11.16b, v11.16b, v4.16b
1658	add	v7.4s, v7.4s, v9.4s
1659	add	v13.4s, v13.4s, v26.4s
1660	add	v1.4s, v1.4s, v14.4s
1661	tbl	v11.16b, { v11.16b }, v19.16b
1662	eor	v12.16b, v12.16b, v7.16b
1663	add	v13.4s, v13.4s, v2.4s
1664	ldur	q25, [x29, #-112]
1665	eor	v0.16b, v0.16b, v1.16b
1666	add	v5.4s, v5.4s, v11.4s
1667	tbl	v12.16b, { v12.16b }, v19.16b
1668	eor	v3.16b, v3.16b, v13.16b
1669	ushr	v15.4s, v0.4s, #7
1670	shl	v0.4s, v0.4s, #25
1671	eor	v10.16b, v5.16b, v10.16b
1672	add	v6.4s, v6.4s, v12.4s
1673	tbl	v3.16b, { v3.16b }, v19.16b
1674	orr	v0.16b, v0.16b, v15.16b
1675	ushr	v15.4s, v10.4s, #7
1676	shl	v10.4s, v10.4s, #25
1677	eor	v9.16b, v6.16b, v9.16b
1678	add	v8.4s, v8.4s, v3.4s
1679	orr	v10.16b, v10.16b, v15.16b
1680	ushr	v15.4s, v9.4s, #7
1681	shl	v9.4s, v9.4s, #25
1682	eor	v2.16b, v8.16b, v2.16b
1683	add	v17.4s, v17.4s, v25.4s
1684	orr	v9.16b, v9.16b, v15.16b
1685	ushr	v15.4s, v2.4s, #7
1686	shl	v2.4s, v2.4s, #25
1687	add	v17.4s, v17.4s, v10.4s
1688	add	v4.4s, v4.4s, v30.4s
1689	orr	v2.16b, v2.16b, v15.16b
1690	eor	v3.16b, v3.16b, v17.16b
1691	add	v4.4s, v4.4s, v9.4s
1692	add	v7.4s, v7.4s, v24.4s
1693	tbl	v3.16b, { v3.16b }, v16.16b
1694	eor	v14.16b, v14.16b, v4.16b
1695	add	v7.4s, v7.4s, v2.4s
1696	add	v13.4s, v13.4s, v31.4s
1697	add	v6.4s, v6.4s, v3.4s
1698	tbl	v14.16b, { v14.16b }, v16.16b
1699	eor	v11.16b, v11.16b, v7.16b
1700	add	v13.4s, v13.4s, v0.4s
1701	ldur	q25, [x29, #-64]
1702	eor	v10.16b, v6.16b, v10.16b
1703	add	v8.4s, v8.4s, v14.4s
1704	tbl	v11.16b, { v11.16b }, v16.16b
1705	eor	v12.16b, v12.16b, v13.16b
1706	ldr	q31, [sp, #224]
1707	ushr	v15.4s, v10.4s, #12
1708	shl	v10.4s, v10.4s, #20
1709	eor	v9.16b, v8.16b, v9.16b
1710	add	v1.4s, v1.4s, v11.4s
1711	tbl	v12.16b, { v12.16b }, v16.16b
1712	orr	v10.16b, v10.16b, v15.16b
1713	ushr	v15.4s, v9.4s, #12
1714	shl	v9.4s, v9.4s, #20
1715	eor	v2.16b, v1.16b, v2.16b
1716	add	v5.4s, v5.4s, v12.4s
1717	add	v17.4s, v17.4s, v27.4s
1718	orr	v9.16b, v9.16b, v15.16b
1719	ushr	v15.4s, v2.4s, #12
1720	shl	v2.4s, v2.4s, #20
1721	eor	v0.16b, v5.16b, v0.16b
1722	add	v17.4s, v17.4s, v10.4s
1723	add	v4.4s, v4.4s, v25.4s
1724	orr	v2.16b, v2.16b, v15.16b
1725	ushr	v15.4s, v0.4s, #12
1726	shl	v0.4s, v0.4s, #20
1727	eor	v3.16b, v3.16b, v17.16b
1728	add	v4.4s, v4.4s, v9.4s
1729	add	v7.4s, v7.4s, v31.4s
1730	orr	v0.16b, v0.16b, v15.16b
1731	tbl	v3.16b, { v3.16b }, v19.16b
1732	eor	v14.16b, v14.16b, v4.16b
1733	add	v7.4s, v7.4s, v2.4s
1734	add	v13.4s, v13.4s, v28.4s
1735	add	v6.4s, v6.4s, v3.4s
1736	tbl	v14.16b, { v14.16b }, v19.16b
1737	eor	v11.16b, v11.16b, v7.16b
1738	add	v13.4s, v13.4s, v0.4s
1739	eor	v10.16b, v6.16b, v10.16b
1740	add	v8.4s, v8.4s, v14.4s
1741	tbl	v11.16b, { v11.16b }, v19.16b
1742	eor	v12.16b, v12.16b, v13.16b
1743	ushr	v15.4s, v10.4s, #7
1744	shl	v10.4s, v10.4s, #25
1745	eor	v9.16b, v8.16b, v9.16b
1746	add	v1.4s, v1.4s, v11.4s
1747	tbl	v12.16b, { v12.16b }, v19.16b
1748	orr	v10.16b, v10.16b, v15.16b
1749	ushr	v15.4s, v9.4s, #7
1750	shl	v9.4s, v9.4s, #25
1751	eor	v2.16b, v1.16b, v2.16b
1752	add	v5.4s, v5.4s, v12.4s
1753	orr	v9.16b, v9.16b, v15.16b
1754	ushr	v15.4s, v2.4s, #7
1755	shl	v2.4s, v2.4s, #25
1756	eor	v0.16b, v5.16b, v0.16b
1757	orr	v2.16b, v2.16b, v15.16b
1758	ushr	v15.4s, v0.4s, #7
1759	shl	v0.4s, v0.4s, #25
1760	orr	v0.16b, v0.16b, v15.16b
1761	add	v17.4s, v17.4s, v18.4s
1762	add	v17.4s, v17.4s, v0.4s
1763	add	v4.4s, v4.4s, v22.4s
1764	eor	v14.16b, v14.16b, v17.16b
1765	add	v4.4s, v4.4s, v10.4s
1766	add	v7.4s, v7.4s, v26.4s
1767	tbl	v14.16b, { v14.16b }, v16.16b
1768	eor	v11.16b, v11.16b, v4.16b
1769	add	v7.4s, v7.4s, v9.4s
1770	add	v13.4s, v13.4s, v23.4s
1771	add	v1.4s, v1.4s, v14.4s
1772	tbl	v11.16b, { v11.16b }, v16.16b
1773	eor	v12.16b, v12.16b, v7.16b
1774	add	v13.4s, v13.4s, v2.4s
1775	mov	v21.16b, v29.16b
1776	stur	q29, [x29, #-128]
1777	mov	v29.16b, v30.16b
1778	mov	v30.16b, v27.16b
1779	mov	v27.16b, v18.16b
1780	str	q18, [sp, #176]
1781	eor	v0.16b, v0.16b, v1.16b
1782	mov	v18.16b, v22.16b
1783	add	v5.4s, v5.4s, v11.4s
1784	tbl	v12.16b, { v12.16b }, v16.16b
1785	eor	v3.16b, v3.16b, v13.16b
1786	ldur	q22, [x29, #-96]
1787	ushr	v15.4s, v0.4s, #12
1788	shl	v0.4s, v0.4s, #20
1789	eor	v10.16b, v5.16b, v10.16b
1790	add	v6.4s, v6.4s, v12.4s
1791	tbl	v3.16b, { v3.16b }, v16.16b
1792	orr	v0.16b, v0.16b, v15.16b
1793	ushr	v15.4s, v10.4s, #12
1794	shl	v10.4s, v10.4s, #20
1795	eor	v9.16b, v6.16b, v9.16b
1796	add	v8.4s, v8.4s, v3.4s
1797	add	v17.4s, v17.4s, v20.4s
1798	orr	v10.16b, v10.16b, v15.16b
1799	ushr	v15.4s, v9.4s, #12
1800	shl	v9.4s, v9.4s, #20
1801	eor	v2.16b, v8.16b, v2.16b
1802	add	v17.4s, v17.4s, v0.4s
1803	add	v4.4s, v4.4s, v29.4s
1804	orr	v9.16b, v9.16b, v15.16b
1805	ushr	v15.4s, v2.4s, #12
1806	shl	v2.4s, v2.4s, #20
1807	eor	v14.16b, v14.16b, v17.16b
1808	add	v4.4s, v4.4s, v10.4s
1809	add	v7.4s, v7.4s, v22.4s
1810	orr	v2.16b, v2.16b, v15.16b
1811	tbl	v14.16b, { v14.16b }, v19.16b
1812	eor	v11.16b, v11.16b, v4.16b
1813	add	v7.4s, v7.4s, v9.4s
1814	add	v13.4s, v13.4s, v31.4s
1815	add	v1.4s, v1.4s, v14.4s
1816	tbl	v11.16b, { v11.16b }, v19.16b
1817	eor	v12.16b, v12.16b, v7.16b
1818	add	v13.4s, v13.4s, v2.4s
1819	eor	v0.16b, v0.16b, v1.16b
1820	add	v5.4s, v5.4s, v11.4s
1821	tbl	v12.16b, { v12.16b }, v19.16b
1822	eor	v3.16b, v3.16b, v13.16b
1823	ushr	v15.4s, v0.4s, #7
1824	shl	v0.4s, v0.4s, #25
1825	eor	v10.16b, v5.16b, v10.16b
1826	add	v6.4s, v6.4s, v12.4s
1827	tbl	v3.16b, { v3.16b }, v19.16b
1828	orr	v0.16b, v0.16b, v15.16b
1829	ushr	v15.4s, v10.4s, #7
1830	shl	v10.4s, v10.4s, #25
1831	eor	v9.16b, v6.16b, v9.16b
1832	add	v8.4s, v8.4s, v3.4s
1833	orr	v10.16b, v10.16b, v15.16b
1834	ushr	v15.4s, v9.4s, #7
1835	shl	v9.4s, v9.4s, #25
1836	eor	v2.16b, v8.16b, v2.16b
1837	add	v17.4s, v17.4s, v21.4s
1838	orr	v9.16b, v9.16b, v15.16b
1839	ushr	v15.4s, v2.4s, #7
1840	shl	v2.4s, v2.4s, #25
1841	add	v17.4s, v17.4s, v10.4s
1842	add	v4.4s, v4.4s, v24.4s
1843	orr	v2.16b, v2.16b, v15.16b
1844	eor	v3.16b, v3.16b, v17.16b
1845	add	v4.4s, v4.4s, v9.4s
1846	add	v7.4s, v7.4s, v30.4s
1847	tbl	v3.16b, { v3.16b }, v16.16b
1848	eor	v14.16b, v14.16b, v4.16b
1849	add	v7.4s, v7.4s, v2.4s
1850	add	v13.4s, v13.4s, v28.4s
1851	add	v6.4s, v6.4s, v3.4s
1852	mov	v22.16b, v24.16b
1853	tbl	v14.16b, { v14.16b }, v16.16b
1854	eor	v11.16b, v11.16b, v7.16b
1855	add	v13.4s, v13.4s, v0.4s
1856	ldur	q24, [x29, #-80]
1857	eor	v10.16b, v6.16b, v10.16b
1858	add	v8.4s, v8.4s, v14.4s
1859	mov	v21.16b, v30.16b
1860	tbl	v11.16b, { v11.16b }, v16.16b
1861	eor	v12.16b, v12.16b, v13.16b
1862	ldur	q30, [x29, #-192]
1863	mov	v20.16b, v29.16b
1864	ushr	v15.4s, v10.4s, #12
1865	shl	v10.4s, v10.4s, #20
1866	eor	v9.16b, v8.16b, v9.16b
1867	add	v1.4s, v1.4s, v11.4s
1868	tbl	v12.16b, { v12.16b }, v16.16b
1869	ldur	q29, [x29, #-112]
1870	orr	v10.16b, v10.16b, v15.16b
1871	ushr	v15.4s, v9.4s, #12
1872	shl	v9.4s, v9.4s, #20
1873	eor	v2.16b, v1.16b, v2.16b
1874	add	v5.4s, v5.4s, v12.4s
1875	add	v17.4s, v17.4s, v25.4s
1876	orr	v9.16b, v9.16b, v15.16b
1877	ushr	v15.4s, v2.4s, #12
1878	shl	v2.4s, v2.4s, #20
1879	eor	v0.16b, v5.16b, v0.16b
1880	add	v17.4s, v17.4s, v10.4s
1881	add	v4.4s, v4.4s, v24.4s
1882	orr	v2.16b, v2.16b, v15.16b
1883	ushr	v15.4s, v0.4s, #12
1884	shl	v0.4s, v0.4s, #20
1885	eor	v3.16b, v3.16b, v17.16b
1886	add	v4.4s, v4.4s, v9.4s
1887	add	v7.4s, v7.4s, v30.4s
1888	orr	v0.16b, v0.16b, v15.16b
1889	tbl	v3.16b, { v3.16b }, v19.16b
1890	eor	v14.16b, v14.16b, v4.16b
1891	add	v7.4s, v7.4s, v2.4s
1892	add	v13.4s, v13.4s, v29.4s
1893	add	v6.4s, v6.4s, v3.4s
1894	tbl	v14.16b, { v14.16b }, v19.16b
1895	eor	v11.16b, v11.16b, v7.16b
1896	add	v13.4s, v13.4s, v0.4s
1897	eor	v10.16b, v6.16b, v10.16b
1898	add	v8.4s, v8.4s, v14.4s
1899	tbl	v11.16b, { v11.16b }, v19.16b
1900	eor	v12.16b, v12.16b, v13.16b
1901	ushr	v15.4s, v10.4s, #7
1902	shl	v10.4s, v10.4s, #25
1903	eor	v9.16b, v8.16b, v9.16b
1904	add	v1.4s, v1.4s, v11.4s
1905	tbl	v12.16b, { v12.16b }, v19.16b
1906	orr	v10.16b, v10.16b, v15.16b
1907	ushr	v15.4s, v9.4s, #7
1908	shl	v9.4s, v9.4s, #25
1909	eor	v2.16b, v1.16b, v2.16b
1910	add	v5.4s, v5.4s, v12.4s
1911	orr	v9.16b, v9.16b, v15.16b
1912	ushr	v15.4s, v2.4s, #7
1913	shl	v2.4s, v2.4s, #25
1914	eor	v0.16b, v5.16b, v0.16b
1915	orr	v2.16b, v2.16b, v15.16b
1916	ushr	v15.4s, v0.4s, #7
1917	shl	v0.4s, v0.4s, #25
1918	orr	v0.16b, v0.16b, v15.16b
1919	add	v17.4s, v17.4s, v18.4s
1920	add	v17.4s, v17.4s, v0.4s
1921	add	v4.4s, v4.4s, v20.4s
1922	eor	v14.16b, v14.16b, v17.16b
1923	add	v4.4s, v4.4s, v10.4s
1924	add	v7.4s, v7.4s, v31.4s
1925	tbl	v14.16b, { v14.16b }, v16.16b
1926	eor	v11.16b, v11.16b, v4.16b
1927	add	v7.4s, v7.4s, v9.4s
1928	add	v13.4s, v13.4s, v26.4s
1929	add	v1.4s, v1.4s, v14.4s
1930	tbl	v11.16b, { v11.16b }, v16.16b
1931	eor	v12.16b, v12.16b, v7.16b
1932	add	v13.4s, v13.4s, v2.4s
1933	eor	v0.16b, v0.16b, v1.16b
1934	add	v5.4s, v5.4s, v11.4s
1935	tbl	v12.16b, { v12.16b }, v16.16b
1936	eor	v3.16b, v3.16b, v13.16b
1937	ushr	v15.4s, v0.4s, #12
1938	shl	v0.4s, v0.4s, #20
1939	eor	v10.16b, v5.16b, v10.16b
1940	add	v6.4s, v6.4s, v12.4s
1941	tbl	v3.16b, { v3.16b }, v16.16b
1942	orr	v0.16b, v0.16b, v15.16b
1943	ushr	v15.4s, v10.4s, #12
1944	shl	v10.4s, v10.4s, #20
1945	eor	v9.16b, v6.16b, v9.16b
1946	add	v8.4s, v8.4s, v3.4s
1947	add	v17.4s, v17.4s, v23.4s
1948	orr	v10.16b, v10.16b, v15.16b
1949	ushr	v15.4s, v9.4s, #12
1950	shl	v9.4s, v9.4s, #20
1951	eor	v2.16b, v8.16b, v2.16b
1952	add	v17.4s, v17.4s, v0.4s
1953	add	v4.4s, v4.4s, v22.4s
1954	orr	v9.16b, v9.16b, v15.16b
1955	ushr	v15.4s, v2.4s, #12
1956	shl	v2.4s, v2.4s, #20
1957	eor	v14.16b, v14.16b, v17.16b
1958	add	v4.4s, v4.4s, v10.4s
1959	add	v7.4s, v7.4s, v27.4s
1960	orr	v2.16b, v2.16b, v15.16b
1961	tbl	v14.16b, { v14.16b }, v19.16b
1962	eor	v11.16b, v11.16b, v4.16b
1963	add	v7.4s, v7.4s, v9.4s
1964	add	v13.4s, v13.4s, v30.4s
1965	add	v1.4s, v1.4s, v14.4s
1966	tbl	v11.16b, { v11.16b }, v19.16b
1967	eor	v12.16b, v12.16b, v7.16b
1968	add	v13.4s, v13.4s, v2.4s
1969	ldur	q27, [x29, #-160]
1970	eor	v0.16b, v0.16b, v1.16b
1971	add	v5.4s, v5.4s, v11.4s
1972	tbl	v12.16b, { v12.16b }, v19.16b
1973	eor	v3.16b, v3.16b, v13.16b
1974	ushr	v15.4s, v0.4s, #7
1975	shl	v0.4s, v0.4s, #25
1976	eor	v10.16b, v5.16b, v10.16b
1977	add	v6.4s, v6.4s, v12.4s
1978	tbl	v3.16b, { v3.16b }, v19.16b
1979	orr	v0.16b, v0.16b, v15.16b
1980	ushr	v15.4s, v10.4s, #7
1981	shl	v10.4s, v10.4s, #25
1982	eor	v9.16b, v6.16b, v9.16b
1983	add	v8.4s, v8.4s, v3.4s
1984	orr	v10.16b, v10.16b, v15.16b
1985	ushr	v15.4s, v9.4s, #7
1986	shl	v9.4s, v9.4s, #25
1987	eor	v2.16b, v8.16b, v2.16b
1988	add	v17.4s, v17.4s, v27.4s
1989	mov	v28.16b, v25.16b
1990	orr	v9.16b, v9.16b, v15.16b
1991	ushr	v15.4s, v2.4s, #7
1992	shl	v2.4s, v2.4s, #25
1993	add	v17.4s, v17.4s, v10.4s
1994	add	v4.4s, v4.4s, v21.4s
1995	orr	v2.16b, v2.16b, v15.16b
1996	eor	v3.16b, v3.16b, v17.16b
1997	add	v4.4s, v4.4s, v9.4s
1998	add	v7.4s, v7.4s, v28.4s
1999	tbl	v3.16b, { v3.16b }, v16.16b
2000	eor	v14.16b, v14.16b, v4.16b
2001	add	v7.4s, v7.4s, v2.4s
2002	add	v13.4s, v13.4s, v29.4s
2003	mov	v25.16b, v31.16b
2004	add	v6.4s, v6.4s, v3.4s
2005	tbl	v14.16b, { v14.16b }, v16.16b
2006	eor	v11.16b, v11.16b, v7.16b
2007	add	v13.4s, v13.4s, v0.4s
2008	ldur	q31, [x29, #-96]
2009	eor	v10.16b, v6.16b, v10.16b
2010	add	v8.4s, v8.4s, v14.4s
2011	tbl	v11.16b, { v11.16b }, v16.16b
2012	eor	v12.16b, v12.16b, v13.16b
2013	ldur	q28, [x29, #-208]
2014	mov	v18.16b, v20.16b
2015	str	q20, [sp, #144]
2016	ushr	v15.4s, v10.4s, #12
2017	shl	v10.4s, v10.4s, #20
2018	eor	v9.16b, v8.16b, v9.16b
2019	add	v1.4s, v1.4s, v11.4s
2020	tbl	v12.16b, { v12.16b }, v16.16b
2021	ldur	q20, [x29, #-128]
2022	orr	v10.16b, v10.16b, v15.16b
2023	ushr	v15.4s, v9.4s, #12
2024	shl	v9.4s, v9.4s, #20
2025	eor	v2.16b, v1.16b, v2.16b
2026	add	v5.4s, v5.4s, v12.4s
2027	add	v17.4s, v17.4s, v24.4s
2028	orr	v9.16b, v9.16b, v15.16b
2029	ushr	v15.4s, v2.4s, #12
2030	shl	v2.4s, v2.4s, #20
2031	eor	v0.16b, v5.16b, v0.16b
2032	add	v17.4s, v17.4s, v10.4s
2033	add	v4.4s, v4.4s, v31.4s
2034	orr	v2.16b, v2.16b, v15.16b
2035	ushr	v15.4s, v0.4s, #12
2036	shl	v0.4s, v0.4s, #20
2037	eor	v3.16b, v3.16b, v17.16b
2038	add	v4.4s, v4.4s, v9.4s
2039	add	v7.4s, v7.4s, v28.4s
2040	orr	v0.16b, v0.16b, v15.16b
2041	tbl	v3.16b, { v3.16b }, v19.16b
2042	eor	v14.16b, v14.16b, v4.16b
2043	add	v7.4s, v7.4s, v2.4s
2044	add	v13.4s, v13.4s, v20.4s
2045	add	v6.4s, v6.4s, v3.4s
2046	tbl	v14.16b, { v14.16b }, v19.16b
2047	eor	v11.16b, v11.16b, v7.16b
2048	add	v13.4s, v13.4s, v0.4s
2049	eor	v10.16b, v6.16b, v10.16b
2050	add	v8.4s, v8.4s, v14.4s
2051	tbl	v11.16b, { v11.16b }, v19.16b
2052	eor	v12.16b, v12.16b, v13.16b
2053	ushr	v15.4s, v10.4s, #7
2054	shl	v10.4s, v10.4s, #25
2055	eor	v9.16b, v8.16b, v9.16b
2056	add	v1.4s, v1.4s, v11.4s
2057	tbl	v12.16b, { v12.16b }, v19.16b
2058	orr	v10.16b, v10.16b, v15.16b
2059	ushr	v15.4s, v9.4s, #7
2060	shl	v9.4s, v9.4s, #25
2061	eor	v2.16b, v1.16b, v2.16b
2062	add	v5.4s, v5.4s, v12.4s
2063	orr	v9.16b, v9.16b, v15.16b
2064	ushr	v15.4s, v2.4s, #7
2065	shl	v2.4s, v2.4s, #25
2066	eor	v0.16b, v5.16b, v0.16b
2067	orr	v2.16b, v2.16b, v15.16b
2068	ushr	v15.4s, v0.4s, #7
2069	shl	v0.4s, v0.4s, #25
2070	orr	v0.16b, v0.16b, v15.16b
2071	add	v17.4s, v17.4s, v18.4s
2072	add	v17.4s, v17.4s, v0.4s
2073	add	v4.4s, v4.4s, v22.4s
2074	eor	v14.16b, v14.16b, v17.16b
2075	add	v4.4s, v4.4s, v10.4s
2076	add	v7.4s, v7.4s, v30.4s
2077	tbl	v14.16b, { v14.16b }, v16.16b
2078	eor	v11.16b, v11.16b, v4.16b
2079	add	v7.4s, v7.4s, v9.4s
2080	add	v13.4s, v13.4s, v25.4s
2081	add	v1.4s, v1.4s, v14.4s
2082	tbl	v11.16b, { v11.16b }, v16.16b
2083	eor	v12.16b, v12.16b, v7.16b
2084	add	v13.4s, v13.4s, v2.4s
2085	eor	v0.16b, v0.16b, v1.16b
2086	add	v5.4s, v5.4s, v11.4s
2087	tbl	v12.16b, { v12.16b }, v16.16b
2088	eor	v3.16b, v3.16b, v13.16b
2089	add	v17.4s, v17.4s, v26.4s
2090	mov	v26.16b, v21.16b
2091	add	v4.4s, v4.4s, v21.4s
2092	ldur	q21, [x29, #-144]
2093	ushr	v15.4s, v0.4s, #12
2094	shl	v0.4s, v0.4s, #20
2095	eor	v10.16b, v5.16b, v10.16b
2096	add	v6.4s, v6.4s, v12.4s
2097	tbl	v3.16b, { v3.16b }, v16.16b
2098	orr	v0.16b, v0.16b, v15.16b
2099	ushr	v15.4s, v10.4s, #12
2100	shl	v10.4s, v10.4s, #20
2101	eor	v9.16b, v6.16b, v9.16b
2102	add	v8.4s, v8.4s, v3.4s
2103	orr	v10.16b, v10.16b, v15.16b
2104	ushr	v15.4s, v9.4s, #12
2105	shl	v9.4s, v9.4s, #20
2106	eor	v2.16b, v8.16b, v2.16b
2107	add	v17.4s, v17.4s, v0.4s
2108	orr	v9.16b, v9.16b, v15.16b
2109	ushr	v15.4s, v2.4s, #12
2110	shl	v2.4s, v2.4s, #20
2111	eor	v14.16b, v14.16b, v17.16b
2112	add	v4.4s, v4.4s, v10.4s
2113	add	v7.4s, v7.4s, v21.4s
2114	orr	v2.16b, v2.16b, v15.16b
2115	tbl	v14.16b, { v14.16b }, v19.16b
2116	eor	v11.16b, v11.16b, v4.16b
2117	add	v7.4s, v7.4s, v9.4s
2118	add	v13.4s, v13.4s, v28.4s
2119	add	v1.4s, v1.4s, v14.4s
2120	tbl	v11.16b, { v11.16b }, v19.16b
2121	eor	v12.16b, v12.16b, v7.16b
2122	add	v13.4s, v13.4s, v2.4s
2123	str	q23, [sp, #160]
2124	eor	v0.16b, v0.16b, v1.16b
2125	add	v5.4s, v5.4s, v11.4s
2126	tbl	v12.16b, { v12.16b }, v19.16b
2127	eor	v3.16b, v3.16b, v13.16b
2128	add	v17.4s, v17.4s, v23.4s
2129	ldur	q23, [x29, #-64]
2130	ushr	v15.4s, v0.4s, #7
2131	shl	v0.4s, v0.4s, #25
2132	eor	v10.16b, v5.16b, v10.16b
2133	add	v6.4s, v6.4s, v12.4s
2134	tbl	v3.16b, { v3.16b }, v19.16b
2135	orr	v0.16b, v0.16b, v15.16b
2136	ushr	v15.4s, v10.4s, #7
2137	shl	v10.4s, v10.4s, #25
2138	eor	v9.16b, v6.16b, v9.16b
2139	add	v8.4s, v8.4s, v3.4s
2140	orr	v10.16b, v10.16b, v15.16b
2141	ushr	v15.4s, v9.4s, #7
2142	shl	v9.4s, v9.4s, #25
2143	eor	v2.16b, v8.16b, v2.16b
2144	orr	v9.16b, v9.16b, v15.16b
2145	ushr	v15.4s, v2.4s, #7
2146	shl	v2.4s, v2.4s, #25
2147	add	v17.4s, v17.4s, v10.4s
2148	add	v4.4s, v4.4s, v23.4s
2149	orr	v2.16b, v2.16b, v15.16b
2150	eor	v3.16b, v3.16b, v17.16b
2151	add	v4.4s, v4.4s, v9.4s
2152	add	v7.4s, v7.4s, v24.4s
2153	tbl	v3.16b, { v3.16b }, v16.16b
2154	eor	v14.16b, v14.16b, v4.16b
2155	add	v7.4s, v7.4s, v2.4s
2156	add	v6.4s, v6.4s, v3.4s
2157	tbl	v14.16b, { v14.16b }, v16.16b
2158	eor	v11.16b, v11.16b, v7.16b
2159	add	v13.4s, v13.4s, v20.4s
2160	eor	v10.16b, v6.16b, v10.16b
2161	add	v8.4s, v8.4s, v14.4s
2162	tbl	v11.16b, { v11.16b }, v16.16b
2163	add	v13.4s, v13.4s, v0.4s
2164	ldr	q20, [sp, #176]
2165	ushr	v15.4s, v10.4s, #12
2166	shl	v10.4s, v10.4s, #20
2167	eor	v9.16b, v8.16b, v9.16b
2168	add	v1.4s, v1.4s, v11.4s
2169	eor	v12.16b, v12.16b, v13.16b
2170	orr	v10.16b, v10.16b, v15.16b
2171	ushr	v15.4s, v9.4s, #12
2172	shl	v9.4s, v9.4s, #20
2173	eor	v2.16b, v1.16b, v2.16b
2174	tbl	v12.16b, { v12.16b }, v16.16b
2175	orr	v9.16b, v9.16b, v15.16b
2176	ushr	v15.4s, v2.4s, #12
2177	shl	v2.4s, v2.4s, #20
2178	add	v5.4s, v5.4s, v12.4s
2179	add	v17.4s, v17.4s, v31.4s
2180	orr	v2.16b, v2.16b, v15.16b
2181	eor	v0.16b, v5.16b, v0.16b
2182	add	v17.4s, v17.4s, v10.4s
2183	add	v4.4s, v4.4s, v20.4s
2184	add	v7.4s, v7.4s, v29.4s
2185	ushr	v15.4s, v0.4s, #12
2186	shl	v0.4s, v0.4s, #20
2187	eor	v3.16b, v3.16b, v17.16b
2188	add	v4.4s, v4.4s, v9.4s
2189	add	v7.4s, v7.4s, v2.4s
2190	orr	v0.16b, v0.16b, v15.16b
2191	mov	v15.16b, v31.16b
2192	add	v17.4s, v17.4s, v22.4s
2193	eor	v31.16b, v14.16b, v4.16b
2194	eor	v22.16b, v11.16b, v7.16b
2195	add	v11.4s, v13.4s, v27.4s
2196	tbl	v3.16b, { v3.16b }, v19.16b
2197	add	v11.4s, v11.4s, v0.4s
2198	tbl	v31.16b, { v31.16b }, v19.16b
2199	add	v6.4s, v6.4s, v3.4s
2200	eor	v12.16b, v12.16b, v11.16b
2201	tbl	v22.16b, { v22.16b }, v19.16b
2202	add	v8.4s, v8.4s, v31.4s
2203	eor	v10.16b, v6.16b, v10.16b
2204	add	v30.4s, v11.4s, v30.4s
2205	tbl	v11.16b, { v12.16b }, v19.16b
2206	add	v1.4s, v1.4s, v22.4s
2207	eor	v9.16b, v8.16b, v9.16b
2208	ushr	v12.4s, v10.4s, #7
2209	shl	v10.4s, v10.4s, #25
2210	add	v5.4s, v5.4s, v11.4s
2211	eor	v2.16b, v1.16b, v2.16b
2212	orr	v10.16b, v10.16b, v12.16b
2213	ushr	v12.4s, v9.4s, #7
2214	shl	v9.4s, v9.4s, #25
2215	eor	v0.16b, v5.16b, v0.16b
2216	orr	v9.16b, v9.16b, v12.16b
2217	ushr	v12.4s, v2.4s, #7
2218	shl	v2.4s, v2.4s, #25
2219	orr	v2.16b, v2.16b, v12.16b
2220	ushr	v12.4s, v0.4s, #7
2221	shl	v0.4s, v0.4s, #25
2222	orr	v0.16b, v0.16b, v12.16b
2223	add	v4.4s, v4.4s, v26.4s
2224	add	v17.4s, v17.4s, v0.4s
2225	add	v7.4s, v7.4s, v28.4s
2226	mov	v18.16b, v27.16b
2227	eor	v31.16b, v31.16b, v17.16b
2228	add	v4.4s, v4.4s, v10.4s
2229	add	v27.4s, v30.4s, v2.4s
2230	eor	v22.16b, v22.16b, v4.16b
2231	add	v7.4s, v7.4s, v9.4s
2232	eor	v3.16b, v3.16b, v27.16b
2233	add	v26.4s, v27.4s, v29.4s
2234	tbl	v27.16b, { v31.16b }, v16.16b
2235	eor	v28.16b, v11.16b, v7.16b
2236	tbl	v22.16b, { v22.16b }, v16.16b
2237	add	v1.4s, v1.4s, v27.4s
2238	add	v4.4s, v4.4s, v23.4s
2239	ldr	q23, [sp, #144]
2240	tbl	v28.16b, { v28.16b }, v16.16b
2241	tbl	v3.16b, { v3.16b }, v16.16b
2242	add	v5.4s, v5.4s, v22.4s
2243	eor	v0.16b, v0.16b, v1.16b
2244	add	v6.4s, v6.4s, v28.4s
2245	add	v29.4s, v8.4s, v3.4s
2246	eor	v30.16b, v5.16b, v10.16b
2247	ushr	v8.4s, v0.4s, #12
2248	shl	v0.4s, v0.4s, #20
2249	eor	v31.16b, v6.16b, v9.16b
2250	orr	v0.16b, v0.16b, v8.16b
2251	ushr	v8.4s, v30.4s, #12
2252	shl	v30.4s, v30.4s, #20
2253	eor	v2.16b, v29.16b, v2.16b
2254	orr	v30.16b, v30.16b, v8.16b
2255	ushr	v8.4s, v31.4s, #12
2256	shl	v31.4s, v31.4s, #20
2257	add	v17.4s, v17.4s, v25.4s
2258	add	v7.4s, v7.4s, v23.4s
2259	orr	v31.16b, v31.16b, v8.16b
2260	ushr	v8.4s, v2.4s, #12
2261	shl	v2.4s, v2.4s, #20
2262	ldur	q23, [x29, #-176]
2263	orr	v2.16b, v2.16b, v8.16b
2264	add	v17.4s, v17.4s, v0.4s
2265	eor	v27.16b, v27.16b, v17.16b
2266	add	v4.4s, v4.4s, v30.4s
2267	add	v25.4s, v26.4s, v2.4s
2268	eor	v22.16b, v22.16b, v4.16b
2269	add	v4.4s, v4.4s, v24.4s
2270	add	v7.4s, v7.4s, v31.4s
2271	eor	v3.16b, v3.16b, v25.16b
2272	add	v24.4s, v25.4s, v18.4s
2273	tbl	v25.16b, { v27.16b }, v19.16b
2274	add	v17.4s, v17.4s, v23.4s
2275	eor	v23.16b, v28.16b, v7.16b
2276	tbl	v22.16b, { v22.16b }, v19.16b
2277	add	v1.4s, v1.4s, v25.4s
2278	tbl	v23.16b, { v23.16b }, v19.16b
2279	tbl	v3.16b, { v3.16b }, v19.16b
2280	add	v5.4s, v5.4s, v22.4s
2281	eor	v0.16b, v0.16b, v1.16b
2282	add	v6.4s, v6.4s, v23.4s
2283	add	v26.4s, v29.4s, v3.4s
2284	eor	v27.16b, v5.16b, v30.16b
2285	ushr	v29.4s, v0.4s, #7
2286	shl	v0.4s, v0.4s, #25
2287	eor	v28.16b, v6.16b, v31.16b
2288	orr	v0.16b, v0.16b, v29.16b
2289	ushr	v29.4s, v27.4s, #7
2290	shl	v27.4s, v27.4s, #25
2291	eor	v2.16b, v26.16b, v2.16b
2292	orr	v27.16b, v27.16b, v29.16b
2293	ushr	v29.4s, v28.4s, #7
2294	shl	v28.4s, v28.4s, #25
2295	ldur	q18, [x29, #-128]
2296	orr	v28.16b, v28.16b, v29.16b
2297	ushr	v29.4s, v2.4s, #7
2298	shl	v2.4s, v2.4s, #25
2299	add	v7.4s, v7.4s, v15.4s
2300	orr	v2.16b, v2.16b, v29.16b
2301	add	v17.4s, v17.4s, v27.4s
2302	add	v4.4s, v4.4s, v28.4s
2303	add	v7.4s, v7.4s, v2.4s
2304	eor	v3.16b, v3.16b, v17.16b
2305	add	v17.4s, v17.4s, v20.4s
2306	eor	v20.16b, v25.16b, v4.16b
2307	add	v4.4s, v4.4s, v21.4s
2308	eor	v21.16b, v22.16b, v7.16b
2309	add	v7.4s, v7.4s, v18.4s
2310	add	v18.4s, v24.4s, v0.4s
2311	eor	v22.16b, v23.16b, v18.16b
2312	ldr	q23, [sp, #160]
2313	tbl	v3.16b, { v3.16b }, v16.16b
2314	tbl	v20.16b, { v20.16b }, v16.16b
2315	add	v6.4s, v6.4s, v3.4s
2316	add	v18.4s, v18.4s, v23.4s
2317	tbl	v21.16b, { v21.16b }, v16.16b
2318	tbl	v16.16b, { v22.16b }, v16.16b
2319	add	v22.4s, v26.4s, v20.4s
2320	eor	v23.16b, v6.16b, v27.16b
2321	add	v1.4s, v1.4s, v21.4s
2322	eor	v24.16b, v22.16b, v28.16b
2323	ushr	v25.4s, v23.4s, #12
2324	shl	v23.4s, v23.4s, #20
2325	add	v5.4s, v5.4s, v16.4s
2326	eor	v2.16b, v1.16b, v2.16b
2327	orr	v23.16b, v23.16b, v25.16b
2328	ushr	v25.4s, v24.4s, #12
2329	shl	v24.4s, v24.4s, #20
2330	eor	v0.16b, v5.16b, v0.16b
2331	orr	v24.16b, v24.16b, v25.16b
2332	ushr	v25.4s, v2.4s, #12
2333	shl	v2.4s, v2.4s, #20
2334	orr	v2.16b, v2.16b, v25.16b
2335	ushr	v25.4s, v0.4s, #12
2336	shl	v0.4s, v0.4s, #20
2337	orr	v0.16b, v0.16b, v25.16b
2338	add	v25.4s, v7.4s, v2.4s
2339	add	v26.4s, v18.4s, v0.4s
2340	eor	v18.16b, v21.16b, v25.16b
2341	add	v17.4s, v17.4s, v23.4s
2342	add	v4.4s, v4.4s, v24.4s
2343	eor	v16.16b, v16.16b, v26.16b
2344	tbl	v21.16b, { v18.16b }, v19.16b
2345	eor	v3.16b, v3.16b, v17.16b
2346	eor	v7.16b, v20.16b, v4.16b
2347	tbl	v16.16b, { v16.16b }, v19.16b
2348	add	v1.4s, v1.4s, v21.4s
2349	tbl	v3.16b, { v3.16b }, v19.16b
2350	tbl	v20.16b, { v7.16b }, v19.16b
2351	eor	v2.16b, v1.16b, v2.16b
2352	eor	v7.16b, v1.16b, v17.16b
2353	add	v1.4s, v5.4s, v16.4s
2354	eor	v0.16b, v1.16b, v0.16b
2355	eor	v18.16b, v1.16b, v4.16b
2356	add	v1.4s, v6.4s, v3.4s
2357	eor	v4.16b, v1.16b, v23.16b
2358	eor	v6.16b, v25.16b, v1.16b
2359	add	v1.4s, v22.4s, v20.4s
2360	eor	v5.16b, v1.16b, v24.16b
2361	eor	v17.16b, v26.16b, v1.16b
2362	ushr	v1.4s, v4.4s, #7
2363	shl	v4.4s, v4.4s, #25
2364	orr	v1.16b, v4.16b, v1.16b
2365	ushr	v4.4s, v5.4s, #7
2366	shl	v5.4s, v5.4s, #25
2367	orr	v4.16b, v5.16b, v4.16b
2368	ushr	v5.4s, v2.4s, #7
2369	shl	v2.4s, v2.4s, #25
2370	orr	v2.16b, v2.16b, v5.16b
2371	ushr	v5.4s, v0.4s, #7
2372	shl	v0.4s, v0.4s, #25
2373	orr	v0.16b, v0.16b, v5.16b
2374	eor	v10.16b, v0.16b, v20.16b
2375	eor	v11.16b, v1.16b, v21.16b
2376	eor	v19.16b, v4.16b, v16.16b
2377	cmp	x0, x22
2378	eor	v16.16b, v2.16b, v3.16b
2379	mov	w6, w19
2380	b.ne	.LBB2_4
2381.LBB2_7:
2382	zip1	v0.4s, v7.4s, v18.4s
2383	zip2	v1.4s, v7.4s, v18.4s
2384	zip1	v2.4s, v6.4s, v17.4s
2385	zip2	v3.4s, v6.4s, v17.4s
2386	zip1	v4.4s, v10.4s, v11.4s
2387	zip2	v5.4s, v10.4s, v11.4s
2388	zip1	v6.4s, v19.4s, v16.4s
2389	zip2	v7.4s, v19.4s, v16.4s
2390	add	x15, x20, #4
2391	tst	w5, #0x1
2392	sub	x28, x28, #4
2393	zip1	v16.2d, v0.2d, v2.2d
2394	zip2	v0.2d, v0.2d, v2.2d
2395	zip1	v2.2d, v1.2d, v3.2d
2396	zip2	v1.2d, v1.2d, v3.2d
2397	zip1	v3.2d, v4.2d, v6.2d
2398	zip2	v4.2d, v4.2d, v6.2d
2399	zip1	v6.2d, v5.2d, v7.2d
2400	zip2	v5.2d, v5.2d, v7.2d
2401	add	x24, x24, #32
2402	csel	x20, x15, x20, ne
2403	cmp	x28, #3
2404	stp	q16, q3, [x26]
2405	stp	q0, q4, [x26, #32]
2406	stp	q2, q6, [x26, #64]
2407	stp	q1, q5, [x26, #96]
2408	add	x26, x26, #128
2409	b.hi	.LBB2_2
2410.LBB2_8:
2411	cbz	x28, .LBB2_16
2412	orr	w8, w7, w19
2413	and	x21, x5, #0x1
2414	stur	w8, [x29, #-64]
2415.LBB2_10:
2416	ldr	x8, [sp, #40]
2417	ldr	x25, [x24]
2418	ldur	w4, [x29, #-64]
2419	ldp	q1, q0, [x8]
2420	mov	x8, x22
2421	stp	q1, q0, [x29, #-48]
2422.LBB2_11:
2423	subs	x23, x8, #1
2424	b.eq	.LBB2_13
2425	cbnz	x8, .LBB2_14
2426	b	.LBB2_15
2427.LBB2_13:
2428	orr	w4, w4, w27
2429.LBB2_14:
2430	sub	x0, x29, #48
2431	mov	w2, #64
2432	mov	x1, x25
2433	mov	x3, x20
2434	bl	zfs_blake3_compress_in_place_sse41
2435	add	x25, x25, #64
2436	mov	x8, x23
2437	mov	w4, w19
2438	b	.LBB2_11
2439.LBB2_15:
2440	ldp	q0, q1, [x29, #-48]
2441	add	x20, x20, x21
2442	add	x24, x24, #8
2443	subs	x28, x28, #1
2444	stp	q0, q1, [x26], #32
2445	b.ne	.LBB2_10
2446.LBB2_16:
2447	add	sp, sp, #448
2448	ldp	x20, x19, [sp, #144]
2449	ldp	x22, x21, [sp, #128]
2450	ldp	x24, x23, [sp, #112]
2451	ldp	x26, x25, [sp, #96]
2452	ldp	x28, x27, [sp, #80]
2453	ldp	x29, x30, [sp, #64]
2454	ldp	d9, d8, [sp, #48]
2455	ldp	d11, d10, [sp, #32]
2456	ldp	d13, d12, [sp, #16]
2457	ldp	d15, d14, [sp], #160
2458	ret
2459.Lfunc_end2:
2460	.size	zfs_blake3_hash_many_sse41, .Lfunc_end2-zfs_blake3_hash_many_sse41
2461	.cfi_endproc
2462	.section	".note.GNU-stack","",@progbits
2463#endif
2464