1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2022 Samuel Neves and Matthew Krupcale
25 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
26 *
27 * This is converted assembly: SSE2 -> ARMv8-A
28 * Used tools: SIMDe https://github.com/simd-everywhere/simde
29 */
30
31#if defined(__aarch64__)
32	.text
33	.section	.rodata.cst16,"aM",@progbits,16
34	.p2align	4
35.LCPI0_0:
36	.word	1779033703
37	.word	3144134277
38	.word	1013904242
39	.word	2773480762
40.LCPI0_1:
41	.xword	0
42	.xword	-4294967296
43.LCPI0_2:
44	.xword	-1
45	.xword	4294967295
46	.text
47	.globl	zfs_blake3_compress_in_place_sse2
48	.p2align	2
49	.type	zfs_blake3_compress_in_place_sse2,@function
50zfs_blake3_compress_in_place_sse2:
51	.cfi_startproc
52	ldp	q3, q2, [x0]
53	ldp	q5, q6, [x1]
54	add	x10, x1, #32
55	lsr	x11, x3, #32
56	fmov	s4, w3
57	ld2	{ v17.4s, v18.4s }, [x10]
58	adrp	x10, .LCPI0_2
59	and	w8, w2, #0xff
60	mov	v4.s[1], w11
61	ldr	q1, [x10, :lo12:.LCPI0_2]
62	and	w9, w4, #0xff
63	adrp	x12, .LCPI0_0
64	mov	v4.s[2], w8
65	uzp1	v19.4s, v5.4s, v6.4s
66	add	v3.4s, v2.4s, v3.4s
67	ldr	q7, [x12, :lo12:.LCPI0_0]
68	mov	v4.s[3], w9
69	add	v3.4s, v3.4s, v19.4s
70	uzp2	v5.4s, v5.4s, v6.4s
71	ext	v21.16b, v18.16b, v18.16b, #12
72	uzp1	v6.4s, v19.4s, v19.4s
73	ext	v22.16b, v19.16b, v19.16b, #12
74	eor	v4.16b, v3.16b, v4.16b
75	ext	v20.16b, v17.16b, v17.16b, #12
76	ext	v6.16b, v6.16b, v19.16b, #8
77	ext	v19.16b, v19.16b, v22.16b, #12
78	zip1	v22.2d, v21.2d, v5.2d
79	rev32	v24.8h, v4.8h
80	mov	v4.16b, v1.16b
81	zip2	v23.4s, v5.4s, v21.4s
82	uzp2	v6.4s, v6.4s, v5.4s
83	bsl	v4.16b, v22.16b, v20.16b
84	add	v3.4s, v3.4s, v5.4s
85	zip1	v5.4s, v23.4s, v20.4s
86	zip1	v22.4s, v20.4s, v23.4s
87	add	v23.4s, v24.4s, v7.4s
88	ext	v7.16b, v6.16b, v6.16b, #4
89	ext	v25.16b, v4.16b, v4.16b, #12
90	ext	v5.16b, v22.16b, v5.16b, #8
91	eor	v2.16b, v23.16b, v2.16b
92	uzp1	v4.4s, v4.4s, v25.4s
93	uzp1	v22.4s, v7.4s, v7.4s
94	ext	v25.16b, v7.16b, v7.16b, #12
95	ext	v22.16b, v22.16b, v7.16b, #8
96	ext	v7.16b, v7.16b, v25.16b, #12
97	ushr	v25.4s, v2.4s, #12
98	shl	v2.4s, v2.4s, #20
99	orr	v2.16b, v2.16b, v25.16b
100	add	v3.4s, v3.4s, v2.4s
101	eor	v24.16b, v3.16b, v24.16b
102	add	v3.4s, v3.4s, v17.4s
103	ushr	v17.4s, v24.4s, #8
104	shl	v18.4s, v24.4s, #24
105	orr	v17.16b, v18.16b, v17.16b
106	add	v18.4s, v17.4s, v23.4s
107	eor	v2.16b, v18.16b, v2.16b
108	ushr	v23.4s, v2.4s, #7
109	shl	v2.4s, v2.4s, #25
110	ext	v3.16b, v3.16b, v3.16b, #12
111	orr	v2.16b, v2.16b, v23.16b
112	ext	v17.16b, v17.16b, v17.16b, #8
113	add	v3.4s, v2.4s, v3.4s
114	adrp	x11, .LCPI0_1
115	eor	v17.16b, v3.16b, v17.16b
116	ldr	q16, [x11, :lo12:.LCPI0_1]
117	ext	v18.16b, v18.16b, v18.16b, #4
118	rev32	v24.8h, v17.8h
119	movi	v0.2d, #0xffffffff00000000
120	add	v23.4s, v3.4s, v21.4s
121	mov	v21.s[1], v20.s[2]
122	add	v20.4s, v18.4s, v24.4s
123	bit	v19.16b, v21.16b, v0.16b
124	eor	v3.16b, v20.16b, v2.16b
125	uzp2	v2.4s, v22.4s, v19.4s
126	zip1	v17.2d, v5.2d, v19.2d
127	zip2	v18.4s, v19.4s, v5.4s
128	ushr	v21.4s, v3.4s, #12
129	shl	v3.4s, v3.4s, #20
130	ext	v22.16b, v2.16b, v2.16b, #4
131	bsl	v16.16b, v4.16b, v17.16b
132	zip1	v17.4s, v18.4s, v4.4s
133	zip1	v18.4s, v4.4s, v18.4s
134	orr	v21.16b, v3.16b, v21.16b
135	ext	v25.16b, v16.16b, v16.16b, #12
136	ext	v3.16b, v18.16b, v17.16b, #8
137	uzp1	v18.4s, v22.4s, v22.4s
138	ext	v26.16b, v22.16b, v22.16b, #12
139	add	v23.4s, v23.4s, v21.4s
140	uzp1	v17.4s, v16.4s, v25.4s
141	ext	v16.16b, v18.16b, v22.16b, #8
142	ext	v18.16b, v22.16b, v26.16b, #12
143	eor	v22.16b, v23.16b, v24.16b
144	add	v6.4s, v23.4s, v6.4s
145	ushr	v23.4s, v22.4s, #8
146	shl	v22.4s, v22.4s, #24
147	orr	v22.16b, v22.16b, v23.16b
148	add	v20.4s, v22.4s, v20.4s
149	eor	v21.16b, v20.16b, v21.16b
150	ushr	v23.4s, v21.4s, #7
151	shl	v21.4s, v21.4s, #25
152	ext	v6.16b, v6.16b, v6.16b, #4
153	orr	v21.16b, v21.16b, v23.16b
154	ext	v22.16b, v22.16b, v22.16b, #8
155	add	v6.4s, v21.4s, v6.4s
156	eor	v22.16b, v6.16b, v22.16b
157	ext	v20.16b, v20.16b, v20.16b, #12
158	add	v6.4s, v6.4s, v19.4s
159	rev32	v19.8h, v22.8h
160	add	v20.4s, v20.4s, v19.4s
161	eor	v21.16b, v20.16b, v21.16b
162	ushr	v22.4s, v21.4s, #12
163	shl	v21.4s, v21.4s, #20
164	orr	v21.16b, v21.16b, v22.16b
165	add	v6.4s, v6.4s, v21.4s
166	eor	v19.16b, v6.16b, v19.16b
167	ushr	v22.4s, v19.4s, #8
168	shl	v19.4s, v19.4s, #24
169	orr	v19.16b, v19.16b, v22.16b
170	add	v20.4s, v19.4s, v20.4s
171	eor	v21.16b, v20.16b, v21.16b
172	ext	v6.16b, v6.16b, v6.16b, #12
173	ushr	v22.4s, v21.4s, #7
174	shl	v21.4s, v21.4s, #25
175	add	v6.4s, v6.4s, v4.4s
176	orr	v21.16b, v21.16b, v22.16b
177	ext	v19.16b, v19.16b, v19.16b, #8
178	add	v6.4s, v6.4s, v21.4s
179	eor	v19.16b, v6.16b, v19.16b
180	ext	v20.16b, v20.16b, v20.16b, #4
181	rev32	v19.8h, v19.8h
182	add	v20.4s, v20.4s, v19.4s
183	add	v6.4s, v6.4s, v5.4s
184	mov	v5.s[1], v4.s[2]
185	eor	v4.16b, v20.16b, v21.16b
186	ushr	v21.4s, v4.4s, #12
187	shl	v4.4s, v4.4s, #20
188	orr	v21.16b, v4.16b, v21.16b
189	add	v6.4s, v6.4s, v21.4s
190	eor	v19.16b, v6.16b, v19.16b
191	add	v2.4s, v6.4s, v2.4s
192	ushr	v6.4s, v19.4s, #8
193	shl	v19.4s, v19.4s, #24
194	orr	v6.16b, v19.16b, v6.16b
195	add	v19.4s, v6.4s, v20.4s
196	eor	v20.16b, v19.16b, v21.16b
197	ushr	v21.4s, v20.4s, #7
198	shl	v20.4s, v20.4s, #25
199	ext	v2.16b, v2.16b, v2.16b, #4
200	orr	v20.16b, v20.16b, v21.16b
201	ext	v6.16b, v6.16b, v6.16b, #8
202	add	v2.4s, v20.4s, v2.4s
203	eor	v6.16b, v2.16b, v6.16b
204	ext	v19.16b, v19.16b, v19.16b, #12
205	rev32	v6.8h, v6.8h
206	add	v19.4s, v19.4s, v6.4s
207	mov	v22.16b, v0.16b
208	eor	v20.16b, v19.16b, v20.16b
209	bsl	v22.16b, v5.16b, v7.16b
210	ushr	v21.4s, v20.4s, #12
211	shl	v20.4s, v20.4s, #20
212	add	v2.4s, v2.4s, v22.4s
213	orr	v20.16b, v20.16b, v21.16b
214	add	v2.4s, v2.4s, v20.4s
215	eor	v6.16b, v2.16b, v6.16b
216	ushr	v21.4s, v6.4s, #8
217	shl	v6.4s, v6.4s, #24
218	orr	v6.16b, v6.16b, v21.16b
219	add	v19.4s, v6.4s, v19.4s
220	eor	v20.16b, v19.16b, v20.16b
221	ext	v2.16b, v2.16b, v2.16b, #12
222	ushr	v21.4s, v20.4s, #7
223	shl	v20.4s, v20.4s, #25
224	add	v2.4s, v2.4s, v17.4s
225	orr	v20.16b, v20.16b, v21.16b
226	ext	v6.16b, v6.16b, v6.16b, #8
227	add	v2.4s, v2.4s, v20.4s
228	eor	v6.16b, v2.16b, v6.16b
229	uzp2	v5.4s, v16.4s, v22.4s
230	zip1	v7.2d, v3.2d, v22.2d
231	zip2	v16.4s, v22.4s, v3.4s
232	ext	v19.16b, v19.16b, v19.16b, #4
233	rev32	v22.8h, v6.8h
234	ext	v23.16b, v5.16b, v5.16b, #4
235	bif	v7.16b, v17.16b, v1.16b
236	zip1	v24.4s, v16.4s, v17.4s
237	zip1	v16.4s, v17.4s, v16.4s
238	add	v21.4s, v2.4s, v3.4s
239	mov	v3.s[1], v17.s[2]
240	add	v17.4s, v19.4s, v22.4s
241	mov	v19.16b, v0.16b
242	ext	v25.16b, v7.16b, v7.16b, #12
243	ext	v4.16b, v16.16b, v24.16b, #8
244	uzp1	v16.4s, v23.4s, v23.4s
245	bsl	v19.16b, v3.16b, v18.16b
246	eor	v2.16b, v17.16b, v20.16b
247	uzp1	v7.4s, v7.4s, v25.4s
248	ext	v25.16b, v16.16b, v23.16b, #8
249	zip1	v3.2d, v4.2d, v19.2d
250	ushr	v20.4s, v2.4s, #12
251	shl	v2.4s, v2.4s, #20
252	ext	v24.16b, v23.16b, v23.16b, #12
253	uzp2	v6.4s, v25.4s, v19.4s
254	zip2	v18.4s, v19.4s, v4.4s
255	bif	v3.16b, v7.16b, v1.16b
256	orr	v20.16b, v2.16b, v20.16b
257	ext	v16.16b, v23.16b, v24.16b, #12
258	ext	v23.16b, v6.16b, v6.16b, #4
259	zip1	v24.4s, v18.4s, v7.4s
260	zip1	v18.4s, v7.4s, v18.4s
261	ext	v25.16b, v3.16b, v3.16b, #12
262	add	v21.4s, v21.4s, v20.4s
263	ext	v2.16b, v18.16b, v24.16b, #8
264	uzp1	v18.4s, v23.4s, v23.4s
265	ext	v24.16b, v23.16b, v23.16b, #12
266	uzp1	v3.4s, v3.4s, v25.4s
267	eor	v22.16b, v21.16b, v22.16b
268	ext	v25.16b, v18.16b, v23.16b, #8
269	dup	v18.4s, v2.s[3]
270	ext	v23.16b, v23.16b, v24.16b, #12
271	add	v5.4s, v21.4s, v5.4s
272	trn1	v21.4s, v3.4s, v3.4s
273	ushr	v24.4s, v22.4s, #8
274	shl	v22.4s, v22.4s, #24
275	ext	v18.16b, v21.16b, v18.16b, #8
276	orr	v21.16b, v22.16b, v24.16b
277	add	v17.4s, v21.4s, v17.4s
278	eor	v20.16b, v17.16b, v20.16b
279	ushr	v22.4s, v20.4s, #7
280	shl	v20.4s, v20.4s, #25
281	ext	v5.16b, v5.16b, v5.16b, #4
282	orr	v20.16b, v20.16b, v22.16b
283	ext	v21.16b, v21.16b, v21.16b, #8
284	add	v5.4s, v20.4s, v5.4s
285	eor	v21.16b, v5.16b, v21.16b
286	ext	v17.16b, v17.16b, v17.16b, #12
287	add	v5.4s, v5.4s, v19.4s
288	rev32	v19.8h, v21.8h
289	add	v17.4s, v17.4s, v19.4s
290	eor	v20.16b, v17.16b, v20.16b
291	ushr	v21.4s, v20.4s, #12
292	shl	v20.4s, v20.4s, #20
293	orr	v20.16b, v20.16b, v21.16b
294	add	v5.4s, v5.4s, v20.4s
295	eor	v19.16b, v5.16b, v19.16b
296	ushr	v21.4s, v19.4s, #8
297	shl	v19.4s, v19.4s, #24
298	orr	v19.16b, v19.16b, v21.16b
299	add	v17.4s, v19.4s, v17.4s
300	eor	v20.16b, v17.16b, v20.16b
301	ext	v5.16b, v5.16b, v5.16b, #12
302	ushr	v21.4s, v20.4s, #7
303	shl	v20.4s, v20.4s, #25
304	add	v5.4s, v5.4s, v7.4s
305	orr	v20.16b, v20.16b, v21.16b
306	ext	v19.16b, v19.16b, v19.16b, #8
307	add	v5.4s, v5.4s, v20.4s
308	eor	v19.16b, v5.16b, v19.16b
309	ext	v17.16b, v17.16b, v17.16b, #4
310	rev32	v22.8h, v19.8h
311	add	v21.4s, v5.4s, v4.4s
312	mov	v4.s[1], v7.s[2]
313	add	v19.4s, v17.4s, v22.4s
314	bit	v16.16b, v4.16b, v0.16b
315	eor	v5.16b, v19.16b, v20.16b
316	uzp2	v4.4s, v25.4s, v16.4s
317	zip1	v7.2d, v2.2d, v16.2d
318	zip2	v17.4s, v16.4s, v2.4s
319	ushr	v20.4s, v5.4s, #12
320	shl	v5.4s, v5.4s, #20
321	ext	v24.16b, v4.16b, v4.16b, #4
322	bif	v7.16b, v3.16b, v1.16b
323	zip1	v25.4s, v17.4s, v3.4s
324	zip1	v17.4s, v3.4s, v17.4s
325	orr	v20.16b, v5.16b, v20.16b
326	ext	v26.16b, v7.16b, v7.16b, #12
327	ext	v5.16b, v17.16b, v25.16b, #8
328	uzp1	v17.4s, v24.4s, v24.4s
329	ext	v25.16b, v24.16b, v24.16b, #12
330	bit	v23.16b, v18.16b, v0.16b
331	add	v21.4s, v21.4s, v20.4s
332	uzp1	v7.4s, v7.4s, v26.4s
333	ext	v26.16b, v17.16b, v24.16b, #8
334	ext	v17.16b, v24.16b, v25.16b, #12
335	eor	v22.16b, v21.16b, v22.16b
336	add	v6.4s, v21.4s, v6.4s
337	zip1	v21.2d, v5.2d, v23.2d
338	zip2	v24.4s, v23.4s, v5.4s
339	bif	v21.16b, v7.16b, v1.16b
340	zip1	v1.4s, v24.4s, v7.4s
341	zip1	v24.4s, v7.4s, v24.4s
342	ext	v1.16b, v24.16b, v1.16b, #8
343	ushr	v24.4s, v22.4s, #8
344	shl	v22.4s, v22.4s, #24
345	orr	v22.16b, v22.16b, v24.16b
346	add	v19.4s, v22.4s, v19.4s
347	ext	v24.16b, v21.16b, v21.16b, #12
348	eor	v20.16b, v19.16b, v20.16b
349	uzp1	v21.4s, v21.4s, v24.4s
350	ushr	v24.4s, v20.4s, #7
351	shl	v20.4s, v20.4s, #25
352	orr	v20.16b, v20.16b, v24.16b
353	ext	v6.16b, v6.16b, v6.16b, #4
354	ext	v22.16b, v22.16b, v22.16b, #8
355	add	v6.4s, v20.4s, v6.4s
356	eor	v22.16b, v6.16b, v22.16b
357	ext	v19.16b, v19.16b, v19.16b, #12
358	add	v6.4s, v6.4s, v16.4s
359	rev32	v16.8h, v22.8h
360	add	v19.4s, v19.4s, v16.4s
361	eor	v20.16b, v19.16b, v20.16b
362	ushr	v22.4s, v20.4s, #12
363	shl	v20.4s, v20.4s, #20
364	orr	v20.16b, v20.16b, v22.16b
365	add	v6.4s, v6.4s, v20.4s
366	eor	v16.16b, v6.16b, v16.16b
367	ext	v6.16b, v6.16b, v6.16b, #12
368	add	v3.4s, v6.4s, v3.4s
369	ushr	v6.4s, v16.4s, #8
370	shl	v16.4s, v16.4s, #24
371	orr	v6.16b, v16.16b, v6.16b
372	add	v16.4s, v6.4s, v19.4s
373	eor	v19.16b, v16.16b, v20.16b
374	ushr	v20.4s, v19.4s, #7
375	shl	v19.4s, v19.4s, #25
376	orr	v19.16b, v19.16b, v20.16b
377	ext	v6.16b, v6.16b, v6.16b, #8
378	add	v3.4s, v3.4s, v19.4s
379	eor	v6.16b, v3.16b, v6.16b
380	ext	v16.16b, v16.16b, v16.16b, #4
381	add	v2.4s, v3.4s, v2.4s
382	rev32	v3.8h, v6.8h
383	add	v6.4s, v16.4s, v3.4s
384	eor	v16.16b, v6.16b, v19.16b
385	ushr	v19.4s, v16.4s, #12
386	shl	v16.4s, v16.4s, #20
387	orr	v16.16b, v16.16b, v19.16b
388	add	v2.4s, v2.4s, v16.4s
389	eor	v3.16b, v2.16b, v3.16b
390	add	v2.4s, v2.4s, v4.4s
391	ushr	v4.4s, v3.4s, #8
392	shl	v3.4s, v3.4s, #24
393	orr	v3.16b, v3.16b, v4.16b
394	add	v4.4s, v3.4s, v6.4s
395	eor	v6.16b, v4.16b, v16.16b
396	ushr	v16.4s, v6.4s, #7
397	shl	v6.4s, v6.4s, #25
398	ext	v2.16b, v2.16b, v2.16b, #4
399	orr	v6.16b, v6.16b, v16.16b
400	ext	v3.16b, v3.16b, v3.16b, #8
401	add	v2.4s, v6.4s, v2.4s
402	eor	v3.16b, v2.16b, v3.16b
403	ext	v4.16b, v4.16b, v4.16b, #12
404	rev32	v3.8h, v3.8h
405	add	v4.4s, v4.4s, v3.4s
406	eor	v6.16b, v4.16b, v6.16b
407	ushr	v16.4s, v6.4s, #12
408	shl	v6.4s, v6.4s, #20
409	add	v2.4s, v2.4s, v23.4s
410	orr	v6.16b, v6.16b, v16.16b
411	add	v2.4s, v2.4s, v6.4s
412	eor	v3.16b, v2.16b, v3.16b
413	ushr	v16.4s, v3.4s, #8
414	shl	v3.4s, v3.4s, #24
415	orr	v3.16b, v3.16b, v16.16b
416	add	v4.4s, v3.4s, v4.4s
417	eor	v6.16b, v4.16b, v6.16b
418	ext	v2.16b, v2.16b, v2.16b, #12
419	ushr	v16.4s, v6.4s, #7
420	shl	v6.4s, v6.4s, #25
421	add	v2.4s, v2.4s, v7.4s
422	orr	v6.16b, v6.16b, v16.16b
423	ext	v3.16b, v3.16b, v3.16b, #8
424	add	v2.4s, v2.4s, v6.4s
425	eor	v3.16b, v2.16b, v3.16b
426	ext	v4.16b, v4.16b, v4.16b, #4
427	rev32	v3.8h, v3.8h
428	add	v2.4s, v2.4s, v5.4s
429	mov	v5.s[1], v7.s[2]
430	add	v4.4s, v4.4s, v3.4s
431	bsl	v0.16b, v5.16b, v17.16b
432	eor	v5.16b, v4.16b, v6.16b
433	ushr	v6.4s, v5.4s, #12
434	shl	v5.4s, v5.4s, #20
435	orr	v5.16b, v5.16b, v6.16b
436	add	v2.4s, v2.4s, v5.4s
437	eor	v3.16b, v2.16b, v3.16b
438	ushr	v6.4s, v3.4s, #8
439	shl	v3.4s, v3.4s, #24
440	orr	v3.16b, v3.16b, v6.16b
441	add	v4.4s, v3.4s, v4.4s
442	uzp2	v18.4s, v26.4s, v18.4s
443	eor	v5.16b, v4.16b, v5.16b
444	add	v2.4s, v2.4s, v18.4s
445	ushr	v6.4s, v5.4s, #7
446	shl	v5.4s, v5.4s, #25
447	ext	v2.16b, v2.16b, v2.16b, #4
448	orr	v5.16b, v5.16b, v6.16b
449	ext	v3.16b, v3.16b, v3.16b, #8
450	add	v2.4s, v5.4s, v2.4s
451	eor	v3.16b, v2.16b, v3.16b
452	ext	v4.16b, v4.16b, v4.16b, #12
453	add	v0.4s, v2.4s, v0.4s
454	rev32	v2.8h, v3.8h
455	add	v3.4s, v4.4s, v2.4s
456	eor	v4.16b, v3.16b, v5.16b
457	ushr	v5.4s, v4.4s, #12
458	shl	v4.4s, v4.4s, #20
459	orr	v4.16b, v4.16b, v5.16b
460	add	v0.4s, v0.4s, v4.4s
461	eor	v2.16b, v0.16b, v2.16b
462	ushr	v5.4s, v2.4s, #8
463	shl	v2.4s, v2.4s, #24
464	orr	v2.16b, v2.16b, v5.16b
465	add	v3.4s, v2.4s, v3.4s
466	eor	v4.16b, v3.16b, v4.16b
467	ext	v0.16b, v0.16b, v0.16b, #12
468	ushr	v5.4s, v4.4s, #7
469	shl	v4.4s, v4.4s, #25
470	add	v0.4s, v0.4s, v21.4s
471	orr	v4.16b, v4.16b, v5.16b
472	ext	v2.16b, v2.16b, v2.16b, #8
473	add	v0.4s, v0.4s, v4.4s
474	eor	v2.16b, v0.16b, v2.16b
475	ext	v3.16b, v3.16b, v3.16b, #4
476	add	v0.4s, v0.4s, v1.4s
477	rev32	v1.8h, v2.8h
478	add	v2.4s, v3.4s, v1.4s
479	eor	v3.16b, v2.16b, v4.16b
480	ushr	v4.4s, v3.4s, #12
481	shl	v3.4s, v3.4s, #20
482	orr	v3.16b, v3.16b, v4.16b
483	add	v0.4s, v0.4s, v3.4s
484	eor	v1.16b, v0.16b, v1.16b
485	ushr	v4.4s, v1.4s, #8
486	shl	v1.4s, v1.4s, #24
487	orr	v1.16b, v1.16b, v4.16b
488	add	v2.4s, v1.4s, v2.4s
489	eor	v3.16b, v2.16b, v3.16b
490	ext	v0.16b, v0.16b, v0.16b, #4
491	ext	v2.16b, v2.16b, v2.16b, #12
492	ushr	v4.4s, v3.4s, #7
493	shl	v3.4s, v3.4s, #25
494	ext	v1.16b, v1.16b, v1.16b, #8
495	eor	v0.16b, v2.16b, v0.16b
496	orr	v2.16b, v3.16b, v4.16b
497	eor	v1.16b, v2.16b, v1.16b
498	stp	q0, q1, [x0]
499	ret
500.Lfunc_end0:
501	.size	zfs_blake3_compress_in_place_sse2, .Lfunc_end0-zfs_blake3_compress_in_place_sse2
502	.cfi_endproc
503
504	.section	.rodata.cst16,"aM",@progbits,16
505	.p2align	4
506.LCPI1_0:
507	.word	1779033703
508	.word	3144134277
509	.word	1013904242
510	.word	2773480762
511.LCPI1_1:
512	.xword	0
513	.xword	-4294967296
514.LCPI1_2:
515	.xword	-1
516	.xword	4294967295
517	.text
518	.globl	zfs_blake3_compress_xof_sse2
519	.p2align	2
520	.type	zfs_blake3_compress_xof_sse2,@function
521zfs_blake3_compress_xof_sse2:
522	.cfi_startproc
523	ldp	q3, q2, [x0]
524	ldp	q5, q6, [x1]
525	add	x10, x1, #32
526	lsr	x11, x3, #32
527	fmov	s4, w3
528	ld2	{ v17.4s, v18.4s }, [x10]
529	adrp	x10, .LCPI1_2
530	and	w8, w2, #0xff
531	mov	v4.s[1], w11
532	ldr	q1, [x10, :lo12:.LCPI1_2]
533	and	w9, w4, #0xff
534	adrp	x12, .LCPI1_0
535	mov	v4.s[2], w8
536	uzp1	v19.4s, v5.4s, v6.4s
537	add	v3.4s, v2.4s, v3.4s
538	ldr	q7, [x12, :lo12:.LCPI1_0]
539	mov	v4.s[3], w9
540	add	v3.4s, v3.4s, v19.4s
541	uzp2	v5.4s, v5.4s, v6.4s
542	ext	v21.16b, v18.16b, v18.16b, #12
543	uzp1	v6.4s, v19.4s, v19.4s
544	ext	v22.16b, v19.16b, v19.16b, #12
545	eor	v4.16b, v3.16b, v4.16b
546	ext	v20.16b, v17.16b, v17.16b, #12
547	ext	v6.16b, v6.16b, v19.16b, #8
548	ext	v19.16b, v19.16b, v22.16b, #12
549	zip1	v22.2d, v21.2d, v5.2d
550	rev32	v24.8h, v4.8h
551	mov	v4.16b, v1.16b
552	zip2	v23.4s, v5.4s, v21.4s
553	uzp2	v6.4s, v6.4s, v5.4s
554	bsl	v4.16b, v22.16b, v20.16b
555	add	v3.4s, v3.4s, v5.4s
556	zip1	v5.4s, v23.4s, v20.4s
557	zip1	v22.4s, v20.4s, v23.4s
558	add	v23.4s, v24.4s, v7.4s
559	ext	v7.16b, v6.16b, v6.16b, #4
560	ext	v25.16b, v4.16b, v4.16b, #12
561	ext	v5.16b, v22.16b, v5.16b, #8
562	eor	v2.16b, v23.16b, v2.16b
563	uzp1	v4.4s, v4.4s, v25.4s
564	uzp1	v22.4s, v7.4s, v7.4s
565	ext	v25.16b, v7.16b, v7.16b, #12
566	ext	v22.16b, v22.16b, v7.16b, #8
567	ext	v7.16b, v7.16b, v25.16b, #12
568	ushr	v25.4s, v2.4s, #12
569	shl	v2.4s, v2.4s, #20
570	orr	v2.16b, v2.16b, v25.16b
571	add	v3.4s, v3.4s, v2.4s
572	eor	v24.16b, v3.16b, v24.16b
573	add	v3.4s, v3.4s, v17.4s
574	ushr	v17.4s, v24.4s, #8
575	shl	v18.4s, v24.4s, #24
576	orr	v17.16b, v18.16b, v17.16b
577	add	v18.4s, v17.4s, v23.4s
578	eor	v2.16b, v18.16b, v2.16b
579	ushr	v23.4s, v2.4s, #7
580	shl	v2.4s, v2.4s, #25
581	ext	v3.16b, v3.16b, v3.16b, #12
582	orr	v2.16b, v2.16b, v23.16b
583	ext	v17.16b, v17.16b, v17.16b, #8
584	add	v3.4s, v2.4s, v3.4s
585	adrp	x11, .LCPI1_1
586	eor	v17.16b, v3.16b, v17.16b
587	ldr	q16, [x11, :lo12:.LCPI1_1]
588	ext	v18.16b, v18.16b, v18.16b, #4
589	rev32	v24.8h, v17.8h
590	movi	v0.2d, #0xffffffff00000000
591	add	v23.4s, v3.4s, v21.4s
592	mov	v21.s[1], v20.s[2]
593	add	v20.4s, v18.4s, v24.4s
594	bit	v19.16b, v21.16b, v0.16b
595	eor	v3.16b, v20.16b, v2.16b
596	uzp2	v2.4s, v22.4s, v19.4s
597	zip1	v17.2d, v5.2d, v19.2d
598	zip2	v18.4s, v19.4s, v5.4s
599	ushr	v21.4s, v3.4s, #12
600	shl	v3.4s, v3.4s, #20
601	ext	v22.16b, v2.16b, v2.16b, #4
602	bsl	v16.16b, v4.16b, v17.16b
603	zip1	v17.4s, v18.4s, v4.4s
604	zip1	v18.4s, v4.4s, v18.4s
605	orr	v21.16b, v3.16b, v21.16b
606	ext	v25.16b, v16.16b, v16.16b, #12
607	ext	v3.16b, v18.16b, v17.16b, #8
608	uzp1	v18.4s, v22.4s, v22.4s
609	ext	v26.16b, v22.16b, v22.16b, #12
610	add	v23.4s, v23.4s, v21.4s
611	uzp1	v17.4s, v16.4s, v25.4s
612	ext	v16.16b, v18.16b, v22.16b, #8
613	ext	v18.16b, v22.16b, v26.16b, #12
614	eor	v22.16b, v23.16b, v24.16b
615	add	v6.4s, v23.4s, v6.4s
616	ushr	v23.4s, v22.4s, #8
617	shl	v22.4s, v22.4s, #24
618	orr	v22.16b, v22.16b, v23.16b
619	add	v20.4s, v22.4s, v20.4s
620	eor	v21.16b, v20.16b, v21.16b
621	ushr	v23.4s, v21.4s, #7
622	shl	v21.4s, v21.4s, #25
623	ext	v6.16b, v6.16b, v6.16b, #4
624	orr	v21.16b, v21.16b, v23.16b
625	ext	v22.16b, v22.16b, v22.16b, #8
626	add	v6.4s, v21.4s, v6.4s
627	eor	v22.16b, v6.16b, v22.16b
628	ext	v20.16b, v20.16b, v20.16b, #12
629	add	v6.4s, v6.4s, v19.4s
630	rev32	v19.8h, v22.8h
631	add	v20.4s, v20.4s, v19.4s
632	eor	v21.16b, v20.16b, v21.16b
633	ushr	v22.4s, v21.4s, #12
634	shl	v21.4s, v21.4s, #20
635	orr	v21.16b, v21.16b, v22.16b
636	add	v6.4s, v6.4s, v21.4s
637	eor	v19.16b, v6.16b, v19.16b
638	ushr	v22.4s, v19.4s, #8
639	shl	v19.4s, v19.4s, #24
640	orr	v19.16b, v19.16b, v22.16b
641	add	v20.4s, v19.4s, v20.4s
642	eor	v21.16b, v20.16b, v21.16b
643	ext	v6.16b, v6.16b, v6.16b, #12
644	ushr	v22.4s, v21.4s, #7
645	shl	v21.4s, v21.4s, #25
646	add	v6.4s, v6.4s, v4.4s
647	orr	v21.16b, v21.16b, v22.16b
648	ext	v19.16b, v19.16b, v19.16b, #8
649	add	v6.4s, v6.4s, v21.4s
650	eor	v19.16b, v6.16b, v19.16b
651	ext	v20.16b, v20.16b, v20.16b, #4
652	rev32	v19.8h, v19.8h
653	add	v20.4s, v20.4s, v19.4s
654	add	v6.4s, v6.4s, v5.4s
655	mov	v5.s[1], v4.s[2]
656	eor	v4.16b, v20.16b, v21.16b
657	ushr	v21.4s, v4.4s, #12
658	shl	v4.4s, v4.4s, #20
659	orr	v21.16b, v4.16b, v21.16b
660	add	v6.4s, v6.4s, v21.4s
661	eor	v19.16b, v6.16b, v19.16b
662	add	v2.4s, v6.4s, v2.4s
663	ushr	v6.4s, v19.4s, #8
664	shl	v19.4s, v19.4s, #24
665	orr	v6.16b, v19.16b, v6.16b
666	add	v19.4s, v6.4s, v20.4s
667	eor	v20.16b, v19.16b, v21.16b
668	ushr	v21.4s, v20.4s, #7
669	shl	v20.4s, v20.4s, #25
670	ext	v2.16b, v2.16b, v2.16b, #4
671	orr	v20.16b, v20.16b, v21.16b
672	ext	v6.16b, v6.16b, v6.16b, #8
673	add	v2.4s, v20.4s, v2.4s
674	eor	v6.16b, v2.16b, v6.16b
675	ext	v19.16b, v19.16b, v19.16b, #12
676	rev32	v6.8h, v6.8h
677	add	v19.4s, v19.4s, v6.4s
678	mov	v22.16b, v0.16b
679	eor	v20.16b, v19.16b, v20.16b
680	bsl	v22.16b, v5.16b, v7.16b
681	ushr	v21.4s, v20.4s, #12
682	shl	v20.4s, v20.4s, #20
683	add	v2.4s, v2.4s, v22.4s
684	orr	v20.16b, v20.16b, v21.16b
685	add	v2.4s, v2.4s, v20.4s
686	eor	v6.16b, v2.16b, v6.16b
687	ushr	v21.4s, v6.4s, #8
688	shl	v6.4s, v6.4s, #24
689	orr	v6.16b, v6.16b, v21.16b
690	add	v19.4s, v6.4s, v19.4s
691	eor	v20.16b, v19.16b, v20.16b
692	ext	v2.16b, v2.16b, v2.16b, #12
693	ushr	v21.4s, v20.4s, #7
694	shl	v20.4s, v20.4s, #25
695	add	v2.4s, v2.4s, v17.4s
696	orr	v20.16b, v20.16b, v21.16b
697	ext	v6.16b, v6.16b, v6.16b, #8
698	add	v2.4s, v2.4s, v20.4s
699	eor	v6.16b, v2.16b, v6.16b
700	uzp2	v5.4s, v16.4s, v22.4s
701	zip1	v7.2d, v3.2d, v22.2d
702	zip2	v16.4s, v22.4s, v3.4s
703	ext	v19.16b, v19.16b, v19.16b, #4
704	rev32	v22.8h, v6.8h
705	ext	v23.16b, v5.16b, v5.16b, #4
706	bif	v7.16b, v17.16b, v1.16b
707	zip1	v24.4s, v16.4s, v17.4s
708	zip1	v16.4s, v17.4s, v16.4s
709	add	v21.4s, v2.4s, v3.4s
710	mov	v3.s[1], v17.s[2]
711	add	v17.4s, v19.4s, v22.4s
712	mov	v19.16b, v0.16b
713	ext	v25.16b, v7.16b, v7.16b, #12
714	ext	v4.16b, v16.16b, v24.16b, #8
715	uzp1	v16.4s, v23.4s, v23.4s
716	bsl	v19.16b, v3.16b, v18.16b
717	eor	v2.16b, v17.16b, v20.16b
718	uzp1	v7.4s, v7.4s, v25.4s
719	ext	v25.16b, v16.16b, v23.16b, #8
720	zip1	v3.2d, v4.2d, v19.2d
721	ushr	v20.4s, v2.4s, #12
722	shl	v2.4s, v2.4s, #20
723	ext	v24.16b, v23.16b, v23.16b, #12
724	uzp2	v6.4s, v25.4s, v19.4s
725	zip2	v18.4s, v19.4s, v4.4s
726	bif	v3.16b, v7.16b, v1.16b
727	orr	v20.16b, v2.16b, v20.16b
728	ext	v16.16b, v23.16b, v24.16b, #12
729	ext	v23.16b, v6.16b, v6.16b, #4
730	zip1	v24.4s, v18.4s, v7.4s
731	zip1	v18.4s, v7.4s, v18.4s
732	ext	v25.16b, v3.16b, v3.16b, #12
733	add	v21.4s, v21.4s, v20.4s
734	ext	v2.16b, v18.16b, v24.16b, #8
735	uzp1	v18.4s, v23.4s, v23.4s
736	ext	v24.16b, v23.16b, v23.16b, #12
737	uzp1	v3.4s, v3.4s, v25.4s
738	eor	v22.16b, v21.16b, v22.16b
739	ext	v25.16b, v18.16b, v23.16b, #8
740	dup	v18.4s, v2.s[3]
741	ext	v23.16b, v23.16b, v24.16b, #12
742	add	v5.4s, v21.4s, v5.4s
743	trn1	v21.4s, v3.4s, v3.4s
744	ushr	v24.4s, v22.4s, #8
745	shl	v22.4s, v22.4s, #24
746	ext	v18.16b, v21.16b, v18.16b, #8
747	orr	v21.16b, v22.16b, v24.16b
748	add	v17.4s, v21.4s, v17.4s
749	eor	v20.16b, v17.16b, v20.16b
750	ushr	v22.4s, v20.4s, #7
751	shl	v20.4s, v20.4s, #25
752	ext	v5.16b, v5.16b, v5.16b, #4
753	orr	v20.16b, v20.16b, v22.16b
754	ext	v21.16b, v21.16b, v21.16b, #8
755	add	v5.4s, v20.4s, v5.4s
756	eor	v21.16b, v5.16b, v21.16b
757	ext	v17.16b, v17.16b, v17.16b, #12
758	add	v5.4s, v5.4s, v19.4s
759	rev32	v19.8h, v21.8h
760	add	v17.4s, v17.4s, v19.4s
761	eor	v20.16b, v17.16b, v20.16b
762	ushr	v21.4s, v20.4s, #12
763	shl	v20.4s, v20.4s, #20
764	orr	v20.16b, v20.16b, v21.16b
765	add	v5.4s, v5.4s, v20.4s
766	eor	v19.16b, v5.16b, v19.16b
767	ushr	v21.4s, v19.4s, #8
768	shl	v19.4s, v19.4s, #24
769	orr	v19.16b, v19.16b, v21.16b
770	add	v17.4s, v19.4s, v17.4s
771	eor	v20.16b, v17.16b, v20.16b
772	ext	v5.16b, v5.16b, v5.16b, #12
773	ushr	v21.4s, v20.4s, #7
774	shl	v20.4s, v20.4s, #25
775	add	v5.4s, v5.4s, v7.4s
776	orr	v20.16b, v20.16b, v21.16b
777	ext	v19.16b, v19.16b, v19.16b, #8
778	add	v5.4s, v5.4s, v20.4s
779	eor	v19.16b, v5.16b, v19.16b
780	ext	v17.16b, v17.16b, v17.16b, #4
781	rev32	v22.8h, v19.8h
782	add	v21.4s, v5.4s, v4.4s
783	mov	v4.s[1], v7.s[2]
784	add	v19.4s, v17.4s, v22.4s
785	bit	v16.16b, v4.16b, v0.16b
786	eor	v5.16b, v19.16b, v20.16b
787	uzp2	v4.4s, v25.4s, v16.4s
788	zip1	v7.2d, v2.2d, v16.2d
789	zip2	v17.4s, v16.4s, v2.4s
790	ushr	v20.4s, v5.4s, #12
791	shl	v5.4s, v5.4s, #20
792	ext	v24.16b, v4.16b, v4.16b, #4
793	bif	v7.16b, v3.16b, v1.16b
794	zip1	v25.4s, v17.4s, v3.4s
795	zip1	v17.4s, v3.4s, v17.4s
796	orr	v20.16b, v5.16b, v20.16b
797	ext	v26.16b, v7.16b, v7.16b, #12
798	ext	v5.16b, v17.16b, v25.16b, #8
799	uzp1	v17.4s, v24.4s, v24.4s
800	ext	v25.16b, v24.16b, v24.16b, #12
801	bit	v23.16b, v18.16b, v0.16b
802	add	v21.4s, v21.4s, v20.4s
803	uzp1	v7.4s, v7.4s, v26.4s
804	ext	v26.16b, v17.16b, v24.16b, #8
805	ext	v17.16b, v24.16b, v25.16b, #12
806	eor	v22.16b, v21.16b, v22.16b
807	add	v6.4s, v21.4s, v6.4s
808	zip1	v21.2d, v5.2d, v23.2d
809	zip2	v24.4s, v23.4s, v5.4s
810	bif	v21.16b, v7.16b, v1.16b
811	zip1	v1.4s, v24.4s, v7.4s
812	zip1	v24.4s, v7.4s, v24.4s
813	ext	v1.16b, v24.16b, v1.16b, #8
814	ushr	v24.4s, v22.4s, #8
815	shl	v22.4s, v22.4s, #24
816	orr	v22.16b, v22.16b, v24.16b
817	add	v19.4s, v22.4s, v19.4s
818	ext	v24.16b, v21.16b, v21.16b, #12
819	eor	v20.16b, v19.16b, v20.16b
820	uzp1	v21.4s, v21.4s, v24.4s
821	ushr	v24.4s, v20.4s, #7
822	shl	v20.4s, v20.4s, #25
823	orr	v20.16b, v20.16b, v24.16b
824	ext	v6.16b, v6.16b, v6.16b, #4
825	ext	v22.16b, v22.16b, v22.16b, #8
826	add	v6.4s, v20.4s, v6.4s
827	eor	v22.16b, v6.16b, v22.16b
828	ext	v19.16b, v19.16b, v19.16b, #12
829	add	v6.4s, v6.4s, v16.4s
830	rev32	v16.8h, v22.8h
831	add	v19.4s, v19.4s, v16.4s
832	eor	v20.16b, v19.16b, v20.16b
833	ushr	v22.4s, v20.4s, #12
834	shl	v20.4s, v20.4s, #20
835	orr	v20.16b, v20.16b, v22.16b
836	add	v6.4s, v6.4s, v20.4s
837	eor	v16.16b, v6.16b, v16.16b
838	ext	v6.16b, v6.16b, v6.16b, #12
839	add	v3.4s, v6.4s, v3.4s
840	ushr	v6.4s, v16.4s, #8
841	shl	v16.4s, v16.4s, #24
842	orr	v6.16b, v16.16b, v6.16b
843	add	v16.4s, v6.4s, v19.4s
844	eor	v19.16b, v16.16b, v20.16b
845	ushr	v20.4s, v19.4s, #7
846	shl	v19.4s, v19.4s, #25
847	orr	v19.16b, v19.16b, v20.16b
848	ext	v6.16b, v6.16b, v6.16b, #8
849	add	v3.4s, v3.4s, v19.4s
850	eor	v6.16b, v3.16b, v6.16b
851	ext	v16.16b, v16.16b, v16.16b, #4
852	add	v2.4s, v3.4s, v2.4s
853	rev32	v3.8h, v6.8h
854	add	v6.4s, v16.4s, v3.4s
855	eor	v16.16b, v6.16b, v19.16b
856	ushr	v19.4s, v16.4s, #12
857	shl	v16.4s, v16.4s, #20
858	orr	v16.16b, v16.16b, v19.16b
859	add	v2.4s, v2.4s, v16.4s
860	eor	v3.16b, v2.16b, v3.16b
861	add	v2.4s, v2.4s, v4.4s
862	ushr	v4.4s, v3.4s, #8
863	shl	v3.4s, v3.4s, #24
864	orr	v3.16b, v3.16b, v4.16b
865	add	v4.4s, v3.4s, v6.4s
866	eor	v6.16b, v4.16b, v16.16b
867	ushr	v16.4s, v6.4s, #7
868	shl	v6.4s, v6.4s, #25
869	ext	v2.16b, v2.16b, v2.16b, #4
870	orr	v6.16b, v6.16b, v16.16b
871	ext	v3.16b, v3.16b, v3.16b, #8
872	add	v2.4s, v6.4s, v2.4s
873	eor	v3.16b, v2.16b, v3.16b
874	ext	v4.16b, v4.16b, v4.16b, #12
875	rev32	v3.8h, v3.8h
876	add	v4.4s, v4.4s, v3.4s
877	eor	v6.16b, v4.16b, v6.16b
878	ushr	v16.4s, v6.4s, #12
879	shl	v6.4s, v6.4s, #20
880	add	v2.4s, v2.4s, v23.4s
881	orr	v6.16b, v6.16b, v16.16b
882	add	v2.4s, v2.4s, v6.4s
883	eor	v3.16b, v2.16b, v3.16b
884	ushr	v16.4s, v3.4s, #8
885	shl	v3.4s, v3.4s, #24
886	orr	v3.16b, v3.16b, v16.16b
887	add	v4.4s, v3.4s, v4.4s
888	eor	v6.16b, v4.16b, v6.16b
889	ext	v2.16b, v2.16b, v2.16b, #12
890	ushr	v16.4s, v6.4s, #7
891	shl	v6.4s, v6.4s, #25
892	add	v2.4s, v2.4s, v7.4s
893	orr	v6.16b, v6.16b, v16.16b
894	ext	v3.16b, v3.16b, v3.16b, #8
895	add	v2.4s, v2.4s, v6.4s
896	eor	v3.16b, v2.16b, v3.16b
897	ext	v4.16b, v4.16b, v4.16b, #4
898	rev32	v3.8h, v3.8h
899	add	v2.4s, v2.4s, v5.4s
900	mov	v5.s[1], v7.s[2]
901	add	v4.4s, v4.4s, v3.4s
902	bsl	v0.16b, v5.16b, v17.16b
903	eor	v5.16b, v4.16b, v6.16b
904	ushr	v6.4s, v5.4s, #12
905	shl	v5.4s, v5.4s, #20
906	orr	v5.16b, v5.16b, v6.16b
907	add	v2.4s, v2.4s, v5.4s
908	eor	v3.16b, v2.16b, v3.16b
909	ushr	v6.4s, v3.4s, #8
910	shl	v3.4s, v3.4s, #24
911	orr	v3.16b, v3.16b, v6.16b
912	add	v4.4s, v3.4s, v4.4s
913	uzp2	v18.4s, v26.4s, v18.4s
914	eor	v5.16b, v4.16b, v5.16b
915	add	v2.4s, v2.4s, v18.4s
916	ushr	v6.4s, v5.4s, #7
917	shl	v5.4s, v5.4s, #25
918	ext	v2.16b, v2.16b, v2.16b, #4
919	orr	v5.16b, v5.16b, v6.16b
920	ext	v3.16b, v3.16b, v3.16b, #8
921	add	v2.4s, v5.4s, v2.4s
922	eor	v3.16b, v2.16b, v3.16b
923	ext	v4.16b, v4.16b, v4.16b, #12
924	add	v0.4s, v2.4s, v0.4s
925	rev32	v2.8h, v3.8h
926	add	v3.4s, v4.4s, v2.4s
927	eor	v4.16b, v3.16b, v5.16b
928	ushr	v5.4s, v4.4s, #12
929	shl	v4.4s, v4.4s, #20
930	orr	v4.16b, v4.16b, v5.16b
931	add	v0.4s, v0.4s, v4.4s
932	eor	v2.16b, v0.16b, v2.16b
933	ushr	v5.4s, v2.4s, #8
934	shl	v2.4s, v2.4s, #24
935	orr	v2.16b, v2.16b, v5.16b
936	add	v3.4s, v2.4s, v3.4s
937	eor	v4.16b, v3.16b, v4.16b
938	ext	v0.16b, v0.16b, v0.16b, #12
939	ushr	v5.4s, v4.4s, #7
940	shl	v4.4s, v4.4s, #25
941	add	v0.4s, v0.4s, v21.4s
942	orr	v4.16b, v4.16b, v5.16b
943	ext	v2.16b, v2.16b, v2.16b, #8
944	add	v0.4s, v0.4s, v4.4s
945	eor	v2.16b, v0.16b, v2.16b
946	ext	v3.16b, v3.16b, v3.16b, #4
947	add	v0.4s, v0.4s, v1.4s
948	rev32	v1.8h, v2.8h
949	add	v2.4s, v3.4s, v1.4s
950	eor	v3.16b, v2.16b, v4.16b
951	ushr	v4.4s, v3.4s, #12
952	shl	v3.4s, v3.4s, #20
953	orr	v3.16b, v3.16b, v4.16b
954	add	v0.4s, v0.4s, v3.4s
955	eor	v1.16b, v0.16b, v1.16b
956	ushr	v4.4s, v1.4s, #8
957	shl	v1.4s, v1.4s, #24
958	orr	v1.16b, v1.16b, v4.16b
959	add	v2.4s, v1.4s, v2.4s
960	eor	v3.16b, v2.16b, v3.16b
961	ushr	v4.4s, v3.4s, #7
962	shl	v3.4s, v3.4s, #25
963	ext	v0.16b, v0.16b, v0.16b, #4
964	ext	v1.16b, v1.16b, v1.16b, #8
965	ext	v2.16b, v2.16b, v2.16b, #12
966	orr	v3.16b, v3.16b, v4.16b
967	eor	v0.16b, v2.16b, v0.16b
968	eor	v3.16b, v3.16b, v1.16b
969	stp	q0, q3, [x5]
970	ldr	q0, [x0]
971	eor	v0.16b, v0.16b, v2.16b
972	str	q0, [x5, #32]
973	ldr	q0, [x0, #16]
974	eor	v0.16b, v0.16b, v1.16b
975	str	q0, [x5, #48]
976	ret
977.Lfunc_end1:
978	.size	zfs_blake3_compress_xof_sse2, .Lfunc_end1-zfs_blake3_compress_xof_sse2
979	.cfi_endproc
980
981	.section	.rodata.cst16,"aM",@progbits,16
982	.p2align	4
983.LCPI2_0:
984	.word	0
985	.word	1
986	.word	2
987	.word	3
988	.text
989	.globl	zfs_blake3_hash_many_sse2
990	.p2align	2
991	.type	zfs_blake3_hash_many_sse2,@function
992zfs_blake3_hash_many_sse2:
993	.cfi_startproc
994	stp	d15, d14, [sp, #-160]!
995	stp	d13, d12, [sp, #16]
996	stp	d11, d10, [sp, #32]
997	stp	d9, d8, [sp, #48]
998	stp	x29, x30, [sp, #64]
999	stp	x28, x27, [sp, #80]
1000	stp	x26, x25, [sp, #96]
1001	stp	x24, x23, [sp, #112]
1002	stp	x22, x21, [sp, #128]
1003	stp	x20, x19, [sp, #144]
1004	mov	x29, sp
1005	sub	sp, sp, #384
1006	.cfi_def_cfa w29, 160
1007	.cfi_offset w19, -8
1008	.cfi_offset w20, -16
1009	.cfi_offset w21, -24
1010	.cfi_offset w22, -32
1011	.cfi_offset w23, -40
1012	.cfi_offset w24, -48
1013	.cfi_offset w25, -56
1014	.cfi_offset w26, -64
1015	.cfi_offset w27, -72
1016	.cfi_offset w28, -80
1017	.cfi_offset w30, -88
1018	.cfi_offset w29, -96
1019	.cfi_offset b8, -104
1020	.cfi_offset b9, -112
1021	.cfi_offset b10, -120
1022	.cfi_offset b11, -128
1023	.cfi_offset b12, -136
1024	.cfi_offset b13, -144
1025	.cfi_offset b14, -152
1026	.cfi_offset b15, -160
1027	ldr	x26, [x29, #168]
1028	ldrb	w27, [x29, #160]
1029	mov	w19, w6
1030	mov	x20, x4
1031	mov	x22, x2
1032	mov	x28, x1
1033	cmp	x1, #4
1034	mov	x24, x0
1035	str	x3, [sp, #40]
1036	b.lo	.LBB2_8
1037	adrp	x9, .LCPI2_0
1038	ldr	q0, [x9, :lo12:.LCPI2_0]
1039	sbfx	w11, w5, #0, #1
1040	dup	v1.4s, w11
1041	mov	w9, #58983
1042	mov	w10, #44677
1043	and	v0.16b, v1.16b, v0.16b
1044	mov	w11, #62322
1045	mov	w12, #62778
1046	orr	w8, w7, w19
1047	movk	w9, #27145, lsl #16
1048	movk	w10, #47975, lsl #16
1049	movk	w11, #15470, lsl #16
1050	str	q0, [sp, #16]
1051	orr	v0.4s, #128, lsl #24
1052	movk	w12, #42319, lsl #16
1053	str	q0, [sp]
1054.LBB2_2:
1055	ldr	x0, [sp, #40]
1056	mov	x13, x0
1057	ld1r	{ v20.4s }, [x13], #4
1058	add	x14, x0, #8
1059	add	x15, x0, #12
1060	add	x16, x0, #16
1061	add	x17, x0, #20
1062	add	x18, x0, #24
1063	add	x0, x0, #28
1064	ld1r	{ v17.4s }, [x14]
1065	ld1r	{ v6.4s }, [x15]
1066	ld1r	{ v8.4s }, [x16]
1067	ld1r	{ v9.4s }, [x17]
1068	ld1r	{ v31.4s }, [x18]
1069	ld1r	{ v26.4s }, [x13]
1070	ld1r	{ v15.4s }, [x0]
1071	cbz	x22, .LBB2_7
1072	ldr	q1, [sp, #16]
1073	dup	v0.4s, w20
1074	ldp	x13, x14, [x24]
1075	ldp	x15, x16, [x24, #16]
1076	add	v1.4s, v0.4s, v1.4s
1077	movi	v0.4s, #128, lsl #24
1078	str	q1, [sp, #64]
1079	eor	v0.16b, v1.16b, v0.16b
1080	ldr	q1, [sp]
1081	lsr	x18, x20, #32
1082	mov	x17, xzr
1083	cmgt	v0.4s, v1.4s, v0.4s
1084	dup	v1.4s, w18
1085	sub	v0.4s, v1.4s, v0.4s
1086	mov	w18, w8
1087	str	q0, [sp, #48]
1088.LBB2_4:
1089	mov	w2, #16
1090	bfi	x2, x17, #6, #58
1091	ldr	q1, [x13, x2]
1092	ldr	q3, [x14, x2]
1093	ldr	q2, [x15, x2]
1094	ldr	q4, [x16, x2]
1095	mov	w2, #32
1096	bfi	x2, x17, #6, #58
1097	ldr	q5, [x13, x2]
1098	ldr	q18, [x14, x2]
1099	ldr	q19, [x15, x2]
1100	ldr	q23, [x16, x2]
1101	mov	w2, #48
1102	lsl	x3, x17, #6
1103	bfi	x2, x17, #6, #58
1104	add	x17, x17, #1
1105	ldr	q0, [x13, x3]
1106	ldr	q21, [x14, x3]
1107	ldr	q7, [x15, x3]
1108	ldr	q16, [x16, x3]
1109	cmp	x17, x22
1110	ldr	q13, [x13, x2]
1111	ldr	q14, [x14, x2]
1112	ldr	q29, [x15, x2]
1113	ldr	q10, [x16, x2]
1114	csel	w2, w27, wzr, eq
1115	orr	w18, w2, w18
1116	mov	x0, xzr
1117	and	w18, w18, #0xff
1118	add	x3, x3, #256
1119.LBB2_5:
1120	ldr	x2, [x24, x0]
1121	add	x0, x0, #8
1122	cmp	x0, #32
1123	add	x2, x2, x3
1124	prfm	pldl1keep, [x2]
1125	b.ne	.LBB2_5
1126	dup	v22.4s, w18
1127	str	q22, [sp, #192]
1128	zip1	v27.4s, v0.4s, v21.4s
1129	zip2	v21.4s, v0.4s, v21.4s
1130	zip1	v0.4s, v7.4s, v16.4s
1131	zip2	v22.4s, v7.4s, v16.4s
1132	zip1	v7.4s, v1.4s, v3.4s
1133	zip1	v25.4s, v2.4s, v4.4s
1134	zip2	v16.4s, v2.4s, v4.4s
1135	zip1	v11.4s, v19.4s, v23.4s
1136	zip2	v12.4s, v19.4s, v23.4s
1137	zip1	v19.4s, v13.4s, v14.4s
1138	zip2	v23.4s, v13.4s, v14.4s
1139	zip1	v13.4s, v29.4s, v10.4s
1140	zip2	v14.4s, v29.4s, v10.4s
1141	add	v10.4s, v20.4s, v8.4s
1142	add	v2.4s, v26.4s, v9.4s
1143	ext	v20.16b, v22.16b, v21.16b, #8
1144	ext	v26.16b, v25.16b, v7.16b, #8
1145	zip2	v24.4s, v1.4s, v3.4s
1146	add	v1.4s, v6.4s, v15.4s
1147	ext	v6.16b, v0.16b, v27.16b, #8
1148	ext	v20.16b, v21.16b, v20.16b, #8
1149	mov	v21.d[1], v22.d[0]
1150	ext	v22.16b, v7.16b, v26.16b, #8
1151	mov	v7.d[1], v25.d[0]
1152	add	v3.4s, v17.4s, v31.4s
1153	str	q1, [sp, #144]
1154	ext	v1.16b, v27.16b, v6.16b, #8
1155	mov	v6.16b, v7.16b
1156	zip1	v28.4s, v5.4s, v18.4s
1157	stur	q1, [x29, #-80]
1158	mov	v1.16b, v27.16b
1159	mov	v27.16b, v24.16b
1160	add	v3.4s, v3.4s, v6.4s
1161	ldr	q6, [sp, #64]
1162	ext	v29.16b, v16.16b, v24.16b, #8
1163	mov	v1.d[1], v0.d[0]
1164	ext	v0.16b, v11.16b, v28.16b, #8
1165	mov	v27.d[1], v16.d[0]
1166	ext	v16.16b, v14.16b, v23.16b, #8
1167	stur	q7, [x29, #-144]
1168	ext	v7.16b, v24.16b, v29.16b, #8
1169	ext	v29.16b, v28.16b, v0.16b, #8
1170	ext	v0.16b, v23.16b, v16.16b, #8
1171	mov	v23.d[1], v14.d[0]
1172	stp	q0, q23, [sp, #80]
1173	add	v0.4s, v10.4s, v1.4s
1174	eor	v16.16b, v0.16b, v6.16b
1175	ldr	q6, [sp, #48]
1176	add	v2.4s, v2.4s, v21.4s
1177	mov	v28.d[1], v11.d[0]
1178	zip2	v18.4s, v5.4s, v18.4s
1179	eor	v10.16b, v2.16b, v6.16b
1180	movi	v6.4s, #64
1181	eor	v11.16b, v3.16b, v6.16b
1182	ldr	q6, [sp, #144]
1183	dup	v17.4s, w9
1184	ext	v30.16b, v12.16b, v18.16b, #8
1185	rev32	v16.8h, v16.8h
1186	dup	v5.4s, w10
1187	ext	v25.16b, v18.16b, v30.16b, #8
1188	mov	v30.16b, v23.16b
1189	mov	v23.16b, v1.16b
1190	str	q1, [sp, #160]
1191	rev32	v10.8h, v10.8h
1192	add	v1.4s, v16.4s, v17.4s
1193	add	v17.4s, v6.4s, v27.4s
1194	ldr	q6, [sp, #192]
1195	dup	v4.4s, w11
1196	rev32	v11.8h, v11.8h
1197	add	v5.4s, v10.4s, v5.4s
1198	eor	v8.16b, v1.16b, v8.16b
1199	stur	q21, [x29, #-128]
1200	mov	v18.d[1], v12.d[0]
1201	add	v4.4s, v11.4s, v4.4s
1202	eor	v9.16b, v5.16b, v9.16b
1203	ushr	v12.4s, v8.4s, #12
1204	shl	v8.4s, v8.4s, #20
1205	ldur	q21, [x29, #-80]
1206	ext	v26.16b, v13.16b, v19.16b, #8
1207	eor	v31.16b, v4.16b, v31.16b
1208	orr	v8.16b, v8.16b, v12.16b
1209	ushr	v12.4s, v9.4s, #12
1210	shl	v9.4s, v9.4s, #20
1211	ext	v26.16b, v19.16b, v26.16b, #8
1212	mov	v19.d[1], v13.d[0]
1213	orr	v9.16b, v9.16b, v12.16b
1214	ushr	v12.4s, v31.4s, #12
1215	shl	v31.4s, v31.4s, #20
1216	eor	v13.16b, v17.16b, v6.16b
1217	orr	v31.16b, v31.16b, v12.16b
1218	dup	v12.4s, w12
1219	rev32	v13.8h, v13.8h
1220	add	v12.4s, v13.4s, v12.4s
1221	add	v0.4s, v0.4s, v21.4s
1222	eor	v14.16b, v12.16b, v15.16b
1223	add	v0.4s, v0.4s, v8.4s
1224	add	v2.4s, v2.4s, v20.4s
1225	ushr	v15.4s, v14.4s, #12
1226	shl	v14.4s, v14.4s, #20
1227	eor	v16.16b, v0.16b, v16.16b
1228	add	v2.4s, v2.4s, v9.4s
1229	add	v3.4s, v3.4s, v22.4s
1230	orr	v14.16b, v14.16b, v15.16b
1231	ushr	v15.4s, v16.4s, #8
1232	shl	v16.4s, v16.4s, #24
1233	eor	v10.16b, v2.16b, v10.16b
1234	add	v3.4s, v3.4s, v31.4s
1235	add	v17.4s, v17.4s, v7.4s
1236	orr	v16.16b, v16.16b, v15.16b
1237	ushr	v15.4s, v10.4s, #8
1238	shl	v10.4s, v10.4s, #24
1239	eor	v11.16b, v3.16b, v11.16b
1240	add	v17.4s, v17.4s, v14.4s
1241	orr	v10.16b, v10.16b, v15.16b
1242	ushr	v15.4s, v11.4s, #8
1243	shl	v11.4s, v11.4s, #24
1244	eor	v13.16b, v17.16b, v13.16b
1245	add	v1.4s, v16.4s, v1.4s
1246	orr	v11.16b, v11.16b, v15.16b
1247	ushr	v15.4s, v13.4s, #8
1248	shl	v13.4s, v13.4s, #24
1249	eor	v8.16b, v1.16b, v8.16b
1250	add	v5.4s, v10.4s, v5.4s
1251	orr	v13.16b, v13.16b, v15.16b
1252	ushr	v15.4s, v8.4s, #7
1253	shl	v8.4s, v8.4s, #25
1254	eor	v9.16b, v5.16b, v9.16b
1255	add	v4.4s, v11.4s, v4.4s
1256	orr	v8.16b, v8.16b, v15.16b
1257	ushr	v15.4s, v9.4s, #7
1258	shl	v9.4s, v9.4s, #25
1259	eor	v31.16b, v4.16b, v31.16b
1260	add	v12.4s, v13.4s, v12.4s
1261	orr	v9.16b, v9.16b, v15.16b
1262	ushr	v15.4s, v31.4s, #7
1263	shl	v31.4s, v31.4s, #25
1264	eor	v14.16b, v12.16b, v14.16b
1265	add	v0.4s, v0.4s, v28.4s
1266	orr	v31.16b, v31.16b, v15.16b
1267	ushr	v15.4s, v14.4s, #7
1268	shl	v14.4s, v14.4s, #25
1269	add	v0.4s, v0.4s, v9.4s
1270	add	v2.4s, v2.4s, v18.4s
1271	orr	v14.16b, v14.16b, v15.16b
1272	eor	v13.16b, v0.16b, v13.16b
1273	add	v2.4s, v2.4s, v31.4s
1274	add	v3.4s, v3.4s, v19.4s
1275	rev32	v13.8h, v13.8h
1276	eor	v16.16b, v2.16b, v16.16b
1277	add	v3.4s, v3.4s, v14.4s
1278	add	v17.4s, v17.4s, v30.4s
1279	add	v4.4s, v4.4s, v13.4s
1280	rev32	v16.8h, v16.8h
1281	eor	v10.16b, v3.16b, v10.16b
1282	add	v17.4s, v17.4s, v8.4s
1283	eor	v9.16b, v4.16b, v9.16b
1284	add	v12.4s, v12.4s, v16.4s
1285	rev32	v10.8h, v10.8h
1286	eor	v11.16b, v17.16b, v11.16b
1287	mov	v24.16b, v7.16b
1288	stur	q7, [x29, #-112]
1289	ushr	v15.4s, v9.4s, #12
1290	shl	v9.4s, v9.4s, #20
1291	eor	v31.16b, v12.16b, v31.16b
1292	add	v1.4s, v1.4s, v10.4s
1293	rev32	v11.8h, v11.8h
1294	mov	v7.16b, v26.16b
1295	add	v3.4s, v3.4s, v26.4s
1296	ldr	q26, [sp, #80]
1297	orr	v9.16b, v9.16b, v15.16b
1298	ushr	v15.4s, v31.4s, #12
1299	shl	v31.4s, v31.4s, #20
1300	eor	v14.16b, v1.16b, v14.16b
1301	add	v5.4s, v5.4s, v11.4s
1302	add	v0.4s, v0.4s, v29.4s
1303	orr	v31.16b, v31.16b, v15.16b
1304	ushr	v15.4s, v14.4s, #12
1305	shl	v14.4s, v14.4s, #20
1306	eor	v8.16b, v5.16b, v8.16b
1307	add	v0.4s, v0.4s, v9.4s
1308	add	v2.4s, v2.4s, v25.4s
1309	orr	v14.16b, v14.16b, v15.16b
1310	ushr	v15.4s, v8.4s, #12
1311	shl	v8.4s, v8.4s, #20
1312	eor	v13.16b, v0.16b, v13.16b
1313	add	v2.4s, v2.4s, v31.4s
1314	orr	v8.16b, v8.16b, v15.16b
1315	ushr	v15.4s, v13.4s, #8
1316	shl	v13.4s, v13.4s, #24
1317	eor	v16.16b, v2.16b, v16.16b
1318	add	v3.4s, v3.4s, v14.4s
1319	add	v17.4s, v17.4s, v26.4s
1320	orr	v13.16b, v13.16b, v15.16b
1321	ushr	v15.4s, v16.4s, #8
1322	shl	v16.4s, v16.4s, #24
1323	eor	v10.16b, v3.16b, v10.16b
1324	add	v17.4s, v17.4s, v8.4s
1325	orr	v16.16b, v16.16b, v15.16b
1326	ushr	v15.4s, v10.4s, #8
1327	shl	v10.4s, v10.4s, #24
1328	eor	v11.16b, v17.16b, v11.16b
1329	add	v4.4s, v13.4s, v4.4s
1330	orr	v10.16b, v10.16b, v15.16b
1331	ushr	v15.4s, v11.4s, #8
1332	shl	v11.4s, v11.4s, #24
1333	eor	v9.16b, v4.16b, v9.16b
1334	add	v12.4s, v16.4s, v12.4s
1335	str	q22, [sp, #128]
1336	orr	v11.16b, v11.16b, v15.16b
1337	ushr	v15.4s, v9.4s, #7
1338	shl	v9.4s, v9.4s, #25
1339	eor	v31.16b, v12.16b, v31.16b
1340	add	v1.4s, v10.4s, v1.4s
1341	ldur	q22, [x29, #-128]
1342	orr	v9.16b, v9.16b, v15.16b
1343	ushr	v15.4s, v31.4s, #7
1344	shl	v31.4s, v31.4s, #25
1345	eor	v14.16b, v1.16b, v14.16b
1346	add	v5.4s, v11.4s, v5.4s
1347	orr	v31.16b, v31.16b, v15.16b
1348	ushr	v15.4s, v14.4s, #7
1349	shl	v14.4s, v14.4s, #25
1350	eor	v8.16b, v5.16b, v8.16b
1351	mov	v6.16b, v18.16b
1352	orr	v14.16b, v14.16b, v15.16b
1353	ushr	v15.4s, v8.4s, #7
1354	shl	v8.4s, v8.4s, #25
1355	ldur	q18, [x29, #-144]
1356	orr	v8.16b, v8.16b, v15.16b
1357	add	v0.4s, v0.4s, v22.4s
1358	add	v0.4s, v0.4s, v8.4s
1359	add	v2.4s, v2.4s, v20.4s
1360	eor	v16.16b, v0.16b, v16.16b
1361	add	v2.4s, v2.4s, v9.4s
1362	add	v3.4s, v3.4s, v24.4s
1363	rev32	v16.8h, v16.8h
1364	eor	v10.16b, v2.16b, v10.16b
1365	add	v3.4s, v3.4s, v31.4s
1366	add	v17.4s, v17.4s, v18.4s
1367	add	v1.4s, v1.4s, v16.4s
1368	rev32	v10.8h, v10.8h
1369	eor	v11.16b, v3.16b, v11.16b
1370	add	v17.4s, v17.4s, v14.4s
1371	eor	v8.16b, v1.16b, v8.16b
1372	add	v5.4s, v5.4s, v10.4s
1373	rev32	v11.8h, v11.8h
1374	eor	v13.16b, v17.16b, v13.16b
1375	ushr	v15.4s, v8.4s, #12
1376	shl	v8.4s, v8.4s, #20
1377	eor	v9.16b, v5.16b, v9.16b
1378	add	v4.4s, v4.4s, v11.4s
1379	rev32	v13.8h, v13.8h
1380	orr	v8.16b, v8.16b, v15.16b
1381	ushr	v15.4s, v9.4s, #12
1382	shl	v9.4s, v9.4s, #20
1383	eor	v31.16b, v4.16b, v31.16b
1384	add	v12.4s, v12.4s, v13.4s
1385	add	v0.4s, v0.4s, v27.4s
1386	orr	v9.16b, v9.16b, v15.16b
1387	ushr	v15.4s, v31.4s, #12
1388	shl	v31.4s, v31.4s, #20
1389	eor	v14.16b, v12.16b, v14.16b
1390	add	v0.4s, v0.4s, v8.4s
1391	add	v2.4s, v2.4s, v6.4s
1392	orr	v31.16b, v31.16b, v15.16b
1393	ushr	v15.4s, v14.4s, #12
1394	shl	v14.4s, v14.4s, #20
1395	eor	v16.16b, v0.16b, v16.16b
1396	add	v2.4s, v2.4s, v9.4s
1397	add	v3.4s, v3.4s, v23.4s
1398	orr	v14.16b, v14.16b, v15.16b
1399	ushr	v15.4s, v16.4s, #8
1400	shl	v16.4s, v16.4s, #24
1401	eor	v10.16b, v2.16b, v10.16b
1402	add	v3.4s, v3.4s, v31.4s
1403	add	v17.4s, v17.4s, v7.4s
1404	orr	v16.16b, v16.16b, v15.16b
1405	ushr	v15.4s, v10.4s, #8
1406	shl	v10.4s, v10.4s, #24
1407	eor	v11.16b, v3.16b, v11.16b
1408	add	v17.4s, v17.4s, v14.4s
1409	orr	v10.16b, v10.16b, v15.16b
1410	ushr	v15.4s, v11.4s, #8
1411	shl	v11.4s, v11.4s, #24
1412	eor	v13.16b, v17.16b, v13.16b
1413	add	v1.4s, v16.4s, v1.4s
1414	orr	v11.16b, v11.16b, v15.16b
1415	ushr	v15.4s, v13.4s, #8
1416	shl	v13.4s, v13.4s, #24
1417	eor	v8.16b, v1.16b, v8.16b
1418	add	v5.4s, v10.4s, v5.4s
1419	orr	v13.16b, v13.16b, v15.16b
1420	ushr	v15.4s, v8.4s, #7
1421	shl	v8.4s, v8.4s, #25
1422	eor	v9.16b, v5.16b, v9.16b
1423	add	v4.4s, v11.4s, v4.4s
1424	orr	v8.16b, v8.16b, v15.16b
1425	ushr	v15.4s, v9.4s, #7
1426	shl	v9.4s, v9.4s, #25
1427	eor	v31.16b, v4.16b, v31.16b
1428	add	v12.4s, v13.4s, v12.4s
1429	orr	v9.16b, v9.16b, v15.16b
1430	ushr	v15.4s, v31.4s, #7
1431	shl	v31.4s, v31.4s, #25
1432	eor	v14.16b, v12.16b, v14.16b
1433	add	v0.4s, v0.4s, v21.4s
1434	orr	v31.16b, v31.16b, v15.16b
1435	ushr	v15.4s, v14.4s, #7
1436	shl	v14.4s, v14.4s, #25
1437	add	v0.4s, v0.4s, v9.4s
1438	add	v2.4s, v2.4s, v19.4s
1439	orr	v14.16b, v14.16b, v15.16b
1440	eor	v13.16b, v0.16b, v13.16b
1441	add	v2.4s, v2.4s, v31.4s
1442	add	v3.4s, v3.4s, v29.4s
1443	str	q28, [sp, #112]
1444	rev32	v13.8h, v13.8h
1445	eor	v16.16b, v2.16b, v16.16b
1446	add	v3.4s, v3.4s, v14.4s
1447	add	v17.4s, v17.4s, v26.4s
1448	add	v4.4s, v4.4s, v13.4s
1449	rev32	v16.8h, v16.8h
1450	eor	v10.16b, v3.16b, v10.16b
1451	add	v17.4s, v17.4s, v8.4s
1452	ldp	q28, q23, [sp, #112]
1453	eor	v9.16b, v4.16b, v9.16b
1454	add	v12.4s, v12.4s, v16.4s
1455	rev32	v10.8h, v10.8h
1456	eor	v11.16b, v17.16b, v11.16b
1457	ldr	q21, [sp, #96]
1458	ushr	v15.4s, v9.4s, #12
1459	shl	v9.4s, v9.4s, #20
1460	eor	v31.16b, v12.16b, v31.16b
1461	add	v1.4s, v1.4s, v10.4s
1462	rev32	v11.8h, v11.8h
1463	orr	v9.16b, v9.16b, v15.16b
1464	ushr	v15.4s, v31.4s, #12
1465	shl	v31.4s, v31.4s, #20
1466	eor	v14.16b, v1.16b, v14.16b
1467	add	v5.4s, v5.4s, v11.4s
1468	add	v0.4s, v0.4s, v25.4s
1469	orr	v31.16b, v31.16b, v15.16b
1470	ushr	v15.4s, v14.4s, #12
1471	shl	v14.4s, v14.4s, #20
1472	eor	v8.16b, v5.16b, v8.16b
1473	add	v0.4s, v0.4s, v9.4s
1474	add	v2.4s, v2.4s, v23.4s
1475	orr	v14.16b, v14.16b, v15.16b
1476	ushr	v15.4s, v8.4s, #12
1477	shl	v8.4s, v8.4s, #20
1478	eor	v13.16b, v0.16b, v13.16b
1479	add	v2.4s, v2.4s, v31.4s
1480	add	v3.4s, v3.4s, v21.4s
1481	orr	v8.16b, v8.16b, v15.16b
1482	ushr	v15.4s, v13.4s, #8
1483	shl	v13.4s, v13.4s, #24
1484	eor	v16.16b, v2.16b, v16.16b
1485	add	v3.4s, v3.4s, v14.4s
1486	add	v17.4s, v17.4s, v28.4s
1487	orr	v13.16b, v13.16b, v15.16b
1488	ushr	v15.4s, v16.4s, #8
1489	shl	v16.4s, v16.4s, #24
1490	eor	v10.16b, v3.16b, v10.16b
1491	add	v17.4s, v17.4s, v8.4s
1492	orr	v16.16b, v16.16b, v15.16b
1493	ushr	v15.4s, v10.4s, #8
1494	shl	v10.4s, v10.4s, #24
1495	eor	v11.16b, v17.16b, v11.16b
1496	add	v4.4s, v13.4s, v4.4s
1497	orr	v10.16b, v10.16b, v15.16b
1498	ushr	v15.4s, v11.4s, #8
1499	shl	v11.4s, v11.4s, #24
1500	eor	v9.16b, v4.16b, v9.16b
1501	add	v12.4s, v16.4s, v12.4s
1502	orr	v11.16b, v11.16b, v15.16b
1503	ushr	v15.4s, v9.4s, #7
1504	shl	v9.4s, v9.4s, #25
1505	eor	v31.16b, v12.16b, v31.16b
1506	add	v1.4s, v10.4s, v1.4s
1507	orr	v9.16b, v9.16b, v15.16b
1508	ushr	v15.4s, v31.4s, #7
1509	shl	v31.4s, v31.4s, #25
1510	eor	v14.16b, v1.16b, v14.16b
1511	add	v5.4s, v11.4s, v5.4s
1512	orr	v31.16b, v31.16b, v15.16b
1513	ushr	v15.4s, v14.4s, #7
1514	shl	v14.4s, v14.4s, #25
1515	eor	v8.16b, v5.16b, v8.16b
1516	mov	v30.16b, v29.16b
1517	mov	v29.16b, v25.16b
1518	orr	v14.16b, v14.16b, v15.16b
1519	ushr	v15.4s, v8.4s, #7
1520	shl	v8.4s, v8.4s, #25
1521	ldur	q25, [x29, #-112]
1522	orr	v8.16b, v8.16b, v15.16b
1523	add	v0.4s, v0.4s, v20.4s
1524	add	v0.4s, v0.4s, v8.4s
1525	add	v2.4s, v2.4s, v6.4s
1526	eor	v16.16b, v0.16b, v16.16b
1527	add	v2.4s, v2.4s, v9.4s
1528	add	v3.4s, v3.4s, v7.4s
1529	rev32	v16.8h, v16.8h
1530	eor	v10.16b, v2.16b, v10.16b
1531	add	v3.4s, v3.4s, v31.4s
1532	add	v17.4s, v17.4s, v25.4s
1533	add	v1.4s, v1.4s, v16.4s
1534	rev32	v10.8h, v10.8h
1535	eor	v11.16b, v3.16b, v11.16b
1536	add	v17.4s, v17.4s, v14.4s
1537	eor	v8.16b, v1.16b, v8.16b
1538	add	v5.4s, v5.4s, v10.4s
1539	rev32	v11.8h, v11.8h
1540	eor	v13.16b, v17.16b, v13.16b
1541	ushr	v15.4s, v8.4s, #12
1542	shl	v8.4s, v8.4s, #20
1543	eor	v9.16b, v5.16b, v9.16b
1544	add	v4.4s, v4.4s, v11.4s
1545	rev32	v13.8h, v13.8h
1546	orr	v8.16b, v8.16b, v15.16b
1547	ushr	v15.4s, v9.4s, #12
1548	shl	v9.4s, v9.4s, #20
1549	eor	v31.16b, v4.16b, v31.16b
1550	add	v12.4s, v12.4s, v13.4s
1551	add	v0.4s, v0.4s, v18.4s
1552	orr	v9.16b, v9.16b, v15.16b
1553	ushr	v15.4s, v31.4s, #12
1554	shl	v31.4s, v31.4s, #20
1555	eor	v14.16b, v12.16b, v14.16b
1556	add	v0.4s, v0.4s, v8.4s
1557	add	v2.4s, v2.4s, v19.4s
1558	orr	v31.16b, v31.16b, v15.16b
1559	ushr	v15.4s, v14.4s, #12
1560	shl	v14.4s, v14.4s, #20
1561	eor	v16.16b, v0.16b, v16.16b
1562	add	v2.4s, v2.4s, v9.4s
1563	add	v3.4s, v3.4s, v22.4s
1564	orr	v14.16b, v14.16b, v15.16b
1565	ushr	v15.4s, v16.4s, #8
1566	shl	v16.4s, v16.4s, #24
1567	eor	v10.16b, v2.16b, v10.16b
1568	add	v3.4s, v3.4s, v31.4s
1569	add	v17.4s, v17.4s, v21.4s
1570	orr	v16.16b, v16.16b, v15.16b
1571	ushr	v15.4s, v10.4s, #8
1572	shl	v10.4s, v10.4s, #24
1573	eor	v11.16b, v3.16b, v11.16b
1574	add	v17.4s, v17.4s, v14.4s
1575	orr	v10.16b, v10.16b, v15.16b
1576	ushr	v15.4s, v11.4s, #8
1577	shl	v11.4s, v11.4s, #24
1578	eor	v13.16b, v17.16b, v13.16b
1579	add	v1.4s, v16.4s, v1.4s
1580	orr	v11.16b, v11.16b, v15.16b
1581	ushr	v15.4s, v13.4s, #8
1582	shl	v13.4s, v13.4s, #24
1583	eor	v8.16b, v1.16b, v8.16b
1584	add	v5.4s, v10.4s, v5.4s
1585	orr	v13.16b, v13.16b, v15.16b
1586	ushr	v15.4s, v8.4s, #7
1587	shl	v8.4s, v8.4s, #25
1588	eor	v9.16b, v5.16b, v9.16b
1589	add	v4.4s, v11.4s, v4.4s
1590	orr	v8.16b, v8.16b, v15.16b
1591	ushr	v15.4s, v9.4s, #7
1592	shl	v9.4s, v9.4s, #25
1593	eor	v31.16b, v4.16b, v31.16b
1594	add	v12.4s, v13.4s, v12.4s
1595	orr	v9.16b, v9.16b, v15.16b
1596	ushr	v15.4s, v31.4s, #7
1597	shl	v31.4s, v31.4s, #25
1598	eor	v14.16b, v12.16b, v14.16b
1599	add	v0.4s, v0.4s, v27.4s
1600	orr	v31.16b, v31.16b, v15.16b
1601	ushr	v15.4s, v14.4s, #7
1602	shl	v14.4s, v14.4s, #25
1603	add	v0.4s, v0.4s, v9.4s
1604	add	v2.4s, v2.4s, v30.4s
1605	orr	v14.16b, v14.16b, v15.16b
1606	eor	v13.16b, v0.16b, v13.16b
1607	add	v2.4s, v2.4s, v31.4s
1608	add	v3.4s, v3.4s, v29.4s
1609	rev32	v13.8h, v13.8h
1610	eor	v16.16b, v2.16b, v16.16b
1611	add	v3.4s, v3.4s, v14.4s
1612	add	v17.4s, v17.4s, v28.4s
1613	add	v4.4s, v4.4s, v13.4s
1614	rev32	v16.8h, v16.8h
1615	eor	v10.16b, v3.16b, v10.16b
1616	add	v17.4s, v17.4s, v8.4s
1617	eor	v9.16b, v4.16b, v9.16b
1618	add	v12.4s, v12.4s, v16.4s
1619	rev32	v10.8h, v10.8h
1620	eor	v11.16b, v17.16b, v11.16b
1621	ushr	v15.4s, v9.4s, #12
1622	shl	v9.4s, v9.4s, #20
1623	eor	v31.16b, v12.16b, v31.16b
1624	add	v1.4s, v1.4s, v10.4s
1625	rev32	v11.8h, v11.8h
1626	ldr	q24, [sp, #160]
1627	orr	v9.16b, v9.16b, v15.16b
1628	ushr	v15.4s, v31.4s, #12
1629	shl	v31.4s, v31.4s, #20
1630	eor	v14.16b, v1.16b, v14.16b
1631	add	v5.4s, v5.4s, v11.4s
1632	stur	q7, [x29, #-64]
1633	orr	v31.16b, v31.16b, v15.16b
1634	ushr	v15.4s, v14.4s, #12
1635	shl	v14.4s, v14.4s, #20
1636	eor	v8.16b, v5.16b, v8.16b
1637	mov	v7.16b, v26.16b
1638	add	v3.4s, v3.4s, v26.4s
1639	ldur	q26, [x29, #-80]
1640	orr	v14.16b, v14.16b, v15.16b
1641	ushr	v15.4s, v8.4s, #12
1642	shl	v8.4s, v8.4s, #20
1643	add	v0.4s, v0.4s, v23.4s
1644	orr	v8.16b, v8.16b, v15.16b
1645	add	v15.4s, v0.4s, v9.4s
1646	add	v2.4s, v2.4s, v24.4s
1647	eor	v0.16b, v15.16b, v13.16b
1648	add	v2.4s, v2.4s, v31.4s
1649	ushr	v13.4s, v0.4s, #8
1650	shl	v0.4s, v0.4s, #24
1651	eor	v16.16b, v2.16b, v16.16b
1652	add	v3.4s, v3.4s, v14.4s
1653	add	v17.4s, v17.4s, v26.4s
1654	orr	v0.16b, v0.16b, v13.16b
1655	ushr	v13.4s, v16.4s, #8
1656	shl	v16.4s, v16.4s, #24
1657	eor	v10.16b, v3.16b, v10.16b
1658	add	v17.4s, v17.4s, v8.4s
1659	orr	v16.16b, v16.16b, v13.16b
1660	ushr	v13.4s, v10.4s, #8
1661	shl	v10.4s, v10.4s, #24
1662	eor	v11.16b, v17.16b, v11.16b
1663	add	v4.4s, v0.4s, v4.4s
1664	orr	v10.16b, v10.16b, v13.16b
1665	ushr	v13.4s, v11.4s, #8
1666	shl	v11.4s, v11.4s, #24
1667	eor	v9.16b, v4.16b, v9.16b
1668	add	v12.4s, v16.4s, v12.4s
1669	orr	v11.16b, v11.16b, v13.16b
1670	ushr	v13.4s, v9.4s, #7
1671	shl	v9.4s, v9.4s, #25
1672	eor	v31.16b, v12.16b, v31.16b
1673	orr	v9.16b, v9.16b, v13.16b
1674	ushr	v13.4s, v31.4s, #7
1675	shl	v31.4s, v31.4s, #25
1676	add	v1.4s, v10.4s, v1.4s
1677	orr	v31.16b, v31.16b, v13.16b
1678	eor	v13.16b, v1.16b, v14.16b
1679	add	v5.4s, v11.4s, v5.4s
1680	ushr	v14.4s, v13.4s, #7
1681	shl	v13.4s, v13.4s, #25
1682	eor	v8.16b, v5.16b, v8.16b
1683	orr	v13.16b, v13.16b, v14.16b
1684	ushr	v14.4s, v8.4s, #7
1685	shl	v8.4s, v8.4s, #25
1686	stur	q6, [x29, #-96]
1687	orr	v8.16b, v8.16b, v14.16b
1688	add	v14.4s, v15.4s, v6.4s
1689	ldur	q6, [x29, #-64]
1690	mov	v18.16b, v19.16b
1691	add	v14.4s, v14.4s, v8.4s
1692	add	v2.4s, v2.4s, v18.4s
1693	eor	v16.16b, v14.16b, v16.16b
1694	add	v2.4s, v2.4s, v9.4s
1695	add	v3.4s, v3.4s, v21.4s
1696	rev32	v16.8h, v16.8h
1697	eor	v10.16b, v2.16b, v10.16b
1698	add	v3.4s, v3.4s, v31.4s
1699	add	v17.4s, v17.4s, v6.4s
1700	add	v1.4s, v1.4s, v16.4s
1701	rev32	v10.8h, v10.8h
1702	eor	v11.16b, v3.16b, v11.16b
1703	add	v17.4s, v17.4s, v13.4s
1704	eor	v8.16b, v1.16b, v8.16b
1705	add	v5.4s, v5.4s, v10.4s
1706	rev32	v11.8h, v11.8h
1707	eor	v0.16b, v17.16b, v0.16b
1708	ushr	v15.4s, v8.4s, #12
1709	shl	v8.4s, v8.4s, #20
1710	eor	v9.16b, v5.16b, v9.16b
1711	add	v4.4s, v4.4s, v11.4s
1712	rev32	v0.8h, v0.8h
1713	str	q27, [sp, #176]
1714	mov	v27.16b, v30.16b
1715	orr	v8.16b, v8.16b, v15.16b
1716	ushr	v15.4s, v9.4s, #12
1717	shl	v9.4s, v9.4s, #20
1718	eor	v31.16b, v4.16b, v31.16b
1719	add	v12.4s, v12.4s, v0.4s
1720	add	v14.4s, v14.4s, v25.4s
1721	orr	v9.16b, v9.16b, v15.16b
1722	ushr	v15.4s, v31.4s, #12
1723	shl	v31.4s, v31.4s, #20
1724	eor	v13.16b, v12.16b, v13.16b
1725	add	v14.4s, v14.4s, v8.4s
1726	add	v2.4s, v2.4s, v27.4s
1727	orr	v31.16b, v31.16b, v15.16b
1728	ushr	v15.4s, v13.4s, #12
1729	shl	v13.4s, v13.4s, #20
1730	eor	v16.16b, v14.16b, v16.16b
1731	add	v2.4s, v2.4s, v9.4s
1732	add	v3.4s, v3.4s, v20.4s
1733	orr	v13.16b, v13.16b, v15.16b
1734	ushr	v15.4s, v16.4s, #8
1735	shl	v16.4s, v16.4s, #24
1736	eor	v10.16b, v2.16b, v10.16b
1737	add	v3.4s, v3.4s, v31.4s
1738	add	v17.4s, v17.4s, v7.4s
1739	orr	v16.16b, v16.16b, v15.16b
1740	ushr	v15.4s, v10.4s, #8
1741	shl	v10.4s, v10.4s, #24
1742	eor	v11.16b, v3.16b, v11.16b
1743	add	v17.4s, v17.4s, v13.4s
1744	mov	v30.16b, v23.16b
1745	orr	v10.16b, v10.16b, v15.16b
1746	ushr	v15.4s, v11.4s, #8
1747	shl	v11.4s, v11.4s, #24
1748	eor	v0.16b, v17.16b, v0.16b
1749	add	v1.4s, v16.4s, v1.4s
1750	ldur	q23, [x29, #-144]
1751	orr	v11.16b, v11.16b, v15.16b
1752	ushr	v15.4s, v0.4s, #8
1753	shl	v0.4s, v0.4s, #24
1754	eor	v8.16b, v1.16b, v8.16b
1755	add	v5.4s, v10.4s, v5.4s
1756	orr	v0.16b, v0.16b, v15.16b
1757	ushr	v15.4s, v8.4s, #7
1758	shl	v8.4s, v8.4s, #25
1759	eor	v9.16b, v5.16b, v9.16b
1760	add	v4.4s, v11.4s, v4.4s
1761	orr	v8.16b, v8.16b, v15.16b
1762	ushr	v15.4s, v9.4s, #7
1763	shl	v9.4s, v9.4s, #25
1764	eor	v31.16b, v4.16b, v31.16b
1765	add	v12.4s, v0.4s, v12.4s
1766	orr	v9.16b, v9.16b, v15.16b
1767	ushr	v15.4s, v31.4s, #7
1768	shl	v31.4s, v31.4s, #25
1769	eor	v13.16b, v12.16b, v13.16b
1770	add	v14.4s, v14.4s, v23.4s
1771	orr	v31.16b, v31.16b, v15.16b
1772	ushr	v15.4s, v13.4s, #7
1773	shl	v13.4s, v13.4s, #25
1774	add	v14.4s, v14.4s, v9.4s
1775	add	v2.4s, v2.4s, v29.4s
1776	orr	v13.16b, v13.16b, v15.16b
1777	eor	v0.16b, v14.16b, v0.16b
1778	add	v2.4s, v2.4s, v31.4s
1779	add	v3.4s, v3.4s, v30.4s
1780	rev32	v0.8h, v0.8h
1781	eor	v16.16b, v2.16b, v16.16b
1782	add	v3.4s, v3.4s, v13.4s
1783	add	v17.4s, v17.4s, v26.4s
1784	add	v4.4s, v4.4s, v0.4s
1785	rev32	v16.8h, v16.8h
1786	eor	v10.16b, v3.16b, v10.16b
1787	add	v17.4s, v17.4s, v8.4s
1788	ldur	q22, [x29, #-128]
1789	eor	v9.16b, v4.16b, v9.16b
1790	add	v12.4s, v12.4s, v16.4s
1791	rev32	v10.8h, v10.8h
1792	eor	v11.16b, v17.16b, v11.16b
1793	ushr	v15.4s, v9.4s, #12
1794	shl	v9.4s, v9.4s, #20
1795	eor	v31.16b, v12.16b, v31.16b
1796	add	v1.4s, v1.4s, v10.4s
1797	rev32	v11.8h, v11.8h
1798	ldr	q26, [sp, #176]
1799	orr	v9.16b, v9.16b, v15.16b
1800	ushr	v15.4s, v31.4s, #12
1801	shl	v31.4s, v31.4s, #20
1802	eor	v13.16b, v1.16b, v13.16b
1803	add	v5.4s, v5.4s, v11.4s
1804	add	v14.4s, v14.4s, v24.4s
1805	orr	v31.16b, v31.16b, v15.16b
1806	ushr	v15.4s, v13.4s, #12
1807	shl	v13.4s, v13.4s, #20
1808	eor	v8.16b, v5.16b, v8.16b
1809	add	v14.4s, v14.4s, v9.4s
1810	add	v2.4s, v2.4s, v22.4s
1811	orr	v13.16b, v13.16b, v15.16b
1812	ushr	v15.4s, v8.4s, #12
1813	shl	v8.4s, v8.4s, #20
1814	eor	v0.16b, v14.16b, v0.16b
1815	add	v2.4s, v2.4s, v31.4s
1816	add	v3.4s, v3.4s, v28.4s
1817	orr	v8.16b, v8.16b, v15.16b
1818	ushr	v15.4s, v0.4s, #8
1819	shl	v0.4s, v0.4s, #24
1820	eor	v16.16b, v2.16b, v16.16b
1821	add	v3.4s, v3.4s, v13.4s
1822	add	v17.4s, v17.4s, v26.4s
1823	orr	v0.16b, v0.16b, v15.16b
1824	ushr	v15.4s, v16.4s, #8
1825	shl	v16.4s, v16.4s, #24
1826	eor	v10.16b, v3.16b, v10.16b
1827	add	v17.4s, v17.4s, v8.4s
1828	orr	v16.16b, v16.16b, v15.16b
1829	ushr	v15.4s, v10.4s, #8
1830	shl	v10.4s, v10.4s, #24
1831	eor	v11.16b, v17.16b, v11.16b
1832	add	v4.4s, v0.4s, v4.4s
1833	orr	v10.16b, v10.16b, v15.16b
1834	ushr	v15.4s, v11.4s, #8
1835	shl	v11.4s, v11.4s, #24
1836	eor	v9.16b, v4.16b, v9.16b
1837	add	v12.4s, v16.4s, v12.4s
1838	orr	v11.16b, v11.16b, v15.16b
1839	ushr	v15.4s, v9.4s, #7
1840	shl	v9.4s, v9.4s, #25
1841	eor	v31.16b, v12.16b, v31.16b
1842	add	v1.4s, v10.4s, v1.4s
1843	orr	v9.16b, v9.16b, v15.16b
1844	ushr	v15.4s, v31.4s, #7
1845	shl	v31.4s, v31.4s, #25
1846	eor	v13.16b, v1.16b, v13.16b
1847	add	v5.4s, v11.4s, v5.4s
1848	orr	v31.16b, v31.16b, v15.16b
1849	ushr	v15.4s, v13.4s, #7
1850	shl	v13.4s, v13.4s, #25
1851	eor	v8.16b, v5.16b, v8.16b
1852	orr	v13.16b, v13.16b, v15.16b
1853	ushr	v15.4s, v8.4s, #7
1854	shl	v8.4s, v8.4s, #25
1855	orr	v8.16b, v8.16b, v15.16b
1856	add	v14.4s, v14.4s, v18.4s
1857	add	v14.4s, v14.4s, v8.4s
1858	add	v2.4s, v2.4s, v27.4s
1859	eor	v16.16b, v14.16b, v16.16b
1860	add	v2.4s, v2.4s, v9.4s
1861	add	v3.4s, v3.4s, v7.4s
1862	rev32	v16.8h, v16.8h
1863	eor	v10.16b, v2.16b, v10.16b
1864	add	v3.4s, v3.4s, v31.4s
1865	add	v17.4s, v17.4s, v21.4s
1866	add	v1.4s, v1.4s, v16.4s
1867	rev32	v10.8h, v10.8h
1868	eor	v11.16b, v3.16b, v11.16b
1869	add	v17.4s, v17.4s, v13.4s
1870	eor	v8.16b, v1.16b, v8.16b
1871	add	v5.4s, v5.4s, v10.4s
1872	rev32	v11.8h, v11.8h
1873	eor	v0.16b, v17.16b, v0.16b
1874	add	v14.4s, v14.4s, v6.4s
1875	ldur	q6, [x29, #-96]
1876	ushr	v15.4s, v8.4s, #12
1877	shl	v8.4s, v8.4s, #20
1878	eor	v9.16b, v5.16b, v9.16b
1879	add	v4.4s, v4.4s, v11.4s
1880	rev32	v0.8h, v0.8h
1881	stur	q20, [x29, #-160]
1882	mov	v20.16b, v29.16b
1883	orr	v8.16b, v8.16b, v15.16b
1884	ushr	v15.4s, v9.4s, #12
1885	shl	v9.4s, v9.4s, #20
1886	eor	v31.16b, v4.16b, v31.16b
1887	add	v12.4s, v12.4s, v0.4s
1888	mov	v19.16b, v29.16b
1889	orr	v9.16b, v9.16b, v15.16b
1890	ushr	v15.4s, v31.4s, #12
1891	shl	v31.4s, v31.4s, #20
1892	eor	v13.16b, v12.16b, v13.16b
1893	add	v14.4s, v14.4s, v8.4s
1894	add	v2.4s, v2.4s, v20.4s
1895	mov	v19.16b, v28.16b
1896	orr	v31.16b, v31.16b, v15.16b
1897	ushr	v15.4s, v13.4s, #12
1898	shl	v13.4s, v13.4s, #20
1899	eor	v16.16b, v14.16b, v16.16b
1900	add	v2.4s, v2.4s, v9.4s
1901	add	v3.4s, v3.4s, v6.4s
1902	orr	v13.16b, v13.16b, v15.16b
1903	ushr	v15.4s, v16.4s, #8
1904	shl	v16.4s, v16.4s, #24
1905	eor	v10.16b, v2.16b, v10.16b
1906	add	v3.4s, v3.4s, v31.4s
1907	add	v17.4s, v17.4s, v19.4s
1908	orr	v16.16b, v16.16b, v15.16b
1909	ushr	v15.4s, v10.4s, #8
1910	shl	v10.4s, v10.4s, #24
1911	eor	v11.16b, v3.16b, v11.16b
1912	add	v17.4s, v17.4s, v13.4s
1913	orr	v10.16b, v10.16b, v15.16b
1914	ushr	v15.4s, v11.4s, #8
1915	shl	v11.4s, v11.4s, #24
1916	eor	v0.16b, v17.16b, v0.16b
1917	add	v1.4s, v16.4s, v1.4s
1918	orr	v11.16b, v11.16b, v15.16b
1919	ushr	v15.4s, v0.4s, #8
1920	shl	v0.4s, v0.4s, #24
1921	eor	v8.16b, v1.16b, v8.16b
1922	add	v5.4s, v10.4s, v5.4s
1923	orr	v0.16b, v0.16b, v15.16b
1924	ushr	v15.4s, v8.4s, #7
1925	shl	v8.4s, v8.4s, #25
1926	eor	v9.16b, v5.16b, v9.16b
1927	add	v4.4s, v11.4s, v4.4s
1928	orr	v8.16b, v8.16b, v15.16b
1929	ushr	v15.4s, v9.4s, #7
1930	shl	v9.4s, v9.4s, #25
1931	eor	v31.16b, v4.16b, v31.16b
1932	add	v12.4s, v0.4s, v12.4s
1933	orr	v9.16b, v9.16b, v15.16b
1934	ushr	v15.4s, v31.4s, #7
1935	shl	v31.4s, v31.4s, #25
1936	eor	v13.16b, v12.16b, v13.16b
1937	add	v14.4s, v14.4s, v25.4s
1938	orr	v31.16b, v31.16b, v15.16b
1939	ushr	v15.4s, v13.4s, #7
1940	shl	v13.4s, v13.4s, #25
1941	add	v14.4s, v14.4s, v9.4s
1942	add	v2.4s, v2.4s, v30.4s
1943	orr	v13.16b, v13.16b, v15.16b
1944	eor	v0.16b, v14.16b, v0.16b
1945	add	v2.4s, v2.4s, v31.4s
1946	add	v3.4s, v3.4s, v24.4s
1947	rev32	v0.8h, v0.8h
1948	eor	v16.16b, v2.16b, v16.16b
1949	add	v3.4s, v3.4s, v13.4s
1950	add	v17.4s, v17.4s, v26.4s
1951	mov	v29.16b, v27.16b
1952	add	v4.4s, v4.4s, v0.4s
1953	rev32	v16.8h, v16.8h
1954	eor	v10.16b, v3.16b, v10.16b
1955	add	v17.4s, v17.4s, v8.4s
1956	ldur	q27, [x29, #-160]
1957	eor	v9.16b, v4.16b, v9.16b
1958	add	v12.4s, v12.4s, v16.4s
1959	rev32	v10.8h, v10.8h
1960	eor	v11.16b, v17.16b, v11.16b
1961	ldur	q6, [x29, #-80]
1962	ushr	v15.4s, v9.4s, #12
1963	shl	v9.4s, v9.4s, #20
1964	eor	v31.16b, v12.16b, v31.16b
1965	add	v1.4s, v1.4s, v10.4s
1966	rev32	v11.8h, v11.8h
1967	orr	v9.16b, v9.16b, v15.16b
1968	ushr	v15.4s, v31.4s, #12
1969	shl	v31.4s, v31.4s, #20
1970	eor	v13.16b, v1.16b, v13.16b
1971	add	v5.4s, v5.4s, v11.4s
1972	add	v14.4s, v14.4s, v22.4s
1973	orr	v31.16b, v31.16b, v15.16b
1974	ushr	v15.4s, v13.4s, #12
1975	shl	v13.4s, v13.4s, #20
1976	eor	v8.16b, v5.16b, v8.16b
1977	add	v14.4s, v14.4s, v9.4s
1978	add	v2.4s, v2.4s, v27.4s
1979	orr	v13.16b, v13.16b, v15.16b
1980	ushr	v15.4s, v8.4s, #12
1981	shl	v8.4s, v8.4s, #20
1982	eor	v0.16b, v14.16b, v0.16b
1983	add	v2.4s, v2.4s, v31.4s
1984	add	v3.4s, v3.4s, v6.4s
1985	orr	v8.16b, v8.16b, v15.16b
1986	ushr	v15.4s, v0.4s, #8
1987	shl	v0.4s, v0.4s, #24
1988	eor	v16.16b, v2.16b, v16.16b
1989	add	v3.4s, v3.4s, v13.4s
1990	add	v17.4s, v17.4s, v23.4s
1991	orr	v0.16b, v0.16b, v15.16b
1992	ushr	v15.4s, v16.4s, #8
1993	shl	v16.4s, v16.4s, #24
1994	eor	v10.16b, v3.16b, v10.16b
1995	add	v17.4s, v17.4s, v8.4s
1996	orr	v16.16b, v16.16b, v15.16b
1997	ushr	v15.4s, v10.4s, #8
1998	shl	v10.4s, v10.4s, #24
1999	eor	v11.16b, v17.16b, v11.16b
2000	add	v4.4s, v0.4s, v4.4s
2001	orr	v10.16b, v10.16b, v15.16b
2002	ushr	v15.4s, v11.4s, #8
2003	shl	v11.4s, v11.4s, #24
2004	eor	v9.16b, v4.16b, v9.16b
2005	add	v12.4s, v16.4s, v12.4s
2006	orr	v11.16b, v11.16b, v15.16b
2007	ushr	v15.4s, v9.4s, #7
2008	shl	v9.4s, v9.4s, #25
2009	eor	v31.16b, v12.16b, v31.16b
2010	add	v1.4s, v10.4s, v1.4s
2011	orr	v9.16b, v9.16b, v15.16b
2012	ushr	v15.4s, v31.4s, #7
2013	shl	v31.4s, v31.4s, #25
2014	eor	v13.16b, v1.16b, v13.16b
2015	add	v5.4s, v11.4s, v5.4s
2016	orr	v31.16b, v31.16b, v15.16b
2017	ushr	v15.4s, v13.4s, #7
2018	shl	v13.4s, v13.4s, #25
2019	eor	v8.16b, v5.16b, v8.16b
2020	orr	v13.16b, v13.16b, v15.16b
2021	ushr	v15.4s, v8.4s, #7
2022	shl	v8.4s, v8.4s, #25
2023	orr	v8.16b, v8.16b, v15.16b
2024	add	v14.4s, v14.4s, v29.4s
2025	add	v14.4s, v14.4s, v8.4s
2026	add	v2.4s, v2.4s, v20.4s
2027	mov	v28.16b, v7.16b
2028	eor	v16.16b, v14.16b, v16.16b
2029	add	v2.4s, v2.4s, v9.4s
2030	add	v3.4s, v3.4s, v19.4s
2031	rev32	v16.8h, v16.8h
2032	eor	v10.16b, v2.16b, v10.16b
2033	add	v3.4s, v3.4s, v31.4s
2034	add	v17.4s, v17.4s, v28.4s
2035	add	v1.4s, v1.4s, v16.4s
2036	rev32	v10.8h, v10.8h
2037	eor	v11.16b, v3.16b, v11.16b
2038	add	v17.4s, v17.4s, v13.4s
2039	eor	v8.16b, v1.16b, v8.16b
2040	add	v5.4s, v5.4s, v10.4s
2041	rev32	v11.8h, v11.8h
2042	eor	v0.16b, v17.16b, v0.16b
2043	ushr	v15.4s, v8.4s, #12
2044	shl	v8.4s, v8.4s, #20
2045	eor	v9.16b, v5.16b, v9.16b
2046	add	v4.4s, v4.4s, v11.4s
2047	rev32	v0.8h, v0.8h
2048	orr	v8.16b, v8.16b, v15.16b
2049	ushr	v15.4s, v9.4s, #12
2050	shl	v9.4s, v9.4s, #20
2051	eor	v31.16b, v4.16b, v31.16b
2052	add	v12.4s, v12.4s, v0.4s
2053	add	v14.4s, v14.4s, v21.4s
2054	orr	v9.16b, v9.16b, v15.16b
2055	ushr	v15.4s, v31.4s, #12
2056	shl	v31.4s, v31.4s, #20
2057	eor	v13.16b, v12.16b, v13.16b
2058	add	v14.4s, v14.4s, v8.4s
2059	add	v2.4s, v2.4s, v30.4s
2060	orr	v31.16b, v31.16b, v15.16b
2061	ushr	v15.4s, v13.4s, #12
2062	shl	v13.4s, v13.4s, #20
2063	eor	v16.16b, v14.16b, v16.16b
2064	add	v2.4s, v2.4s, v9.4s
2065	orr	v13.16b, v13.16b, v15.16b
2066	ushr	v15.4s, v16.4s, #8
2067	shl	v16.4s, v16.4s, #24
2068	eor	v10.16b, v2.16b, v10.16b
2069	orr	v16.16b, v16.16b, v15.16b
2070	ushr	v15.4s, v10.4s, #8
2071	shl	v10.4s, v10.4s, #24
2072	add	v3.4s, v3.4s, v18.4s
2073	orr	v10.16b, v10.16b, v15.16b
2074	add	v15.4s, v3.4s, v31.4s
2075	eor	v3.16b, v15.16b, v11.16b
2076	ushr	v11.4s, v3.4s, #8
2077	shl	v3.4s, v3.4s, #24
2078	orr	v11.16b, v3.16b, v11.16b
2079	add	v3.4s, v17.4s, v6.4s
2080	add	v17.4s, v3.4s, v13.4s
2081	eor	v0.16b, v17.16b, v0.16b
2082	ushr	v3.4s, v0.4s, #8
2083	shl	v0.4s, v0.4s, #24
2084	add	v1.4s, v16.4s, v1.4s
2085	orr	v0.16b, v0.16b, v3.16b
2086	eor	v3.16b, v1.16b, v8.16b
2087	ushr	v8.4s, v3.4s, #7
2088	shl	v3.4s, v3.4s, #25
2089	add	v5.4s, v10.4s, v5.4s
2090	orr	v8.16b, v3.16b, v8.16b
2091	eor	v3.16b, v5.16b, v9.16b
2092	add	v4.4s, v11.4s, v4.4s
2093	ushr	v9.4s, v3.4s, #7
2094	shl	v3.4s, v3.4s, #25
2095	eor	v31.16b, v4.16b, v31.16b
2096	mov	v7.16b, v23.16b
2097	mov	v23.16b, v28.16b
2098	mov	v28.16b, v6.16b
2099	orr	v3.16b, v3.16b, v9.16b
2100	ushr	v9.4s, v31.4s, #7
2101	shl	v31.4s, v31.4s, #25
2102	ldur	q6, [x29, #-64]
2103	orr	v31.16b, v31.16b, v9.16b
2104	add	v9.4s, v0.4s, v12.4s
2105	eor	v12.16b, v9.16b, v13.16b
2106	ushr	v13.4s, v12.4s, #7
2107	shl	v12.4s, v12.4s, #25
2108	orr	v12.16b, v12.16b, v13.16b
2109	add	v13.4s, v14.4s, v6.4s
2110	add	v13.4s, v13.4s, v3.4s
2111	eor	v0.16b, v13.16b, v0.16b
2112	add	v2.4s, v2.4s, v24.4s
2113	rev32	v14.8h, v0.8h
2114	add	v0.4s, v2.4s, v31.4s
2115	add	v6.4s, v4.4s, v14.4s
2116	eor	v2.16b, v0.16b, v16.16b
2117	eor	v3.16b, v6.16b, v3.16b
2118	rev32	v16.8h, v2.8h
2119	ushr	v4.4s, v3.4s, #12
2120	shl	v3.4s, v3.4s, #20
2121	add	v2.4s, v9.4s, v16.4s
2122	orr	v4.16b, v3.16b, v4.16b
2123	eor	v3.16b, v2.16b, v31.16b
2124	ushr	v31.4s, v3.4s, #12
2125	shl	v3.4s, v3.4s, #20
2126	orr	v3.16b, v3.16b, v31.16b
2127	add	v31.4s, v15.4s, v22.4s
2128	add	v31.4s, v31.4s, v12.4s
2129	add	v17.4s, v17.4s, v7.4s
2130	eor	v9.16b, v31.16b, v10.16b
2131	add	v17.4s, v17.4s, v8.4s
2132	rev32	v9.8h, v9.8h
2133	eor	v11.16b, v17.16b, v11.16b
2134	add	v1.4s, v1.4s, v9.4s
2135	rev32	v11.8h, v11.8h
2136	eor	v10.16b, v1.16b, v12.16b
2137	add	v5.4s, v5.4s, v11.4s
2138	ushr	v12.4s, v10.4s, #12
2139	shl	v10.4s, v10.4s, #20
2140	eor	v8.16b, v5.16b, v8.16b
2141	orr	v10.16b, v10.16b, v12.16b
2142	ushr	v12.4s, v8.4s, #12
2143	shl	v8.4s, v8.4s, #20
2144	orr	v8.16b, v8.16b, v12.16b
2145	add	v12.4s, v13.4s, v27.4s
2146	add	v12.4s, v12.4s, v4.4s
2147	eor	v13.16b, v12.16b, v14.16b
2148	ldur	q14, [x29, #-96]
2149	mov	v25.16b, v29.16b
2150	add	v29.4s, v12.4s, v20.4s
2151	add	v20.4s, v31.4s, v26.4s
2152	add	v0.4s, v0.4s, v14.4s
2153	add	v0.4s, v0.4s, v3.4s
2154	eor	v16.16b, v0.16b, v16.16b
2155	add	v0.4s, v0.4s, v30.4s
2156	ldur	q30, [x29, #-112]
2157	add	v20.4s, v20.4s, v10.4s
2158	eor	v31.16b, v20.16b, v9.16b
2159	add	v20.4s, v20.4s, v28.4s
2160	add	v17.4s, v17.4s, v30.4s
2161	add	v17.4s, v17.4s, v8.4s
2162	eor	v9.16b, v17.16b, v11.16b
2163	ushr	v28.4s, v13.4s, #8
2164	shl	v11.4s, v13.4s, #24
2165	orr	v28.16b, v11.16b, v28.16b
2166	ushr	v11.4s, v16.4s, #8
2167	shl	v16.4s, v16.4s, #24
2168	orr	v16.16b, v16.16b, v11.16b
2169	ushr	v11.4s, v31.4s, #8
2170	shl	v31.4s, v31.4s, #24
2171	add	v6.4s, v28.4s, v6.4s
2172	orr	v31.16b, v31.16b, v11.16b
2173	ushr	v11.4s, v9.4s, #8
2174	shl	v9.4s, v9.4s, #24
2175	add	v2.4s, v16.4s, v2.4s
2176	eor	v4.16b, v6.16b, v4.16b
2177	orr	v9.16b, v9.16b, v11.16b
2178	add	v1.4s, v31.4s, v1.4s
2179	eor	v3.16b, v2.16b, v3.16b
2180	ushr	v11.4s, v4.4s, #7
2181	shl	v4.4s, v4.4s, #25
2182	add	v5.4s, v9.4s, v5.4s
2183	eor	v10.16b, v1.16b, v10.16b
2184	orr	v4.16b, v4.16b, v11.16b
2185	ushr	v11.4s, v3.4s, #7
2186	shl	v3.4s, v3.4s, #25
2187	eor	v8.16b, v5.16b, v8.16b
2188	orr	v3.16b, v3.16b, v11.16b
2189	ushr	v11.4s, v10.4s, #7
2190	shl	v10.4s, v10.4s, #25
2191	orr	v10.16b, v10.16b, v11.16b
2192	ushr	v11.4s, v8.4s, #7
2193	shl	v8.4s, v8.4s, #25
2194	orr	v8.16b, v8.16b, v11.16b
2195	add	v29.4s, v29.4s, v8.4s
2196	eor	v16.16b, v29.16b, v16.16b
2197	add	v0.4s, v0.4s, v4.4s
2198	mov	v12.16b, v26.16b
2199	add	v17.4s, v17.4s, v19.4s
2200	add	v26.4s, v29.4s, v23.4s
2201	eor	v29.16b, v0.16b, v31.16b
2202	add	v20.4s, v20.4s, v3.4s
2203	rev32	v16.8h, v16.8h
2204	stur	q18, [x29, #-176]
2205	mov	v18.16b, v27.16b
2206	add	v0.4s, v0.4s, v24.4s
2207	eor	v27.16b, v20.16b, v9.16b
2208	add	v17.4s, v17.4s, v10.4s
2209	rev32	v24.8h, v29.8h
2210	add	v1.4s, v1.4s, v16.4s
2211	add	v20.4s, v20.4s, v25.4s
2212	eor	v25.16b, v17.16b, v28.16b
2213	rev32	v27.8h, v27.8h
2214	add	v5.4s, v5.4s, v24.4s
2215	eor	v28.16b, v1.16b, v8.16b
2216	rev32	v25.8h, v25.8h
2217	add	v6.4s, v6.4s, v27.4s
2218	eor	v4.16b, v5.16b, v4.16b
2219	ushr	v31.4s, v28.4s, #12
2220	shl	v28.4s, v28.4s, #20
2221	add	v2.4s, v2.4s, v25.4s
2222	eor	v3.16b, v6.16b, v3.16b
2223	orr	v28.16b, v28.16b, v31.16b
2224	ushr	v31.4s, v4.4s, #12
2225	shl	v4.4s, v4.4s, #20
2226	eor	v29.16b, v2.16b, v10.16b
2227	orr	v4.16b, v4.16b, v31.16b
2228	ushr	v31.4s, v3.4s, #12
2229	shl	v3.4s, v3.4s, #20
2230	add	v26.4s, v26.4s, v28.4s
2231	orr	v3.16b, v3.16b, v31.16b
2232	ushr	v31.4s, v29.4s, #12
2233	shl	v29.4s, v29.4s, #20
2234	eor	v16.16b, v26.16b, v16.16b
2235	add	v0.4s, v0.4s, v4.4s
2236	add	v17.4s, v17.4s, v12.4s
2237	orr	v29.16b, v29.16b, v31.16b
2238	eor	v24.16b, v0.16b, v24.16b
2239	add	v0.4s, v0.4s, v22.4s
2240	add	v20.4s, v20.4s, v3.4s
2241	ushr	v22.4s, v16.4s, #8
2242	shl	v16.4s, v16.4s, #24
2243	add	v23.4s, v26.4s, v21.4s
2244	eor	v21.16b, v20.16b, v27.16b
2245	add	v17.4s, v17.4s, v29.4s
2246	orr	v16.16b, v16.16b, v22.16b
2247	ushr	v22.4s, v24.4s, #8
2248	shl	v24.4s, v24.4s, #24
2249	eor	v25.16b, v17.16b, v25.16b
2250	orr	v22.16b, v24.16b, v22.16b
2251	ushr	v24.4s, v21.4s, #8
2252	shl	v21.4s, v21.4s, #24
2253	orr	v21.16b, v21.16b, v24.16b
2254	ushr	v24.4s, v25.4s, #8
2255	shl	v25.4s, v25.4s, #24
2256	add	v1.4s, v16.4s, v1.4s
2257	orr	v24.16b, v25.16b, v24.16b
2258	add	v5.4s, v22.4s, v5.4s
2259	eor	v25.16b, v1.16b, v28.16b
2260	add	v6.4s, v21.4s, v6.4s
2261	eor	v4.16b, v5.16b, v4.16b
2262	ushr	v27.4s, v25.4s, #7
2263	shl	v25.4s, v25.4s, #25
2264	add	v2.4s, v24.4s, v2.4s
2265	eor	v3.16b, v6.16b, v3.16b
2266	orr	v25.16b, v25.16b, v27.16b
2267	ushr	v27.4s, v4.4s, #7
2268	shl	v4.4s, v4.4s, #25
2269	ldur	q19, [x29, #-176]
2270	eor	v26.16b, v2.16b, v29.16b
2271	orr	v4.16b, v4.16b, v27.16b
2272	ushr	v27.4s, v3.4s, #7
2273	shl	v3.4s, v3.4s, #25
2274	orr	v3.16b, v3.16b, v27.16b
2275	ushr	v27.4s, v26.4s, #7
2276	shl	v26.4s, v26.4s, #25
2277	add	v20.4s, v20.4s, v18.4s
2278	add	v17.4s, v17.4s, v30.4s
2279	orr	v26.16b, v26.16b, v27.16b
2280	add	v0.4s, v0.4s, v3.4s
2281	eor	v16.16b, v0.16b, v16.16b
2282	add	v0.4s, v0.4s, v19.4s
2283	add	v19.4s, v20.4s, v26.4s
2284	add	v17.4s, v17.4s, v25.4s
2285	eor	v20.16b, v19.16b, v22.16b
2286	add	v7.4s, v19.4s, v7.4s
2287	eor	v19.16b, v17.16b, v21.16b
2288	ldur	q21, [x29, #-64]
2289	add	v23.4s, v23.4s, v4.4s
2290	eor	v24.16b, v23.16b, v24.16b
2291	rev32	v16.8h, v16.8h
2292	add	v17.4s, v17.4s, v21.4s
2293	rev32	v21.8h, v24.8h
2294	add	v6.4s, v6.4s, v21.4s
2295	rev32	v20.8h, v20.8h
2296	add	v2.4s, v2.4s, v16.4s
2297	eor	v4.16b, v6.16b, v4.16b
2298	rev32	v19.8h, v19.8h
2299	add	v1.4s, v1.4s, v20.4s
2300	eor	v3.16b, v2.16b, v3.16b
2301	ushr	v24.4s, v4.4s, #12
2302	shl	v4.4s, v4.4s, #20
2303	add	v5.4s, v5.4s, v19.4s
2304	eor	v22.16b, v1.16b, v26.16b
2305	orr	v4.16b, v4.16b, v24.16b
2306	ushr	v24.4s, v3.4s, #12
2307	shl	v3.4s, v3.4s, #20
2308	add	v18.4s, v23.4s, v14.4s
2309	eor	v23.16b, v5.16b, v25.16b
2310	orr	v3.16b, v3.16b, v24.16b
2311	ushr	v24.4s, v22.4s, #12
2312	shl	v22.4s, v22.4s, #20
2313	orr	v22.16b, v22.16b, v24.16b
2314	ushr	v24.4s, v23.4s, #12
2315	shl	v23.4s, v23.4s, #20
2316	orr	v23.16b, v23.16b, v24.16b
2317	add	v18.4s, v18.4s, v4.4s
2318	add	v0.4s, v0.4s, v3.4s
2319	add	v24.4s, v17.4s, v23.4s
2320	eor	v17.16b, v18.16b, v21.16b
2321	add	v7.4s, v7.4s, v22.4s
2322	eor	v16.16b, v0.16b, v16.16b
2323	ushr	v21.4s, v17.4s, #8
2324	shl	v17.4s, v17.4s, #24
2325	eor	v20.16b, v7.16b, v20.16b
2326	orr	v21.16b, v17.16b, v21.16b
2327	ushr	v17.4s, v16.4s, #8
2328	shl	v16.4s, v16.4s, #24
2329	eor	v19.16b, v24.16b, v19.16b
2330	orr	v16.16b, v16.16b, v17.16b
2331	ushr	v17.4s, v20.4s, #8
2332	shl	v20.4s, v20.4s, #24
2333	orr	v25.16b, v20.16b, v17.16b
2334	ushr	v17.4s, v19.4s, #8
2335	shl	v19.4s, v19.4s, #24
2336	orr	v19.16b, v19.16b, v17.16b
2337	add	v1.4s, v25.4s, v1.4s
2338	eor	v22.16b, v1.16b, v22.16b
2339	eor	v20.16b, v1.16b, v18.16b
2340	add	v1.4s, v19.4s, v5.4s
2341	eor	v26.16b, v1.16b, v0.16b
2342	add	v0.4s, v21.4s, v6.4s
2343	eor	v5.16b, v1.16b, v23.16b
2344	eor	v1.16b, v0.16b, v4.16b
2345	eor	v17.16b, v0.16b, v7.16b
2346	add	v0.4s, v16.4s, v2.4s
2347	eor	v2.16b, v0.16b, v3.16b
2348	eor	v6.16b, v0.16b, v24.16b
2349	ushr	v0.4s, v1.4s, #7
2350	shl	v1.4s, v1.4s, #25
2351	orr	v0.16b, v1.16b, v0.16b
2352	ushr	v1.4s, v2.4s, #7
2353	shl	v2.4s, v2.4s, #25
2354	orr	v1.16b, v2.16b, v1.16b
2355	ushr	v2.4s, v22.4s, #7
2356	shl	v3.4s, v22.4s, #25
2357	orr	v2.16b, v3.16b, v2.16b
2358	ushr	v3.4s, v5.4s, #7
2359	shl	v4.4s, v5.4s, #25
2360	orr	v3.16b, v4.16b, v3.16b
2361	eor	v8.16b, v16.16b, v3.16b
2362	eor	v9.16b, v25.16b, v0.16b
2363	eor	v31.16b, v1.16b, v19.16b
2364	cmp	x17, x22
2365	eor	v15.16b, v2.16b, v21.16b
2366	mov	w18, w19
2367	b.ne	.LBB2_4
2368.LBB2_7:
2369	zip1	v0.4s, v20.4s, v26.4s
2370	zip2	v1.4s, v20.4s, v26.4s
2371	zip1	v2.4s, v17.4s, v6.4s
2372	zip2	v3.4s, v17.4s, v6.4s
2373	zip1	v4.4s, v8.4s, v9.4s
2374	zip2	v5.4s, v8.4s, v9.4s
2375	zip1	v6.4s, v31.4s, v15.4s
2376	zip2	v7.4s, v31.4s, v15.4s
2377	add	x13, x20, #4
2378	tst	w5, #0x1
2379	sub	x28, x28, #4
2380	zip1	v16.2d, v0.2d, v2.2d
2381	zip2	v0.2d, v0.2d, v2.2d
2382	zip1	v2.2d, v1.2d, v3.2d
2383	zip2	v1.2d, v1.2d, v3.2d
2384	zip1	v3.2d, v4.2d, v6.2d
2385	zip2	v4.2d, v4.2d, v6.2d
2386	zip1	v6.2d, v5.2d, v7.2d
2387	zip2	v5.2d, v5.2d, v7.2d
2388	add	x24, x24, #32
2389	csel	x20, x13, x20, ne
2390	cmp	x28, #3
2391	stp	q16, q3, [x26]
2392	stp	q0, q4, [x26, #32]
2393	stp	q2, q6, [x26, #64]
2394	stp	q1, q5, [x26, #96]
2395	add	x26, x26, #128
2396	b.hi	.LBB2_2
2397.LBB2_8:
2398	cbz	x28, .LBB2_16
2399	orr	w8, w7, w19
2400	and	x21, x5, #0x1
2401	stur	w8, [x29, #-64]
2402.LBB2_10:
2403	ldr	x8, [sp, #40]
2404	ldr	x25, [x24]
2405	ldur	w4, [x29, #-64]
2406	ldp	q1, q0, [x8]
2407	mov	x8, x22
2408	stp	q1, q0, [x29, #-48]
2409.LBB2_11:
2410	subs	x23, x8, #1
2411	b.eq	.LBB2_13
2412	cbnz	x8, .LBB2_14
2413	b	.LBB2_15
2414.LBB2_13:
2415	orr	w4, w4, w27
2416.LBB2_14:
2417	sub	x0, x29, #48
2418	mov	w2, #64
2419	mov	x1, x25
2420	mov	x3, x20
2421	bl	zfs_blake3_compress_in_place_sse2
2422	add	x25, x25, #64
2423	mov	x8, x23
2424	mov	w4, w19
2425	b	.LBB2_11
2426.LBB2_15:
2427	ldp	q0, q1, [x29, #-48]
2428	add	x20, x20, x21
2429	add	x24, x24, #8
2430	subs	x28, x28, #1
2431	stp	q0, q1, [x26], #32
2432	b.ne	.LBB2_10
2433.LBB2_16:
2434	add	sp, sp, #384
2435	ldp	x20, x19, [sp, #144]
2436	ldp	x22, x21, [sp, #128]
2437	ldp	x24, x23, [sp, #112]
2438	ldp	x26, x25, [sp, #96]
2439	ldp	x28, x27, [sp, #80]
2440	ldp	x29, x30, [sp, #64]
2441	ldp	d9, d8, [sp, #48]
2442	ldp	d11, d10, [sp, #32]
2443	ldp	d13, d12, [sp, #16]
2444	ldp	d15, d14, [sp], #160
2445	ret
2446.Lfunc_end2:
2447	.size	zfs_blake3_hash_many_sse2, .Lfunc_end2-zfs_blake3_hash_many_sse2
2448	.cfi_endproc
2449	.section	".note.GNU-stack","",@progbits
2450#endif
2451