1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy  *
8eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11eda14cbcSMatt Macy  * and limitations under the License.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy  *
19eda14cbcSMatt Macy  * CDDL HEADER END
20eda14cbcSMatt Macy  */
21eda14cbcSMatt Macy /*
22eda14cbcSMatt Macy  * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
23eda14cbcSMatt Macy  */
24eda14cbcSMatt Macy 
25eda14cbcSMatt Macy #include <sys/types.h>
26eda14cbcSMatt Macy #include <sys/simd.h>
27eda14cbcSMatt Macy 
28eda14cbcSMatt Macy #ifdef __linux__
29eda14cbcSMatt Macy #define	__asm __asm__ __volatile__
30eda14cbcSMatt Macy #endif
31eda14cbcSMatt Macy 
32eda14cbcSMatt Macy #define	_REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
33eda14cbcSMatt Macy #define	REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
34eda14cbcSMatt Macy 
35eda14cbcSMatt Macy #define	VR0_(REG, ...) "%[w"#REG"]"
36eda14cbcSMatt Macy #define	VR1_(_1, REG, ...) "%[w"#REG"]"
37eda14cbcSMatt Macy #define	VR2_(_1, _2, REG, ...) "%[w"#REG"]"
38eda14cbcSMatt Macy #define	VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]"
39eda14cbcSMatt Macy #define	VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]"
40eda14cbcSMatt Macy #define	VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]"
41eda14cbcSMatt Macy #define	VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]"
42eda14cbcSMatt Macy #define	VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]"
43eda14cbcSMatt Macy 
44eda14cbcSMatt Macy /*
45eda14cbcSMatt Macy  * Here we need registers not used otherwise.
46eda14cbcSMatt Macy  * They will be used in unused ASM for the case
47eda14cbcSMatt Macy  * with more registers than required... but GCC
48eda14cbcSMatt Macy  * will still need to make sure the constraints
49eda14cbcSMatt Macy  * are correct, and duplicate constraints are illegal
50eda14cbcSMatt Macy  * ... and we use the "register" number as a name
51eda14cbcSMatt Macy  */
52eda14cbcSMatt Macy 
53eda14cbcSMatt Macy #define	VR0(r...) VR0_(r)
54eda14cbcSMatt Macy #define	VR1(r...) VR1_(r)
55eda14cbcSMatt Macy #define	VR2(r...) VR2_(r, 36)
56eda14cbcSMatt Macy #define	VR3(r...) VR3_(r, 36, 35)
57eda14cbcSMatt Macy #define	VR4(r...) VR4_(r, 36, 35, 34, 33)
58eda14cbcSMatt Macy #define	VR5(r...) VR5_(r, 36, 35, 34, 33, 32)
59eda14cbcSMatt Macy #define	VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31)
60eda14cbcSMatt Macy #define	VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30)
61eda14cbcSMatt Macy 
62eda14cbcSMatt Macy #define	VR(X) "%[w"#X"]"
63eda14cbcSMatt Macy 
64eda14cbcSMatt Macy #define	RVR0_(REG, ...) [w##REG] "w" (w##REG)
65eda14cbcSMatt Macy #define	RVR1_(_1, REG, ...) [w##REG] "w" (w##REG)
66eda14cbcSMatt Macy #define	RVR2_(_1, _2, REG, ...) [w##REG] "w" (w##REG)
67eda14cbcSMatt Macy #define	RVR3_(_1, _2, _3, REG, ...) [w##REG] "w" (w##REG)
68eda14cbcSMatt Macy #define	RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "w" (w##REG)
69eda14cbcSMatt Macy #define	RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "w" (w##REG)
70eda14cbcSMatt Macy #define	RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "w" (w##REG)
71eda14cbcSMatt Macy #define	RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "w" (w##REG)
72eda14cbcSMatt Macy 
73eda14cbcSMatt Macy #define	RVR0(r...) RVR0_(r)
74eda14cbcSMatt Macy #define	RVR1(r...) RVR1_(r)
75eda14cbcSMatt Macy #define	RVR2(r...) RVR2_(r, 36)
76eda14cbcSMatt Macy #define	RVR3(r...) RVR3_(r, 36, 35)
77eda14cbcSMatt Macy #define	RVR4(r...) RVR4_(r, 36, 35, 34, 33)
78eda14cbcSMatt Macy #define	RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32)
79eda14cbcSMatt Macy #define	RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31)
80eda14cbcSMatt Macy #define	RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30)
81eda14cbcSMatt Macy 
82eda14cbcSMatt Macy #define	RVR(X) [w##X] "w" (w##X)
83eda14cbcSMatt Macy 
84eda14cbcSMatt Macy #define	WVR0_(REG, ...) [w##REG] "=w" (w##REG)
85eda14cbcSMatt Macy #define	WVR1_(_1, REG, ...) [w##REG] "=w" (w##REG)
86eda14cbcSMatt Macy #define	WVR2_(_1, _2, REG, ...) [w##REG] "=w" (w##REG)
87eda14cbcSMatt Macy #define	WVR3_(_1, _2, _3, REG, ...) [w##REG] "=w" (w##REG)
88eda14cbcSMatt Macy #define	WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=w" (w##REG)
89eda14cbcSMatt Macy #define	WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=w" (w##REG)
90eda14cbcSMatt Macy #define	WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=w" (w##REG)
91eda14cbcSMatt Macy #define	WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=w" (w##REG)
92eda14cbcSMatt Macy 
93eda14cbcSMatt Macy #define	WVR0(r...) WVR0_(r)
94eda14cbcSMatt Macy #define	WVR1(r...) WVR1_(r)
95eda14cbcSMatt Macy #define	WVR2(r...) WVR2_(r, 36)
96eda14cbcSMatt Macy #define	WVR3(r...) WVR3_(r, 36, 35)
97eda14cbcSMatt Macy #define	WVR4(r...) WVR4_(r, 36, 35, 34, 33)
98eda14cbcSMatt Macy #define	WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32)
99eda14cbcSMatt Macy #define	WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31)
100eda14cbcSMatt Macy #define	WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30)
101eda14cbcSMatt Macy 
102eda14cbcSMatt Macy #define	WVR(X) [w##X] "=w" (w##X)
103eda14cbcSMatt Macy 
104eda14cbcSMatt Macy #define	UVR0_(REG, ...) [w##REG] "+&w" (w##REG)
105eda14cbcSMatt Macy #define	UVR1_(_1, REG, ...) [w##REG] "+&w" (w##REG)
106eda14cbcSMatt Macy #define	UVR2_(_1, _2, REG, ...) [w##REG] "+&w" (w##REG)
107eda14cbcSMatt Macy #define	UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&w" (w##REG)
108eda14cbcSMatt Macy #define	UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&w" (w##REG)
109eda14cbcSMatt Macy #define	UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&w" (w##REG)
110eda14cbcSMatt Macy #define	UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&w" (w##REG)
111eda14cbcSMatt Macy #define	UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&w" (w##REG)
112eda14cbcSMatt Macy 
113eda14cbcSMatt Macy #define	UVR0(r...) UVR0_(r)
114eda14cbcSMatt Macy #define	UVR1(r...) UVR1_(r)
115eda14cbcSMatt Macy #define	UVR2(r...) UVR2_(r, 36)
116eda14cbcSMatt Macy #define	UVR3(r...) UVR3_(r, 36, 35)
117eda14cbcSMatt Macy #define	UVR4(r...) UVR4_(r, 36, 35, 34, 33)
118eda14cbcSMatt Macy #define	UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32)
119eda14cbcSMatt Macy #define	UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
120eda14cbcSMatt Macy #define	UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
121eda14cbcSMatt Macy 
122eda14cbcSMatt Macy #define	UVR(X) [w##X] "+&w" (w##X)
123eda14cbcSMatt Macy 
124eda14cbcSMatt Macy #define	R_01(REG1, REG2, ...) REG1, REG2
125eda14cbcSMatt Macy #define	_R_23(_0, _1, REG2, REG3, ...) REG2, REG3
126eda14cbcSMatt Macy #define	R_23(REG...) _R_23(REG, 1, 2, 3)
127eda14cbcSMatt Macy 
128eda14cbcSMatt Macy #define	ZFS_ASM_BUG()	ASSERT(0)
129eda14cbcSMatt Macy 
130eda14cbcSMatt Macy #define	OFFSET(ptr, val)	(((unsigned char *)(ptr))+val)
131eda14cbcSMatt Macy 
132eda14cbcSMatt Macy extern const uint8_t gf_clmul_mod_lt[4*256][16];
133eda14cbcSMatt Macy 
134eda14cbcSMatt Macy #define	ELEM_SIZE 16
135eda14cbcSMatt Macy 
136eda14cbcSMatt Macy typedef struct v {
137eda14cbcSMatt Macy 	uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
138eda14cbcSMatt Macy } v_t;
139eda14cbcSMatt Macy 
140eda14cbcSMatt Macy #define	XOR_ACC(src, r...)						\
141eda14cbcSMatt Macy {									\
142eda14cbcSMatt Macy 	switch (REG_CNT(r)) {						\
143eda14cbcSMatt Macy 	case 8:								\
144eda14cbcSMatt Macy 		__asm(							\
145eda14cbcSMatt Macy 		"ld1 { v21.4s },%[SRC0]\n"				\
146eda14cbcSMatt Macy 		"ld1 { v20.4s },%[SRC1]\n"				\
147eda14cbcSMatt Macy 		"ld1 { v19.4s },%[SRC2]\n"				\
148eda14cbcSMatt Macy 		"ld1 { v18.4s },%[SRC3]\n"				\
149eda14cbcSMatt Macy 		"eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n"		\
150eda14cbcSMatt Macy 		"eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n"		\
151eda14cbcSMatt Macy 		"eor " VR2(r) ".16b," VR2(r) ".16b,v19.16b\n"		\
152eda14cbcSMatt Macy 		"eor " VR3(r) ".16b," VR3(r) ".16b,v18.16b\n"		\
153eda14cbcSMatt Macy 		"ld1 { v21.4s },%[SRC4]\n"				\
154eda14cbcSMatt Macy 		"ld1 { v20.4s },%[SRC5]\n"				\
155eda14cbcSMatt Macy 		"ld1 { v19.4s },%[SRC6]\n"				\
156eda14cbcSMatt Macy 		"ld1 { v18.4s },%[SRC7]\n"				\
157eda14cbcSMatt Macy 		"eor " VR4(r) ".16b," VR4(r) ".16b,v21.16b\n"		\
158eda14cbcSMatt Macy 		"eor " VR5(r) ".16b," VR5(r) ".16b,v20.16b\n"		\
159eda14cbcSMatt Macy 		"eor " VR6(r) ".16b," VR6(r) ".16b,v19.16b\n"		\
160eda14cbcSMatt Macy 		"eor " VR7(r) ".16b," VR7(r) ".16b,v18.16b\n"		\
161eda14cbcSMatt Macy 		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r),		\
162eda14cbcSMatt Macy 			UVR4(r), UVR5(r), UVR6(r), UVR7(r)		\
163eda14cbcSMatt Macy 		:	[SRC0] "Q" (*(OFFSET(src, 0))),			\
164eda14cbcSMatt Macy 		[SRC1] "Q" (*(OFFSET(src, 16))),			\
165eda14cbcSMatt Macy 		[SRC2] "Q" (*(OFFSET(src, 32))),			\
166eda14cbcSMatt Macy 		[SRC3] "Q" (*(OFFSET(src, 48))),			\
167eda14cbcSMatt Macy 		[SRC4] "Q" (*(OFFSET(src, 64))),			\
168eda14cbcSMatt Macy 		[SRC5] "Q" (*(OFFSET(src, 80))),			\
169eda14cbcSMatt Macy 		[SRC6] "Q" (*(OFFSET(src, 96))),			\
170eda14cbcSMatt Macy 		[SRC7] "Q" (*(OFFSET(src, 112)))			\
171eda14cbcSMatt Macy 		:	"v18", "v19", "v20", "v21");			\
172eda14cbcSMatt Macy 		break;							\
173eda14cbcSMatt Macy 	case 4:								\
174eda14cbcSMatt Macy 		__asm(							\
175eda14cbcSMatt Macy 		"ld1 { v21.4s },%[SRC0]\n"				\
176eda14cbcSMatt Macy 		"ld1 { v20.4s },%[SRC1]\n"				\
177eda14cbcSMatt Macy 		"ld1 { v19.4s },%[SRC2]\n"				\
178eda14cbcSMatt Macy 		"ld1 { v18.4s },%[SRC3]\n"				\
179eda14cbcSMatt Macy 		"eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n"		\
180eda14cbcSMatt Macy 		"eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n"		\
181eda14cbcSMatt Macy 		"eor " VR2(r) ".16b," VR2(r) ".16b,v19.16b\n"		\
182eda14cbcSMatt Macy 		"eor " VR3(r) ".16b," VR3(r) ".16b,v18.16b\n"		\
183eda14cbcSMatt Macy 		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r)		\
184eda14cbcSMatt Macy 		:	[SRC0] "Q" (*(OFFSET(src, 0))),			\
185eda14cbcSMatt Macy 		[SRC1] "Q" (*(OFFSET(src, 16))),			\
186eda14cbcSMatt Macy 		[SRC2] "Q" (*(OFFSET(src, 32))),			\
187eda14cbcSMatt Macy 		[SRC3] "Q" (*(OFFSET(src, 48)))				\
188eda14cbcSMatt Macy 		:	"v18", "v19", "v20", "v21");			\
189eda14cbcSMatt Macy 		break;							\
190eda14cbcSMatt Macy 	case 2:								\
191eda14cbcSMatt Macy 		__asm(							\
192eda14cbcSMatt Macy 		"ld1 { v21.4s },%[SRC0]\n"				\
193eda14cbcSMatt Macy 		"ld1 { v20.4s },%[SRC1]\n"				\
194eda14cbcSMatt Macy 		"eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n"		\
195eda14cbcSMatt Macy 		"eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n"		\
196eda14cbcSMatt Macy 		:	UVR0(r), UVR1(r)				\
197eda14cbcSMatt Macy 		:	[SRC0] "Q" (*(OFFSET(src, 0))),			\
198eda14cbcSMatt Macy 		[SRC1] "Q" (*(OFFSET(src, 16)))				\
199eda14cbcSMatt Macy 		:	"v20", "v21");					\
200eda14cbcSMatt Macy 		break;							\
201eda14cbcSMatt Macy 	default:							\
202eda14cbcSMatt Macy 		ZFS_ASM_BUG();						\
203eda14cbcSMatt Macy 	}								\
204eda14cbcSMatt Macy }
205eda14cbcSMatt Macy 
206eda14cbcSMatt Macy #define	XOR(r...)							\
207eda14cbcSMatt Macy {									\
208eda14cbcSMatt Macy 	switch (REG_CNT(r)) {						\
209eda14cbcSMatt Macy 	case 8:								\
210eda14cbcSMatt Macy 		__asm(							\
211eda14cbcSMatt Macy 		"eor " VR4(r) ".16b," VR4(r) ".16b," VR0(r) ".16b\n"	\
212eda14cbcSMatt Macy 		"eor " VR5(r) ".16b," VR5(r) ".16b," VR1(r) ".16b\n"	\
213eda14cbcSMatt Macy 		"eor " VR6(r) ".16b," VR6(r) ".16b," VR2(r) ".16b\n"	\
214eda14cbcSMatt Macy 		"eor " VR7(r) ".16b," VR7(r) ".16b," VR3(r) ".16b\n"	\
215eda14cbcSMatt Macy 		:	UVR4(r), UVR5(r), UVR6(r), UVR7(r)		\
216eda14cbcSMatt Macy 		:	RVR0(r), RVR1(r), RVR2(r), RVR3(r));		\
217eda14cbcSMatt Macy 		break;							\
218eda14cbcSMatt Macy 	case 4:								\
219eda14cbcSMatt Macy 		__asm(							\
220eda14cbcSMatt Macy 		"eor " VR2(r) ".16b," VR2(r) ".16b," VR0(r) ".16b\n"	\
221eda14cbcSMatt Macy 		"eor " VR3(r) ".16b," VR3(r) ".16b," VR1(r) ".16b\n"	\
222eda14cbcSMatt Macy 		:	UVR2(r), UVR3(r)				\
223eda14cbcSMatt Macy 		:	RVR0(r), RVR1(r));				\
224eda14cbcSMatt Macy 		break;							\
225eda14cbcSMatt Macy 	default:							\
226eda14cbcSMatt Macy 		ZFS_ASM_BUG();						\
227eda14cbcSMatt Macy 	}								\
228eda14cbcSMatt Macy }
229eda14cbcSMatt Macy 
230eda14cbcSMatt Macy #define	ZERO(r...)							\
231eda14cbcSMatt Macy {									\
232eda14cbcSMatt Macy 	switch (REG_CNT(r)) {						\
233eda14cbcSMatt Macy 	case 8:								\
234eda14cbcSMatt Macy 		__asm(							\
235eda14cbcSMatt Macy 		"eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n"	\
236eda14cbcSMatt Macy 		"eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n"	\
237eda14cbcSMatt Macy 		"eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n"	\
238eda14cbcSMatt Macy 		"eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n"	\
239eda14cbcSMatt Macy 		"eor " VR4(r) ".16b," VR4(r) ".16b," VR4(r) ".16b\n"	\
240eda14cbcSMatt Macy 		"eor " VR5(r) ".16b," VR5(r) ".16b," VR5(r) ".16b\n"	\
241eda14cbcSMatt Macy 		"eor " VR6(r) ".16b," VR6(r) ".16b," VR6(r) ".16b\n"	\
242eda14cbcSMatt Macy 		"eor " VR7(r) ".16b," VR7(r) ".16b," VR7(r) ".16b\n"	\
243eda14cbcSMatt Macy 		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r),		\
244eda14cbcSMatt Macy 			WVR4(r), WVR5(r), WVR6(r), WVR7(r));		\
245eda14cbcSMatt Macy 		break;							\
246eda14cbcSMatt Macy 	case 4:								\
247eda14cbcSMatt Macy 		__asm(							\
248eda14cbcSMatt Macy 		"eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n"	\
249eda14cbcSMatt Macy 		"eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n"	\
250eda14cbcSMatt Macy 		"eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n"	\
251eda14cbcSMatt Macy 		"eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n"	\
252eda14cbcSMatt Macy 		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r));		\
253eda14cbcSMatt Macy 		break;							\
254eda14cbcSMatt Macy 	case 2:								\
255eda14cbcSMatt Macy 		__asm(							\
256eda14cbcSMatt Macy 		"eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n"	\
257eda14cbcSMatt Macy 		"eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n"	\
258eda14cbcSMatt Macy 		:	WVR0(r), WVR1(r));				\
259eda14cbcSMatt Macy 		break;							\
260eda14cbcSMatt Macy 	default:							\
261eda14cbcSMatt Macy 		ZFS_ASM_BUG();						\
262eda14cbcSMatt Macy 	}								\
263eda14cbcSMatt Macy }
264eda14cbcSMatt Macy 
265eda14cbcSMatt Macy #define	COPY(r...)							\
266eda14cbcSMatt Macy {									\
267eda14cbcSMatt Macy 	switch (REG_CNT(r)) {						\
268eda14cbcSMatt Macy 	case 8:								\
269eda14cbcSMatt Macy 		__asm(							\
270eda14cbcSMatt Macy 		"mov " VR4(r) ".16b," VR0(r) ".16b\n"			\
271eda14cbcSMatt Macy 		"mov " VR5(r) ".16b," VR1(r) ".16b\n"			\
272eda14cbcSMatt Macy 		"mov " VR6(r) ".16b," VR2(r) ".16b\n"			\
273eda14cbcSMatt Macy 		"mov " VR7(r) ".16b," VR3(r) ".16b\n"			\
274eda14cbcSMatt Macy 		:	WVR4(r), WVR5(r), WVR6(r), WVR7(r)		\
275eda14cbcSMatt Macy 		:	RVR0(r), RVR1(r), RVR2(r), RVR3(r));		\
276eda14cbcSMatt Macy 		break;							\
277eda14cbcSMatt Macy 	case 4:								\
278eda14cbcSMatt Macy 		__asm(							\
279eda14cbcSMatt Macy 		"mov " VR2(r) ".16b," VR0(r) ".16b\n"			\
280eda14cbcSMatt Macy 		"mov " VR3(r) ".16b," VR1(r) ".16b\n"			\
281eda14cbcSMatt Macy 		:	WVR2(r), WVR3(r)				\
282eda14cbcSMatt Macy 		:	RVR0(r), RVR1(r));				\
283eda14cbcSMatt Macy 		break;							\
284eda14cbcSMatt Macy 	default:							\
285eda14cbcSMatt Macy 		ZFS_ASM_BUG();						\
286eda14cbcSMatt Macy 	}								\
287eda14cbcSMatt Macy }
288eda14cbcSMatt Macy 
289eda14cbcSMatt Macy #define	LOAD(src, r...)							\
290eda14cbcSMatt Macy {									\
291eda14cbcSMatt Macy 	switch (REG_CNT(r)) {						\
292eda14cbcSMatt Macy 	case 8:								\
293eda14cbcSMatt Macy 		__asm(							\
294eda14cbcSMatt Macy 		"ld1 { " VR0(r) ".4s },%[SRC0]\n"			\
295eda14cbcSMatt Macy 		"ld1 { " VR1(r) ".4s },%[SRC1]\n"			\
296eda14cbcSMatt Macy 		"ld1 { " VR2(r) ".4s },%[SRC2]\n"			\
297eda14cbcSMatt Macy 		"ld1 { " VR3(r) ".4s },%[SRC3]\n"			\
298eda14cbcSMatt Macy 		"ld1 { " VR4(r) ".4s },%[SRC4]\n"			\
299eda14cbcSMatt Macy 		"ld1 { " VR5(r) ".4s },%[SRC5]\n"			\
300eda14cbcSMatt Macy 		"ld1 { " VR6(r) ".4s },%[SRC6]\n"			\
301eda14cbcSMatt Macy 		"ld1 { " VR7(r) ".4s },%[SRC7]\n"			\
302eda14cbcSMatt Macy 		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r),		\
303eda14cbcSMatt Macy 			WVR4(r), WVR5(r), WVR6(r), WVR7(r)		\
304eda14cbcSMatt Macy 		:	[SRC0] "Q" (*(OFFSET(src, 0))),			\
305eda14cbcSMatt Macy 		[SRC1] "Q" (*(OFFSET(src, 16))),			\
306eda14cbcSMatt Macy 		[SRC2] "Q" (*(OFFSET(src, 32))),			\
307eda14cbcSMatt Macy 		[SRC3] "Q" (*(OFFSET(src, 48))),			\
308eda14cbcSMatt Macy 		[SRC4] "Q" (*(OFFSET(src, 64))),			\
309eda14cbcSMatt Macy 		[SRC5] "Q" (*(OFFSET(src, 80))),			\
310eda14cbcSMatt Macy 		[SRC6] "Q" (*(OFFSET(src, 96))),			\
311eda14cbcSMatt Macy 		[SRC7] "Q" (*(OFFSET(src, 112))));			\
312eda14cbcSMatt Macy 		break;							\
313eda14cbcSMatt Macy 	case 4:								\
314eda14cbcSMatt Macy 		__asm(							\
315eda14cbcSMatt Macy 		"ld1 { " VR0(r) ".4s },%[SRC0]\n"			\
316eda14cbcSMatt Macy 		"ld1 { " VR1(r) ".4s },%[SRC1]\n"			\
317eda14cbcSMatt Macy 		"ld1 { " VR2(r) ".4s },%[SRC2]\n"			\
318eda14cbcSMatt Macy 		"ld1 { " VR3(r) ".4s },%[SRC3]\n"			\
319eda14cbcSMatt Macy 		:	WVR0(r), WVR1(r), WVR2(r), WVR3(r)		\
320eda14cbcSMatt Macy 		:	[SRC0] "Q" (*(OFFSET(src, 0))),			\
321eda14cbcSMatt Macy 		[SRC1] "Q" (*(OFFSET(src, 16))),			\
322eda14cbcSMatt Macy 		[SRC2] "Q" (*(OFFSET(src, 32))),			\
323eda14cbcSMatt Macy 		[SRC3] "Q" (*(OFFSET(src, 48))));			\
324eda14cbcSMatt Macy 		break;							\
325eda14cbcSMatt Macy 	case 2:								\
326eda14cbcSMatt Macy 		__asm(							\
327eda14cbcSMatt Macy 		"ld1 { " VR0(r) ".4s },%[SRC0]\n"			\
328eda14cbcSMatt Macy 		"ld1 { " VR1(r) ".4s },%[SRC1]\n"			\
329eda14cbcSMatt Macy 		:	WVR0(r), WVR1(r)				\
330eda14cbcSMatt Macy 		:	[SRC0] "Q" (*(OFFSET(src, 0))),			\
331eda14cbcSMatt Macy 		[SRC1] "Q" (*(OFFSET(src, 16))));			\
332eda14cbcSMatt Macy 		break;							\
333eda14cbcSMatt Macy 	default:							\
334eda14cbcSMatt Macy 		ZFS_ASM_BUG();						\
335eda14cbcSMatt Macy 	}								\
336eda14cbcSMatt Macy }
337eda14cbcSMatt Macy 
338eda14cbcSMatt Macy #define	STORE(dst, r...)						\
339eda14cbcSMatt Macy {									\
340eda14cbcSMatt Macy 	switch (REG_CNT(r)) {						\
341eda14cbcSMatt Macy 	case 8:								\
342eda14cbcSMatt Macy 		__asm(							\
343eda14cbcSMatt Macy 		"st1 { " VR0(r) ".4s },%[DST0]\n"			\
344eda14cbcSMatt Macy 		"st1 { " VR1(r) ".4s },%[DST1]\n"			\
345eda14cbcSMatt Macy 		"st1 { " VR2(r) ".4s },%[DST2]\n"			\
346eda14cbcSMatt Macy 		"st1 { " VR3(r) ".4s },%[DST3]\n"			\
347eda14cbcSMatt Macy 		"st1 { " VR4(r) ".4s },%[DST4]\n"			\
348eda14cbcSMatt Macy 		"st1 { " VR5(r) ".4s },%[DST5]\n"			\
349eda14cbcSMatt Macy 		"st1 { " VR6(r) ".4s },%[DST6]\n"			\
350eda14cbcSMatt Macy 		"st1 { " VR7(r) ".4s },%[DST7]\n"			\
351eda14cbcSMatt Macy 		:	[DST0] "=Q" (*(OFFSET(dst, 0))),		\
352eda14cbcSMatt Macy 		[DST1] "=Q" (*(OFFSET(dst, 16))),			\
353eda14cbcSMatt Macy 		[DST2] "=Q" (*(OFFSET(dst, 32))),			\
354eda14cbcSMatt Macy 		[DST3] "=Q" (*(OFFSET(dst, 48))),			\
355eda14cbcSMatt Macy 		[DST4] "=Q" (*(OFFSET(dst, 64))),			\
356eda14cbcSMatt Macy 		[DST5] "=Q" (*(OFFSET(dst, 80))),			\
357eda14cbcSMatt Macy 		[DST6] "=Q" (*(OFFSET(dst, 96))),			\
358eda14cbcSMatt Macy 		[DST7] "=Q" (*(OFFSET(dst, 112)))			\
359eda14cbcSMatt Macy 		:	RVR0(r), RVR1(r), RVR2(r), RVR3(r),		\
360eda14cbcSMatt Macy 			RVR4(r), RVR5(r), RVR6(r), RVR7(r));		\
361eda14cbcSMatt Macy 		break;							\
362eda14cbcSMatt Macy 	case 4:								\
363eda14cbcSMatt Macy 		__asm(							\
364eda14cbcSMatt Macy 		"st1 { " VR0(r) ".4s },%[DST0]\n"			\
365eda14cbcSMatt Macy 		"st1 { " VR1(r) ".4s },%[DST1]\n"			\
366eda14cbcSMatt Macy 		"st1 { " VR2(r) ".4s },%[DST2]\n"			\
367eda14cbcSMatt Macy 		"st1 { " VR3(r) ".4s },%[DST3]\n"			\
368eda14cbcSMatt Macy 		:	[DST0] "=Q" (*(OFFSET(dst, 0))),		\
369eda14cbcSMatt Macy 		[DST1] "=Q" (*(OFFSET(dst, 16))),			\
370eda14cbcSMatt Macy 		[DST2] "=Q" (*(OFFSET(dst, 32))),			\
371eda14cbcSMatt Macy 		[DST3] "=Q" (*(OFFSET(dst, 48)))			\
372eda14cbcSMatt Macy 		:	RVR0(r), RVR1(r), RVR2(r), RVR3(r));		\
373eda14cbcSMatt Macy 		break;							\
374eda14cbcSMatt Macy 	case 2:								\
375eda14cbcSMatt Macy 		__asm(							\
376eda14cbcSMatt Macy 		"st1 { " VR0(r) ".4s },%[DST0]\n"			\
377eda14cbcSMatt Macy 		"st1 { " VR1(r) ".4s },%[DST1]\n"			\
378eda14cbcSMatt Macy 		:	[DST0] "=Q" (*(OFFSET(dst, 0))),		\
379eda14cbcSMatt Macy 		[DST1] "=Q" (*(OFFSET(dst, 16)))			\
380eda14cbcSMatt Macy 		:	RVR0(r), RVR1(r));				\
381eda14cbcSMatt Macy 		break;							\
382eda14cbcSMatt Macy 	default:							\
383eda14cbcSMatt Macy 		ZFS_ASM_BUG();						\
384eda14cbcSMatt Macy 	}								\
385eda14cbcSMatt Macy }
386eda14cbcSMatt Macy 
387eda14cbcSMatt Macy /*
388eda14cbcSMatt Macy  * Unfortunately cannot use the macro, because GCC
389eda14cbcSMatt Macy  * will try to use the macro name and not value
390eda14cbcSMatt Macy  * later on...
391eda14cbcSMatt Macy  * Kept as a reference to what a numbered variable is
392eda14cbcSMatt Macy  */
393eda14cbcSMatt Macy #define	_00	"v17"
394eda14cbcSMatt Macy #define	_1d	"v16"
395eda14cbcSMatt Macy #define	_temp0	"v19"
396eda14cbcSMatt Macy #define	_temp1	"v18"
397eda14cbcSMatt Macy 
398eda14cbcSMatt Macy #define	MUL2_SETUP()							\
399eda14cbcSMatt Macy {									\
400eda14cbcSMatt Macy 	__asm(								\
401eda14cbcSMatt Macy 	"eor " VR(17) ".16b," VR(17) ".16b," VR(17) ".16b\n"		\
402eda14cbcSMatt Macy 	"movi " VR(16) ".16b,#0x1d\n"					\
403eda14cbcSMatt Macy 	:	WVR(16), WVR(17));					\
404eda14cbcSMatt Macy }
405eda14cbcSMatt Macy 
406eda14cbcSMatt Macy #define	MUL2(r...)							\
407eda14cbcSMatt Macy {									\
408eda14cbcSMatt Macy 	switch (REG_CNT(r)) {						\
409eda14cbcSMatt Macy 	case 4:								\
410eda14cbcSMatt Macy 		__asm(							\
411eda14cbcSMatt Macy 		"cmgt v19.16b," VR(17) ".16b," VR0(r) ".16b\n"		\
412eda14cbcSMatt Macy 		"cmgt v18.16b," VR(17) ".16b," VR1(r) ".16b\n"		\
413eda14cbcSMatt Macy 		"cmgt v21.16b," VR(17) ".16b," VR2(r) ".16b\n"		\
414eda14cbcSMatt Macy 		"cmgt v20.16b," VR(17) ".16b," VR3(r) ".16b\n"		\
415eda14cbcSMatt Macy 		"and v19.16b,v19.16b," VR(16) ".16b\n"			\
416eda14cbcSMatt Macy 		"and v18.16b,v18.16b," VR(16) ".16b\n"			\
417eda14cbcSMatt Macy 		"and v21.16b,v21.16b," VR(16) ".16b\n"			\
418eda14cbcSMatt Macy 		"and v20.16b,v20.16b," VR(16) ".16b\n"			\
419eda14cbcSMatt Macy 		"shl " VR0(r) ".16b," VR0(r) ".16b,#1\n"		\
420eda14cbcSMatt Macy 		"shl " VR1(r) ".16b," VR1(r) ".16b,#1\n"		\
421eda14cbcSMatt Macy 		"shl " VR2(r) ".16b," VR2(r) ".16b,#1\n"		\
422eda14cbcSMatt Macy 		"shl " VR3(r) ".16b," VR3(r) ".16b,#1\n"		\
423eda14cbcSMatt Macy 		"eor " VR0(r) ".16b,v19.16b," VR0(r) ".16b\n"		\
424eda14cbcSMatt Macy 		"eor " VR1(r) ".16b,v18.16b," VR1(r) ".16b\n"		\
425eda14cbcSMatt Macy 		"eor " VR2(r) ".16b,v21.16b," VR2(r) ".16b\n"		\
426eda14cbcSMatt Macy 		"eor " VR3(r) ".16b,v20.16b," VR3(r) ".16b\n"		\
427eda14cbcSMatt Macy 		:	UVR0(r), UVR1(r), UVR2(r), UVR3(r)		\
428eda14cbcSMatt Macy 		:	RVR(17), RVR(16)				\
429eda14cbcSMatt Macy 		:	"v18", "v19", "v20", "v21");			\
430eda14cbcSMatt Macy 		break;							\
431eda14cbcSMatt Macy 	case 2:								\
432eda14cbcSMatt Macy 		__asm(							\
433eda14cbcSMatt Macy 		"cmgt v19.16b," VR(17) ".16b," VR0(r) ".16b\n"		\
434eda14cbcSMatt Macy 		"cmgt v18.16b," VR(17) ".16b," VR1(r) ".16b\n"		\
435eda14cbcSMatt Macy 		"and v19.16b,v19.16b," VR(16) ".16b\n"			\
436eda14cbcSMatt Macy 		"and v18.16b,v18.16b," VR(16) ".16b\n"			\
437eda14cbcSMatt Macy 		"shl " VR0(r) ".16b," VR0(r) ".16b,#1\n"		\
438eda14cbcSMatt Macy 		"shl " VR1(r) ".16b," VR1(r) ".16b,#1\n"		\
439eda14cbcSMatt Macy 		"eor " VR0(r) ".16b,v19.16b," VR0(r) ".16b\n"		\
440eda14cbcSMatt Macy 		"eor " VR1(r) ".16b,v18.16b," VR1(r) ".16b\n"		\
441eda14cbcSMatt Macy 		:	UVR0(r), UVR1(r)				\
442eda14cbcSMatt Macy 		:	RVR(17), RVR(16)				\
443eda14cbcSMatt Macy 		:	"v18", "v19");					\
444eda14cbcSMatt Macy 		break;							\
445eda14cbcSMatt Macy 	default:							\
446eda14cbcSMatt Macy 		ZFS_ASM_BUG();						\
447eda14cbcSMatt Macy 	}								\
448eda14cbcSMatt Macy }
449eda14cbcSMatt Macy 
450eda14cbcSMatt Macy #define	MUL4(r...)							\
451eda14cbcSMatt Macy {									\
452eda14cbcSMatt Macy 	MUL2(r);							\
453eda14cbcSMatt Macy 	MUL2(r);							\
454eda14cbcSMatt Macy }
455eda14cbcSMatt Macy 
456eda14cbcSMatt Macy /*
457eda14cbcSMatt Macy  * Unfortunately cannot use the macro, because GCC
458eda14cbcSMatt Macy  * will try to use the macro name and not value
459eda14cbcSMatt Macy  * later on...
460eda14cbcSMatt Macy  * Kept as a reference to what a register is
461eda14cbcSMatt Macy  * (here we're using actual registers for the
462eda14cbcSMatt Macy  * clobbered ones)
463eda14cbcSMatt Macy  */
464eda14cbcSMatt Macy #define	_0f		"v15"
465eda14cbcSMatt Macy #define	_a_save		"v14"
466eda14cbcSMatt Macy #define	_b_save		"v13"
467eda14cbcSMatt Macy #define	_lt_mod_a	"v12"
468eda14cbcSMatt Macy #define	_lt_clmul_a	"v11"
469eda14cbcSMatt Macy #define	_lt_mod_b	"v10"
470eda14cbcSMatt Macy #define	_lt_clmul_b	"v15"
471eda14cbcSMatt Macy 
472eda14cbcSMatt Macy #define	_MULx2(c, r...)							\
473eda14cbcSMatt Macy {									\
474eda14cbcSMatt Macy 	switch (REG_CNT(r)) {						\
475eda14cbcSMatt Macy 	case 2:								\
476eda14cbcSMatt Macy 		__asm(							\
477eda14cbcSMatt Macy 		/* lts for upper part */				\
478eda14cbcSMatt Macy 		"movi v15.16b,#0x0f\n"					\
479eda14cbcSMatt Macy 		"ld1 { v10.4s },%[lt0]\n"				\
480eda14cbcSMatt Macy 		"ld1 { v11.4s },%[lt1]\n"				\
481eda14cbcSMatt Macy 		/* upper part */					\
482eda14cbcSMatt Macy 		"and v14.16b," VR0(r) ".16b,v15.16b\n"			\
483eda14cbcSMatt Macy 		"and v13.16b," VR1(r) ".16b,v15.16b\n"			\
484eda14cbcSMatt Macy 		"ushr " VR0(r) ".16b," VR0(r) ".16b,#4\n"		\
485eda14cbcSMatt Macy 		"ushr " VR1(r) ".16b," VR1(r) ".16b,#4\n"		\
486eda14cbcSMatt Macy 									\
487eda14cbcSMatt Macy 		"tbl v12.16b,{v10.16b}," VR0(r) ".16b\n"		\
488eda14cbcSMatt Macy 		"tbl v10.16b,{v10.16b}," VR1(r) ".16b\n"		\
489eda14cbcSMatt Macy 		"tbl v15.16b,{v11.16b}," VR0(r) ".16b\n"		\
490eda14cbcSMatt Macy 		"tbl v11.16b,{v11.16b}," VR1(r) ".16b\n"		\
491eda14cbcSMatt Macy 									\
492eda14cbcSMatt Macy 		"eor " VR0(r) ".16b,v15.16b,v12.16b\n"			\
493eda14cbcSMatt Macy 		"eor " VR1(r) ".16b,v11.16b,v10.16b\n"			\
494eda14cbcSMatt Macy 		/* lts for lower part */				\
495eda14cbcSMatt Macy 		"ld1 { v10.4s },%[lt2]\n"				\
496eda14cbcSMatt Macy 		"ld1 { v15.4s },%[lt3]\n"				\
497eda14cbcSMatt Macy 		/* lower part */					\
498eda14cbcSMatt Macy 		"tbl v12.16b,{v10.16b},v14.16b\n"			\
499eda14cbcSMatt Macy 		"tbl v10.16b,{v10.16b},v13.16b\n"			\
500eda14cbcSMatt Macy 		"tbl v11.16b,{v15.16b},v14.16b\n"			\
501eda14cbcSMatt Macy 		"tbl v15.16b,{v15.16b},v13.16b\n"			\
502eda14cbcSMatt Macy 									\
503eda14cbcSMatt Macy 		"eor " VR0(r) ".16b," VR0(r) ".16b,v12.16b\n"		\
504eda14cbcSMatt Macy 		"eor " VR1(r) ".16b," VR1(r) ".16b,v10.16b\n"		\
505eda14cbcSMatt Macy 		"eor " VR0(r) ".16b," VR0(r) ".16b,v11.16b\n"		\
506eda14cbcSMatt Macy 		"eor " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n"		\
507eda14cbcSMatt Macy 		:	UVR0(r), UVR1(r)				\
508eda14cbcSMatt Macy 		:	[lt0] "Q" ((gf_clmul_mod_lt[4*(c)+0][0])),	\
509eda14cbcSMatt Macy 		[lt1] "Q" ((gf_clmul_mod_lt[4*(c)+1][0])),		\
510eda14cbcSMatt Macy 		[lt2] "Q" ((gf_clmul_mod_lt[4*(c)+2][0])),		\
511eda14cbcSMatt Macy 		[lt3] "Q" ((gf_clmul_mod_lt[4*(c)+3][0]))		\
512eda14cbcSMatt Macy 		:	"v10", "v11", "v12", "v13", "v14", "v15");	\
513eda14cbcSMatt Macy 		break;							\
514eda14cbcSMatt Macy 	default:							\
515eda14cbcSMatt Macy 		ZFS_ASM_BUG();						\
516eda14cbcSMatt Macy 	}								\
517eda14cbcSMatt Macy }
518eda14cbcSMatt Macy 
519eda14cbcSMatt Macy #define	MUL(c, r...)							\
520eda14cbcSMatt Macy {									\
521eda14cbcSMatt Macy 	switch (REG_CNT(r)) {						\
522eda14cbcSMatt Macy 	case 4:								\
523eda14cbcSMatt Macy 		_MULx2(c, R_23(r));					\
524eda14cbcSMatt Macy 		_MULx2(c, R_01(r));					\
525eda14cbcSMatt Macy 		break;							\
526eda14cbcSMatt Macy 	case 2:								\
527eda14cbcSMatt Macy 		_MULx2(c, R_01(r));					\
528eda14cbcSMatt Macy 		break;							\
529eda14cbcSMatt Macy 	default:							\
530eda14cbcSMatt Macy 		ZFS_ASM_BUG();						\
531eda14cbcSMatt Macy 	}								\
532eda14cbcSMatt Macy }
533eda14cbcSMatt Macy 
534eda14cbcSMatt Macy #define	raidz_math_begin()	kfpu_begin()
535eda14cbcSMatt Macy #define	raidz_math_end()	kfpu_end()
536eda14cbcSMatt Macy 
537eda14cbcSMatt Macy /* Overkill... */
538eda14cbcSMatt Macy #if defined(_KERNEL)
539eda14cbcSMatt Macy #define	GEN_X_DEFINE_0_3()	\
540eda14cbcSMatt Macy register unsigned char w0 asm("v0") __attribute__((vector_size(16)));	\
541eda14cbcSMatt Macy register unsigned char w1 asm("v1") __attribute__((vector_size(16)));	\
542eda14cbcSMatt Macy register unsigned char w2 asm("v2") __attribute__((vector_size(16)));	\
543eda14cbcSMatt Macy register unsigned char w3 asm("v3") __attribute__((vector_size(16)));
544eda14cbcSMatt Macy #define	GEN_X_DEFINE_4_5()	\
545eda14cbcSMatt Macy register unsigned char w4 asm("v4") __attribute__((vector_size(16)));	\
546eda14cbcSMatt Macy register unsigned char w5 asm("v5") __attribute__((vector_size(16)));
547eda14cbcSMatt Macy #define	GEN_X_DEFINE_6_7()	\
548eda14cbcSMatt Macy register unsigned char w6 asm("v6") __attribute__((vector_size(16)));	\
549eda14cbcSMatt Macy register unsigned char w7 asm("v7") __attribute__((vector_size(16)));
550eda14cbcSMatt Macy #define	GEN_X_DEFINE_8_9()	\
551eda14cbcSMatt Macy register unsigned char w8 asm("v8") __attribute__((vector_size(16)));	\
552eda14cbcSMatt Macy register unsigned char w9 asm("v9") __attribute__((vector_size(16)));
553eda14cbcSMatt Macy #define	GEN_X_DEFINE_10_11()	\
554eda14cbcSMatt Macy register unsigned char w10 asm("v10") __attribute__((vector_size(16)));	\
555eda14cbcSMatt Macy register unsigned char w11 asm("v11") __attribute__((vector_size(16)));
556eda14cbcSMatt Macy #define	GEN_X_DEFINE_12_15()	\
557eda14cbcSMatt Macy register unsigned char w12 asm("v12") __attribute__((vector_size(16)));	\
558eda14cbcSMatt Macy register unsigned char w13 asm("v13") __attribute__((vector_size(16)));	\
559eda14cbcSMatt Macy register unsigned char w14 asm("v14") __attribute__((vector_size(16)));	\
560eda14cbcSMatt Macy register unsigned char w15 asm("v15") __attribute__((vector_size(16)));
561eda14cbcSMatt Macy #define	GEN_X_DEFINE_16()	\
562eda14cbcSMatt Macy register unsigned char w16 asm("v16") __attribute__((vector_size(16)));
563eda14cbcSMatt Macy #define	GEN_X_DEFINE_17()	\
564eda14cbcSMatt Macy register unsigned char w17 asm("v17") __attribute__((vector_size(16)));
565eda14cbcSMatt Macy #define	GEN_X_DEFINE_18_21()	\
566eda14cbcSMatt Macy register unsigned char w18 asm("v18") __attribute__((vector_size(16)));	\
567eda14cbcSMatt Macy register unsigned char w19 asm("v19") __attribute__((vector_size(16)));	\
568eda14cbcSMatt Macy register unsigned char w20 asm("v20") __attribute__((vector_size(16)));	\
569eda14cbcSMatt Macy register unsigned char w21 asm("v21") __attribute__((vector_size(16)));
570eda14cbcSMatt Macy #define	GEN_X_DEFINE_22_23()	\
571eda14cbcSMatt Macy register unsigned char w22 asm("v22") __attribute__((vector_size(16)));	\
572eda14cbcSMatt Macy register unsigned char w23 asm("v23") __attribute__((vector_size(16)));
573eda14cbcSMatt Macy #define	GEN_X_DEFINE_24_27()	\
574eda14cbcSMatt Macy register unsigned char w24 asm("v24") __attribute__((vector_size(16)));	\
575eda14cbcSMatt Macy register unsigned char w25 asm("v25") __attribute__((vector_size(16)));	\
576eda14cbcSMatt Macy register unsigned char w26 asm("v26") __attribute__((vector_size(16)));	\
577eda14cbcSMatt Macy register unsigned char w27 asm("v27") __attribute__((vector_size(16)));
578eda14cbcSMatt Macy #define	GEN_X_DEFINE_28_30()	\
579eda14cbcSMatt Macy register unsigned char w28 asm("v28") __attribute__((vector_size(16)));	\
580eda14cbcSMatt Macy register unsigned char w29 asm("v29") __attribute__((vector_size(16)));	\
581eda14cbcSMatt Macy register unsigned char w30 asm("v30") __attribute__((vector_size(16)));
582eda14cbcSMatt Macy #define	GEN_X_DEFINE_31()	\
583eda14cbcSMatt Macy register unsigned char w31 asm("v31") __attribute__((vector_size(16)));
584eda14cbcSMatt Macy #define	GEN_X_DEFINE_32()	\
585eda14cbcSMatt Macy register unsigned char w32 asm("v31") __attribute__((vector_size(16)));
586eda14cbcSMatt Macy #define	GEN_X_DEFINE_33_36()	\
587eda14cbcSMatt Macy register unsigned char w33 asm("v31") __attribute__((vector_size(16)));	\
588eda14cbcSMatt Macy register unsigned char w34 asm("v31") __attribute__((vector_size(16)));	\
589eda14cbcSMatt Macy register unsigned char w35 asm("v31") __attribute__((vector_size(16)));	\
590eda14cbcSMatt Macy register unsigned char w36 asm("v31") __attribute__((vector_size(16)));
591eda14cbcSMatt Macy #define	GEN_X_DEFINE_37_38()	\
592eda14cbcSMatt Macy register unsigned char w37 asm("v31") __attribute__((vector_size(16)));	\
593eda14cbcSMatt Macy register unsigned char w38 asm("v31") __attribute__((vector_size(16)));
594eda14cbcSMatt Macy #define	GEN_X_DEFINE_ALL()	\
595eda14cbcSMatt Macy 	GEN_X_DEFINE_0_3()	\
596eda14cbcSMatt Macy 	GEN_X_DEFINE_4_5()	\
597eda14cbcSMatt Macy 	GEN_X_DEFINE_6_7()	\
598eda14cbcSMatt Macy 	GEN_X_DEFINE_8_9()	\
599eda14cbcSMatt Macy 	GEN_X_DEFINE_10_11()	\
600eda14cbcSMatt Macy 	GEN_X_DEFINE_12_15()	\
601eda14cbcSMatt Macy 	GEN_X_DEFINE_16()	\
602eda14cbcSMatt Macy 	GEN_X_DEFINE_17()	\
603eda14cbcSMatt Macy 	GEN_X_DEFINE_18_21()	\
604eda14cbcSMatt Macy 	GEN_X_DEFINE_22_23()	\
605eda14cbcSMatt Macy 	GEN_X_DEFINE_24_27()	\
606eda14cbcSMatt Macy 	GEN_X_DEFINE_28_30()	\
607eda14cbcSMatt Macy 	GEN_X_DEFINE_31()	\
608eda14cbcSMatt Macy 	GEN_X_DEFINE_32()	\
609eda14cbcSMatt Macy 	GEN_X_DEFINE_33_36() 	\
610eda14cbcSMatt Macy 	GEN_X_DEFINE_37_38()
611eda14cbcSMatt Macy #else
612eda14cbcSMatt Macy #define	GEN_X_DEFINE_0_3()	\
613eda14cbcSMatt Macy 	unsigned char w0 __attribute__((vector_size(16)));	\
614eda14cbcSMatt Macy 	unsigned char w1 __attribute__((vector_size(16)));	\
615eda14cbcSMatt Macy 	unsigned char w2 __attribute__((vector_size(16)));	\
616eda14cbcSMatt Macy 	unsigned char w3 __attribute__((vector_size(16)));
617eda14cbcSMatt Macy #define	GEN_X_DEFINE_4_5()	\
618eda14cbcSMatt Macy 	unsigned char w4 __attribute__((vector_size(16)));	\
619eda14cbcSMatt Macy 	unsigned char w5 __attribute__((vector_size(16)));
620eda14cbcSMatt Macy #define	GEN_X_DEFINE_6_7()	\
621eda14cbcSMatt Macy 	unsigned char w6 __attribute__((vector_size(16)));	\
622eda14cbcSMatt Macy 	unsigned char w7 __attribute__((vector_size(16)));
623eda14cbcSMatt Macy #define	GEN_X_DEFINE_8_9()	\
624eda14cbcSMatt Macy 	unsigned char w8 __attribute__((vector_size(16)));	\
625eda14cbcSMatt Macy 	unsigned char w9 __attribute__((vector_size(16)));
626eda14cbcSMatt Macy #define	GEN_X_DEFINE_10_11()	\
627eda14cbcSMatt Macy 	unsigned char w10 __attribute__((vector_size(16)));	\
628eda14cbcSMatt Macy 	unsigned char w11 __attribute__((vector_size(16)));
629eda14cbcSMatt Macy #define	GEN_X_DEFINE_12_15()	\
630eda14cbcSMatt Macy 	unsigned char w12 __attribute__((vector_size(16)));	\
631eda14cbcSMatt Macy 	unsigned char w13 __attribute__((vector_size(16)));	\
632eda14cbcSMatt Macy 	unsigned char w14 __attribute__((vector_size(16)));	\
633eda14cbcSMatt Macy 	unsigned char w15 __attribute__((vector_size(16)));
634eda14cbcSMatt Macy #define	GEN_X_DEFINE_16()	\
635eda14cbcSMatt Macy 	unsigned char w16 __attribute__((vector_size(16)));
636eda14cbcSMatt Macy #define	GEN_X_DEFINE_17()	\
637eda14cbcSMatt Macy 	unsigned char w17 __attribute__((vector_size(16)));
638eda14cbcSMatt Macy #define	GEN_X_DEFINE_18_21()	\
639eda14cbcSMatt Macy 	unsigned char w18 __attribute__((vector_size(16)));	\
640eda14cbcSMatt Macy 	unsigned char w19 __attribute__((vector_size(16)));	\
641eda14cbcSMatt Macy 	unsigned char w20 __attribute__((vector_size(16)));	\
642eda14cbcSMatt Macy 	unsigned char w21 __attribute__((vector_size(16)));
643eda14cbcSMatt Macy #define	GEN_X_DEFINE_22_23()	\
644eda14cbcSMatt Macy 	unsigned char w22 __attribute__((vector_size(16)));	\
645eda14cbcSMatt Macy 	unsigned char w23 __attribute__((vector_size(16)));
646eda14cbcSMatt Macy #define	GEN_X_DEFINE_24_27()	\
647eda14cbcSMatt Macy 	unsigned char w24 __attribute__((vector_size(16)));	\
648eda14cbcSMatt Macy 	unsigned char w25 __attribute__((vector_size(16)));	\
649eda14cbcSMatt Macy 	unsigned char w26 __attribute__((vector_size(16)));	\
650eda14cbcSMatt Macy 	unsigned char w27 __attribute__((vector_size(16)));
651eda14cbcSMatt Macy #define	GEN_X_DEFINE_28_30()	\
652eda14cbcSMatt Macy 	unsigned char w28 __attribute__((vector_size(16)));	\
653eda14cbcSMatt Macy 	unsigned char w29 __attribute__((vector_size(16)));	\
654eda14cbcSMatt Macy 	unsigned char w30 __attribute__((vector_size(16)));
655eda14cbcSMatt Macy #define	GEN_X_DEFINE_31()	\
656eda14cbcSMatt Macy 	unsigned char w31 __attribute__((vector_size(16)));
657eda14cbcSMatt Macy #define	GEN_X_DEFINE_32()	\
658eda14cbcSMatt Macy 	unsigned char w32 __attribute__((vector_size(16)));
659eda14cbcSMatt Macy #define	GEN_X_DEFINE_33_36()	\
660eda14cbcSMatt Macy 	unsigned char w33 __attribute__((vector_size(16)));	\
661eda14cbcSMatt Macy 	unsigned char w34 __attribute__((vector_size(16)));	\
662eda14cbcSMatt Macy 	unsigned char w35 __attribute__((vector_size(16)));	\
663eda14cbcSMatt Macy 	unsigned char w36 __attribute__((vector_size(16)));
664eda14cbcSMatt Macy #define	GEN_X_DEFINE_37_38()	\
665eda14cbcSMatt Macy 	unsigned char w37 __attribute__((vector_size(16)));	\
666eda14cbcSMatt Macy 	unsigned char w38 __attribute__((vector_size(16)));
667eda14cbcSMatt Macy #define	GEN_X_DEFINE_ALL()	\
668eda14cbcSMatt Macy 	GEN_X_DEFINE_0_3()	\
669eda14cbcSMatt Macy 	GEN_X_DEFINE_4_5()	\
670eda14cbcSMatt Macy 	GEN_X_DEFINE_6_7()	\
671eda14cbcSMatt Macy 	GEN_X_DEFINE_8_9()	\
672eda14cbcSMatt Macy 	GEN_X_DEFINE_10_11()	\
673eda14cbcSMatt Macy 	GEN_X_DEFINE_12_15()	\
674eda14cbcSMatt Macy 	GEN_X_DEFINE_16()	\
675eda14cbcSMatt Macy 	GEN_X_DEFINE_17()	\
676eda14cbcSMatt Macy 	GEN_X_DEFINE_18_21()	\
677eda14cbcSMatt Macy 	GEN_X_DEFINE_22_23()	\
678eda14cbcSMatt Macy 	GEN_X_DEFINE_24_27()	\
679eda14cbcSMatt Macy 	GEN_X_DEFINE_28_30()	\
680eda14cbcSMatt Macy 	GEN_X_DEFINE_31()	\
681eda14cbcSMatt Macy 	GEN_X_DEFINE_32()	\
682eda14cbcSMatt Macy 	GEN_X_DEFINE_33_36()	\
683eda14cbcSMatt Macy 	GEN_X_DEFINE_37_38()
684eda14cbcSMatt Macy #endif
685