1#version 450
2
3#extension GL_KHR_shader_subgroup_arithmetic: enable
4#extension GL_EXT_shader_explicit_arithmetic_types_int8: enable
5#extension GL_EXT_shader_explicit_arithmetic_types_int16: enable
6#extension GL_EXT_shader_explicit_arithmetic_types_int64: enable
7#extension GL_EXT_shader_explicit_arithmetic_types_float16: enable
8
9layout (local_size_x = 8) in;
10
11layout(binding = 0) buffer Buffers
12{
13    i8vec4 i8;
14    u8vec4 u8;
15    i16vec4 i16;
16    u16vec4 u16;
17    i64vec4 i64;
18    u64vec4 u64;
19    f16vec4 f16;
20} data[4];
21
22void main()
23{
24    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
25
26    data[invocation].i8.x   = subgroupAdd(data[0].i8.x);
27    data[invocation].i8.xy  = subgroupAdd(data[1].i8.xy);
28    data[invocation].i8.xyz = subgroupAdd(data[2].i8.xyz);
29    data[invocation].i8     = subgroupAdd(data[3].i8);
30
31    data[invocation].i8.x   = subgroupMul(data[0].i8.x);
32    data[invocation].i8.xy  = subgroupMul(data[1].i8.xy);
33    data[invocation].i8.xyz = subgroupMul(data[2].i8.xyz);
34    data[invocation].i8     = subgroupMul(data[3].i8);
35
36    data[invocation].i8.x   = subgroupMin(data[0].i8.x);
37    data[invocation].i8.xy  = subgroupMin(data[1].i8.xy);
38    data[invocation].i8.xyz = subgroupMin(data[2].i8.xyz);
39    data[invocation].i8     = subgroupMin(data[3].i8);
40
41    data[invocation].i8.x   = subgroupMax(data[0].i8.x);
42    data[invocation].i8.xy  = subgroupMax(data[1].i8.xy);
43    data[invocation].i8.xyz = subgroupMax(data[2].i8.xyz);
44    data[invocation].i8     = subgroupMax(data[3].i8);
45
46    data[invocation].i8.x   = subgroupAnd(data[0].i8.x);
47    data[invocation].i8.xy  = subgroupAnd(data[1].i8.xy);
48    data[invocation].i8.xyz = subgroupAnd(data[2].i8.xyz);
49    data[invocation].i8     = subgroupAnd(data[3].i8);
50
51    data[invocation].i8.x   = subgroupOr(data[0].i8.x);
52    data[invocation].i8.xy  = subgroupOr(data[1].i8.xy);
53    data[invocation].i8.xyz = subgroupOr(data[2].i8.xyz);
54    data[invocation].i8     = subgroupOr(data[3].i8);
55
56    data[invocation].i8.x   = subgroupXor(data[0].i8.x);
57    data[invocation].i8.xy  = subgroupXor(data[1].i8.xy);
58    data[invocation].i8.xyz = subgroupXor(data[2].i8.xyz);
59    data[invocation].i8     = subgroupXor(data[3].i8);
60
61    data[invocation].i8.x   = subgroupInclusiveAdd(data[0].i8.x);
62    data[invocation].i8.xy  = subgroupInclusiveAdd(data[1].i8.xy);
63    data[invocation].i8.xyz = subgroupInclusiveAdd(data[2].i8.xyz);
64    data[invocation].i8     = subgroupInclusiveAdd(data[3].i8);
65
66    data[invocation].i8.x   = subgroupInclusiveMul(data[0].i8.x);
67    data[invocation].i8.xy  = subgroupInclusiveMul(data[1].i8.xy);
68    data[invocation].i8.xyz = subgroupInclusiveMul(data[2].i8.xyz);
69    data[invocation].i8     = subgroupInclusiveMul(data[3].i8);
70
71    data[invocation].i8.x   = subgroupInclusiveMin(data[0].i8.x);
72    data[invocation].i8.xy  = subgroupInclusiveMin(data[1].i8.xy);
73    data[invocation].i8.xyz = subgroupInclusiveMin(data[2].i8.xyz);
74    data[invocation].i8     = subgroupInclusiveMin(data[3].i8);
75
76    data[invocation].i8.x   = subgroupInclusiveMax(data[0].i8.x);
77    data[invocation].i8.xy  = subgroupInclusiveMax(data[1].i8.xy);
78    data[invocation].i8.xyz = subgroupInclusiveMax(data[2].i8.xyz);
79    data[invocation].i8     = subgroupInclusiveMax(data[3].i8);
80
81    data[invocation].i8.x   = subgroupInclusiveAnd(data[0].i8.x);
82    data[invocation].i8.xy  = subgroupInclusiveAnd(data[1].i8.xy);
83    data[invocation].i8.xyz = subgroupInclusiveAnd(data[2].i8.xyz);
84    data[invocation].i8     = subgroupInclusiveAnd(data[3].i8);
85
86    data[invocation].i8.x   = subgroupInclusiveOr(data[0].i8.x);
87    data[invocation].i8.xy  = subgroupInclusiveOr(data[1].i8.xy);
88    data[invocation].i8.xyz = subgroupInclusiveOr(data[2].i8.xyz);
89    data[invocation].i8     = subgroupInclusiveOr(data[3].i8);
90
91    data[invocation].i8.x   = subgroupInclusiveXor(data[0].i8.x);
92    data[invocation].i8.xy  = subgroupInclusiveXor(data[1].i8.xy);
93    data[invocation].i8.xyz = subgroupInclusiveXor(data[2].i8.xyz);
94    data[invocation].i8     = subgroupInclusiveXor(data[3].i8);
95
96    data[invocation].i8.x   = subgroupExclusiveAdd(data[0].i8.x);
97    data[invocation].i8.xy  = subgroupExclusiveAdd(data[1].i8.xy);
98    data[invocation].i8.xyz = subgroupExclusiveAdd(data[2].i8.xyz);
99    data[invocation].i8     = subgroupExclusiveAdd(data[3].i8);
100
101    data[invocation].i8.x   = subgroupExclusiveMul(data[0].i8.x);
102    data[invocation].i8.xy  = subgroupExclusiveMul(data[1].i8.xy);
103    data[invocation].i8.xyz = subgroupExclusiveMul(data[2].i8.xyz);
104    data[invocation].i8     = subgroupExclusiveMul(data[3].i8);
105
106    data[invocation].i8.x   = subgroupExclusiveMin(data[0].i8.x);
107    data[invocation].i8.xy  = subgroupExclusiveMin(data[1].i8.xy);
108    data[invocation].i8.xyz = subgroupExclusiveMin(data[2].i8.xyz);
109    data[invocation].i8     = subgroupExclusiveMin(data[3].i8);
110
111    data[invocation].i8.x   = subgroupExclusiveMax(data[0].i8.x);
112    data[invocation].i8.xy  = subgroupExclusiveMax(data[1].i8.xy);
113    data[invocation].i8.xyz = subgroupExclusiveMax(data[2].i8.xyz);
114    data[invocation].i8     = subgroupExclusiveMax(data[3].i8);
115
116    data[invocation].i8.x   = subgroupExclusiveAnd(data[0].i8.x);
117    data[invocation].i8.xy  = subgroupExclusiveAnd(data[1].i8.xy);
118    data[invocation].i8.xyz = subgroupExclusiveAnd(data[2].i8.xyz);
119    data[invocation].i8     = subgroupExclusiveAnd(data[3].i8);
120
121    data[invocation].i8.x   = subgroupExclusiveOr(data[0].i8.x);
122    data[invocation].i8.xy  = subgroupExclusiveOr(data[1].i8.xy);
123    data[invocation].i8.xyz = subgroupExclusiveOr(data[2].i8.xyz);
124    data[invocation].i8     = subgroupExclusiveOr(data[3].i8);
125
126    data[invocation].i8.x   = subgroupExclusiveXor(data[0].i8.x);
127    data[invocation].i8.xy  = subgroupExclusiveXor(data[1].i8.xy);
128    data[invocation].i8.xyz = subgroupExclusiveXor(data[2].i8.xyz);
129    data[invocation].i8     = subgroupExclusiveXor(data[3].i8);
130
131    data[invocation].u8.x   = subgroupAdd(data[0].u8.x);
132    data[invocation].u8.xy  = subgroupAdd(data[1].u8.xy);
133    data[invocation].u8.xyz = subgroupAdd(data[2].u8.xyz);
134    data[invocation].u8     = subgroupAdd(data[3].u8);
135
136    data[invocation].u8.x   = subgroupMul(data[0].u8.x);
137    data[invocation].u8.xy  = subgroupMul(data[1].u8.xy);
138    data[invocation].u8.xyz = subgroupMul(data[2].u8.xyz);
139    data[invocation].u8     = subgroupMul(data[3].u8);
140
141    data[invocation].u8.x   = subgroupMin(data[0].u8.x);
142    data[invocation].u8.xy  = subgroupMin(data[1].u8.xy);
143    data[invocation].u8.xyz = subgroupMin(data[2].u8.xyz);
144    data[invocation].u8     = subgroupMin(data[3].u8);
145
146    data[invocation].u8.x   = subgroupMax(data[0].u8.x);
147    data[invocation].u8.xy  = subgroupMax(data[1].u8.xy);
148    data[invocation].u8.xyz = subgroupMax(data[2].u8.xyz);
149    data[invocation].u8     = subgroupMax(data[3].u8);
150
151    data[invocation].u8.x   = subgroupAnd(data[0].u8.x);
152    data[invocation].u8.xy  = subgroupAnd(data[1].u8.xy);
153    data[invocation].u8.xyz = subgroupAnd(data[2].u8.xyz);
154    data[invocation].u8     = subgroupAnd(data[3].u8);
155
156    data[invocation].u8.x   = subgroupOr(data[0].u8.x);
157    data[invocation].u8.xy  = subgroupOr(data[1].u8.xy);
158    data[invocation].u8.xyz = subgroupOr(data[2].u8.xyz);
159    data[invocation].u8     = subgroupOr(data[3].u8);
160
161    data[invocation].u8.x   = subgroupXor(data[0].u8.x);
162    data[invocation].u8.xy  = subgroupXor(data[1].u8.xy);
163    data[invocation].u8.xyz = subgroupXor(data[2].u8.xyz);
164    data[invocation].u8     = subgroupXor(data[3].u8);
165
166    data[invocation].u8.x   = subgroupInclusiveAdd(data[0].u8.x);
167    data[invocation].u8.xy  = subgroupInclusiveAdd(data[1].u8.xy);
168    data[invocation].u8.xyz = subgroupInclusiveAdd(data[2].u8.xyz);
169    data[invocation].u8     = subgroupInclusiveAdd(data[3].u8);
170
171    data[invocation].u8.x   = subgroupInclusiveMul(data[0].u8.x);
172    data[invocation].u8.xy  = subgroupInclusiveMul(data[1].u8.xy);
173    data[invocation].u8.xyz = subgroupInclusiveMul(data[2].u8.xyz);
174    data[invocation].u8     = subgroupInclusiveMul(data[3].u8);
175
176    data[invocation].u8.x   = subgroupInclusiveMin(data[0].u8.x);
177    data[invocation].u8.xy  = subgroupInclusiveMin(data[1].u8.xy);
178    data[invocation].u8.xyz = subgroupInclusiveMin(data[2].u8.xyz);
179    data[invocation].u8     = subgroupInclusiveMin(data[3].u8);
180
181    data[invocation].u8.x   = subgroupInclusiveMax(data[0].u8.x);
182    data[invocation].u8.xy  = subgroupInclusiveMax(data[1].u8.xy);
183    data[invocation].u8.xyz = subgroupInclusiveMax(data[2].u8.xyz);
184    data[invocation].u8     = subgroupInclusiveMax(data[3].u8);
185
186    data[invocation].u8.x   = subgroupInclusiveAnd(data[0].u8.x);
187    data[invocation].u8.xy  = subgroupInclusiveAnd(data[1].u8.xy);
188    data[invocation].u8.xyz = subgroupInclusiveAnd(data[2].u8.xyz);
189    data[invocation].u8     = subgroupInclusiveAnd(data[3].u8);
190
191    data[invocation].u8.x   = subgroupInclusiveOr(data[0].u8.x);
192    data[invocation].u8.xy  = subgroupInclusiveOr(data[1].u8.xy);
193    data[invocation].u8.xyz = subgroupInclusiveOr(data[2].u8.xyz);
194    data[invocation].u8     = subgroupInclusiveOr(data[3].u8);
195
196    data[invocation].u8.x   = subgroupInclusiveXor(data[0].u8.x);
197    data[invocation].u8.xy  = subgroupInclusiveXor(data[1].u8.xy);
198    data[invocation].u8.xyz = subgroupInclusiveXor(data[2].u8.xyz);
199    data[invocation].u8     = subgroupInclusiveXor(data[3].u8);
200
201    data[invocation].u8.x   = subgroupExclusiveAdd(data[0].u8.x);
202    data[invocation].u8.xy  = subgroupExclusiveAdd(data[1].u8.xy);
203    data[invocation].u8.xyz = subgroupExclusiveAdd(data[2].u8.xyz);
204    data[invocation].u8     = subgroupExclusiveAdd(data[3].u8);
205
206    data[invocation].u8.x   = subgroupExclusiveMul(data[0].u8.x);
207    data[invocation].u8.xy  = subgroupExclusiveMul(data[1].u8.xy);
208    data[invocation].u8.xyz = subgroupExclusiveMul(data[2].u8.xyz);
209    data[invocation].u8     = subgroupExclusiveMul(data[3].u8);
210
211    data[invocation].u8.x   = subgroupExclusiveMin(data[0].u8.x);
212    data[invocation].u8.xy  = subgroupExclusiveMin(data[1].u8.xy);
213    data[invocation].u8.xyz = subgroupExclusiveMin(data[2].u8.xyz);
214    data[invocation].u8     = subgroupExclusiveMin(data[3].u8);
215
216    data[invocation].u8.x   = subgroupExclusiveMax(data[0].u8.x);
217    data[invocation].u8.xy  = subgroupExclusiveMax(data[1].u8.xy);
218    data[invocation].u8.xyz = subgroupExclusiveMax(data[2].u8.xyz);
219    data[invocation].u8     = subgroupExclusiveMax(data[3].u8);
220
221    data[invocation].u8.x   = subgroupExclusiveAnd(data[0].u8.x);
222    data[invocation].u8.xy  = subgroupExclusiveAnd(data[1].u8.xy);
223    data[invocation].u8.xyz = subgroupExclusiveAnd(data[2].u8.xyz);
224    data[invocation].u8     = subgroupExclusiveAnd(data[3].u8);
225
226    data[invocation].u8.x   = subgroupExclusiveOr(data[0].u8.x);
227    data[invocation].u8.xy  = subgroupExclusiveOr(data[1].u8.xy);
228    data[invocation].u8.xyz = subgroupExclusiveOr(data[2].u8.xyz);
229    data[invocation].u8     = subgroupExclusiveOr(data[3].u8);
230
231    data[invocation].u8.x   = subgroupExclusiveXor(data[0].u8.x);
232    data[invocation].u8.xy  = subgroupExclusiveXor(data[1].u8.xy);
233    data[invocation].u8.xyz = subgroupExclusiveXor(data[2].u8.xyz);
234    data[invocation].u8     = subgroupExclusiveXor(data[3].u8);
235
236    data[invocation].i16.x   = subgroupAdd(data[0].i16.x);
237    data[invocation].i16.xy  = subgroupAdd(data[1].i16.xy);
238    data[invocation].i16.xyz = subgroupAdd(data[2].i16.xyz);
239    data[invocation].i16     = subgroupAdd(data[3].i16);
240
241    data[invocation].i16.x   = subgroupMul(data[0].i16.x);
242    data[invocation].i16.xy  = subgroupMul(data[1].i16.xy);
243    data[invocation].i16.xyz = subgroupMul(data[2].i16.xyz);
244    data[invocation].i16     = subgroupMul(data[3].i16);
245
246    data[invocation].i16.x   = subgroupMin(data[0].i16.x);
247    data[invocation].i16.xy  = subgroupMin(data[1].i16.xy);
248    data[invocation].i16.xyz = subgroupMin(data[2].i16.xyz);
249    data[invocation].i16     = subgroupMin(data[3].i16);
250
251    data[invocation].i16.x   = subgroupMax(data[0].i16.x);
252    data[invocation].i16.xy  = subgroupMax(data[1].i16.xy);
253    data[invocation].i16.xyz = subgroupMax(data[2].i16.xyz);
254    data[invocation].i16     = subgroupMax(data[3].i16);
255
256    data[invocation].i16.x   = subgroupAnd(data[0].i16.x);
257    data[invocation].i16.xy  = subgroupAnd(data[1].i16.xy);
258    data[invocation].i16.xyz = subgroupAnd(data[2].i16.xyz);
259    data[invocation].i16     = subgroupAnd(data[3].i16);
260
261    data[invocation].i16.x   = subgroupOr(data[0].i16.x);
262    data[invocation].i16.xy  = subgroupOr(data[1].i16.xy);
263    data[invocation].i16.xyz = subgroupOr(data[2].i16.xyz);
264    data[invocation].i16     = subgroupOr(data[3].i16);
265
266    data[invocation].i16.x   = subgroupXor(data[0].i16.x);
267    data[invocation].i16.xy  = subgroupXor(data[1].i16.xy);
268    data[invocation].i16.xyz = subgroupXor(data[2].i16.xyz);
269    data[invocation].i16     = subgroupXor(data[3].i16);
270
271    data[invocation].i16.x   = subgroupInclusiveAdd(data[0].i16.x);
272    data[invocation].i16.xy  = subgroupInclusiveAdd(data[1].i16.xy);
273    data[invocation].i16.xyz = subgroupInclusiveAdd(data[2].i16.xyz);
274    data[invocation].i16     = subgroupInclusiveAdd(data[3].i16);
275
276    data[invocation].i16.x   = subgroupInclusiveMul(data[0].i16.x);
277    data[invocation].i16.xy  = subgroupInclusiveMul(data[1].i16.xy);
278    data[invocation].i16.xyz = subgroupInclusiveMul(data[2].i16.xyz);
279    data[invocation].i16     = subgroupInclusiveMul(data[3].i16);
280
281    data[invocation].i16.x   = subgroupInclusiveMin(data[0].i16.x);
282    data[invocation].i16.xy  = subgroupInclusiveMin(data[1].i16.xy);
283    data[invocation].i16.xyz = subgroupInclusiveMin(data[2].i16.xyz);
284    data[invocation].i16     = subgroupInclusiveMin(data[3].i16);
285
286    data[invocation].i16.x   = subgroupInclusiveMax(data[0].i16.x);
287    data[invocation].i16.xy  = subgroupInclusiveMax(data[1].i16.xy);
288    data[invocation].i16.xyz = subgroupInclusiveMax(data[2].i16.xyz);
289    data[invocation].i16     = subgroupInclusiveMax(data[3].i16);
290
291    data[invocation].i16.x   = subgroupInclusiveAnd(data[0].i16.x);
292    data[invocation].i16.xy  = subgroupInclusiveAnd(data[1].i16.xy);
293    data[invocation].i16.xyz = subgroupInclusiveAnd(data[2].i16.xyz);
294    data[invocation].i16     = subgroupInclusiveAnd(data[3].i16);
295
296    data[invocation].i16.x   = subgroupInclusiveOr(data[0].i16.x);
297    data[invocation].i16.xy  = subgroupInclusiveOr(data[1].i16.xy);
298    data[invocation].i16.xyz = subgroupInclusiveOr(data[2].i16.xyz);
299    data[invocation].i16     = subgroupInclusiveOr(data[3].i16);
300
301    data[invocation].i16.x   = subgroupInclusiveXor(data[0].i16.x);
302    data[invocation].i16.xy  = subgroupInclusiveXor(data[1].i16.xy);
303    data[invocation].i16.xyz = subgroupInclusiveXor(data[2].i16.xyz);
304    data[invocation].i16     = subgroupInclusiveXor(data[3].i16);
305
306    data[invocation].i16.x   = subgroupExclusiveAdd(data[0].i16.x);
307    data[invocation].i16.xy  = subgroupExclusiveAdd(data[1].i16.xy);
308    data[invocation].i16.xyz = subgroupExclusiveAdd(data[2].i16.xyz);
309    data[invocation].i16     = subgroupExclusiveAdd(data[3].i16);
310
311    data[invocation].i16.x   = subgroupExclusiveMul(data[0].i16.x);
312    data[invocation].i16.xy  = subgroupExclusiveMul(data[1].i16.xy);
313    data[invocation].i16.xyz = subgroupExclusiveMul(data[2].i16.xyz);
314    data[invocation].i16     = subgroupExclusiveMul(data[3].i16);
315
316    data[invocation].i16.x   = subgroupExclusiveMin(data[0].i16.x);
317    data[invocation].i16.xy  = subgroupExclusiveMin(data[1].i16.xy);
318    data[invocation].i16.xyz = subgroupExclusiveMin(data[2].i16.xyz);
319    data[invocation].i16     = subgroupExclusiveMin(data[3].i16);
320
321    data[invocation].i16.x   = subgroupExclusiveMax(data[0].i16.x);
322    data[invocation].i16.xy  = subgroupExclusiveMax(data[1].i16.xy);
323    data[invocation].i16.xyz = subgroupExclusiveMax(data[2].i16.xyz);
324    data[invocation].i16     = subgroupExclusiveMax(data[3].i16);
325
326    data[invocation].i16.x   = subgroupExclusiveAnd(data[0].i16.x);
327    data[invocation].i16.xy  = subgroupExclusiveAnd(data[1].i16.xy);
328    data[invocation].i16.xyz = subgroupExclusiveAnd(data[2].i16.xyz);
329    data[invocation].i16     = subgroupExclusiveAnd(data[3].i16);
330
331    data[invocation].i16.x   = subgroupExclusiveOr(data[0].i16.x);
332    data[invocation].i16.xy  = subgroupExclusiveOr(data[1].i16.xy);
333    data[invocation].i16.xyz = subgroupExclusiveOr(data[2].i16.xyz);
334    data[invocation].i16     = subgroupExclusiveOr(data[3].i16);
335
336    data[invocation].i16.x   = subgroupExclusiveXor(data[0].i16.x);
337    data[invocation].i16.xy  = subgroupExclusiveXor(data[1].i16.xy);
338    data[invocation].i16.xyz = subgroupExclusiveXor(data[2].i16.xyz);
339    data[invocation].i16     = subgroupExclusiveXor(data[3].i16);
340
341    data[invocation].u16.x   = subgroupAdd(data[0].u16.x);
342    data[invocation].u16.xy  = subgroupAdd(data[1].u16.xy);
343    data[invocation].u16.xyz = subgroupAdd(data[2].u16.xyz);
344    data[invocation].u16     = subgroupAdd(data[3].u16);
345
346    data[invocation].u16.x   = subgroupMul(data[0].u16.x);
347    data[invocation].u16.xy  = subgroupMul(data[1].u16.xy);
348    data[invocation].u16.xyz = subgroupMul(data[2].u16.xyz);
349    data[invocation].u16     = subgroupMul(data[3].u16);
350
351    data[invocation].u16.x   = subgroupMin(data[0].u16.x);
352    data[invocation].u16.xy  = subgroupMin(data[1].u16.xy);
353    data[invocation].u16.xyz = subgroupMin(data[2].u16.xyz);
354    data[invocation].u16     = subgroupMin(data[3].u16);
355
356    data[invocation].u16.x   = subgroupMax(data[0].u16.x);
357    data[invocation].u16.xy  = subgroupMax(data[1].u16.xy);
358    data[invocation].u16.xyz = subgroupMax(data[2].u16.xyz);
359    data[invocation].u16     = subgroupMax(data[3].u16);
360
361    data[invocation].u16.x   = subgroupAnd(data[0].u16.x);
362    data[invocation].u16.xy  = subgroupAnd(data[1].u16.xy);
363    data[invocation].u16.xyz = subgroupAnd(data[2].u16.xyz);
364    data[invocation].u16     = subgroupAnd(data[3].u16);
365
366    data[invocation].u16.x   = subgroupOr(data[0].u16.x);
367    data[invocation].u16.xy  = subgroupOr(data[1].u16.xy);
368    data[invocation].u16.xyz = subgroupOr(data[2].u16.xyz);
369    data[invocation].u16     = subgroupOr(data[3].u16);
370
371    data[invocation].u16.x   = subgroupXor(data[0].u16.x);
372    data[invocation].u16.xy  = subgroupXor(data[1].u16.xy);
373    data[invocation].u16.xyz = subgroupXor(data[2].u16.xyz);
374    data[invocation].u16     = subgroupXor(data[3].u16);
375
376    data[invocation].u16.x   = subgroupInclusiveAdd(data[0].u16.x);
377    data[invocation].u16.xy  = subgroupInclusiveAdd(data[1].u16.xy);
378    data[invocation].u16.xyz = subgroupInclusiveAdd(data[2].u16.xyz);
379    data[invocation].u16     = subgroupInclusiveAdd(data[3].u16);
380
381    data[invocation].u16.x   = subgroupInclusiveMul(data[0].u16.x);
382    data[invocation].u16.xy  = subgroupInclusiveMul(data[1].u16.xy);
383    data[invocation].u16.xyz = subgroupInclusiveMul(data[2].u16.xyz);
384    data[invocation].u16     = subgroupInclusiveMul(data[3].u16);
385
386    data[invocation].u16.x   = subgroupInclusiveMin(data[0].u16.x);
387    data[invocation].u16.xy  = subgroupInclusiveMin(data[1].u16.xy);
388    data[invocation].u16.xyz = subgroupInclusiveMin(data[2].u16.xyz);
389    data[invocation].u16     = subgroupInclusiveMin(data[3].u16);
390
391    data[invocation].u16.x   = subgroupInclusiveMax(data[0].u16.x);
392    data[invocation].u16.xy  = subgroupInclusiveMax(data[1].u16.xy);
393    data[invocation].u16.xyz = subgroupInclusiveMax(data[2].u16.xyz);
394    data[invocation].u16     = subgroupInclusiveMax(data[3].u16);
395
396    data[invocation].u16.x   = subgroupInclusiveAnd(data[0].u16.x);
397    data[invocation].u16.xy  = subgroupInclusiveAnd(data[1].u16.xy);
398    data[invocation].u16.xyz = subgroupInclusiveAnd(data[2].u16.xyz);
399    data[invocation].u16     = subgroupInclusiveAnd(data[3].u16);
400
401    data[invocation].u16.x   = subgroupInclusiveOr(data[0].u16.x);
402    data[invocation].u16.xy  = subgroupInclusiveOr(data[1].u16.xy);
403    data[invocation].u16.xyz = subgroupInclusiveOr(data[2].u16.xyz);
404    data[invocation].u16     = subgroupInclusiveOr(data[3].u16);
405
406    data[invocation].u16.x   = subgroupInclusiveXor(data[0].u16.x);
407    data[invocation].u16.xy  = subgroupInclusiveXor(data[1].u16.xy);
408    data[invocation].u16.xyz = subgroupInclusiveXor(data[2].u16.xyz);
409    data[invocation].u16     = subgroupInclusiveXor(data[3].u16);
410
411    data[invocation].u16.x   = subgroupExclusiveAdd(data[0].u16.x);
412    data[invocation].u16.xy  = subgroupExclusiveAdd(data[1].u16.xy);
413    data[invocation].u16.xyz = subgroupExclusiveAdd(data[2].u16.xyz);
414    data[invocation].u16     = subgroupExclusiveAdd(data[3].u16);
415
416    data[invocation].u16.x   = subgroupExclusiveMul(data[0].u16.x);
417    data[invocation].u16.xy  = subgroupExclusiveMul(data[1].u16.xy);
418    data[invocation].u16.xyz = subgroupExclusiveMul(data[2].u16.xyz);
419    data[invocation].u16     = subgroupExclusiveMul(data[3].u16);
420
421    data[invocation].u16.x   = subgroupExclusiveMin(data[0].u16.x);
422    data[invocation].u16.xy  = subgroupExclusiveMin(data[1].u16.xy);
423    data[invocation].u16.xyz = subgroupExclusiveMin(data[2].u16.xyz);
424    data[invocation].u16     = subgroupExclusiveMin(data[3].u16);
425
426    data[invocation].u16.x   = subgroupExclusiveMax(data[0].u16.x);
427    data[invocation].u16.xy  = subgroupExclusiveMax(data[1].u16.xy);
428    data[invocation].u16.xyz = subgroupExclusiveMax(data[2].u16.xyz);
429    data[invocation].u16     = subgroupExclusiveMax(data[3].u16);
430
431    data[invocation].u16.x   = subgroupExclusiveAnd(data[0].u16.x);
432    data[invocation].u16.xy  = subgroupExclusiveAnd(data[1].u16.xy);
433    data[invocation].u16.xyz = subgroupExclusiveAnd(data[2].u16.xyz);
434    data[invocation].u16     = subgroupExclusiveAnd(data[3].u16);
435
436    data[invocation].u16.x   = subgroupExclusiveOr(data[0].u16.x);
437    data[invocation].u16.xy  = subgroupExclusiveOr(data[1].u16.xy);
438    data[invocation].u16.xyz = subgroupExclusiveOr(data[2].u16.xyz);
439    data[invocation].u16     = subgroupExclusiveOr(data[3].u16);
440
441    data[invocation].u16.x   = subgroupExclusiveXor(data[0].u16.x);
442    data[invocation].u16.xy  = subgroupExclusiveXor(data[1].u16.xy);
443    data[invocation].u16.xyz = subgroupExclusiveXor(data[2].u16.xyz);
444    data[invocation].u16     = subgroupExclusiveXor(data[3].u16);
445
446    data[invocation].i64.x   = subgroupAdd(data[0].i64.x);
447    data[invocation].i64.xy  = subgroupAdd(data[1].i64.xy);
448    data[invocation].i64.xyz = subgroupAdd(data[2].i64.xyz);
449    data[invocation].i64     = subgroupAdd(data[3].i64);
450
451    data[invocation].i64.x   = subgroupMul(data[0].i64.x);
452    data[invocation].i64.xy  = subgroupMul(data[1].i64.xy);
453    data[invocation].i64.xyz = subgroupMul(data[2].i64.xyz);
454    data[invocation].i64     = subgroupMul(data[3].i64);
455
456    data[invocation].i64.x   = subgroupMin(data[0].i64.x);
457    data[invocation].i64.xy  = subgroupMin(data[1].i64.xy);
458    data[invocation].i64.xyz = subgroupMin(data[2].i64.xyz);
459    data[invocation].i64     = subgroupMin(data[3].i64);
460
461    data[invocation].i64.x   = subgroupMax(data[0].i64.x);
462    data[invocation].i64.xy  = subgroupMax(data[1].i64.xy);
463    data[invocation].i64.xyz = subgroupMax(data[2].i64.xyz);
464    data[invocation].i64     = subgroupMax(data[3].i64);
465
466    data[invocation].i64.x   = subgroupAnd(data[0].i64.x);
467    data[invocation].i64.xy  = subgroupAnd(data[1].i64.xy);
468    data[invocation].i64.xyz = subgroupAnd(data[2].i64.xyz);
469    data[invocation].i64     = subgroupAnd(data[3].i64);
470
471    data[invocation].i64.x   = subgroupOr(data[0].i64.x);
472    data[invocation].i64.xy  = subgroupOr(data[1].i64.xy);
473    data[invocation].i64.xyz = subgroupOr(data[2].i64.xyz);
474    data[invocation].i64     = subgroupOr(data[3].i64);
475
476    data[invocation].i64.x   = subgroupXor(data[0].i64.x);
477    data[invocation].i64.xy  = subgroupXor(data[1].i64.xy);
478    data[invocation].i64.xyz = subgroupXor(data[2].i64.xyz);
479    data[invocation].i64     = subgroupXor(data[3].i64);
480
481    data[invocation].i64.x   = subgroupInclusiveAdd(data[0].i64.x);
482    data[invocation].i64.xy  = subgroupInclusiveAdd(data[1].i64.xy);
483    data[invocation].i64.xyz = subgroupInclusiveAdd(data[2].i64.xyz);
484    data[invocation].i64     = subgroupInclusiveAdd(data[3].i64);
485
486    data[invocation].i64.x   = subgroupInclusiveMul(data[0].i64.x);
487    data[invocation].i64.xy  = subgroupInclusiveMul(data[1].i64.xy);
488    data[invocation].i64.xyz = subgroupInclusiveMul(data[2].i64.xyz);
489    data[invocation].i64     = subgroupInclusiveMul(data[3].i64);
490
491    data[invocation].i64.x   = subgroupInclusiveMin(data[0].i64.x);
492    data[invocation].i64.xy  = subgroupInclusiveMin(data[1].i64.xy);
493    data[invocation].i64.xyz = subgroupInclusiveMin(data[2].i64.xyz);
494    data[invocation].i64     = subgroupInclusiveMin(data[3].i64);
495
496    data[invocation].i64.x   = subgroupInclusiveMax(data[0].i64.x);
497    data[invocation].i64.xy  = subgroupInclusiveMax(data[1].i64.xy);
498    data[invocation].i64.xyz = subgroupInclusiveMax(data[2].i64.xyz);
499    data[invocation].i64     = subgroupInclusiveMax(data[3].i64);
500
501    data[invocation].i64.x   = subgroupInclusiveAnd(data[0].i64.x);
502    data[invocation].i64.xy  = subgroupInclusiveAnd(data[1].i64.xy);
503    data[invocation].i64.xyz = subgroupInclusiveAnd(data[2].i64.xyz);
504    data[invocation].i64     = subgroupInclusiveAnd(data[3].i64);
505
506    data[invocation].i64.x   = subgroupInclusiveOr(data[0].i64.x);
507    data[invocation].i64.xy  = subgroupInclusiveOr(data[1].i64.xy);
508    data[invocation].i64.xyz = subgroupInclusiveOr(data[2].i64.xyz);
509    data[invocation].i64     = subgroupInclusiveOr(data[3].i64);
510
511    data[invocation].i64.x   = subgroupInclusiveXor(data[0].i64.x);
512    data[invocation].i64.xy  = subgroupInclusiveXor(data[1].i64.xy);
513    data[invocation].i64.xyz = subgroupInclusiveXor(data[2].i64.xyz);
514    data[invocation].i64     = subgroupInclusiveXor(data[3].i64);
515
516    data[invocation].i64.x   = subgroupExclusiveAdd(data[0].i64.x);
517    data[invocation].i64.xy  = subgroupExclusiveAdd(data[1].i64.xy);
518    data[invocation].i64.xyz = subgroupExclusiveAdd(data[2].i64.xyz);
519    data[invocation].i64     = subgroupExclusiveAdd(data[3].i64);
520
521    data[invocation].i64.x   = subgroupExclusiveMul(data[0].i64.x);
522    data[invocation].i64.xy  = subgroupExclusiveMul(data[1].i64.xy);
523    data[invocation].i64.xyz = subgroupExclusiveMul(data[2].i64.xyz);
524    data[invocation].i64     = subgroupExclusiveMul(data[3].i64);
525
526    data[invocation].i64.x   = subgroupExclusiveMin(data[0].i64.x);
527    data[invocation].i64.xy  = subgroupExclusiveMin(data[1].i64.xy);
528    data[invocation].i64.xyz = subgroupExclusiveMin(data[2].i64.xyz);
529    data[invocation].i64     = subgroupExclusiveMin(data[3].i64);
530
531    data[invocation].i64.x   = subgroupExclusiveMax(data[0].i64.x);
532    data[invocation].i64.xy  = subgroupExclusiveMax(data[1].i64.xy);
533    data[invocation].i64.xyz = subgroupExclusiveMax(data[2].i64.xyz);
534    data[invocation].i64     = subgroupExclusiveMax(data[3].i64);
535
536    data[invocation].i64.x   = subgroupExclusiveAnd(data[0].i64.x);
537    data[invocation].i64.xy  = subgroupExclusiveAnd(data[1].i64.xy);
538    data[invocation].i64.xyz = subgroupExclusiveAnd(data[2].i64.xyz);
539    data[invocation].i64     = subgroupExclusiveAnd(data[3].i64);
540
541    data[invocation].i64.x   = subgroupExclusiveOr(data[0].i64.x);
542    data[invocation].i64.xy  = subgroupExclusiveOr(data[1].i64.xy);
543    data[invocation].i64.xyz = subgroupExclusiveOr(data[2].i64.xyz);
544    data[invocation].i64     = subgroupExclusiveOr(data[3].i64);
545
546    data[invocation].i64.x   = subgroupExclusiveXor(data[0].i64.x);
547    data[invocation].i64.xy  = subgroupExclusiveXor(data[1].i64.xy);
548    data[invocation].i64.xyz = subgroupExclusiveXor(data[2].i64.xyz);
549    data[invocation].i64     = subgroupExclusiveXor(data[3].i64);
550
551    data[invocation].u64.x   = subgroupAdd(data[0].u64.x);
552    data[invocation].u64.xy  = subgroupAdd(data[1].u64.xy);
553    data[invocation].u64.xyz = subgroupAdd(data[2].u64.xyz);
554    data[invocation].u64     = subgroupAdd(data[3].u64);
555
556    data[invocation].u64.x   = subgroupMul(data[0].u64.x);
557    data[invocation].u64.xy  = subgroupMul(data[1].u64.xy);
558    data[invocation].u64.xyz = subgroupMul(data[2].u64.xyz);
559    data[invocation].u64     = subgroupMul(data[3].u64);
560
561    data[invocation].u64.x   = subgroupMin(data[0].u64.x);
562    data[invocation].u64.xy  = subgroupMin(data[1].u64.xy);
563    data[invocation].u64.xyz = subgroupMin(data[2].u64.xyz);
564    data[invocation].u64     = subgroupMin(data[3].u64);
565
566    data[invocation].u64.x   = subgroupMax(data[0].u64.x);
567    data[invocation].u64.xy  = subgroupMax(data[1].u64.xy);
568    data[invocation].u64.xyz = subgroupMax(data[2].u64.xyz);
569    data[invocation].u64     = subgroupMax(data[3].u64);
570
571    data[invocation].u64.x   = subgroupAnd(data[0].u64.x);
572    data[invocation].u64.xy  = subgroupAnd(data[1].u64.xy);
573    data[invocation].u64.xyz = subgroupAnd(data[2].u64.xyz);
574    data[invocation].u64     = subgroupAnd(data[3].u64);
575
576    data[invocation].u64.x   = subgroupOr(data[0].u64.x);
577    data[invocation].u64.xy  = subgroupOr(data[1].u64.xy);
578    data[invocation].u64.xyz = subgroupOr(data[2].u64.xyz);
579    data[invocation].u64     = subgroupOr(data[3].u64);
580
581    data[invocation].u64.x   = subgroupXor(data[0].u64.x);
582    data[invocation].u64.xy  = subgroupXor(data[1].u64.xy);
583    data[invocation].u64.xyz = subgroupXor(data[2].u64.xyz);
584    data[invocation].u64     = subgroupXor(data[3].u64);
585
586    data[invocation].u64.x   = subgroupInclusiveAdd(data[0].u64.x);
587    data[invocation].u64.xy  = subgroupInclusiveAdd(data[1].u64.xy);
588    data[invocation].u64.xyz = subgroupInclusiveAdd(data[2].u64.xyz);
589    data[invocation].u64     = subgroupInclusiveAdd(data[3].u64);
590
591    data[invocation].u64.x   = subgroupInclusiveMul(data[0].u64.x);
592    data[invocation].u64.xy  = subgroupInclusiveMul(data[1].u64.xy);
593    data[invocation].u64.xyz = subgroupInclusiveMul(data[2].u64.xyz);
594    data[invocation].u64     = subgroupInclusiveMul(data[3].u64);
595
596    data[invocation].u64.x   = subgroupInclusiveMin(data[0].u64.x);
597    data[invocation].u64.xy  = subgroupInclusiveMin(data[1].u64.xy);
598    data[invocation].u64.xyz = subgroupInclusiveMin(data[2].u64.xyz);
599    data[invocation].u64     = subgroupInclusiveMin(data[3].u64);
600
601    data[invocation].u64.x   = subgroupInclusiveMax(data[0].u64.x);
602    data[invocation].u64.xy  = subgroupInclusiveMax(data[1].u64.xy);
603    data[invocation].u64.xyz = subgroupInclusiveMax(data[2].u64.xyz);
604    data[invocation].u64     = subgroupInclusiveMax(data[3].u64);
605
606    data[invocation].u64.x   = subgroupInclusiveAnd(data[0].u64.x);
607    data[invocation].u64.xy  = subgroupInclusiveAnd(data[1].u64.xy);
608    data[invocation].u64.xyz = subgroupInclusiveAnd(data[2].u64.xyz);
609    data[invocation].u64     = subgroupInclusiveAnd(data[3].u64);
610
611    data[invocation].u64.x   = subgroupInclusiveOr(data[0].u64.x);
612    data[invocation].u64.xy  = subgroupInclusiveOr(data[1].u64.xy);
613    data[invocation].u64.xyz = subgroupInclusiveOr(data[2].u64.xyz);
614    data[invocation].u64     = subgroupInclusiveOr(data[3].u64);
615
616    data[invocation].u64.x   = subgroupInclusiveXor(data[0].u64.x);
617    data[invocation].u64.xy  = subgroupInclusiveXor(data[1].u64.xy);
618    data[invocation].u64.xyz = subgroupInclusiveXor(data[2].u64.xyz);
619    data[invocation].u64     = subgroupInclusiveXor(data[3].u64);
620
621    data[invocation].u64.x   = subgroupExclusiveAdd(data[0].u64.x);
622    data[invocation].u64.xy  = subgroupExclusiveAdd(data[1].u64.xy);
623    data[invocation].u64.xyz = subgroupExclusiveAdd(data[2].u64.xyz);
624    data[invocation].u64     = subgroupExclusiveAdd(data[3].u64);
625
626    data[invocation].u64.x   = subgroupExclusiveMul(data[0].u64.x);
627    data[invocation].u64.xy  = subgroupExclusiveMul(data[1].u64.xy);
628    data[invocation].u64.xyz = subgroupExclusiveMul(data[2].u64.xyz);
629    data[invocation].u64     = subgroupExclusiveMul(data[3].u64);
630
631    data[invocation].u64.x   = subgroupExclusiveMin(data[0].u64.x);
632    data[invocation].u64.xy  = subgroupExclusiveMin(data[1].u64.xy);
633    data[invocation].u64.xyz = subgroupExclusiveMin(data[2].u64.xyz);
634    data[invocation].u64     = subgroupExclusiveMin(data[3].u64);
635
636    data[invocation].u64.x   = subgroupExclusiveMax(data[0].u64.x);
637    data[invocation].u64.xy  = subgroupExclusiveMax(data[1].u64.xy);
638    data[invocation].u64.xyz = subgroupExclusiveMax(data[2].u64.xyz);
639    data[invocation].u64     = subgroupExclusiveMax(data[3].u64);
640
641    data[invocation].u64.x   = subgroupExclusiveAnd(data[0].u64.x);
642    data[invocation].u64.xy  = subgroupExclusiveAnd(data[1].u64.xy);
643    data[invocation].u64.xyz = subgroupExclusiveAnd(data[2].u64.xyz);
644    data[invocation].u64     = subgroupExclusiveAnd(data[3].u64);
645
646    data[invocation].u64.x   = subgroupExclusiveOr(data[0].u64.x);
647    data[invocation].u64.xy  = subgroupExclusiveOr(data[1].u64.xy);
648    data[invocation].u64.xyz = subgroupExclusiveOr(data[2].u64.xyz);
649    data[invocation].u64     = subgroupExclusiveOr(data[3].u64);
650
651    data[invocation].u64.x   = subgroupExclusiveXor(data[0].u64.x);
652    data[invocation].u64.xy  = subgroupExclusiveXor(data[1].u64.xy);
653    data[invocation].u64.xyz = subgroupExclusiveXor(data[2].u64.xyz);
654    data[invocation].u64     = subgroupExclusiveXor(data[3].u64);
655
656    data[invocation].f16.x   = subgroupAdd(data[0].f16.x);
657    data[invocation].f16.xy  = subgroupAdd(data[1].f16.xy);
658    data[invocation].f16.xyz = subgroupAdd(data[2].f16.xyz);
659    data[invocation].f16     = subgroupAdd(data[3].f16);
660
661    data[invocation].f16.x   = subgroupMul(data[0].f16.x);
662    data[invocation].f16.xy  = subgroupMul(data[1].f16.xy);
663    data[invocation].f16.xyz = subgroupMul(data[2].f16.xyz);
664    data[invocation].f16     = subgroupMul(data[3].f16);
665
666    data[invocation].f16.x   = subgroupMin(data[0].f16.x);
667    data[invocation].f16.xy  = subgroupMin(data[1].f16.xy);
668    data[invocation].f16.xyz = subgroupMin(data[2].f16.xyz);
669    data[invocation].f16     = subgroupMin(data[3].f16);
670
671    data[invocation].f16.x   = subgroupMax(data[0].f16.x);
672    data[invocation].f16.xy  = subgroupMax(data[1].f16.xy);
673    data[invocation].f16.xyz = subgroupMax(data[2].f16.xyz);
674    data[invocation].f16     = subgroupMax(data[3].f16);
675
676    data[invocation].f16.x   = subgroupInclusiveAdd(data[0].f16.x);
677    data[invocation].f16.xy  = subgroupInclusiveAdd(data[1].f16.xy);
678    data[invocation].f16.xyz = subgroupInclusiveAdd(data[2].f16.xyz);
679    data[invocation].f16     = subgroupInclusiveAdd(data[3].f16);
680
681    data[invocation].f16.x   = subgroupInclusiveMul(data[0].f16.x);
682    data[invocation].f16.xy  = subgroupInclusiveMul(data[1].f16.xy);
683    data[invocation].f16.xyz = subgroupInclusiveMul(data[2].f16.xyz);
684    data[invocation].f16     = subgroupInclusiveMul(data[3].f16);
685
686    data[invocation].f16.x   = subgroupInclusiveMin(data[0].f16.x);
687    data[invocation].f16.xy  = subgroupInclusiveMin(data[1].f16.xy);
688    data[invocation].f16.xyz = subgroupInclusiveMin(data[2].f16.xyz);
689    data[invocation].f16     = subgroupInclusiveMin(data[3].f16);
690
691    data[invocation].f16.x   = subgroupInclusiveMax(data[0].f16.x);
692    data[invocation].f16.xy  = subgroupInclusiveMax(data[1].f16.xy);
693    data[invocation].f16.xyz = subgroupInclusiveMax(data[2].f16.xyz);
694    data[invocation].f16     = subgroupInclusiveMax(data[3].f16);
695
696    data[invocation].f16.x   = subgroupExclusiveAdd(data[0].f16.x);
697    data[invocation].f16.xy  = subgroupExclusiveAdd(data[1].f16.xy);
698    data[invocation].f16.xyz = subgroupExclusiveAdd(data[2].f16.xyz);
699    data[invocation].f16     = subgroupExclusiveAdd(data[3].f16);
700
701    data[invocation].f16.x   = subgroupExclusiveMul(data[0].f16.x);
702    data[invocation].f16.xy  = subgroupExclusiveMul(data[1].f16.xy);
703    data[invocation].f16.xyz = subgroupExclusiveMul(data[2].f16.xyz);
704    data[invocation].f16     = subgroupExclusiveMul(data[3].f16);
705
706    data[invocation].f16.x   = subgroupExclusiveMin(data[0].f16.x);
707    data[invocation].f16.xy  = subgroupExclusiveMin(data[1].f16.xy);
708    data[invocation].f16.xyz = subgroupExclusiveMin(data[2].f16.xyz);
709    data[invocation].f16     = subgroupExclusiveMin(data[3].f16);
710
711    data[invocation].f16.x   = subgroupExclusiveMax(data[0].f16.x);
712    data[invocation].f16.xy  = subgroupExclusiveMax(data[1].f16.xy);
713    data[invocation].f16.xyz = subgroupExclusiveMax(data[2].f16.xyz);
714    data[invocation].f16     = subgroupExclusiveMax(data[3].f16);
715}
716