1#version 450
2
3#extension GL_KHR_shader_subgroup_quad: enable
4#extension GL_EXT_shader_subgroup_extended_types_int8: enable
5#extension GL_EXT_shader_subgroup_extended_types_int16: enable
6#extension GL_EXT_shader_subgroup_extended_types_int64: enable
7#extension GL_EXT_shader_subgroup_extended_types_float16: enable
8
9layout (local_size_x = 8) in;
10
11layout(binding = 0) buffer Buffers
12{
13    i8vec4 i8;
14    u8vec4 u8;
15    i16vec4 i16;
16    u16vec4 u16;
17    i64vec4 i64;
18    u64vec4 u64;
19    f16vec4 f16;
20} data[4];
21
22void main()
23{
24    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
25
26    data[invocation].i8.x   = subgroupQuadBroadcast(data[0].i8.x, 1);
27    data[invocation].i8.xy  = subgroupQuadBroadcast(data[1].i8.xy, 1);
28    data[invocation].i8.xyz = subgroupQuadBroadcast(data[2].i8.xyz, 1);
29    data[invocation].i8     = subgroupQuadBroadcast(data[3].i8, 1);
30
31    data[invocation].i8.x   = subgroupQuadSwapHorizontal(data[0].i8.x);
32    data[invocation].i8.xy  = subgroupQuadSwapHorizontal(data[1].i8.xy);
33    data[invocation].i8.xyz = subgroupQuadSwapHorizontal(data[2].i8.xyz);
34    data[invocation].i8     = subgroupQuadSwapHorizontal(data[3].i8);
35
36    data[invocation].i8.x   = subgroupQuadSwapVertical(data[0].i8.x);
37    data[invocation].i8.xy  = subgroupQuadSwapVertical(data[1].i8.xy);
38    data[invocation].i8.xyz = subgroupQuadSwapVertical(data[2].i8.xyz);
39    data[invocation].i8     = subgroupQuadSwapVertical(data[3].i8);
40
41    data[invocation].i8.x   = subgroupQuadSwapDiagonal(data[0].i8.x);
42    data[invocation].i8.xy  = subgroupQuadSwapDiagonal(data[1].i8.xy);
43    data[invocation].i8.xyz = subgroupQuadSwapDiagonal(data[2].i8.xyz);
44    data[invocation].i8     = subgroupQuadSwapDiagonal(data[3].i8);
45
46    data[invocation].u8.x   = subgroupQuadBroadcast(data[0].u8.x, 1);
47    data[invocation].u8.xy  = subgroupQuadBroadcast(data[1].u8.xy, 1);
48    data[invocation].u8.xyz = subgroupQuadBroadcast(data[2].u8.xyz, 1);
49    data[invocation].u8     = subgroupQuadBroadcast(data[3].u8, 1);
50
51    data[invocation].u8.x   = subgroupQuadSwapHorizontal(data[0].u8.x);
52    data[invocation].u8.xy  = subgroupQuadSwapHorizontal(data[1].u8.xy);
53    data[invocation].u8.xyz = subgroupQuadSwapHorizontal(data[2].u8.xyz);
54    data[invocation].u8     = subgroupQuadSwapHorizontal(data[3].u8);
55
56    data[invocation].u8.x   = subgroupQuadSwapVertical(data[0].u8.x);
57    data[invocation].u8.xy  = subgroupQuadSwapVertical(data[1].u8.xy);
58    data[invocation].u8.xyz = subgroupQuadSwapVertical(data[2].u8.xyz);
59    data[invocation].u8     = subgroupQuadSwapVertical(data[3].u8);
60
61    data[invocation].u8.x   = subgroupQuadSwapDiagonal(data[0].u8.x);
62    data[invocation].u8.xy  = subgroupQuadSwapDiagonal(data[1].u8.xy);
63    data[invocation].u8.xyz = subgroupQuadSwapDiagonal(data[2].u8.xyz);
64    data[invocation].u8     = subgroupQuadSwapDiagonal(data[3].u8);
65
66    data[invocation].i16.x   = subgroupQuadBroadcast(data[0].i16.x, 1);
67    data[invocation].i16.xy  = subgroupQuadBroadcast(data[1].i16.xy, 1);
68    data[invocation].i16.xyz = subgroupQuadBroadcast(data[2].i16.xyz, 1);
69    data[invocation].i16     = subgroupQuadBroadcast(data[3].i16, 1);
70
71    data[invocation].i16.x   = subgroupQuadSwapHorizontal(data[0].i16.x);
72    data[invocation].i16.xy  = subgroupQuadSwapHorizontal(data[1].i16.xy);
73    data[invocation].i16.xyz = subgroupQuadSwapHorizontal(data[2].i16.xyz);
74    data[invocation].i16     = subgroupQuadSwapHorizontal(data[3].i16);
75
76    data[invocation].i16.x   = subgroupQuadSwapVertical(data[0].i16.x);
77    data[invocation].i16.xy  = subgroupQuadSwapVertical(data[1].i16.xy);
78    data[invocation].i16.xyz = subgroupQuadSwapVertical(data[2].i16.xyz);
79    data[invocation].i16     = subgroupQuadSwapVertical(data[3].i16);
80
81    data[invocation].i16.x   = subgroupQuadSwapDiagonal(data[0].i16.x);
82    data[invocation].i16.xy  = subgroupQuadSwapDiagonal(data[1].i16.xy);
83    data[invocation].i16.xyz = subgroupQuadSwapDiagonal(data[2].i16.xyz);
84    data[invocation].i16     = subgroupQuadSwapDiagonal(data[3].i16);
85
86    data[invocation].u16.x   = subgroupQuadBroadcast(data[0].u16.x, 1);
87    data[invocation].u16.xy  = subgroupQuadBroadcast(data[1].u16.xy, 1);
88    data[invocation].u16.xyz = subgroupQuadBroadcast(data[2].u16.xyz, 1);
89    data[invocation].u16     = subgroupQuadBroadcast(data[3].u16, 1);
90
91    data[invocation].u16.x   = subgroupQuadSwapHorizontal(data[0].u16.x);
92    data[invocation].u16.xy  = subgroupQuadSwapHorizontal(data[1].u16.xy);
93    data[invocation].u16.xyz = subgroupQuadSwapHorizontal(data[2].u16.xyz);
94    data[invocation].u16     = subgroupQuadSwapHorizontal(data[3].u16);
95
96    data[invocation].u16.x   = subgroupQuadSwapVertical(data[0].u16.x);
97    data[invocation].u16.xy  = subgroupQuadSwapVertical(data[1].u16.xy);
98    data[invocation].u16.xyz = subgroupQuadSwapVertical(data[2].u16.xyz);
99    data[invocation].u16     = subgroupQuadSwapVertical(data[3].u16);
100
101    data[invocation].u16.x   = subgroupQuadSwapDiagonal(data[0].u16.x);
102    data[invocation].u16.xy  = subgroupQuadSwapDiagonal(data[1].u16.xy);
103    data[invocation].u16.xyz = subgroupQuadSwapDiagonal(data[2].u16.xyz);
104    data[invocation].u16     = subgroupQuadSwapDiagonal(data[3].u16);
105
106    data[invocation].i64.x   = subgroupQuadBroadcast(data[0].i64.x, 1);
107    data[invocation].i64.xy  = subgroupQuadBroadcast(data[1].i64.xy, 1);
108    data[invocation].i64.xyz = subgroupQuadBroadcast(data[2].i64.xyz, 1);
109    data[invocation].i64     = subgroupQuadBroadcast(data[3].i64, 1);
110
111    data[invocation].i64.x   = subgroupQuadSwapHorizontal(data[0].i64.x);
112    data[invocation].i64.xy  = subgroupQuadSwapHorizontal(data[1].i64.xy);
113    data[invocation].i64.xyz = subgroupQuadSwapHorizontal(data[2].i64.xyz);
114    data[invocation].i64     = subgroupQuadSwapHorizontal(data[3].i64);
115
116    data[invocation].i64.x   = subgroupQuadSwapVertical(data[0].i64.x);
117    data[invocation].i64.xy  = subgroupQuadSwapVertical(data[1].i64.xy);
118    data[invocation].i64.xyz = subgroupQuadSwapVertical(data[2].i64.xyz);
119    data[invocation].i64     = subgroupQuadSwapVertical(data[3].i64);
120
121    data[invocation].i64.x   = subgroupQuadSwapDiagonal(data[0].i64.x);
122    data[invocation].i64.xy  = subgroupQuadSwapDiagonal(data[1].i64.xy);
123    data[invocation].i64.xyz = subgroupQuadSwapDiagonal(data[2].i64.xyz);
124    data[invocation].i64     = subgroupQuadSwapDiagonal(data[3].i64);
125
126    data[invocation].u64.x   = subgroupQuadBroadcast(data[0].u64.x, 1);
127    data[invocation].u64.xy  = subgroupQuadBroadcast(data[1].u64.xy, 1);
128    data[invocation].u64.xyz = subgroupQuadBroadcast(data[2].u64.xyz, 1);
129    data[invocation].u64     = subgroupQuadBroadcast(data[3].u64, 1);
130
131    data[invocation].u64.x   = subgroupQuadSwapHorizontal(data[0].u64.x);
132    data[invocation].u64.xy  = subgroupQuadSwapHorizontal(data[1].u64.xy);
133    data[invocation].u64.xyz = subgroupQuadSwapHorizontal(data[2].u64.xyz);
134    data[invocation].u64     = subgroupQuadSwapHorizontal(data[3].u64);
135
136    data[invocation].u64.x   = subgroupQuadSwapVertical(data[0].u64.x);
137    data[invocation].u64.xy  = subgroupQuadSwapVertical(data[1].u64.xy);
138    data[invocation].u64.xyz = subgroupQuadSwapVertical(data[2].u64.xyz);
139    data[invocation].u64     = subgroupQuadSwapVertical(data[3].u64);
140
141    data[invocation].u64.x   = subgroupQuadSwapDiagonal(data[0].u64.x);
142    data[invocation].u64.xy  = subgroupQuadSwapDiagonal(data[1].u64.xy);
143    data[invocation].u64.xyz = subgroupQuadSwapDiagonal(data[2].u64.xyz);
144    data[invocation].u64     = subgroupQuadSwapDiagonal(data[3].u64);
145
146    data[invocation].f16.x   = subgroupQuadBroadcast(data[0].f16.x, 1);
147    data[invocation].f16.xy  = subgroupQuadBroadcast(data[1].f16.xy, 1);
148    data[invocation].f16.xyz = subgroupQuadBroadcast(data[2].f16.xyz, 1);
149    data[invocation].f16     = subgroupQuadBroadcast(data[3].f16, 1);
150
151    data[invocation].f16.x   = subgroupQuadSwapHorizontal(data[0].f16.x);
152    data[invocation].f16.xy  = subgroupQuadSwapHorizontal(data[1].f16.xy);
153    data[invocation].f16.xyz = subgroupQuadSwapHorizontal(data[2].f16.xyz);
154    data[invocation].f16     = subgroupQuadSwapHorizontal(data[3].f16);
155
156    data[invocation].f16.x   = subgroupQuadSwapVertical(data[0].f16.x);
157    data[invocation].f16.xy  = subgroupQuadSwapVertical(data[1].f16.xy);
158    data[invocation].f16.xyz = subgroupQuadSwapVertical(data[2].f16.xyz);
159    data[invocation].f16     = subgroupQuadSwapVertical(data[3].f16);
160
161    data[invocation].f16.x   = subgroupQuadSwapDiagonal(data[0].f16.x);
162    data[invocation].f16.xy  = subgroupQuadSwapDiagonal(data[1].f16.xy);
163    data[invocation].f16.xyz = subgroupQuadSwapDiagonal(data[2].f16.xyz);
164    data[invocation].f16     = subgroupQuadSwapDiagonal(data[3].f16);
165}
166