1#version 450 core
2#extension GL_KHR_memory_scope_semantics : enable
3#extension GL_NV_cooperative_matrix : enable
4#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
5#extension GL_EXT_buffer_reference : enable
6
7layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
8
9const int X = 8;
10layout(constant_id = 0) const int Y = 2;
11const int Z = X*Y;
12
13fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC;
14fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC2[3];
15
16int arr[mC.length()];
17int arr2[mC2[1].length()];
18
19layout(constant_id = 1) const float F = 3.0;
20
21const fcoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = fcoopmatNV<32, gl_ScopeSubgroup, Z, 8>(0.0);
22const fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> mD2 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(1);
23
24struct S { int a; int b; int c; };
25
26const S s = S(12, 23, 34);
27
28layout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
29    float y[1024*1024];
30    float x[];
31} block;
32
33layout(set = 0, binding = 0) coherent buffer Block16 {
34    float16_t y[1024*1024];
35    float16_t x[];
36
37    Block b;
38} block16;
39
40fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> f16(fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> m) { return -m; }
41fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> f32(fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> m) { return -m; }
42
43layout(constant_id = 2) const int SC = 1;
44fcoopmatNV<16, gl_ScopeSubgroup, SC, SC> scm[SC][SC];
45
46// sized for fcoopmatNV<16, gl_ScopeSubgroup, 16, 16>
47shared uvec4 shmatrix[16*16*2/16];
48
49void main()
50{
51    fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> m = fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)>(0.0);
52
53    m = m + m;
54    m = m - m;
55    m = -m;
56    m = 2.0*m;
57    m = m*2.0;
58
59    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> m2 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(m);
60
61    float x = m[1];
62    m[0] = x;
63
64    coopMatLoadNV(m, block.x, 16, 128, false);
65    coopMatStoreNV(m, block.x, 16, 128, false);
66    coopMatLoadNV(m2, block16.x, 16, 128, false);
67    coopMatStoreNV(m2, block16.x, 16, 128, false);
68    coopMatLoadNV(m, block16.b.x, 16, 128, false);
69    coopMatStoreNV(m, block16.b.x, 16, 128, false);
70
71    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> A;
72    fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> B;
73    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> C;
74    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> D;
75    D = coopMatMulAddNV(A, B, C);
76
77    int l = D.length();
78
79    fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> E;
80
81    fcoopmatNV<16, gl_ScopeSubgroup, Z, Z> F = fcoopmatNV<16, gl_ScopeSubgroup, Z, Z>(0.0);
82
83    fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5];
84    a[3][0] = 1.0;
85
86    float md1 = mD[1];
87
88    md1 += (m += m)[1234];
89
90    mC2[1] = mC2[2];
91
92    coopMatLoadNV(m, block.y, 16, 128, false);
93    coopMatStoreNV(m, block.y, 16, 128, false);
94    coopMatLoadNV(m2, block16.y, 16, 128, false);
95    coopMatStoreNV(m2, block16.y, 16, 128, false);
96
97    fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> p1;
98    fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> p2;
99
100    p1 = f16(p1);
101    p2 = f32(p2);
102
103    p1 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(0.0);
104    p2 = fcoopmatNV<32, gl_ScopeSubgroup, 8, 8>(0.0);
105
106    p1 /= p1;
107
108    p1 *= float16_t(2.0);
109    p2 *= 4.0;
110
111    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> ms;
112    coopMatLoadNV(ms, shmatrix, 1, 2, false);
113    coopMatStoreNV(ms, shmatrix, 1, 2, false);
114
115}
116