1layout(std430) buffer;
2layout(FORMAT, binding=0) writeonly uniform PRECISION image3D uOutput;
3layout(location=1) uniform mediump sampler3D uInput;
4layout(location=2) uniform mediump sampler3D uKernel;
5
6layout(binding=3) readonly buffer bias{
7    vec4 data[];
8} uBias;
9
10layout(location=8) uniform int uUnroll;
11
12layout(location=10) uniform ivec3 uOutputSize;
13layout(location=11) uniform ivec3 uInputSize;
14
15#define UP_DIV(x, y) (((x)+(y)-1)/(y))
16
17layout (local_size_x = XLOCAL, local_size_y = YLOCAL, local_size_z = ZLOCAL) in;
18
19void main()
20{
21    ivec3 outputSize = uOutputSize;
22    if (all(lessThan(ivec3(gl_GlobalInvocationID), outputSize)))
23    {
24        ivec3 pos = ivec3(gl_GlobalInvocationID)*ivec3(uUnroll, 1, 1);
25        ivec3 inputSize = uInputSize;
26        int sy = pos.y;
27        int sx = pos.x;
28        int fx, fy, fz;
29        vec4 color = uBias.data[pos.z];
30        vec4 color2 = color;
31        vec4 color3 = color;
32        vec4 color4 = color;
33        int kernelY = pos.z;
34        for (fz=0; fz<inputSize.z; ++fz)
35        {
36            int kernelX = 4*fz;
37            vec4 k0 = texelFetch(uKernel, ivec3(kernelX+0, kernelY, 0), 0);
38            vec4 k1 = texelFetch(uKernel, ivec3(kernelX+1, kernelY, 0), 0);
39            vec4 k2 = texelFetch(uKernel, ivec3(kernelX+2, kernelY, 0), 0);
40            vec4 k3 = texelFetch(uKernel, ivec3(kernelX+3, kernelY, 0), 0);
41
42            mat4 k = mat4(k0, k1, k2, k3);
43
44            color  += k*texelFetch(uInput, ivec3(sx+0, sy, fz), 0);
45            color2 += k*texelFetch(uInput, ivec3(sx+1, sy, fz), 0);
46            color3 += k*texelFetch(uInput, ivec3(sx+2, sy, fz), 0);
47            color4 += k*texelFetch(uInput, ivec3(sx+3, sy, fz), 0);
48        }
49        #ifdef RELU
50        color = max(color, vec4(0));
51        color2 = max(color2, vec4(0));
52        color3 = max(color3, vec4(0));
53        color4 = max(color4, vec4(0));
54        #endif
55        #ifdef RELU6
56        color = clamp(color, vec4(0), vec4(6));
57        color2 = clamp(color2, vec4(0), vec4(6));
58        color3 = clamp(color3, vec4(0), vec4(6));
59        color4 = clamp(color4, vec4(0), vec4(6));
60        #endif
61        imageStore(uOutput, ivec3(pos.x+0, pos.y, pos.z), color);
62        imageStore(uOutput, ivec3(pos.x+1, pos.y, pos.z), color2);
63        imageStore(uOutput, ivec3(pos.x+2, pos.y, pos.z), color3);
64        imageStore(uOutput, ivec3(pos.x+3, pos.y, pos.z), color4);
65    }
66
67}
68