1layout(std430) buffer; 2layout(FORMAT, binding=0) writeonly uniform PRECISION image3D uOutput; 3layout(location=1) uniform mediump sampler3D uInput; 4layout(location=2) uniform mediump sampler3D uKernel; 5 6layout(binding=3) readonly buffer bias{ 7 vec4 data[]; 8} uBias; 9 10layout(location=8) uniform int uUnroll; 11 12layout(location=10) uniform ivec3 uOutputSize; 13layout(location=11) uniform ivec3 uInputSize; 14 15#define UP_DIV(x, y) (((x)+(y)-1)/(y)) 16 17layout (local_size_x = XLOCAL, local_size_y = YLOCAL, local_size_z = ZLOCAL) in; 18 19void main() 20{ 21 ivec3 outputSize = uOutputSize; 22 if (all(lessThan(ivec3(gl_GlobalInvocationID), outputSize))) 23 { 24 ivec3 pos = ivec3(gl_GlobalInvocationID)*ivec3(uUnroll, 1, 1); 25 ivec3 inputSize = uInputSize; 26 int sy = pos.y; 27 int sx = pos.x; 28 int fx, fy, fz; 29 vec4 color = uBias.data[pos.z]; 30 vec4 color2 = color; 31 vec4 color3 = color; 32 vec4 color4 = color; 33 int kernelY = pos.z; 34 for (fz=0; fz<inputSize.z; ++fz) 35 { 36 int kernelX = 4*fz; 37 vec4 k0 = texelFetch(uKernel, ivec3(kernelX+0, kernelY, 0), 0); 38 vec4 k1 = texelFetch(uKernel, ivec3(kernelX+1, kernelY, 0), 0); 39 vec4 k2 = texelFetch(uKernel, ivec3(kernelX+2, kernelY, 0), 0); 40 vec4 k3 = texelFetch(uKernel, ivec3(kernelX+3, kernelY, 0), 0); 41 42 mat4 k = mat4(k0, k1, k2, k3); 43 44 color += k*texelFetch(uInput, ivec3(sx+0, sy, fz), 0); 45 color2 += k*texelFetch(uInput, ivec3(sx+1, sy, fz), 0); 46 color3 += k*texelFetch(uInput, ivec3(sx+2, sy, fz), 0); 47 color4 += k*texelFetch(uInput, ivec3(sx+3, sy, fz), 0); 48 } 49 #ifdef RELU 50 color = max(color, vec4(0)); 51 color2 = max(color2, vec4(0)); 52 color3 = max(color3, vec4(0)); 53 color4 = max(color4, vec4(0)); 54 #endif 55 #ifdef RELU6 56 color = clamp(color, vec4(0), vec4(6)); 57 color2 = clamp(color2, vec4(0), vec4(6)); 58 color3 = clamp(color3, vec4(0), vec4(6)); 59 color4 = clamp(color4, vec4(0), vec4(6)); 60 #endif 61 imageStore(uOutput, ivec3(pos.x+0, pos.y, pos.z), color); 62 imageStore(uOutput, ivec3(pos.x+1, pos.y, pos.z), color2); 63 imageStore(uOutput, ivec3(pos.x+2, pos.y, pos.z), color3); 64 imageStore(uOutput, ivec3(pos.x+3, pos.y, pos.z), color4); 65 } 66 67} 68