1// Tencent is pleased to support the open source community by making ncnn available. 2// 3// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 4// 5// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6// in compliance with the License. You may obtain a copy of the License at 7// 8// https://opensource.org/licenses/BSD-3-Clause 9// 10// Unless required by applicable law or agreed to in writing, software distributed 11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12// CONDITIONS OF ANY KIND, either express or implied. See the License for the 13// specific language governing permissions and limitations under the License. 14 15#version 450 16 17#if NCNN_fp16_storage 18#extension GL_EXT_shader_16bit_storage: require 19#endif 20#if NCNN_fp16_arithmetic 21#extension GL_EXT_shader_explicit_arithmetic_types_float16: require 22#endif 23 24#define shape_constant_id_offset 0 25layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; 26layout (constant_id = shape_constant_id_offset + 1) const int w = 0; 27layout (constant_id = shape_constant_id_offset + 2) const int h = 0; 28layout (constant_id = shape_constant_id_offset + 3) const int c = 0; 29layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; 30 31layout (constant_id = shape_constant_id_offset + 5) const int outdims = 0; 32layout (constant_id = shape_constant_id_offset + 6) const int outw = 0; 33layout (constant_id = shape_constant_id_offset + 7) const int outh = 0; 34layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; 35layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; 36 37#if NCNN_image_shader 38layout (binding = 0) uniform unfp sampler3D bottom_blob; 39layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob; 40#else 41layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; }; 42layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; }; 43#endif 44layout (binding = 2) readonly buffer alpha_blob { sfpvec4 alpha_blob_data[]; }; 45layout (binding = 3) readonly buffer xofs_blob { int xofs_blob_data[]; }; 46layout (binding = 4) readonly buffer beta_blob { sfpvec4 beta_blob_data[]; }; 47layout (binding = 5) readonly buffer yofs_blob { int yofs_blob_data[]; }; 48 49layout (push_constant) uniform parameter 50{ 51 int dims; 52 int w; 53 int h; 54 int c; 55 int cstep; 56 57 int outdims; 58 int outw; 59 int outh; 60 int outc; 61 int outcstep; 62} p; 63 64void main() 65{ 66 int gx = int(gl_GlobalInvocationID.x); 67 int gy = int(gl_GlobalInvocationID.y); 68 int gz = int(gl_GlobalInvocationID.z); 69 70 if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc)) 71 return; 72 73 if (psc(dims) == 2) 74 { 75 int sx = xofs_blob_data[gx]; 76 77#if NCNN_image_shader 78 afpvec4 b0 = image3d_ld4(bottom_blob, ivec3(sx - 1, gy, gz)); 79 afpvec4 b1 = image3d_ld4(bottom_blob, ivec3(sx + 0, gy, gz)); 80 afpvec4 b2 = image3d_ld4(bottom_blob, ivec3(sx + 1, gy, gz)); 81 afpvec4 b3 = image3d_ld4(bottom_blob, ivec3(sx + 2, gy, gz)); 82 83 afpmat4 b0123 = afpmat4(b0, b1, b2, b3); 84#else 85 int v_offset_1 = gz * psc(cstep) + gy * psc(w) + sx; 86 87 afpvec4 b0 = buffer_ld4(bottom_blob_data, v_offset_1 - 1); 88 afpvec4 b1 = buffer_ld4(bottom_blob_data, v_offset_1 + 0); 89 afpvec4 b2 = buffer_ld4(bottom_blob_data, v_offset_1 + 1); 90 afpvec4 b3 = buffer_ld4(bottom_blob_data, v_offset_1 + 2); 91 92 afpmat4 b0123 = afpmat4(b0, b1, b2, b3); 93#endif 94 95 afpvec4 alpha = buffer_ld4(alpha_blob_data, gx); 96 97 afpvec4 v = b0123 * alpha; 98 99#if NCNN_image_shader 100 image3d_st4(top_blob, ivec3(gx, gy, gz), v); 101#else 102 const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; 103 104 buffer_st4(top_blob_data, gi, v); 105#endif 106 return; 107 } 108 109 int sx = xofs_blob_data[gx]; 110 int sy = yofs_blob_data[gy]; 111 112#if NCNN_image_shader 113 afpvec4 a0 = image3d_ld4(bottom_blob, ivec3(sx - 1, sy - 1, gz)); 114 afpvec4 a1 = image3d_ld4(bottom_blob, ivec3(sx + 0, sy - 1, gz)); 115 afpvec4 a2 = image3d_ld4(bottom_blob, ivec3(sx + 1, sy - 1, gz)); 116 afpvec4 a3 = image3d_ld4(bottom_blob, ivec3(sx + 2, sy - 1, gz)); 117 118 afpmat4 a0123 = afpmat4(a0, a1, a2, a3); 119 120 afpvec4 b0 = image3d_ld4(bottom_blob, ivec3(sx - 1, sy + 0, gz)); 121 afpvec4 b1 = image3d_ld4(bottom_blob, ivec3(sx + 0, sy + 0, gz)); 122 afpvec4 b2 = image3d_ld4(bottom_blob, ivec3(sx + 1, sy + 0, gz)); 123 afpvec4 b3 = image3d_ld4(bottom_blob, ivec3(sx + 2, sy + 0, gz)); 124 125 afpmat4 b0123 = afpmat4(b0, b1, b2, b3); 126 127 afpvec4 c0 = image3d_ld4(bottom_blob, ivec3(sx - 1, sy + 1, gz)); 128 afpvec4 c1 = image3d_ld4(bottom_blob, ivec3(sx + 0, sy + 1, gz)); 129 afpvec4 c2 = image3d_ld4(bottom_blob, ivec3(sx + 1, sy + 1, gz)); 130 afpvec4 c3 = image3d_ld4(bottom_blob, ivec3(sx + 2, sy + 1, gz)); 131 132 afpmat4 c0123 = afpmat4(c0, c1, c2, c3); 133 134 afpvec4 d0 = image3d_ld4(bottom_blob, ivec3(sx - 1, sy + 2, gz)); 135 afpvec4 d1 = image3d_ld4(bottom_blob, ivec3(sx + 0, sy + 2, gz)); 136 afpvec4 d2 = image3d_ld4(bottom_blob, ivec3(sx + 1, sy + 2, gz)); 137 afpvec4 d3 = image3d_ld4(bottom_blob, ivec3(sx + 2, sy + 2, gz)); 138 139 afpmat4 d0123 = afpmat4(d0, d1, d2, d3); 140#else 141 int v_offset_0 = gz * psc(cstep) + (sy - 1) * psc(w) + sx; 142 int v_offset_1 = gz * psc(cstep) + (sy + 0) * psc(w) + sx; 143 int v_offset_2 = gz * psc(cstep) + (sy + 1) * psc(w) + sx; 144 int v_offset_3 = gz * psc(cstep) + (sy + 2) * psc(w) + sx; 145 146 afpvec4 a0 = buffer_ld4(bottom_blob_data, v_offset_0 - 1); 147 afpvec4 a1 = buffer_ld4(bottom_blob_data, v_offset_0 + 0); 148 afpvec4 a2 = buffer_ld4(bottom_blob_data, v_offset_0 + 1); 149 afpvec4 a3 = buffer_ld4(bottom_blob_data, v_offset_0 + 2); 150 151 afpmat4 a0123 = afpmat4(a0, a1, a2, a3); 152 153 afpvec4 b0 = buffer_ld4(bottom_blob_data, v_offset_1 - 1); 154 afpvec4 b1 = buffer_ld4(bottom_blob_data, v_offset_1 + 0); 155 afpvec4 b2 = buffer_ld4(bottom_blob_data, v_offset_1 + 1); 156 afpvec4 b3 = buffer_ld4(bottom_blob_data, v_offset_1 + 2); 157 158 afpmat4 b0123 = afpmat4(b0, b1, b2, b3); 159 160 afpvec4 c0 = buffer_ld4(bottom_blob_data, v_offset_2 - 1); 161 afpvec4 c1 = buffer_ld4(bottom_blob_data, v_offset_2 + 0); 162 afpvec4 c2 = buffer_ld4(bottom_blob_data, v_offset_2 + 1); 163 afpvec4 c3 = buffer_ld4(bottom_blob_data, v_offset_2 + 2); 164 165 afpmat4 c0123 = afpmat4(c0, c1, c2, c3); 166 167 afpvec4 d0 = buffer_ld4(bottom_blob_data, v_offset_3 - 1); 168 afpvec4 d1 = buffer_ld4(bottom_blob_data, v_offset_3 + 0); 169 afpvec4 d2 = buffer_ld4(bottom_blob_data, v_offset_3 + 1); 170 afpvec4 d3 = buffer_ld4(bottom_blob_data, v_offset_3 + 2); 171 172 afpmat4 d0123 = afpmat4(d0, d1, d2, d3); 173#endif 174 175 afpvec4 alpha = buffer_ld4(alpha_blob_data, gx); 176 177 afpvec4 a = a0123 * alpha; 178 afpvec4 b = b0123 * alpha; 179 afpvec4 c = c0123 * alpha; 180 afpvec4 d = d0123 * alpha; 181 182 afpmat4 abcd = afpmat4(a, b, c, d); 183 184 afpvec4 beta = buffer_ld4(beta_blob_data, gy); 185 186 afpvec4 v = abcd * beta; 187 188#if NCNN_image_shader 189 image3d_st4(top_blob, ivec3(gx, gy, gz), v); 190#else 191 const int gi = gz * psc(outcstep) + gy * psc(outw) + gx; 192 193 buffer_st4(top_blob_data, gi, v); 194#endif 195} 196