1// Tencent is pleased to support the open source community by making ncnn available. 2// 3// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. 4// 5// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6// in compliance with the License. You may obtain a copy of the License at 7// 8// https://opensource.org/licenses/BSD-3-Clause 9// 10// Unless required by applicable law or agreed to in writing, software distributed 11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12// CONDITIONS OF ANY KIND, either express or implied. See the License for the 13// specific language governing permissions and limitations under the License. 14 15#version 450 16 17#if NCNN_fp16_storage 18#extension GL_EXT_shader_16bit_storage: require 19struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; }; 20#endif 21#if NCNN_fp16_arithmetic 22#extension GL_EXT_shader_explicit_arithmetic_types_float16: require 23#endif 24 25layout (constant_id = 0) const int order_type = 0; 26layout (constant_id = 1) const int bugihfa = 0; 27 28#define shape_constant_id_offset 2 29layout (constant_id = shape_constant_id_offset + 0) const int dims = 0; 30layout (constant_id = shape_constant_id_offset + 1) const int w = 0; 31layout (constant_id = shape_constant_id_offset + 2) const int h = 0; 32layout (constant_id = shape_constant_id_offset + 3) const int c = 0; 33layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0; 34 35layout (constant_id = shape_constant_id_offset + 5) const int outdims = 0; 36layout (constant_id = shape_constant_id_offset + 6) const int outw = 0; 37layout (constant_id = shape_constant_id_offset + 7) const int outh = 0; 38layout (constant_id = shape_constant_id_offset + 8) const int outc = 0; 39layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0; 40 41#if NCNN_image_shader 42layout (binding = 0) uniform unfp sampler3D bottom_blob_3d; 43layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d; 44#else 45layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; }; 46layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; }; 47#endif 48 49layout (push_constant) uniform parameter 50{ 51 int dims; 52 int w; 53 int h; 54 int c; 55 int cstep; 56 57 int outdims; 58 int outw; 59 int outh; 60 int outc; 61 int outcstep; 62} p; 63 64void main() 65{ 66 int gx = int(gl_GlobalInvocationID.x); 67 int gy = int(gl_GlobalInvocationID.y); 68 int gz = int(gl_GlobalInvocationID.z); 69 70 if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c)) 71 return; 72 73#if NCNN_image_shader 74 if (psc(dims) == 2) 75 { 76 // order_type 77 // 0 = w h 78 // 1 = h w 79 80 afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, 0)); 81 82 if (order_type == 0) 83 { 84 ivec4 y4 = gy * 8 + ivec4(0, 1, 2, 3); 85 ivec4 yy4 = y4 + 4; 86 87 image3d_st1(top_blob_3d, ivec3(gx, y4.r, 0), v[0].r); 88 image3d_st1(top_blob_3d, ivec3(gx, y4.g, 0), v[0].g); 89 image3d_st1(top_blob_3d, ivec3(gx, y4.b, 0), v[0].b); 90 image3d_st1(top_blob_3d, ivec3(gx, y4.a, 0), v[0].a); 91 image3d_st1(top_blob_3d, ivec3(gx, yy4.r, 0), v[1].r); 92 image3d_st1(top_blob_3d, ivec3(gx, yy4.g, 0), v[1].g); 93 image3d_st1(top_blob_3d, ivec3(gx, yy4.b, 0), v[1].b); 94 image3d_st1(top_blob_3d, ivec3(gx, yy4.a, 0), v[1].a); 95 } 96 if (order_type == 1) 97 { 98 ivec4 x4 = gy * 8 + ivec4(0, 1, 2, 3); 99 ivec4 xx4 = x4 + 4; 100 101 image3d_st1(top_blob_3d, ivec3(x4.r, gx, 0), v[0].r); 102 image3d_st1(top_blob_3d, ivec3(x4.g, gx, 0), v[0].g); 103 image3d_st1(top_blob_3d, ivec3(x4.b, gx, 0), v[0].b); 104 image3d_st1(top_blob_3d, ivec3(x4.a, gx, 0), v[0].a); 105 image3d_st1(top_blob_3d, ivec3(xx4.r, gx, 0), v[1].r); 106 image3d_st1(top_blob_3d, ivec3(xx4.g, gx, 0), v[1].g); 107 image3d_st1(top_blob_3d, ivec3(xx4.b, gx, 0), v[1].b); 108 image3d_st1(top_blob_3d, ivec3(xx4.a, gx, 0), v[1].a); 109 } 110 } 111 else // if (psc(dims) == 3) 112 { 113 // order_type 114 // 0 = w h c 115 // 1 = h w c 116 // 2 = w c h 117 // 3 = c w h 118 // 4 = h c w 119 // 5 = c h w 120 121 afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz)); 122 123 if (order_type == 0) 124 { 125 ivec4 z4 = gz * 8 + ivec4(0, 1, 2, 3); 126 ivec4 zz4 = z4 + 4; 127 128 image3d_st1(top_blob_3d, ivec3(gx, gy, z4.r), v[0].r); 129 image3d_st1(top_blob_3d, ivec3(gx, gy, z4.g), v[0].g); 130 image3d_st1(top_blob_3d, ivec3(gx, gy, z4.b), v[0].b); 131 image3d_st1(top_blob_3d, ivec3(gx, gy, z4.a), v[0].a); 132 image3d_st1(top_blob_3d, ivec3(gx, gy, zz4.r), v[1].r); 133 image3d_st1(top_blob_3d, ivec3(gx, gy, zz4.g), v[1].g); 134 image3d_st1(top_blob_3d, ivec3(gx, gy, zz4.b), v[1].b); 135 image3d_st1(top_blob_3d, ivec3(gx, gy, zz4.a), v[1].a); 136 } 137 if (order_type == 1) 138 { 139 ivec4 z4 = gz * 8 + ivec4(0, 1, 2, 3); 140 ivec4 zz4 = z4 + 4; 141 142 image3d_st1(top_blob_3d, ivec3(gy, gx, z4.r), v[0].r); 143 image3d_st1(top_blob_3d, ivec3(gy, gx, z4.g), v[0].g); 144 image3d_st1(top_blob_3d, ivec3(gy, gx, z4.b), v[0].b); 145 image3d_st1(top_blob_3d, ivec3(gy, gx, z4.a), v[0].a); 146 image3d_st1(top_blob_3d, ivec3(gy, gx, zz4.r), v[1].r); 147 image3d_st1(top_blob_3d, ivec3(gy, gx, zz4.g), v[1].g); 148 image3d_st1(top_blob_3d, ivec3(gy, gx, zz4.b), v[1].b); 149 image3d_st1(top_blob_3d, ivec3(gy, gx, zz4.a), v[1].a); 150 } 151 if (order_type == 2) 152 { 153 ivec4 y4 = gz * 8 + ivec4(0, 1, 2, 3); 154 ivec4 yy4 = y4 + 4; 155 156 image3d_st1(top_blob_3d, ivec3(gx, y4.r, gy), v[0].r); 157 image3d_st1(top_blob_3d, ivec3(gx, y4.g, gy), v[0].g); 158 image3d_st1(top_blob_3d, ivec3(gx, y4.b, gy), v[0].b); 159 image3d_st1(top_blob_3d, ivec3(gx, y4.a, gy), v[0].a); 160 image3d_st1(top_blob_3d, ivec3(gx, yy4.r, gy), v[1].r); 161 image3d_st1(top_blob_3d, ivec3(gx, yy4.g, gy), v[1].g); 162 image3d_st1(top_blob_3d, ivec3(gx, yy4.b, gy), v[1].b); 163 image3d_st1(top_blob_3d, ivec3(gx, yy4.a, gy), v[1].a); 164 } 165 if (order_type == 3) 166 { 167 ivec4 x4 = gz * 8 + ivec4(0, 1, 2, 3); 168 ivec4 xx4 = x4 + 4; 169 170 image3d_st1(top_blob_3d, ivec3(x4.r, gx, gy), v[0].r); 171 image3d_st1(top_blob_3d, ivec3(x4.g, gx, gy), v[0].g); 172 image3d_st1(top_blob_3d, ivec3(x4.b, gx, gy), v[0].b); 173 image3d_st1(top_blob_3d, ivec3(x4.a, gx, gy), v[0].a); 174 image3d_st1(top_blob_3d, ivec3(xx4.r, gx, gy), v[1].r); 175 image3d_st1(top_blob_3d, ivec3(xx4.g, gx, gy), v[1].g); 176 image3d_st1(top_blob_3d, ivec3(xx4.b, gx, gy), v[1].b); 177 image3d_st1(top_blob_3d, ivec3(xx4.a, gx, gy), v[1].a); 178 } 179 if (order_type == 4) 180 { 181 ivec4 y4 = gz * 8 + ivec4(0, 1, 2, 3); 182 ivec4 yy4 = y4 + 4; 183 184 image3d_st1(top_blob_3d, ivec3(gy, y4.r, gx), v[0].r); 185 image3d_st1(top_blob_3d, ivec3(gy, y4.g, gx), v[0].g); 186 image3d_st1(top_blob_3d, ivec3(gy, y4.b, gx), v[0].b); 187 image3d_st1(top_blob_3d, ivec3(gy, y4.a, gx), v[0].a); 188 image3d_st1(top_blob_3d, ivec3(gy, yy4.r, gx), v[1].r); 189 image3d_st1(top_blob_3d, ivec3(gy, yy4.g, gx), v[1].g); 190 image3d_st1(top_blob_3d, ivec3(gy, yy4.b, gx), v[1].b); 191 image3d_st1(top_blob_3d, ivec3(gy, yy4.a, gx), v[1].a); 192 } 193 if (order_type == 5) 194 { 195 ivec4 x4 = gz * 8 + ivec4(0, 1, 2, 3); 196 ivec4 xx4 = x4 + 4; 197 198 image3d_st1(top_blob_3d, ivec3(x4.r, gy, gx), v[0].r); 199 image3d_st1(top_blob_3d, ivec3(x4.g, gy, gx), v[0].g); 200 image3d_st1(top_blob_3d, ivec3(x4.b, gy, gx), v[0].b); 201 image3d_st1(top_blob_3d, ivec3(x4.a, gy, gx), v[0].a); 202 image3d_st1(top_blob_3d, ivec3(xx4.r, gy, gx), v[1].r); 203 image3d_st1(top_blob_3d, ivec3(xx4.g, gy, gx), v[1].g); 204 image3d_st1(top_blob_3d, ivec3(xx4.b, gy, gx), v[1].b); 205 image3d_st1(top_blob_3d, ivec3(xx4.a, gy, gx), v[1].a); 206 } 207 } 208#else 209 ivec4 v_offset; 210 ivec4 vv_offset; 211 212 if (psc(dims) == 2) 213 { 214 // order_type 215 // 0 = w h 216 // 1 = h w 217 218 if (order_type == 0) 219 { 220 v_offset = ivec4((gy * 8) * psc(outw) + gx) + ivec4(0, 1, 2, 3) * psc(outw); 221 vv_offset = v_offset + 4 * psc(outw); 222 } 223 if (order_type == 1) 224 { 225 v_offset = ivec4(gx * psc(outw) + gy * 8) + ivec4(0, 1, 2, 3); 226 vv_offset = v_offset + 4; 227 } 228 } 229 else // if (psc(dims) == 3) 230 { 231 // order_type 232 // 0 = w h c 233 // 1 = h w c 234 // 2 = w c h 235 // 3 = c w h 236 // 4 = h c w 237 // 5 = c h w 238 239 if (order_type == 0) 240 { 241 v_offset = ivec4((gz * 8) * psc(outcstep) + gy * psc(outw) + gx) + ivec4(0, 1, 2, 3) * psc(outcstep); 242 vv_offset = v_offset + 4 * psc(outcstep); 243 } 244 if (order_type == 1) 245 { 246 v_offset = ivec4((gz * 8) * psc(outcstep) + gx * psc(outw) + gy) + ivec4(0, 1, 2, 3) * psc(outcstep); 247 vv_offset = v_offset + 4 * psc(outcstep); 248 } 249 if (order_type == 2) 250 { 251 v_offset = ivec4(gy * psc(outcstep) + (gz * 8) * psc(outw) + gx) + ivec4(0, 1, 2, 3) * psc(outw); 252 vv_offset = v_offset + 4 * psc(outw); 253 } 254 if (order_type == 3) 255 { 256 v_offset = ivec4(gy * psc(outcstep) + gx * psc(outw) + gz * 8) + ivec4(0, 1, 2, 3); 257 vv_offset = v_offset + 4; 258 } 259 if (order_type == 4) 260 { 261 v_offset = ivec4(gx * psc(outcstep) + (gz * 8) * psc(outw) + gy) + ivec4(0, 1, 2, 3) * psc(outw); 262 vv_offset = v_offset + 4 * psc(outw); 263 } 264 if (order_type == 5) 265 { 266 v_offset = ivec4(gx * psc(outcstep) + gy * psc(outw) + gz * 8) + ivec4(0, 1, 2, 3); 267 vv_offset = v_offset + 4; 268 } 269 } 270 271 int gi = gz * psc(cstep) + gy * psc(w) + gx; 272 273 buffer_cp8to1(top_blob_data, v_offset, vv_offset, bottom_blob_data, gi); 274#endif 275} 276