1// Tencent is pleased to support the open source community by making ncnn available.
2//
3// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4//
5// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6// in compliance with the License. You may obtain a copy of the License at
7//
8// https://opensource.org/licenses/BSD-3-Clause
9//
10// Unless required by applicable law or agreed to in writing, software distributed
11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13// specific language governing permissions and limitations under the License.
14
15#version 450
16
17#if NCNN_fp16_storage
18#extension GL_EXT_shader_16bit_storage: require
19#endif
20#if NCNN_fp16_arithmetic
21#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
22#endif
23
24layout (constant_id = 0) const int ndim = 0;
25
26#define shape_constant_id_offset 1
27layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
28layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
29layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
30layout (constant_id = shape_constant_id_offset + 3) const int d = 0;
31layout (constant_id = shape_constant_id_offset + 4) const int c = 0;
32layout (constant_id = shape_constant_id_offset + 5) const int cstep = 0;
33
34layout (constant_id = shape_constant_id_offset + 6) const int outdims = 0;
35layout (constant_id = shape_constant_id_offset + 7) const int outw = 0;
36layout (constant_id = shape_constant_id_offset + 8) const int outh = 0;
37layout (constant_id = shape_constant_id_offset + 9) const int outd = 0;
38layout (constant_id = shape_constant_id_offset + 10) const int outc = 0;
39layout (constant_id = shape_constant_id_offset + 11) const int outcstep = 0;
40
41#if NCNN_image_shader
42layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
43layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
44#else
45layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
46layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
47#endif
48
49layout (push_constant) uniform parameter
50{
51    int dims;
52    int w;
53    int h;
54    int d;
55    int c;
56    int cstep;
57
58    int outdims;
59    int outw;
60    int outh;
61    int outd;
62    int outc;
63    int outcstep;
64} p;
65
66void main()
67{
68    int gx = int(gl_GlobalInvocationID.x);
69    int gy = int(gl_GlobalInvocationID.y);
70    int gz = int(gl_GlobalInvocationID.z);
71
72    if (gx >= psc(w) || gy >= psc(h) * psc(d) || gz >= psc(c))
73        return;
74
75    ivec4 i4;
76    if (psc(dims) == 1)
77    {
78        i4 = gx * 4 + ivec4(0, 1, 2, 3);
79    }
80    else if (psc(dims) == 2)
81    {
82        i4 = (gy * 4) * psc(w) + gx + ivec4(0, 1, 2, 3) * psc(w);
83    }
84    else if (psc(dims) == 3)
85    {
86        i4 = (gz * 4) * psc(h) * psc(w) + gy * psc(w) + gx + ivec4(0, 1, 2, 3) * psc(h) * psc(w);
87    }
88    else // if (psc(dims) == 4)
89    {
90        i4 = (gz * 4) * psc(d) * psc(h) * psc(w) + gy * psc(w) + gx + ivec4(0, 1, 2, 3) * psc(d) * psc(h) * psc(w);
91    }
92
93#if NCNN_image_shader
94    ivec4 x4;
95    ivec4 y4;
96    ivec4 z4;
97
98    if (ndim == 1)
99    {
100        z4 = ivec4(0);
101        y4 = ivec4(0);
102        x4 = i4;
103    }
104    if (ndim == 2)
105    {
106        z4 = ivec4(0);
107        y4 = i4 / psc(outw);
108        x4 = i4 % psc(outw);
109    }
110    if (ndim == 3)
111    {
112        int size = psc(outw) * psc(outh);
113
114        z4 = i4 / size;
115        y4 = i4 % size / psc(outw);
116        x4 = i4 % size % psc(outw);
117    }
118    if (ndim == 4)
119    {
120        int size = psc(outw) * psc(outh) * psc(outd);
121        int dsize = psc(outw) * psc(outh);
122
123        z4 = i4 / size;
124        ivec4 yd4 = i4 % size / dsize;
125        ivec4 yh4 = i4 % size % dsize / psc(outw);
126        x4 = i4 % size % dsize % psc(outw);
127
128        y4 = yd4 * psc(outh) + yh4;
129    }
130
131    afpvec4 v;
132    if (psc(dims) == 1)
133    {
134        v = image3d_ld4(bottom_blob_3d, ivec3(gx, 0, 0));
135    }
136    else if (psc(dims) == 2)
137    {
138        v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, 0));
139    }
140    else // if (psc(dims) == 3 || psc(dims) == 4)
141    {
142        v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
143    }
144
145    image3d_st1(top_blob_3d, ivec3(x4.r, y4.r, z4.r), v.r);
146    image3d_st1(top_blob_3d, ivec3(x4.g, y4.g, z4.g), v.g);
147    image3d_st1(top_blob_3d, ivec3(x4.b, y4.b, z4.b), v.b);
148    image3d_st1(top_blob_3d, ivec3(x4.a, y4.a, z4.a), v.a);
149#else
150    ivec4 v_offset;
151
152    if (ndim == 1)
153    {
154        v_offset = i4;
155    }
156    if (ndim == 2)
157    {
158        ivec4 y4 = i4 / psc(outw);
159        ivec4 x4 = i4 % psc(outw);
160
161        v_offset = y4 * psc(outw) + x4;
162    }
163    if (ndim == 3)
164    {
165        int size = psc(outw) * psc(outh);
166
167        ivec4 z4 = i4 / size;
168        ivec4 y4 = i4 % size / psc(outw);
169        ivec4 x4 = i4 % size % psc(outw);
170
171        v_offset = z4 * psc(outcstep) + y4 * psc(outw) + x4;
172    }
173    if (ndim == 4)
174    {
175        int size = psc(outw) * psc(outh) * psc(outd);
176        int dsize = psc(outw) * psc(outh);
177
178        ivec4 z4 = i4 / size;
179        ivec4 yd4 = i4 % size / dsize;
180        ivec4 yh4 = i4 % size % dsize / psc(outw);
181        ivec4 x4 = i4 % size % dsize % psc(outw);
182
183        ivec4 y4 = yd4 * psc(outh) + yh4;
184
185        v_offset = z4 * psc(outcstep) + y4 * psc(outw) + x4;
186    }
187
188    int gi = gz * psc(cstep) + gy * psc(w) + gx;
189
190    buffer_cp4to1(top_blob_data, v_offset, bottom_blob_data, gi);
191#endif
192}
193