1 /*******************************************************************************
2 * Copyright 2020-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 #ifndef GPU_OCL_OFFSETS_H
18 #define GPU_OCL_OFFSETS_H
19
off_ncdhw(int n,int c,int d,int h,int w,int C,int D,int H,int W)20 int off_ncdhw(int n, int c, int d, int h, int w, int C, int D, int H, int W) {
21 int off = 0;
22 off += n * C * D * H * W;
23 off += c * D * H * W;
24 off += d * H * W;
25 off += h * W;
26 off += w;
27 return off;
28 }
off_ndhwc(int n,int c,int d,int h,int w,int C,int D,int H,int W)29 int off_ndhwc(int n, int c, int d, int h, int w, int C, int D, int H, int W) {
30 int off = 0;
31 off += n * D * H * W * C;
32 off += d * H * W * C;
33 off += h * W * C;
34 off += w * C;
35 off += c;
36 return off;
37 }
38
off_nCdhw16c(int n,int c,int d,int h,int w,int C,int D,int H,int W)39 int off_nCdhw16c(
40 int n, int c, int d, int h, int w, int C, int D, int H, int W) {
41 int off = 0;
42 off += n * (C / 16) * D * H * W * 16;
43 off += (c / 16) * D * H * W * 16;
44 off += d * H * W * 16;
45 off += h * W * 16;
46 off += w * 16;
47 off += c % 16;
48 return off;
49 }
50
off_NCdhw16n16c(int n,int c,int d,int h,int w,int C,int D,int H,int W)51 int off_NCdhw16n16c(
52 int n, int c, int d, int h, int w, int C, int D, int H, int W) {
53 int off = 0;
54 off += (n / 16) * (C / 16) * D * H * W * 16 * 16;
55 off += (c / 16) * D * H * W * 16 * 16;
56 off += d * H * W * 16 * 16;
57 off += h * W * 16 * 16;
58 off += w * 16 * 16;
59 off += (n % 16) * 16;
60 off += (c % 16);
61 return off;
62 }
63
off_nCdhw32c(int n,int c,int d,int h,int w,int C,int D,int H,int W)64 int off_nCdhw32c(
65 int n, int c, int d, int h, int w, int C, int D, int H, int W) {
66 int c_32_block = OC % 32 ? (32 + OC - (OC % 32)) : OC;
67 int off = 0;
68 off += n * (c_32_block / 32) * G * D * H * W * 32;
69 off += (c / 32) * D * H * W * 32;
70 off += d * H * W * 32;
71 off += h * W * 32;
72 off += w * 32;
73 off += c % 32;
74 return off;
75 }
76
off_NCdhw32n16c(int n,int c,int d,int h,int w,int C,int D,int H,int W)77 int off_NCdhw32n16c(
78 int n, int c, int d, int h, int w, int C, int D, int H, int W) {
79 int off = 0;
80 off += (n / 32) * (C / 16) * D * H * W * 32 * 16;
81 off += (c / 16) * D * H * W * 32 * 16;
82 off += d * H * W * 32 * 16;
83 off += h * W * 32 * 16;
84 off += w * 32 * 16;
85 off += (n % 32) * 16;
86 off += (c % 16);
87 return off;
88 }
89
off_NCdhw32n32c(int n,int c,int d,int h,int w,int C,int D,int H,int W)90 int off_NCdhw32n32c(
91 int n, int c, int d, int h, int w, int C, int D, int H, int W) {
92 int c_32_block = OC % 32 ? (32 + OC - (OC % 32)) : OC;
93 int off = 0;
94 off += (n / 32) * (c_32_block / 32) * D * H * W * 32 * 32;
95 off += (c / 32) * D * H * W * 32 * 32;
96 off += d * H * W * 32 * 32;
97 off += h * W * 32 * 32;
98 off += w * 32 * 32;
99 off += (n % 32) * 32;
100 off += (c % 32);
101 return off;
102 }
103
off_gOdhwi16o(int g,int o,int i,int d,int h,int w,int O,int I,int D,int H,int W)104 int off_gOdhwi16o(int g, int o, int i, int d, int h, int w, int O, int I, int D,
105 int H, int W) {
106 int off = 0;
107 off += g * (O / 16) * D * H * W * I * 16;
108 off += (o / 16) * D * H * W * I * 16;
109 off += d * H * W * I * 16;
110 off += h * W * I * 16;
111 off += w * I * 16;
112 off += i * 16;
113 off += (o % 16);
114 return off;
115 }
116
off_gOIdhw16i16o(int g,int o,int i,int d,int h,int w,int O,int I,int D,int H,int W)117 int off_gOIdhw16i16o(int g, int o, int i, int d, int h, int w, int O, int I,
118 int D, int H, int W) {
119 int off = 0;
120 off += g * (O / 16) * (I / 16) * D * H * W * 16 * 16;
121 off += (o / 16) * (I / 16) * D * H * W * 16 * 16;
122 off += (i / 16) * D * H * W * 16 * 16;
123 off += d * H * W * 16 * 16;
124 off += h * W * 16 * 16;
125 off += w * 16 * 16;
126 off += (i % 16) * 16;
127 off += (o % 16);
128 return off;
129 }
130
off_gIOdhw16i16o(int g,int o,int i,int d,int h,int w,int O,int I,int D,int H,int W)131 int off_gIOdhw16i16o(int g, int o, int i, int d, int h, int w, int O, int I,
132 int D, int H, int W) {
133 int off = 0;
134 off += g * (I / 16) * (O / 16) * D * H * W * 16 * 16;
135 off += (i / 16) * (O / 16) * D * H * W * 16 * 16;
136 off += (o / 16) * D * H * W * 16 * 16;
137 off += d * H * W * 16 * 16;
138 off += h * W * 16 * 16;
139 off += w * 16 * 16;
140 off += (i % 16) * 16;
141 off += (o % 16);
142 return off;
143 }
144
src_off(int n,int c,int d,int h,int w)145 int src_off(int n, int c, int d, int h, int w) {
146 #if SRC_NCHW
147 return off_ncdhw(n, c, d, h, w, G * IC, ID, IH, IW);
148 #elif SRC_NHWC
149 return off_ndhwc(n, c, d, h, w, G * IC, ID, IH, IW);
150 #elif SRC_W16C
151 return off_nCdhw16c(n, c, d, h, w, G * IC, ID, IH, IW);
152 #elif SRC_16N16C
153 return off_NCdhw16n16c(n, c, d, h, w, G * IC, ID, IH, IW);
154 #else
155 #error "Unknown layout"
156 #endif
157 }
158
wei_off(int g,int o,int i,int d,int h,int w)159 int wei_off(int g, int o, int i, int d, int h, int w) {
160 #if WEI_I16O
161 return off_gOdhwi16o(g, o, i, d, h, w, OC, IC, KD, KH, KW);
162 #elif WEI_16I16O
163 return off_gOIdhw16i16o(g, o, i, d, h, w, OC, IC, KD, KH, KW);
164 #elif WEI_16I16O_FLIPPED
165 return off_gIOdhw16i16o(g, o, i, d, h, w, OC, IC, KD, KH, KW);
166 #else
167 #error "Unknown layout"
168 #endif
169 return 0;
170 }
171
dst_off(int n,int c,int d,int h,int w)172 int dst_off(int n, int c, int d, int h, int w) {
173 #if DST_NCHW
174 return off_ncdhw(n, c, d, h, w, G * OC_WO_PADDING, OD, OH, OW);
175 #elif DST_NHWC
176 return off_ndhwc(n, c, d, h, w, G * OC, OD, OH, OW);
177 #elif DST_W16C
178 return off_nCdhw16c(n, c, d, h, w, G * OC, OD, OH, OW);
179 #elif DST_16N16C
180 return off_NCdhw16n16c(n, c, d, h, w, G * OC, OD, OH, OW);
181 #elif DST_W32C
182 return off_nCdhw32c(n, c, d, h, w, G * OC, OD, OH, OW);
183 #elif DST_32N16C
184 return off_NCdhw32n16c(n, c, d, h, w, G * OC, OD, OH, OW);
185 #elif DST_32N32C
186 return off_NCdhw32n32c(n, c, d, h, w, G * OC, OD, OH, OW);
187 #else
188 #error "Unknown layout"
189 #endif
190 }
191
192 #endif // GPU_OCL_OFFSETS_H
193