1 /*******************************************************************************
2  * Copyright 2020-2021 Intel Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *******************************************************************************/
16 
17 #ifndef GPU_OCL_OFFSETS_H
18 #define GPU_OCL_OFFSETS_H
19 
off_ncdhw(int n,int c,int d,int h,int w,int C,int D,int H,int W)20 int off_ncdhw(int n, int c, int d, int h, int w, int C, int D, int H, int W) {
21     int off = 0;
22     off += n * C * D * H * W;
23     off += c * D * H * W;
24     off += d * H * W;
25     off += h * W;
26     off += w;
27     return off;
28 }
off_ndhwc(int n,int c,int d,int h,int w,int C,int D,int H,int W)29 int off_ndhwc(int n, int c, int d, int h, int w, int C, int D, int H, int W) {
30     int off = 0;
31     off += n * D * H * W * C;
32     off += d * H * W * C;
33     off += h * W * C;
34     off += w * C;
35     off += c;
36     return off;
37 }
38 
off_nCdhw16c(int n,int c,int d,int h,int w,int C,int D,int H,int W)39 int off_nCdhw16c(
40         int n, int c, int d, int h, int w, int C, int D, int H, int W) {
41     int off = 0;
42     off += n * (C / 16) * D * H * W * 16;
43     off += (c / 16) * D * H * W * 16;
44     off += d * H * W * 16;
45     off += h * W * 16;
46     off += w * 16;
47     off += c % 16;
48     return off;
49 }
50 
off_NCdhw16n16c(int n,int c,int d,int h,int w,int C,int D,int H,int W)51 int off_NCdhw16n16c(
52         int n, int c, int d, int h, int w, int C, int D, int H, int W) {
53     int off = 0;
54     off += (n / 16) * (C / 16) * D * H * W * 16 * 16;
55     off += (c / 16) * D * H * W * 16 * 16;
56     off += d * H * W * 16 * 16;
57     off += h * W * 16 * 16;
58     off += w * 16 * 16;
59     off += (n % 16) * 16;
60     off += (c % 16);
61     return off;
62 }
63 
off_nCdhw32c(int n,int c,int d,int h,int w,int C,int D,int H,int W)64 int off_nCdhw32c(
65         int n, int c, int d, int h, int w, int C, int D, int H, int W) {
66     int c_32_block = OC % 32 ? (32 + OC - (OC % 32)) : OC;
67     int off = 0;
68     off += n * (c_32_block / 32) * G * D * H * W * 32;
69     off += (c / 32) * D * H * W * 32;
70     off += d * H * W * 32;
71     off += h * W * 32;
72     off += w * 32;
73     off += c % 32;
74     return off;
75 }
76 
off_NCdhw32n16c(int n,int c,int d,int h,int w,int C,int D,int H,int W)77 int off_NCdhw32n16c(
78         int n, int c, int d, int h, int w, int C, int D, int H, int W) {
79     int off = 0;
80     off += (n / 32) * (C / 16) * D * H * W * 32 * 16;
81     off += (c / 16) * D * H * W * 32 * 16;
82     off += d * H * W * 32 * 16;
83     off += h * W * 32 * 16;
84     off += w * 32 * 16;
85     off += (n % 32) * 16;
86     off += (c % 16);
87     return off;
88 }
89 
off_NCdhw32n32c(int n,int c,int d,int h,int w,int C,int D,int H,int W)90 int off_NCdhw32n32c(
91         int n, int c, int d, int h, int w, int C, int D, int H, int W) {
92     int c_32_block = OC % 32 ? (32 + OC - (OC % 32)) : OC;
93     int off = 0;
94     off += (n / 32) * (c_32_block / 32) * D * H * W * 32 * 32;
95     off += (c / 32) * D * H * W * 32 * 32;
96     off += d * H * W * 32 * 32;
97     off += h * W * 32 * 32;
98     off += w * 32 * 32;
99     off += (n % 32) * 32;
100     off += (c % 32);
101     return off;
102 }
103 
off_gOdhwi16o(int g,int o,int i,int d,int h,int w,int O,int I,int D,int H,int W)104 int off_gOdhwi16o(int g, int o, int i, int d, int h, int w, int O, int I, int D,
105         int H, int W) {
106     int off = 0;
107     off += g * (O / 16) * D * H * W * I * 16;
108     off += (o / 16) * D * H * W * I * 16;
109     off += d * H * W * I * 16;
110     off += h * W * I * 16;
111     off += w * I * 16;
112     off += i * 16;
113     off += (o % 16);
114     return off;
115 }
116 
off_gOIdhw16i16o(int g,int o,int i,int d,int h,int w,int O,int I,int D,int H,int W)117 int off_gOIdhw16i16o(int g, int o, int i, int d, int h, int w, int O, int I,
118         int D, int H, int W) {
119     int off = 0;
120     off += g * (O / 16) * (I / 16) * D * H * W * 16 * 16;
121     off += (o / 16) * (I / 16) * D * H * W * 16 * 16;
122     off += (i / 16) * D * H * W * 16 * 16;
123     off += d * H * W * 16 * 16;
124     off += h * W * 16 * 16;
125     off += w * 16 * 16;
126     off += (i % 16) * 16;
127     off += (o % 16);
128     return off;
129 }
130 
off_gIOdhw16i16o(int g,int o,int i,int d,int h,int w,int O,int I,int D,int H,int W)131 int off_gIOdhw16i16o(int g, int o, int i, int d, int h, int w, int O, int I,
132         int D, int H, int W) {
133     int off = 0;
134     off += g * (I / 16) * (O / 16) * D * H * W * 16 * 16;
135     off += (i / 16) * (O / 16) * D * H * W * 16 * 16;
136     off += (o / 16) * D * H * W * 16 * 16;
137     off += d * H * W * 16 * 16;
138     off += h * W * 16 * 16;
139     off += w * 16 * 16;
140     off += (i % 16) * 16;
141     off += (o % 16);
142     return off;
143 }
144 
src_off(int n,int c,int d,int h,int w)145 int src_off(int n, int c, int d, int h, int w) {
146 #if SRC_NCHW
147     return off_ncdhw(n, c, d, h, w, G * IC, ID, IH, IW);
148 #elif SRC_NHWC
149     return off_ndhwc(n, c, d, h, w, G * IC, ID, IH, IW);
150 #elif SRC_W16C
151     return off_nCdhw16c(n, c, d, h, w, G * IC, ID, IH, IW);
152 #elif SRC_16N16C
153     return off_NCdhw16n16c(n, c, d, h, w, G * IC, ID, IH, IW);
154 #else
155 #error "Unknown layout"
156 #endif
157 }
158 
wei_off(int g,int o,int i,int d,int h,int w)159 int wei_off(int g, int o, int i, int d, int h, int w) {
160 #if WEI_I16O
161     return off_gOdhwi16o(g, o, i, d, h, w, OC, IC, KD, KH, KW);
162 #elif WEI_16I16O
163     return off_gOIdhw16i16o(g, o, i, d, h, w, OC, IC, KD, KH, KW);
164 #elif WEI_16I16O_FLIPPED
165     return off_gIOdhw16i16o(g, o, i, d, h, w, OC, IC, KD, KH, KW);
166 #else
167 #error "Unknown layout"
168 #endif
169     return 0;
170 }
171 
dst_off(int n,int c,int d,int h,int w)172 int dst_off(int n, int c, int d, int h, int w) {
173 #if DST_NCHW
174     return off_ncdhw(n, c, d, h, w, G * OC_WO_PADDING, OD, OH, OW);
175 #elif DST_NHWC
176     return off_ndhwc(n, c, d, h, w, G * OC, OD, OH, OW);
177 #elif DST_W16C
178     return off_nCdhw16c(n, c, d, h, w, G * OC, OD, OH, OW);
179 #elif DST_16N16C
180     return off_NCdhw16n16c(n, c, d, h, w, G * OC, OD, OH, OW);
181 #elif DST_W32C
182     return off_nCdhw32c(n, c, d, h, w, G * OC, OD, OH, OW);
183 #elif DST_32N16C
184     return off_NCdhw32n16c(n, c, d, h, w, G * OC, OD, OH, OW);
185 #elif DST_32N32C
186     return off_NCdhw32n32c(n, c, d, h, w, G * OC, OD, OH, OW);
187 #else
188 #error "Unknown layout"
189 #endif
190 }
191 
192 #endif // GPU_OCL_OFFSETS_H
193