1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #include "eltwise.h"
16
17 namespace ncnn {
18
Eltwise()19 Eltwise::Eltwise()
20 {
21 one_blob_only = false;
22 support_inplace = false; // TODO inplace reduction
23 }
24
load_param(const ParamDict & pd)25 int Eltwise::load_param(const ParamDict& pd)
26 {
27 op_type = pd.get(0, 0);
28 coeffs = pd.get(1, Mat());
29
30 return 0;
31 }
32
forward(const std::vector<Mat> & bottom_blobs,std::vector<Mat> & top_blobs,const Option & opt) const33 int Eltwise::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
34 {
35 const Mat& bottom_blob = bottom_blobs[0];
36 int w = bottom_blob.w;
37 int h = bottom_blob.h;
38 int channels = bottom_blob.c;
39 int size = w * h;
40
41 Mat& top_blob = top_blobs[0];
42 top_blob.create_like(bottom_blob, opt.blob_allocator);
43 if (top_blob.empty())
44 return -100;
45
46 if (op_type == Operation_PROD)
47 {
48 // first blob
49 const Mat& bottom_blob1 = bottom_blobs[1];
50 #pragma omp parallel for num_threads(opt.num_threads)
51 for (int q = 0; q < channels; q++)
52 {
53 const float* ptr = bottom_blob.channel(q);
54 const float* ptr1 = bottom_blob1.channel(q);
55 float* outptr = top_blob.channel(q);
56
57 for (int i = 0; i < size; i++)
58 {
59 outptr[i] = ptr[i] * ptr1[i];
60 }
61 }
62
63 for (size_t b = 2; b < bottom_blobs.size(); b++)
64 {
65 const Mat& bottom_blob1 = bottom_blobs[b];
66 #pragma omp parallel for num_threads(opt.num_threads)
67 for (int q = 0; q < channels; q++)
68 {
69 const float* ptr = bottom_blob1.channel(q);
70 float* outptr = top_blob.channel(q);
71
72 for (int i = 0; i < size; i++)
73 {
74 outptr[i] *= ptr[i];
75 }
76 }
77 }
78 }
79 else if (op_type == Operation_SUM)
80 {
81 if (coeffs.w == 0)
82 {
83 // first blob
84 const Mat& bottom_blob1 = bottom_blobs[1];
85 #pragma omp parallel for num_threads(opt.num_threads)
86 for (int q = 0; q < channels; q++)
87 {
88 const float* ptr = bottom_blob.channel(q);
89 const float* ptr1 = bottom_blob1.channel(q);
90 float* outptr = top_blob.channel(q);
91
92 for (int i = 0; i < size; i++)
93 {
94 outptr[i] = ptr[i] + ptr1[i];
95 }
96 }
97
98 for (size_t b = 2; b < bottom_blobs.size(); b++)
99 {
100 const Mat& bottom_blob1 = bottom_blobs[b];
101 #pragma omp parallel for num_threads(opt.num_threads)
102 for (int q = 0; q < channels; q++)
103 {
104 const float* ptr = bottom_blob1.channel(q);
105 float* outptr = top_blob.channel(q);
106
107 for (int i = 0; i < size; i++)
108 {
109 outptr[i] += ptr[i];
110 }
111 }
112 }
113 }
114 else
115 {
116 // first blob
117 const Mat& bottom_blob1 = bottom_blobs[1];
118 float coeff0 = coeffs[0];
119 float coeff1 = coeffs[1];
120 #pragma omp parallel for num_threads(opt.num_threads)
121 for (int q = 0; q < channels; q++)
122 {
123 const float* ptr = bottom_blob.channel(q);
124 const float* ptr1 = bottom_blob1.channel(q);
125 float* outptr = top_blob.channel(q);
126
127 for (int i = 0; i < size; i++)
128 {
129 outptr[i] = ptr[i] * coeff0 + ptr1[i] * coeff1;
130 }
131 }
132
133 for (size_t b = 2; b < bottom_blobs.size(); b++)
134 {
135 const Mat& bottom_blob1 = bottom_blobs[b];
136 float coeff = coeffs[b];
137 #pragma omp parallel for num_threads(opt.num_threads)
138 for (int q = 0; q < channels; q++)
139 {
140 const float* ptr = bottom_blob1.channel(q);
141 float* outptr = top_blob.channel(q);
142
143 for (int i = 0; i < size; i++)
144 {
145 outptr[i] += ptr[i] * coeff;
146 }
147 }
148 }
149 }
150 }
151 else if (op_type == Operation_MAX)
152 {
153 // first blob
154 const Mat& bottom_blob1 = bottom_blobs[1];
155 #pragma omp parallel for num_threads(opt.num_threads)
156 for (int q = 0; q < channels; q++)
157 {
158 const float* ptr = bottom_blob.channel(q);
159 const float* ptr1 = bottom_blob1.channel(q);
160 float* outptr = top_blob.channel(q);
161
162 for (int i = 0; i < size; i++)
163 {
164 outptr[i] = std::max(ptr[i], ptr1[i]);
165 }
166 }
167
168 for (size_t b = 2; b < bottom_blobs.size(); b++)
169 {
170 const Mat& bottom_blob1 = bottom_blobs[b];
171 #pragma omp parallel for num_threads(opt.num_threads)
172 for (int q = 0; q < channels; q++)
173 {
174 const float* ptr = bottom_blob1.channel(q);
175 float* outptr = top_blob.channel(q);
176
177 for (int i = 0; i < size; i++)
178 {
179 outptr[i] = std::max(outptr[i], ptr[i]);
180 }
181 }
182 }
183 }
184
185 return 0;
186 }
187
188 } // namespace ncnn
189