1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #include "eltwise.h"
16 
17 namespace ncnn {
18 
Eltwise()19 Eltwise::Eltwise()
20 {
21     one_blob_only = false;
22     support_inplace = false; // TODO inplace reduction
23 }
24 
load_param(const ParamDict & pd)25 int Eltwise::load_param(const ParamDict& pd)
26 {
27     op_type = pd.get(0, 0);
28     coeffs = pd.get(1, Mat());
29 
30     return 0;
31 }
32 
forward(const std::vector<Mat> & bottom_blobs,std::vector<Mat> & top_blobs,const Option & opt) const33 int Eltwise::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
34 {
35     const Mat& bottom_blob = bottom_blobs[0];
36     int w = bottom_blob.w;
37     int h = bottom_blob.h;
38     int channels = bottom_blob.c;
39     int size = w * h;
40 
41     Mat& top_blob = top_blobs[0];
42     top_blob.create_like(bottom_blob, opt.blob_allocator);
43     if (top_blob.empty())
44         return -100;
45 
46     if (op_type == Operation_PROD)
47     {
48         // first blob
49         const Mat& bottom_blob1 = bottom_blobs[1];
50         #pragma omp parallel for num_threads(opt.num_threads)
51         for (int q = 0; q < channels; q++)
52         {
53             const float* ptr = bottom_blob.channel(q);
54             const float* ptr1 = bottom_blob1.channel(q);
55             float* outptr = top_blob.channel(q);
56 
57             for (int i = 0; i < size; i++)
58             {
59                 outptr[i] = ptr[i] * ptr1[i];
60             }
61         }
62 
63         for (size_t b = 2; b < bottom_blobs.size(); b++)
64         {
65             const Mat& bottom_blob1 = bottom_blobs[b];
66             #pragma omp parallel for num_threads(opt.num_threads)
67             for (int q = 0; q < channels; q++)
68             {
69                 const float* ptr = bottom_blob1.channel(q);
70                 float* outptr = top_blob.channel(q);
71 
72                 for (int i = 0; i < size; i++)
73                 {
74                     outptr[i] *= ptr[i];
75                 }
76             }
77         }
78     }
79     else if (op_type == Operation_SUM)
80     {
81         if (coeffs.w == 0)
82         {
83             // first blob
84             const Mat& bottom_blob1 = bottom_blobs[1];
85             #pragma omp parallel for num_threads(opt.num_threads)
86             for (int q = 0; q < channels; q++)
87             {
88                 const float* ptr = bottom_blob.channel(q);
89                 const float* ptr1 = bottom_blob1.channel(q);
90                 float* outptr = top_blob.channel(q);
91 
92                 for (int i = 0; i < size; i++)
93                 {
94                     outptr[i] = ptr[i] + ptr1[i];
95                 }
96             }
97 
98             for (size_t b = 2; b < bottom_blobs.size(); b++)
99             {
100                 const Mat& bottom_blob1 = bottom_blobs[b];
101                 #pragma omp parallel for num_threads(opt.num_threads)
102                 for (int q = 0; q < channels; q++)
103                 {
104                     const float* ptr = bottom_blob1.channel(q);
105                     float* outptr = top_blob.channel(q);
106 
107                     for (int i = 0; i < size; i++)
108                     {
109                         outptr[i] += ptr[i];
110                     }
111                 }
112             }
113         }
114         else
115         {
116             // first blob
117             const Mat& bottom_blob1 = bottom_blobs[1];
118             float coeff0 = coeffs[0];
119             float coeff1 = coeffs[1];
120             #pragma omp parallel for num_threads(opt.num_threads)
121             for (int q = 0; q < channels; q++)
122             {
123                 const float* ptr = bottom_blob.channel(q);
124                 const float* ptr1 = bottom_blob1.channel(q);
125                 float* outptr = top_blob.channel(q);
126 
127                 for (int i = 0; i < size; i++)
128                 {
129                     outptr[i] = ptr[i] * coeff0 + ptr1[i] * coeff1;
130                 }
131             }
132 
133             for (size_t b = 2; b < bottom_blobs.size(); b++)
134             {
135                 const Mat& bottom_blob1 = bottom_blobs[b];
136                 float coeff = coeffs[b];
137                 #pragma omp parallel for num_threads(opt.num_threads)
138                 for (int q = 0; q < channels; q++)
139                 {
140                     const float* ptr = bottom_blob1.channel(q);
141                     float* outptr = top_blob.channel(q);
142 
143                     for (int i = 0; i < size; i++)
144                     {
145                         outptr[i] += ptr[i] * coeff;
146                     }
147                 }
148             }
149         }
150     }
151     else if (op_type == Operation_MAX)
152     {
153         // first blob
154         const Mat& bottom_blob1 = bottom_blobs[1];
155         #pragma omp parallel for num_threads(opt.num_threads)
156         for (int q = 0; q < channels; q++)
157         {
158             const float* ptr = bottom_blob.channel(q);
159             const float* ptr1 = bottom_blob1.channel(q);
160             float* outptr = top_blob.channel(q);
161 
162             for (int i = 0; i < size; i++)
163             {
164                 outptr[i] = std::max(ptr[i], ptr1[i]);
165             }
166         }
167 
168         for (size_t b = 2; b < bottom_blobs.size(); b++)
169         {
170             const Mat& bottom_blob1 = bottom_blobs[b];
171             #pragma omp parallel for num_threads(opt.num_threads)
172             for (int q = 0; q < channels; q++)
173             {
174                 const float* ptr = bottom_blob1.channel(q);
175                 float* outptr = top_blob.channel(q);
176 
177                 for (int i = 0; i < size; i++)
178                 {
179                     outptr[i] = std::max(outptr[i], ptr[i]);
180                 }
181             }
182         }
183     }
184 
185     return 0;
186 }
187 
188 } // namespace ncnn
189