1 // Leo is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2019 Leo <leo@nullptr.com.cn>. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #include "bias_mips.h"
16 
17 #if __mips_msa
18 #include "mips_common.h"
19 
20 #include <msa.h>
21 #endif // __mips_msa
22 
23 namespace ncnn {
24 
forward_inplace(Mat & bottom_top_blob,const Option & opt) const25 int Bias_mips::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
26 {
27     int w = bottom_top_blob.w;
28     int h = bottom_top_blob.h;
29     int channels = bottom_top_blob.c;
30     int size = w * h;
31 
32     const float* bias_ptr = bias_data;
33     #pragma omp parallel for num_threads(opt.num_threads)
34     for (int q = 0; q < channels; q++)
35     {
36         float* ptr = bottom_top_blob.channel(q);
37 
38         float bias = bias_ptr[q];
39 
40 #if __mips_msa
41         int nn = size >> 2;
42         int remain = size - (nn << 2);
43 #else
44         int remain = size;
45 #endif // __mips_msa
46 
47 #if __mips_msa
48         v4f32 _bias = (v4f32)__msa_fill_w_f32(bias);
49         for (; nn > 0; nn--)
50         {
51             v4f32 _p = (v4f32)__msa_ld_w(ptr, 0);
52             v4f32 _outp = __msa_fadd_w(_p, _bias);
53             __msa_st_w((v4i32)_outp, ptr, 0);
54 
55             ptr += 4;
56         }
57 #endif // __mips_msa
58 
59         for (; remain > 0; remain--)
60         {
61             *ptr = *ptr + bias;
62             ptr++;
63         }
64     }
65 
66     return 0;
67 }
68 
69 } // namespace ncnn
70