1 // Leo is pleased to support the open source community by making ncnn available. 2 // 3 // Copyright (C) 2019 Leo <leo@nullptr.com.cn>. All rights reserved. 4 // 5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 // in compliance with the License. You may obtain a copy of the License at 7 // 8 // https://opensource.org/licenses/BSD-3-Clause 9 // 10 // Unless required by applicable law or agreed to in writing, software distributed 11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 // specific language governing permissions and limitations under the License. 14 15 #include "bias_mips.h" 16 17 #if __mips_msa 18 #include "mips_common.h" 19 20 #include <msa.h> 21 #endif // __mips_msa 22 23 namespace ncnn { 24 forward_inplace(Mat & bottom_top_blob,const Option & opt) const25int Bias_mips::forward_inplace(Mat& bottom_top_blob, const Option& opt) const 26 { 27 int w = bottom_top_blob.w; 28 int h = bottom_top_blob.h; 29 int channels = bottom_top_blob.c; 30 int size = w * h; 31 32 const float* bias_ptr = bias_data; 33 #pragma omp parallel for num_threads(opt.num_threads) 34 for (int q = 0; q < channels; q++) 35 { 36 float* ptr = bottom_top_blob.channel(q); 37 38 float bias = bias_ptr[q]; 39 40 #if __mips_msa 41 int nn = size >> 2; 42 int remain = size - (nn << 2); 43 #else 44 int remain = size; 45 #endif // __mips_msa 46 47 #if __mips_msa 48 v4f32 _bias = (v4f32)__msa_fill_w_f32(bias); 49 for (; nn > 0; nn--) 50 { 51 v4f32 _p = (v4f32)__msa_ld_w(ptr, 0); 52 v4f32 _outp = __msa_fadd_w(_p, _bias); 53 __msa_st_w((v4i32)_outp, ptr, 0); 54 55 ptr += 4; 56 } 57 #endif // __mips_msa 58 59 for (; remain > 0; remain--) 60 { 61 *ptr = *ptr + bias; 62 ptr++; 63 } 64 } 65 66 return 0; 67 } 68 69 } // namespace ncnn 70