1 // Leo is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2019 Leo <leo@nullptr.com.cn>. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #include "absval_mips.h"
16 
17 #if __mips_msa
18 #include <msa.h>
19 #endif // __mips_msa
20 
21 namespace ncnn {
22 
forward_inplace(Mat & bottom_top_blob,const Option & opt) const23 int AbsVal_mips::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
24 {
25     int w = bottom_top_blob.w;
26     int h = bottom_top_blob.h;
27     int channels = bottom_top_blob.c;
28     int size = w * h;
29 
30     #pragma omp parallel for num_threads(opt.num_threads)
31     for (int q = 0; q < channels; q++)
32     {
33         float* ptr = bottom_top_blob.channel(q);
34 
35 #if __mips_msa
36         int nn = size >> 2;
37         int remain = size - (nn << 2);
38 #else
39         int remain = size;
40 #endif // __mips_msa
41 
42 #if __mips_msa
43         for (; nn > 0; nn--)
44         {
45             v4u32 _p = (v4u32)__msa_ld_w(ptr, 0);
46             v4f32 _outp = (v4f32)__msa_bclri_w(_p, 31);
47             __msa_st_w((v4i32)_outp, ptr, 0);
48 
49             ptr += 4;
50         }
51 #endif // __mips_msa
52         for (; remain > 0; remain--)
53         {
54             *ptr = *ptr > 0 ? *ptr : -*ptr;
55 
56             ptr++;
57         }
58     }
59 
60     return 0;
61 }
62 
63 } // namespace ncnn
64