1 /*
2  * By downloading, copying, installing or using the software you agree to this license.
3  * If you do not agree to this license, do not download, install,
4  * copy or use the software.
5  *
6  *
7  *                           License Agreement
8  *                For Open Source Computer Vision Library
9  *                        (3-clause BSD License)
10  *
11  * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
12  * Third party copyrights are property of their respective owners.
13  *
14  * Redistribution and use in source and binary forms, with or without modification,
15  * are permitted provided that the following conditions are met:
16  *
17  *   * Redistributions of source code must retain the above copyright notice,
18  *     this list of conditions and the following disclaimer.
19  *
20  *   * Redistributions in binary form must reproduce the above copyright notice,
21  *     this list of conditions and the following disclaimer in the documentation
22  *     and/or other materials provided with the distribution.
23  *
24  *   * Neither the names of the copyright holders nor the names of the contributors
25  *     may be used to endorse or promote products derived from this software
26  *     without specific prior written permission.
27  *
28  * This software is provided by the copyright holders and contributors "as is" and
29  * any express or implied warranties, including, but not limited to, the implied
30  * warranties of merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall copyright holders or contributors be liable for any direct,
32  * indirect, incidental, special, exemplary, or consequential damages
33  * (including, but not limited to, procurement of substitute goods or services;
34  * loss of use, data, or profits; or business interruption) however caused
35  * and on any theory of liability, whether in contract, strict liability,
36  * or tort (including negligence or otherwise) arising in any way out of
37  * the use of this software, even if advised of the possibility of such damage.
38  */
39 
40 #include "common.hpp"
41 #include "vtransform.hpp"
42 
43 namespace CAROTENE_NS {
44 
45 #ifdef CAROTENE_NEON
46 
47 struct BitwiseAnd
48 {
49     typedef u8 type;
50 
operator ()CAROTENE_NS::BitwiseAnd51     void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
52                      uint8x16_t & v_dst) const
53     {
54         v_dst = vandq_u8(v_src0, v_src1);
55     }
56 
operator ()CAROTENE_NS::BitwiseAnd57     void operator() (const uint8x8_t & v_src0, const uint8x8_t & v_src1,
58                      uint8x8_t & v_dst) const
59     {
60         v_dst = vand_u8(v_src0, v_src1);
61     }
62 
operator ()CAROTENE_NS::BitwiseAnd63     void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
64     {
65         dst[0] = src0[0] & src1[0];
66     }
67 };
68 
69 struct BitwiseOr
70 {
71     typedef u8 type;
72 
operator ()CAROTENE_NS::BitwiseOr73     void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
74                      uint8x16_t & v_dst) const
75     {
76         v_dst = vorrq_u8(v_src0, v_src1);
77     }
78 
operator ()CAROTENE_NS::BitwiseOr79     void operator() (const uint8x8_t & v_src0, const uint8x8_t & v_src1,
80                      uint8x8_t & v_dst) const
81     {
82         v_dst = vorr_u8(v_src0, v_src1);
83     }
84 
operator ()CAROTENE_NS::BitwiseOr85     void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
86     {
87         dst[0] = src0[0] | src1[0];
88     }
89 };
90 
91 struct BitwiseXor
92 {
93     typedef u8 type;
94 
operator ()CAROTENE_NS::BitwiseXor95     void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
96                      uint8x16_t & v_dst) const
97     {
98         v_dst = veorq_u8(v_src0, v_src1);
99     }
100 
operator ()CAROTENE_NS::BitwiseXor101     void operator() (const uint8x8_t & v_src0, const uint8x8_t & v_src1,
102                      uint8x8_t & v_dst) const
103     {
104         v_dst = veor_u8(v_src0, v_src1);
105     }
106 
operator ()CAROTENE_NS::BitwiseXor107     void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
108     {
109         dst[0] = src0[0] ^ src1[0];
110     }
111 };
112 
113 #endif
114 
bitwiseNot(const Size2D & size,const u8 * srcBase,ptrdiff_t srcStride,u8 * dstBase,ptrdiff_t dstStride)115 void bitwiseNot(const Size2D &size,
116                 const u8 *srcBase, ptrdiff_t srcStride,
117                 u8 *dstBase, ptrdiff_t dstStride)
118 {
119     internal::assertSupportedConfiguration();
120 #ifdef CAROTENE_NEON
121     size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
122     size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
123 
124     for (size_t i = 0; i < size.height; ++i)
125     {
126         const u8* src = internal::getRowPtr(srcBase, srcStride, i);
127         u8* dst = internal::getRowPtr(dstBase, dstStride, i);
128         size_t j = 0;
129 
130         for (; j < roiw32; j += 32)
131         {
132             internal::prefetch(src + j);
133             uint8x16_t v_src0 = vld1q_u8(src + j), v_src1 = vld1q_u8(src + j + 16);
134             uint8x16_t v_dst0 = vmvnq_u8(v_src0), v_dst1 = vmvnq_u8(v_src1);
135             vst1q_u8(dst + j, v_dst0);
136             vst1q_u8(dst + j + 16, v_dst1);
137         }
138         for (; j < roiw8; j += 8)
139         {
140             uint8x8_t v_src = vld1_u8(src + j);
141             uint8x8_t v_dst = vmvn_u8(v_src);
142             vst1_u8(dst + j, v_dst);
143         }
144 
145         for (; j < size.width; j++)
146         {
147             dst[j] = ~src[j];
148         }
149     }
150 #else
151     (void)size;
152     (void)srcBase;
153     (void)srcStride;
154     (void)dstBase;
155     (void)dstStride;
156 #endif
157 }
158 
bitwiseAnd(const Size2D & size,const u8 * src0Base,ptrdiff_t src0Stride,const u8 * src1Base,ptrdiff_t src1Stride,u8 * dstBase,ptrdiff_t dstStride)159 void bitwiseAnd(const Size2D &size,
160                 const u8 *src0Base, ptrdiff_t src0Stride,
161                 const u8 *src1Base, ptrdiff_t src1Stride,
162                 u8 *dstBase, ptrdiff_t dstStride)
163 {
164     internal::assertSupportedConfiguration();
165 #ifdef CAROTENE_NEON
166     internal::vtransform(size,
167                          src0Base, src0Stride,
168                          src1Base, src1Stride,
169                          dstBase, dstStride, BitwiseAnd());
170 #else
171     (void)size;
172     (void)src0Base;
173     (void)src0Stride;
174     (void)src1Base;
175     (void)src1Stride;
176     (void)dstBase;
177     (void)dstStride;
178 #endif
179 }
180 
bitwiseOr(const Size2D & size,const u8 * src0Base,ptrdiff_t src0Stride,const u8 * src1Base,ptrdiff_t src1Stride,u8 * dstBase,ptrdiff_t dstStride)181 void bitwiseOr(const Size2D &size,
182                const u8 *src0Base, ptrdiff_t src0Stride,
183                const u8 *src1Base, ptrdiff_t src1Stride,
184                u8 *dstBase, ptrdiff_t dstStride)
185 {
186     internal::assertSupportedConfiguration();
187 #ifdef CAROTENE_NEON
188     internal::vtransform(size,
189                          src0Base, src0Stride,
190                          src1Base, src1Stride,
191                          dstBase, dstStride, BitwiseOr());
192 #else
193     (void)size;
194     (void)src0Base;
195     (void)src0Stride;
196     (void)src1Base;
197     (void)src1Stride;
198     (void)dstBase;
199     (void)dstStride;
200 #endif
201 }
202 
bitwiseXor(const Size2D & size,const u8 * src0Base,ptrdiff_t src0Stride,const u8 * src1Base,ptrdiff_t src1Stride,u8 * dstBase,ptrdiff_t dstStride)203 void bitwiseXor(const Size2D &size,
204                 const u8 *src0Base, ptrdiff_t src0Stride,
205                 const u8 *src1Base, ptrdiff_t src1Stride,
206                 u8 *dstBase, ptrdiff_t dstStride)
207 {
208     internal::assertSupportedConfiguration();
209 #ifdef CAROTENE_NEON
210     internal::vtransform(size,
211                          src0Base, src0Stride,
212                          src1Base, src1Stride,
213                          dstBase, dstStride, BitwiseXor());
214 #else
215     (void)size;
216     (void)src0Base;
217     (void)src0Stride;
218     (void)src1Base;
219     (void)src1Stride;
220     (void)dstBase;
221     (void)dstStride;
222 #endif
223 }
224 
225 } // namespace CAROTENE_NS
226