1 /*
2 * By downloading, copying, installing or using the software you agree to this license.
3 * If you do not agree to this license, do not download, install,
4 * copy or use the software.
5 *
6 *
7 * License Agreement
8 * For Open Source Computer Vision Library
9 * (3-clause BSD License)
10 *
11 * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
12 * Third party copyrights are property of their respective owners.
13 *
14 * Redistribution and use in source and binary forms, with or without modification,
15 * are permitted provided that the following conditions are met:
16 *
17 * * Redistributions of source code must retain the above copyright notice,
18 * this list of conditions and the following disclaimer.
19 *
20 * * Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimer in the documentation
22 * and/or other materials provided with the distribution.
23 *
24 * * Neither the names of the copyright holders nor the names of the contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided by the copyright holders and contributors "as is" and
29 * any express or implied warranties, including, but not limited to, the implied
30 * warranties of merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall copyright holders or contributors be liable for any direct,
32 * indirect, incidental, special, exemplary, or consequential damages
33 * (including, but not limited to, procurement of substitute goods or services;
34 * loss of use, data, or profits; or business interruption) however caused
35 * and on any theory of liability, whether in contract, strict liability,
36 * or tort (including negligence or otherwise) arising in any way out of
37 * the use of this software, even if advised of the possibility of such damage.
38 */
39
40 #include "common.hpp"
41 #include "vtransform.hpp"
42
43 namespace CAROTENE_NS {
44
45 #ifdef CAROTENE_NEON
46
47 struct BitwiseAnd
48 {
49 typedef u8 type;
50
operator ()CAROTENE_NS::BitwiseAnd51 void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
52 uint8x16_t & v_dst) const
53 {
54 v_dst = vandq_u8(v_src0, v_src1);
55 }
56
operator ()CAROTENE_NS::BitwiseAnd57 void operator() (const uint8x8_t & v_src0, const uint8x8_t & v_src1,
58 uint8x8_t & v_dst) const
59 {
60 v_dst = vand_u8(v_src0, v_src1);
61 }
62
operator ()CAROTENE_NS::BitwiseAnd63 void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
64 {
65 dst[0] = src0[0] & src1[0];
66 }
67 };
68
69 struct BitwiseOr
70 {
71 typedef u8 type;
72
operator ()CAROTENE_NS::BitwiseOr73 void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
74 uint8x16_t & v_dst) const
75 {
76 v_dst = vorrq_u8(v_src0, v_src1);
77 }
78
operator ()CAROTENE_NS::BitwiseOr79 void operator() (const uint8x8_t & v_src0, const uint8x8_t & v_src1,
80 uint8x8_t & v_dst) const
81 {
82 v_dst = vorr_u8(v_src0, v_src1);
83 }
84
operator ()CAROTENE_NS::BitwiseOr85 void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
86 {
87 dst[0] = src0[0] | src1[0];
88 }
89 };
90
91 struct BitwiseXor
92 {
93 typedef u8 type;
94
operator ()CAROTENE_NS::BitwiseXor95 void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
96 uint8x16_t & v_dst) const
97 {
98 v_dst = veorq_u8(v_src0, v_src1);
99 }
100
operator ()CAROTENE_NS::BitwiseXor101 void operator() (const uint8x8_t & v_src0, const uint8x8_t & v_src1,
102 uint8x8_t & v_dst) const
103 {
104 v_dst = veor_u8(v_src0, v_src1);
105 }
106
operator ()CAROTENE_NS::BitwiseXor107 void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
108 {
109 dst[0] = src0[0] ^ src1[0];
110 }
111 };
112
113 #endif
114
bitwiseNot(const Size2D & size,const u8 * srcBase,ptrdiff_t srcStride,u8 * dstBase,ptrdiff_t dstStride)115 void bitwiseNot(const Size2D &size,
116 const u8 *srcBase, ptrdiff_t srcStride,
117 u8 *dstBase, ptrdiff_t dstStride)
118 {
119 internal::assertSupportedConfiguration();
120 #ifdef CAROTENE_NEON
121 size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
122 size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
123
124 for (size_t i = 0; i < size.height; ++i)
125 {
126 const u8* src = internal::getRowPtr(srcBase, srcStride, i);
127 u8* dst = internal::getRowPtr(dstBase, dstStride, i);
128 size_t j = 0;
129
130 for (; j < roiw32; j += 32)
131 {
132 internal::prefetch(src + j);
133 uint8x16_t v_src0 = vld1q_u8(src + j), v_src1 = vld1q_u8(src + j + 16);
134 uint8x16_t v_dst0 = vmvnq_u8(v_src0), v_dst1 = vmvnq_u8(v_src1);
135 vst1q_u8(dst + j, v_dst0);
136 vst1q_u8(dst + j + 16, v_dst1);
137 }
138 for (; j < roiw8; j += 8)
139 {
140 uint8x8_t v_src = vld1_u8(src + j);
141 uint8x8_t v_dst = vmvn_u8(v_src);
142 vst1_u8(dst + j, v_dst);
143 }
144
145 for (; j < size.width; j++)
146 {
147 dst[j] = ~src[j];
148 }
149 }
150 #else
151 (void)size;
152 (void)srcBase;
153 (void)srcStride;
154 (void)dstBase;
155 (void)dstStride;
156 #endif
157 }
158
bitwiseAnd(const Size2D & size,const u8 * src0Base,ptrdiff_t src0Stride,const u8 * src1Base,ptrdiff_t src1Stride,u8 * dstBase,ptrdiff_t dstStride)159 void bitwiseAnd(const Size2D &size,
160 const u8 *src0Base, ptrdiff_t src0Stride,
161 const u8 *src1Base, ptrdiff_t src1Stride,
162 u8 *dstBase, ptrdiff_t dstStride)
163 {
164 internal::assertSupportedConfiguration();
165 #ifdef CAROTENE_NEON
166 internal::vtransform(size,
167 src0Base, src0Stride,
168 src1Base, src1Stride,
169 dstBase, dstStride, BitwiseAnd());
170 #else
171 (void)size;
172 (void)src0Base;
173 (void)src0Stride;
174 (void)src1Base;
175 (void)src1Stride;
176 (void)dstBase;
177 (void)dstStride;
178 #endif
179 }
180
bitwiseOr(const Size2D & size,const u8 * src0Base,ptrdiff_t src0Stride,const u8 * src1Base,ptrdiff_t src1Stride,u8 * dstBase,ptrdiff_t dstStride)181 void bitwiseOr(const Size2D &size,
182 const u8 *src0Base, ptrdiff_t src0Stride,
183 const u8 *src1Base, ptrdiff_t src1Stride,
184 u8 *dstBase, ptrdiff_t dstStride)
185 {
186 internal::assertSupportedConfiguration();
187 #ifdef CAROTENE_NEON
188 internal::vtransform(size,
189 src0Base, src0Stride,
190 src1Base, src1Stride,
191 dstBase, dstStride, BitwiseOr());
192 #else
193 (void)size;
194 (void)src0Base;
195 (void)src0Stride;
196 (void)src1Base;
197 (void)src1Stride;
198 (void)dstBase;
199 (void)dstStride;
200 #endif
201 }
202
bitwiseXor(const Size2D & size,const u8 * src0Base,ptrdiff_t src0Stride,const u8 * src1Base,ptrdiff_t src1Stride,u8 * dstBase,ptrdiff_t dstStride)203 void bitwiseXor(const Size2D &size,
204 const u8 *src0Base, ptrdiff_t src0Stride,
205 const u8 *src1Base, ptrdiff_t src1Stride,
206 u8 *dstBase, ptrdiff_t dstStride)
207 {
208 internal::assertSupportedConfiguration();
209 #ifdef CAROTENE_NEON
210 internal::vtransform(size,
211 src0Base, src0Stride,
212 src1Base, src1Stride,
213 dstBase, dstStride, BitwiseXor());
214 #else
215 (void)size;
216 (void)src0Base;
217 (void)src0Stride;
218 (void)src1Base;
219 (void)src1Stride;
220 (void)dstBase;
221 (void)dstStride;
222 #endif
223 }
224
225 } // namespace CAROTENE_NS
226