1 /*
2 * By downloading, copying, installing or using the software you agree to this license.
3 * If you do not agree to this license, do not download, install,
4 * copy or use the software.
5 *
6 *
7 * License Agreement
8 * For Open Source Computer Vision Library
9 * (3-clause BSD License)
10 *
11 * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
12 * Third party copyrights are property of their respective owners.
13 *
14 * Redistribution and use in source and binary forms, with or without modification,
15 * are permitted provided that the following conditions are met:
16 *
17 * * Redistributions of source code must retain the above copyright notice,
18 * this list of conditions and the following disclaimer.
19 *
20 * * Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimer in the documentation
22 * and/or other materials provided with the distribution.
23 *
24 * * Neither the names of the copyright holders nor the names of the contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * This software is provided by the copyright holders and contributors "as is" and
29 * any express or implied warranties, including, but not limited to, the implied
30 * warranties of merchantability and fitness for a particular purpose are disclaimed.
31 * In no event shall copyright holders or contributors be liable for any direct,
32 * indirect, incidental, special, exemplary, or consequential damages
33 * (including, but not limited to, procurement of substitute goods or services;
34 * loss of use, data, or profits; or business interruption) however caused
35 * and on any theory of liability, whether in contract, strict liability,
36 * or tort (including negligence or otherwise) arising in any way out of
37 * the use of this software, even if advised of the possibility of such damage.
38 */
39
40 #include "common.hpp"
41 #include "vtransform.hpp"
42
43 #include <cstring>
44
45 namespace CAROTENE_NS {
46
isFlipSupported(FLIP_MODE flipMode,u32 elemSize)47 bool isFlipSupported(FLIP_MODE flipMode, u32 elemSize)
48 {
49 bool supportedElemSize = (elemSize == 1) || (elemSize == 2) || (elemSize == 3) || (elemSize == 4);
50 return isSupportedConfiguration() &&
51 ((supportedElemSize && ((flipMode == FLIP_BOTH_MODE) || (flipMode == FLIP_HORIZONTAL_MODE))) ||
52 (flipMode == FLIP_VERTICAL_MODE));
53 }
54
55 #ifdef CAROTENE_NEON
56
57 namespace {
58
59 template <typename T>
flip(const Size2D & size,const void * srcBase,ptrdiff_t srcStride,void * dstBase,ptrdiff_t dstStride,FLIP_MODE flipMode)60 void flip(const Size2D & size,
61 const void * srcBase, ptrdiff_t srcStride,
62 void * dstBase, ptrdiff_t dstStride,
63 FLIP_MODE flipMode)
64 {
65 using namespace internal;
66
67 typedef typename VecTraits<T>::vec128 vec128;
68 typedef typename VecTraits<T>::vec64 vec64;
69
70 u32 step_base = 16 / sizeof(T), step_tail = 8 / sizeof(T);
71 size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
72 size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
73
74 for (size_t i = 0; i < size.height; ++i)
75 {
76 const T * src = getRowPtr((const T *)srcBase, srcStride, i);
77 T * dst = getRowPtr((T *)dstBase, dstStride, (flipMode & FLIP_VERTICAL_MODE) != 0 ? size.height - i - 1 : i);
78 size_t js = 0, jd = size.width;
79
80 for (; js < roiw_base; js += step_base, jd -= step_base)
81 {
82 prefetch(src + js);
83
84 vec128 v_src = vld1q(src + js);
85 vec128 v_dst = vrev64q(v_src);
86 v_dst = vcombine(vget_high(v_dst), vget_low(v_dst));
87 vst1q(dst + jd - step_base, v_dst);
88 }
89 for (; js < roiw_tail; js += step_tail, jd -= step_tail)
90 {
91 vec64 v_src = vld1(src + js);
92 vst1(dst + jd - step_tail, vrev64(v_src));
93 }
94
95 for (--jd; js < size.width; ++js, --jd)
96 dst[jd] = src[js];
97 }
98 }
99
100 template <typename T>
flip3(const Size2D & size,const void * srcBase,ptrdiff_t srcStride,void * dstBase,ptrdiff_t dstStride,FLIP_MODE flipMode)101 void flip3(const Size2D & size,
102 const void * srcBase, ptrdiff_t srcStride,
103 void * dstBase, ptrdiff_t dstStride,
104 FLIP_MODE flipMode)
105 {
106 using namespace internal;
107
108 #ifndef __ANDROID__
109 typedef typename VecTraits<T, 3>::vec128 vec128;
110 #endif
111 typedef typename VecTraits<T, 3>::vec64 vec64;
112
113 #ifndef __ANDROID__
114 u32 step_base = 16 / sizeof(T), step_base3 = step_base * 3;
115 size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
116 #endif
117 u32 step_tail = 8 / sizeof(T), step_tail3 = step_tail * 3;
118 size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
119
120 for (size_t i = 0; i < size.height; ++i)
121 {
122 const T * src = getRowPtr((const T *)srcBase, srcStride, i);
123 T * dst = getRowPtr((T *)dstBase, dstStride, (flipMode & FLIP_VERTICAL_MODE) != 0 ? size.height - i - 1 : i);
124 size_t j = 0, js = 0, jd = size.width * 3;
125
126 #ifndef __ANDROID__
127 for (; j < roiw_base; j += step_base, js += step_base3, jd -= step_base3)
128 {
129 prefetch(src + js);
130
131 vec128 v_src = vld3q(src + js), v_dst;
132 v_src.val[0] = vrev64q(v_src.val[0]);
133 v_src.val[1] = vrev64q(v_src.val[1]);
134 v_src.val[2] = vrev64q(v_src.val[2]);
135
136 v_dst.val[0] = vcombine(vget_high(v_src.val[0]), vget_low(v_src.val[0]));
137 v_dst.val[1] = vcombine(vget_high(v_src.val[1]), vget_low(v_src.val[1]));
138 v_dst.val[2] = vcombine(vget_high(v_src.val[2]), vget_low(v_src.val[2]));
139
140 vst3q(dst + jd - step_base3, v_dst);
141 }
142 #endif // __ANDROID__
143
144 for (; j < roiw_tail; j += step_tail, js += step_tail3, jd -= step_tail3)
145 {
146 vec64 v_src = vld3(src + js), v_dst;
147 v_dst.val[0] = vrev64(v_src.val[0]);
148 v_dst.val[1] = vrev64(v_src.val[1]);
149 v_dst.val[2] = vrev64(v_src.val[2]);
150
151 vst3(dst + jd - step_tail3, v_dst);
152 }
153
154 for (jd -= 3; j < size.width; ++j, js += 3, jd -= 3)
155 {
156 dst[jd] = src[js];
157 dst[jd + 1] = src[js + 1];
158 dst[jd + 2] = src[js + 2];
159 }
160 }
161 }
162
163 typedef void (* flipFunc)(const Size2D &size,
164 const void * srcBase, ptrdiff_t srcStride,
165 void * dstBase, ptrdiff_t dstStride,
166 FLIP_MODE flipMode);
167
168 } // namespace
169
170 #endif
171
flip(const Size2D & size,const u8 * srcBase,ptrdiff_t srcStride,u8 * dstBase,ptrdiff_t dstStride,FLIP_MODE flipMode,u32 elemSize)172 void flip(const Size2D &size,
173 const u8 * srcBase, ptrdiff_t srcStride,
174 u8 * dstBase, ptrdiff_t dstStride,
175 FLIP_MODE flipMode, u32 elemSize)
176 {
177 internal::assertSupportedConfiguration(isFlipSupported(flipMode, elemSize));
178 #ifdef CAROTENE_NEON
179
180 if (flipMode == FLIP_VERTICAL_MODE)
181 {
182 for (size_t y = 0; y < size.height; ++y)
183 {
184 const u8 * src_row = internal::getRowPtr(srcBase, srcStride, y);
185 u8 * dst_row = internal::getRowPtr(dstBase, dstStride, size.height - y - 1);
186
187 std::memcpy(dst_row, src_row, elemSize * size.width);
188 }
189 return;
190 }
191
192 flipFunc func = NULL;
193
194 if (elemSize == (u32)sizeof(u8))
195 func = &flip<u8>;
196 if (elemSize == (u32)sizeof(u16))
197 func = &flip<u16>;
198 if (elemSize == (u32)sizeof(u32))
199 func = &flip<u32>;
200 if (elemSize == (u32)sizeof(u8) * 3)
201 func = &flip3<u8>;
202
203 if (func == NULL)
204 return;
205
206 func(size,
207 srcBase, srcStride,
208 dstBase, dstStride,
209 flipMode);
210
211 #else
212 (void)size;
213 (void)srcBase;
214 (void)srcStride;
215 (void)dstBase;
216 (void)dstStride;
217 (void)flipMode;
218 (void)elemSize;
219 #endif
220 }
221
222 } // namespace CAROTENE_NS
223