1 /*
2  * By downloading, copying, installing or using the software you agree to this license.
3  * If you do not agree to this license, do not download, install,
4  * copy or use the software.
5  *
6  *
7  *                           License Agreement
8  *                For Open Source Computer Vision Library
9  *                        (3-clause BSD License)
10  *
11  * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
12  * Third party copyrights are property of their respective owners.
13  *
14  * Redistribution and use in source and binary forms, with or without modification,
15  * are permitted provided that the following conditions are met:
16  *
17  *   * Redistributions of source code must retain the above copyright notice,
18  *     this list of conditions and the following disclaimer.
19  *
20  *   * Redistributions in binary form must reproduce the above copyright notice,
21  *     this list of conditions and the following disclaimer in the documentation
22  *     and/or other materials provided with the distribution.
23  *
24  *   * Neither the names of the copyright holders nor the names of the contributors
25  *     may be used to endorse or promote products derived from this software
26  *     without specific prior written permission.
27  *
28  * This software is provided by the copyright holders and contributors "as is" and
29  * any express or implied warranties, including, but not limited to, the implied
30  * warranties of merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall copyright holders or contributors be liable for any direct,
32  * indirect, incidental, special, exemplary, or consequential damages
33  * (including, but not limited to, procurement of substitute goods or services;
34  * loss of use, data, or profits; or business interruption) however caused
35  * and on any theory of liability, whether in contract, strict liability,
36  * or tort (including negligence or otherwise) arising in any way out of
37  * the use of this software, even if advised of the possibility of such damage.
38  */
39 
40 #include "common.hpp"
41 #include "vtransform.hpp"
42 
43 #include <cstring>
44 
45 namespace CAROTENE_NS {
46 
isFlipSupported(FLIP_MODE flipMode,u32 elemSize)47 bool isFlipSupported(FLIP_MODE flipMode, u32 elemSize)
48 {
49     bool supportedElemSize = (elemSize == 1) || (elemSize == 2) || (elemSize == 3) || (elemSize == 4);
50     return isSupportedConfiguration() &&
51             ((supportedElemSize && ((flipMode == FLIP_BOTH_MODE) || (flipMode == FLIP_HORIZONTAL_MODE))) ||
52              (flipMode == FLIP_VERTICAL_MODE));
53 }
54 
55 #ifdef CAROTENE_NEON
56 
57 namespace {
58 
59 template <typename T>
flip(const Size2D & size,const void * srcBase,ptrdiff_t srcStride,void * dstBase,ptrdiff_t dstStride,FLIP_MODE flipMode)60 void flip(const Size2D & size,
61           const void * srcBase, ptrdiff_t srcStride,
62           void * dstBase, ptrdiff_t dstStride,
63           FLIP_MODE flipMode)
64 {
65     using namespace internal;
66 
67     typedef typename VecTraits<T>::vec128 vec128;
68     typedef typename VecTraits<T>::vec64 vec64;
69 
70     u32 step_base = 16 / sizeof(T), step_tail = 8 / sizeof(T);
71     size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
72     size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
73 
74     for (size_t i = 0; i < size.height; ++i)
75     {
76         const T * src = getRowPtr((const T *)srcBase, srcStride, i);
77         T * dst = getRowPtr((T *)dstBase, dstStride, (flipMode & FLIP_VERTICAL_MODE) != 0 ? size.height - i - 1 : i);
78         size_t js = 0, jd = size.width;
79 
80         for (; js < roiw_base; js += step_base, jd -= step_base)
81         {
82             prefetch(src + js);
83 
84             vec128 v_src = vld1q(src + js);
85             vec128 v_dst = vrev64q(v_src);
86             v_dst = vcombine(vget_high(v_dst), vget_low(v_dst));
87             vst1q(dst + jd - step_base, v_dst);
88         }
89         for (; js < roiw_tail; js += step_tail, jd -= step_tail)
90         {
91             vec64 v_src = vld1(src + js);
92             vst1(dst + jd - step_tail, vrev64(v_src));
93         }
94 
95         for (--jd; js < size.width; ++js, --jd)
96             dst[jd] = src[js];
97     }
98 }
99 
100 template <typename T>
flip3(const Size2D & size,const void * srcBase,ptrdiff_t srcStride,void * dstBase,ptrdiff_t dstStride,FLIP_MODE flipMode)101 void flip3(const Size2D & size,
102            const void * srcBase, ptrdiff_t srcStride,
103            void * dstBase, ptrdiff_t dstStride,
104            FLIP_MODE flipMode)
105 {
106     using namespace internal;
107 
108 #ifndef __ANDROID__
109     typedef typename VecTraits<T, 3>::vec128 vec128;
110 #endif
111     typedef typename VecTraits<T, 3>::vec64 vec64;
112 
113 #ifndef __ANDROID__
114     u32 step_base = 16 / sizeof(T), step_base3 = step_base * 3;
115     size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
116 #endif
117     u32 step_tail = 8 / sizeof(T), step_tail3 = step_tail * 3;
118     size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
119 
120     for (size_t i = 0; i < size.height; ++i)
121     {
122         const T * src = getRowPtr((const T *)srcBase, srcStride, i);
123         T * dst = getRowPtr((T *)dstBase, dstStride, (flipMode & FLIP_VERTICAL_MODE) != 0 ? size.height - i - 1 : i);
124         size_t j = 0, js = 0, jd = size.width * 3;
125 
126 #ifndef __ANDROID__
127         for (; j < roiw_base; j += step_base, js += step_base3, jd -= step_base3)
128         {
129             prefetch(src + js);
130 
131             vec128 v_src = vld3q(src + js), v_dst;
132             v_src.val[0] = vrev64q(v_src.val[0]);
133             v_src.val[1] = vrev64q(v_src.val[1]);
134             v_src.val[2] = vrev64q(v_src.val[2]);
135 
136             v_dst.val[0] = vcombine(vget_high(v_src.val[0]), vget_low(v_src.val[0]));
137             v_dst.val[1] = vcombine(vget_high(v_src.val[1]), vget_low(v_src.val[1]));
138             v_dst.val[2] = vcombine(vget_high(v_src.val[2]), vget_low(v_src.val[2]));
139 
140             vst3q(dst + jd - step_base3, v_dst);
141         }
142 #endif // __ANDROID__
143 
144         for (; j < roiw_tail; j += step_tail, js += step_tail3, jd -= step_tail3)
145         {
146             vec64 v_src = vld3(src + js), v_dst;
147             v_dst.val[0] = vrev64(v_src.val[0]);
148             v_dst.val[1] = vrev64(v_src.val[1]);
149             v_dst.val[2] = vrev64(v_src.val[2]);
150 
151             vst3(dst + jd - step_tail3, v_dst);
152         }
153 
154         for (jd -= 3; j < size.width; ++j, js += 3, jd -= 3)
155         {
156             dst[jd] = src[js];
157             dst[jd + 1] = src[js + 1];
158             dst[jd + 2] = src[js + 2];
159         }
160     }
161 }
162 
163 typedef void (* flipFunc)(const Size2D &size,
164                   const void * srcBase, ptrdiff_t srcStride,
165                   void * dstBase, ptrdiff_t dstStride,
166                   FLIP_MODE flipMode);
167 
168 } // namespace
169 
170 #endif
171 
flip(const Size2D & size,const u8 * srcBase,ptrdiff_t srcStride,u8 * dstBase,ptrdiff_t dstStride,FLIP_MODE flipMode,u32 elemSize)172 void flip(const Size2D &size,
173           const u8 * srcBase, ptrdiff_t srcStride,
174           u8 * dstBase, ptrdiff_t dstStride,
175           FLIP_MODE flipMode, u32 elemSize)
176 {
177     internal::assertSupportedConfiguration(isFlipSupported(flipMode, elemSize));
178 #ifdef CAROTENE_NEON
179 
180     if (flipMode == FLIP_VERTICAL_MODE)
181     {
182         for (size_t y = 0; y < size.height; ++y)
183         {
184             const u8 * src_row = internal::getRowPtr(srcBase, srcStride, y);
185             u8 * dst_row = internal::getRowPtr(dstBase, dstStride, size.height - y - 1);
186 
187             std::memcpy(dst_row, src_row, elemSize * size.width);
188         }
189         return;
190     }
191 
192     flipFunc func = NULL;
193 
194     if (elemSize == (u32)sizeof(u8))
195         func = &flip<u8>;
196     if (elemSize == (u32)sizeof(u16))
197         func = &flip<u16>;
198     if (elemSize == (u32)sizeof(u32))
199         func = &flip<u32>;
200     if (elemSize == (u32)sizeof(u8) * 3)
201         func = &flip3<u8>;
202 
203     if (func == NULL)
204         return;
205 
206     func(size,
207          srcBase, srcStride,
208          dstBase, dstStride,
209          flipMode);
210 
211 #else
212     (void)size;
213     (void)srcBase;
214     (void)srcStride;
215     (void)dstBase;
216     (void)dstStride;
217     (void)flipMode;
218     (void)elemSize;
219 #endif
220 }
221 
222 } // namespace CAROTENE_NS
223