1 /*
2 * Simd Library (http://ermig1979.github.io/Simd).
3 *
4 * Copyright (c) 2011-2019 Yermalayeu Ihar.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "Simd/SimdMemory.h"
25 #include "Simd/SimdResizer.h"
26 #include "Simd/SimdStore.h"
27 
28 namespace Simd
29 {
30 #ifdef SIMD_SSE_ENABLE
31     namespace Sse
32     {
ResizerFloatBilinear(const ResParam & param)33         ResizerFloatBilinear::ResizerFloatBilinear(const ResParam & param)
34             : Base::ResizerFloatBilinear(param)
35         {
36         }
37 
Run(const float * src,size_t srcStride,float * dst,size_t dstStride)38         void ResizerFloatBilinear::Run(const float * src, size_t srcStride, float * dst, size_t dstStride)
39         {
40             size_t cn = _param.channels;
41             size_t rs = _param.dstW * cn;
42             float * pbx[2] = { _bx[0].data, _bx[1].data };
43             int32_t prev = -2;
44             size_t rsa = AlignLo(rs, Sse::F);
45             for (size_t dy = 0; dy < _param.dstH; dy++, dst += dstStride)
46             {
47                 float fy1 = _ay[dy];
48                 float fy0 = 1.0f - fy1;
49                 int32_t sy = _iy[dy];
50                 int32_t k = 0;
51 
52                 if (sy == prev)
53                     k = 2;
54                 else if (sy == prev + 1)
55                 {
56                     Swap(pbx[0], pbx[1]);
57                     k = 1;
58                 }
59 
60                 prev = sy;
61 
62                 for (; k < 2; k++)
63                 {
64                     float * pb = pbx[k];
65                     const float * ps = src + (sy + k)*srcStride;
66                     size_t dx = 0;
67                     if (cn == 1)
68                     {
69                         __m128 _1 = _mm_set1_ps(1.0f);
70                         for (; dx < rsa; dx += Sse::F)
71                         {
72                             __m128 s01 = Sse::Load(ps + _ix[dx + 0], ps + _ix[dx + 1]);
73                             __m128 s23 = Sse::Load(ps + _ix[dx + 2], ps + _ix[dx + 3]);
74                             __m128 fx1 = _mm_load_ps(_ax.data + dx);
75                             __m128 fx0 = _mm_sub_ps(_1, fx1);
76                             __m128 m0 = _mm_mul_ps(fx0, _mm_shuffle_ps(s01, s23, 0x88));
77                             __m128 m1 = _mm_mul_ps(fx1, _mm_shuffle_ps(s01, s23, 0xDD));
78                             _mm_store_ps(pb + dx, _mm_add_ps(m0, m1));
79                         }
80                     }
81                     if (cn == 3 && rs > 3)
82                     {
83                         __m128 _1 = _mm_set1_ps(1.0f);
84                         size_t rs3 = rs - 3;
85                         for (; dx < rs3; dx += 3)
86                         {
87                             __m128 s0 = _mm_loadu_ps(ps + _ix[dx] + 0);
88                             __m128 s1 = _mm_loadu_ps(ps + _ix[dx] + 3);
89                             __m128 fx1 = _mm_set1_ps(_ax.data[dx]);
90                             __m128 fx0 = _mm_sub_ps(_1, fx1);
91                             _mm_storeu_ps(pb + dx, _mm_add_ps(_mm_mul_ps(fx0, s0), _mm_mul_ps(fx1, s1)));
92                         }
93                     }
94                     for (; dx < rs; dx++)
95                     {
96                         int32_t sx = _ix[dx];
97                         float fx = _ax[dx];
98                         pb[dx] = ps[sx] * (1.0f - fx) + ps[sx + cn] * fx;
99                     }
100                 }
101 
102                 size_t dx = 0;
103                 __m128 _fy0 = _mm_set1_ps(fy0);
104                 __m128 _fy1 = _mm_set1_ps(fy1);
105                 for (; dx < rsa; dx += Sse::F)
106                 {
107                     __m128 m0 = _mm_mul_ps(_mm_load_ps(pbx[0] + dx), _fy0);
108                     __m128 m1 = _mm_mul_ps(_mm_load_ps(pbx[1] + dx), _fy1);
109                     _mm_storeu_ps(dst + dx, _mm_add_ps(m0, m1));
110                 }
111                 for (; dx < rs; dx++)
112                     dst[dx] = pbx[0][dx] * fy0 + pbx[1][dx] * fy1;
113             }
114         }
115 
116         //---------------------------------------------------------------------
117 
ResizerInit(size_t srcX,size_t srcY,size_t dstX,size_t dstY,size_t channels,SimdResizeChannelType type,SimdResizeMethodType method)118         void * ResizerInit(size_t srcX, size_t srcY, size_t dstX, size_t dstY, size_t channels, SimdResizeChannelType type, SimdResizeMethodType method)
119         {
120             ResParam param(srcX, srcY, dstX, dstY, channels, type, method, sizeof(__m128));
121             if (type == SimdResizeChannelFloat && (method == SimdResizeMethodBilinear || method == SimdResizeMethodCaffeInterp))
122                 return new ResizerFloatBilinear(param);
123             else
124                 return Base::ResizerInit(srcX, srcY, dstX, dstY, channels, type, method);
125         }
126     }
127 #endif //SIMD_SSE_ENABLE
128 }
129 
130