1 /*
2 * Simd Library (http://ermig1979.github.io/Simd).
3 *
4 * Copyright (c) 2011-2017 Yermalayeu Ihar.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "Simd/SimdMemory.h"
25 
26 namespace Simd
27 {
28     namespace Base
29     {
30         namespace
31         {
32             struct Buffer
33             {
BufferSimd::Base::__anon369e68e20111::Buffer34                 Buffer(size_t width)
35                 {
36                     _p = Allocate(sizeof(int) * 2 * width);
37                     src0 = (int*)_p;
38                     src1 = src0 + width;
39                 }
40 
~BufferSimd::Base::__anon369e68e20111::Buffer41                 ~Buffer()
42                 {
43                     Free(_p);
44                 }
45 
46                 int * src0;
47                 int * src1;
48             private:
49                 void *_p;
50             };
51         }
52 
DivideBy64(int value)53         SIMD_INLINE int DivideBy64(int value)
54         {
55             return (value + 32) >> 6;
56         }
57 
GaussianBlur(const uint8_t * src,size_t x0,size_t x1,size_t x2,size_t x3)58         SIMD_INLINE int GaussianBlur(const uint8_t *src, size_t x0, size_t x1, size_t x2, size_t x3)
59         {
60             return src[x0] + 3 * (src[x1] + src[x2]) + src[x3];
61         }
62 
ProcessFirstRow(const uint8_t * src,size_t x0,size_t x1,size_t x2,size_t x3,Buffer & buffer,size_t offset)63         SIMD_INLINE void ProcessFirstRow(const uint8_t *src, size_t x0, size_t x1, size_t x2, size_t x3, Buffer & buffer, size_t offset)
64         {
65             int tmp = GaussianBlur(src, x0, x1, x2, x3);
66             buffer.src0[offset] = tmp;
67             buffer.src1[offset] = tmp;
68         }
69 
ProcessMainRow(const uint8_t * s2,const uint8_t * s3,size_t x0,size_t x1,size_t x2,size_t x3,Buffer & buffer,uint8_t * dst,size_t offset)70         SIMD_INLINE void ProcessMainRow(const uint8_t *s2, const uint8_t *s3, size_t x0, size_t x1, size_t x2, size_t x3, Buffer & buffer, uint8_t* dst, size_t offset)
71         {
72             int tmp2 = GaussianBlur(s2, x0, x1, x2, x3);
73             int tmp3 = GaussianBlur(s3, x0, x1, x2, x3);
74             dst[offset] = DivideBy64(buffer.src0[offset] + 3 * (buffer.src1[offset] + tmp2) + tmp3);
75             buffer.src0[offset] = tmp2;
76             buffer.src1[offset] = tmp3;
77         }
78 
ReduceGray4x4(const uint8_t * src,size_t srcWidth,size_t srcHeight,size_t srcStride,uint8_t * dst,size_t dstWidth,size_t dstHeight,size_t dstStride)79         void ReduceGray4x4(const uint8_t *src, size_t srcWidth, size_t srcHeight, size_t srcStride,
80             uint8_t *dst, size_t dstWidth, size_t dstHeight, size_t dstStride)
81         {
82             assert((srcWidth + 1) / 2 == dstWidth && (srcHeight + 1) / 2 == dstHeight && srcWidth > 2);
83 
84             Buffer buffer(dstWidth);
85 
86             ProcessFirstRow(src, 0, 0, 1, 2, buffer, 0);
87             size_t srcCol = 2, dstCol = 1;
88             for (; srcCol < srcWidth - 2; srcCol += 2, dstCol++)
89                 ProcessFirstRow(src, srcCol - 1, srcCol, srcCol + 1, srcCol + 2, buffer, dstCol);
90             ProcessFirstRow(src, srcCol - 1, srcCol, srcWidth - 1, srcWidth - 1, buffer, dstCol);
91 
92             for (size_t row = 0; row < srcHeight; row += 2, dst += dstStride)
93             {
94                 const uint8_t *src2 = src + srcStride*(row + 1);
95                 const uint8_t *src3 = src2 + srcStride;
96                 if (row >= srcHeight - 2)
97                 {
98                     src2 = src + srcStride*(srcHeight - 1);
99                     src3 = src2;
100                 }
101 
102                 ProcessMainRow(src2, src3, 0, 0, 1, 2, buffer, dst, 0);
103                 size_t srcCol = 2, dstCol = 1;
104                 for (; srcCol < srcWidth - 2; srcCol += 2, dstCol++)
105                     ProcessMainRow(src2, src3, srcCol - 1, srcCol, srcCol + 1, srcCol + 2, buffer, dst, dstCol);
106                 ProcessMainRow(src2, src3, srcCol - 1, srcCol, srcWidth - 1, srcWidth - 1, buffer, dst, dstCol);
107             }
108         }
109     }
110 }
111