1 /* 2 * Simd Library (http://ermig1979.github.io/Simd). 3 * 4 * Copyright (c) 2011-2017 Yermalayeu Ihar. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #include "Simd/SimdMemory.h" 25 26 namespace Simd 27 { 28 namespace Base 29 { 30 namespace 31 { 32 struct Buffer 33 { BufferSimd::Base::__anon369e68e20111::Buffer34 Buffer(size_t width) 35 { 36 _p = Allocate(sizeof(int) * 2 * width); 37 src0 = (int*)_p; 38 src1 = src0 + width; 39 } 40 ~BufferSimd::Base::__anon369e68e20111::Buffer41 ~Buffer() 42 { 43 Free(_p); 44 } 45 46 int * src0; 47 int * src1; 48 private: 49 void *_p; 50 }; 51 } 52 DivideBy64(int value)53 SIMD_INLINE int DivideBy64(int value) 54 { 55 return (value + 32) >> 6; 56 } 57 GaussianBlur(const uint8_t * src,size_t x0,size_t x1,size_t x2,size_t x3)58 SIMD_INLINE int GaussianBlur(const uint8_t *src, size_t x0, size_t x1, size_t x2, size_t x3) 59 { 60 return src[x0] + 3 * (src[x1] + src[x2]) + src[x3]; 61 } 62 ProcessFirstRow(const uint8_t * src,size_t x0,size_t x1,size_t x2,size_t x3,Buffer & buffer,size_t offset)63 SIMD_INLINE void ProcessFirstRow(const uint8_t *src, size_t x0, size_t x1, size_t x2, size_t x3, Buffer & buffer, size_t offset) 64 { 65 int tmp = GaussianBlur(src, x0, x1, x2, x3); 66 buffer.src0[offset] = tmp; 67 buffer.src1[offset] = tmp; 68 } 69 ProcessMainRow(const uint8_t * s2,const uint8_t * s3,size_t x0,size_t x1,size_t x2,size_t x3,Buffer & buffer,uint8_t * dst,size_t offset)70 SIMD_INLINE void ProcessMainRow(const uint8_t *s2, const uint8_t *s3, size_t x0, size_t x1, size_t x2, size_t x3, Buffer & buffer, uint8_t* dst, size_t offset) 71 { 72 int tmp2 = GaussianBlur(s2, x0, x1, x2, x3); 73 int tmp3 = GaussianBlur(s3, x0, x1, x2, x3); 74 dst[offset] = DivideBy64(buffer.src0[offset] + 3 * (buffer.src1[offset] + tmp2) + tmp3); 75 buffer.src0[offset] = tmp2; 76 buffer.src1[offset] = tmp3; 77 } 78 ReduceGray4x4(const uint8_t * src,size_t srcWidth,size_t srcHeight,size_t srcStride,uint8_t * dst,size_t dstWidth,size_t dstHeight,size_t dstStride)79 void ReduceGray4x4(const uint8_t *src, size_t srcWidth, size_t srcHeight, size_t srcStride, 80 uint8_t *dst, size_t dstWidth, size_t dstHeight, size_t dstStride) 81 { 82 assert((srcWidth + 1) / 2 == dstWidth && (srcHeight + 1) / 2 == dstHeight && srcWidth > 2); 83 84 Buffer buffer(dstWidth); 85 86 ProcessFirstRow(src, 0, 0, 1, 2, buffer, 0); 87 size_t srcCol = 2, dstCol = 1; 88 for (; srcCol < srcWidth - 2; srcCol += 2, dstCol++) 89 ProcessFirstRow(src, srcCol - 1, srcCol, srcCol + 1, srcCol + 2, buffer, dstCol); 90 ProcessFirstRow(src, srcCol - 1, srcCol, srcWidth - 1, srcWidth - 1, buffer, dstCol); 91 92 for (size_t row = 0; row < srcHeight; row += 2, dst += dstStride) 93 { 94 const uint8_t *src2 = src + srcStride*(row + 1); 95 const uint8_t *src3 = src2 + srcStride; 96 if (row >= srcHeight - 2) 97 { 98 src2 = src + srcStride*(srcHeight - 1); 99 src3 = src2; 100 } 101 102 ProcessMainRow(src2, src3, 0, 0, 1, 2, buffer, dst, 0); 103 size_t srcCol = 2, dstCol = 1; 104 for (; srcCol < srcWidth - 2; srcCol += 2, dstCol++) 105 ProcessMainRow(src2, src3, srcCol - 1, srcCol, srcCol + 1, srcCol + 2, buffer, dst, dstCol); 106 ProcessMainRow(src2, src3, srcCol - 1, srcCol, srcWidth - 1, srcWidth - 1, buffer, dst, dstCol); 107 } 108 } 109 } 110 } 111