1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef SKIA_EXT_CONVOLVER_H_ 6 #define SKIA_EXT_CONVOLVER_H_ 7 8 #include <stdint.h> 9 10 #include <cmath> 11 #include <vector> 12 13 #include "build/build_config.h" 14 #include "third_party/skia/include/core/SkSize.h" 15 #include "third_party/skia/include/core/SkTypes.h" 16 17 // We can build SSE2 optimized versions for all x86 CPUs 18 // except when building for the IOS emulator. 19 #if defined(ARCH_CPU_X86_FAMILY) && !defined(OS_IOS) 20 #define SIMD_SSE2 1 21 #define SIMD_PADDING 8 // 8 * int16_t 22 #endif 23 24 #if defined (ARCH_CPU_MIPS_FAMILY) && \ 25 defined(__mips_dsp) && (__mips_dsp_rev >= 2) 26 #define SIMD_MIPS_DSPR2 1 27 #endif 28 29 #if defined(ARCH_CPU_ARM_FAMILY) && \ 30 (defined(__ARM_NEON__) || defined(__ARM_NEON)) 31 #define SIMD_NEON 1 32 #endif 33 34 // avoid confusion with Mac OS X's math library (Carbon) 35 #if defined(__APPLE__) 36 #undef FloatToFixed 37 #undef FixedToFloat 38 #endif 39 40 namespace skia { 41 42 // Represents a filter in one dimension. Each output pixel has one entry in this 43 // object for the filter values contributing to it. You build up the filter 44 // list by calling AddFilter for each output pixel (in order). 45 // 46 // We do 2-dimensional convolution by first convolving each row by one 47 // ConvolutionFilter1D, then convolving each column by another one. 48 // 49 // Entries are stored in fixed point, shifted left by kShiftBits. 50 class ConvolutionFilter1D { 51 public: 52 typedef short Fixed; 53 54 // The number of bits that fixed point values are shifted by. 55 enum { kShiftBits = 14 }; 56 57 SK_API ConvolutionFilter1D(); 58 SK_API ~ConvolutionFilter1D(); 59 60 // Convert between floating point and our fixed point representation. FloatToFixed(float f)61 static Fixed FloatToFixed(float f) { 62 return static_cast<Fixed>(f * (1 << kShiftBits)); 63 } FixedToChar(Fixed x)64 static unsigned char FixedToChar(Fixed x) { 65 return static_cast<unsigned char>(x >> kShiftBits); 66 } FixedToFloat(Fixed x)67 static float FixedToFloat(Fixed x) { 68 // The cast relies on Fixed being a short, implying that on 69 // the platforms we care about all (16) bits will fit into 70 // the mantissa of a (32-bit) float. 71 static_assert(sizeof(Fixed) == 2, 72 "fixed type should fit in float mantissa"); 73 float raw = static_cast<float>(x); 74 return ldexpf(raw, -kShiftBits); 75 } 76 77 // Returns the maximum pixel span of a filter. max_filter()78 int max_filter() const { return max_filter_; } 79 80 // Returns the number of filters in this filter. This is the dimension of the 81 // output image. num_values()82 int num_values() const { return static_cast<int>(filters_.size()); } 83 84 // Appends the given list of scaling values for generating a given output 85 // pixel. |filter_offset| is the distance from the edge of the image to where 86 // the scaling factors start. The scaling factors apply to the source pixels 87 // starting from this position, and going for the next |filter_length| pixels. 88 // 89 // You will probably want to make sure your input is normalized (that is, 90 // all entries in |filter_values| sub to one) to prevent affecting the overall 91 // brighness of the image. 92 // 93 // The filter_length must be > 0. 94 // 95 // This version will automatically convert your input to fixed point. 96 SK_API void AddFilter(int filter_offset, 97 const float* filter_values, 98 int filter_length); 99 100 // Same as the above version, but the input is already fixed point. 101 void AddFilter(int filter_offset, 102 const Fixed* filter_values, 103 int filter_length); 104 105 // Retrieves a filter for the given |value_offset|, a position in the output 106 // image in the direction we're convolving. The offset and length of the 107 // filter values are put into the corresponding out arguments (see AddFilter 108 // above for what these mean), and a pointer to the first scaling factor is 109 // returned. There will be |filter_length| values in this array. FilterForValue(int value_offset,int * filter_offset,int * filter_length)110 inline const Fixed* FilterForValue(int value_offset, 111 int* filter_offset, 112 int* filter_length) const { 113 const FilterInstance& filter = filters_[value_offset]; 114 *filter_offset = filter.offset; 115 *filter_length = filter.trimmed_length; 116 if (filter.trimmed_length == 0) { 117 return NULL; 118 } 119 return &filter_values_[filter.data_location]; 120 } 121 122 // Retrieves the filter for the offset 0, presumed to be the one and only. 123 // The offset and length of the filter values are put into the corresponding 124 // out arguments (see AddFilter). Note that |filter_legth| and 125 // |specified_filter_length| may be different if leading/trailing zeros of the 126 // original floating point form were clipped. 127 // There will be |filter_length| values in the return array. 128 // Returns NULL if the filter is 0-length (for instance when all floating 129 // point values passed to AddFilter were clipped to 0). 130 SK_API const Fixed* GetSingleFilter(int* specified_filter_length, 131 int* filter_offset, 132 int* filter_length) const; 133 PaddingForSIMD()134 inline void PaddingForSIMD() { 135 // Padding |padding_count| of more dummy coefficients after the coefficients 136 // of last filter to prevent SIMD instructions which load 8 or 16 bytes 137 // together to access invalid memory areas. We are not trying to align the 138 // coefficients right now due to the opaqueness of <vector> implementation. 139 // This has to be done after all |AddFilter| calls. 140 #ifdef SIMD_PADDING 141 for (int i = 0; i < SIMD_PADDING; ++i) 142 filter_values_.push_back(static_cast<Fixed>(0)); 143 #endif 144 } 145 146 private: 147 struct FilterInstance { 148 // Offset within filter_values for this instance of the filter. 149 int data_location; 150 151 // Distance from the left of the filter to the center. IN PIXELS 152 int offset; 153 154 // Number of values in this filter instance. 155 int trimmed_length; 156 157 // Filter length as specified. Note that this may be different from 158 // 'trimmed_length' if leading/trailing zeros of the original floating 159 // point form were clipped differently on each tail. 160 int length; 161 }; 162 163 // Stores the information for each filter added to this class. 164 std::vector<FilterInstance> filters_; 165 166 // We store all the filter values in this flat list, indexed by 167 // |FilterInstance.data_location| to avoid the mallocs required for storing 168 // each one separately. 169 std::vector<Fixed> filter_values_; 170 171 // The maximum size of any filter we've added. 172 int max_filter_; 173 }; 174 175 // Does a two-dimensional convolution on the given source image. 176 // 177 // It is assumed the source pixel offsets referenced in the input filters 178 // reference only valid pixels, so the source image size is not required. Each 179 // row of the source image starts |source_byte_row_stride| after the previous 180 // one (this allows you to have rows with some padding at the end). 181 // 182 // The result will be put into the given output buffer. The destination image 183 // size will be xfilter.num_values() * yfilter.num_values() pixels. It will be 184 // in rows of exactly xfilter.num_values() * 4 bytes. 185 // 186 // |source_has_alpha| is a hint that allows us to avoid doing computations on 187 // the alpha channel if the image is opaque. If you don't know, set this to 188 // true and it will work properly, but setting this to false will be a few 189 // percent faster if you know the image is opaque. 190 // 191 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order 192 // (this is ARGB when loaded into 32-bit words on a little-endian machine). 193 SK_API void BGRAConvolve2D(const unsigned char* source_data, 194 int source_byte_row_stride, 195 bool source_has_alpha, 196 const ConvolutionFilter1D& xfilter, 197 const ConvolutionFilter1D& yfilter, 198 int output_byte_row_stride, 199 unsigned char* output, 200 bool use_simd_if_possible); 201 202 // Does a 1D convolution of the given source image along the X dimension on 203 // a single channel of the bitmap. 204 // 205 // The function uses the same convolution kernel for each pixel. That kernel 206 // must be added to |filter| at offset 0. This is a most straightforward 207 // implementation of convolution, intended chiefly for development purposes. 208 SK_API void SingleChannelConvolveX1D(const unsigned char* source_data, 209 int source_byte_row_stride, 210 int input_channel_index, 211 int input_channel_count, 212 const ConvolutionFilter1D& filter, 213 const SkISize& image_size, 214 unsigned char* output, 215 int output_byte_row_stride, 216 int output_channel_index, 217 int output_channel_count, 218 bool absolute_values); 219 220 // Does a 1D convolution of the given source image along the Y dimension on 221 // a single channel of the bitmap. 222 SK_API void SingleChannelConvolveY1D(const unsigned char* source_data, 223 int source_byte_row_stride, 224 int input_channel_index, 225 int input_channel_count, 226 const ConvolutionFilter1D& filter, 227 const SkISize& image_size, 228 unsigned char* output, 229 int output_byte_row_stride, 230 int output_channel_index, 231 int output_channel_count, 232 bool absolute_values); 233 234 // Set up the |filter| instance with a gaussian kernel. |kernel_sigma| is the 235 // parameter of gaussian. If |derivative| is true, the kernel will be that of 236 // the first derivative. Intended for use with the two routines above. 237 SK_API void SetUpGaussianConvolutionKernel(ConvolutionFilter1D* filter, 238 float kernel_sigma, 239 bool derivative); 240 241 } // namespace skia 242 243 #endif // SKIA_EXT_CONVOLVER_H_ 244