1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef SKIA_EXT_CONVOLVER_H_
6 #define SKIA_EXT_CONVOLVER_H_
7 
8 #include <stdint.h>
9 
10 #include <cmath>
11 #include <vector>
12 
13 #include "build/build_config.h"
14 #include "third_party/skia/include/core/SkSize.h"
15 #include "third_party/skia/include/core/SkTypes.h"
16 
17 // We can build SSE2 optimized versions for all x86 CPUs
18 // except when building for the IOS emulator.
19 #if defined(ARCH_CPU_X86_FAMILY) && !defined(OS_IOS)
20 #define SIMD_SSE2 1
21 #define SIMD_PADDING 8  // 8 * int16_t
22 #endif
23 
24 #if defined (ARCH_CPU_MIPS_FAMILY) && \
25     defined(__mips_dsp) && (__mips_dsp_rev >= 2)
26 #define SIMD_MIPS_DSPR2 1
27 #endif
28 
29 #if defined(ARCH_CPU_ARM_FAMILY) && \
30     (defined(__ARM_NEON__) || defined(__ARM_NEON))
31 #define SIMD_NEON 1
32 #endif
33 
34 // avoid confusion with Mac OS X's math library (Carbon)
35 #if defined(__APPLE__)
36 #undef FloatToFixed
37 #undef FixedToFloat
38 #endif
39 
40 namespace skia {
41 
42 // Represents a filter in one dimension. Each output pixel has one entry in this
43 // object for the filter values contributing to it. You build up the filter
44 // list by calling AddFilter for each output pixel (in order).
45 //
46 // We do 2-dimensional convolution by first convolving each row by one
47 // ConvolutionFilter1D, then convolving each column by another one.
48 //
49 // Entries are stored in fixed point, shifted left by kShiftBits.
50 class ConvolutionFilter1D {
51  public:
52   typedef short Fixed;
53 
54   // The number of bits that fixed point values are shifted by.
55   enum { kShiftBits = 14 };
56 
57   SK_API ConvolutionFilter1D();
58   SK_API ~ConvolutionFilter1D();
59 
60   // Convert between floating point and our fixed point representation.
FloatToFixed(float f)61   static Fixed FloatToFixed(float f) {
62     return static_cast<Fixed>(f * (1 << kShiftBits));
63   }
FixedToChar(Fixed x)64   static unsigned char FixedToChar(Fixed x) {
65     return static_cast<unsigned char>(x >> kShiftBits);
66   }
FixedToFloat(Fixed x)67   static float FixedToFloat(Fixed x) {
68     // The cast relies on Fixed being a short, implying that on
69     // the platforms we care about all (16) bits will fit into
70     // the mantissa of a (32-bit) float.
71     static_assert(sizeof(Fixed) == 2,
72                   "fixed type should fit in float mantissa");
73     float raw = static_cast<float>(x);
74     return ldexpf(raw, -kShiftBits);
75   }
76 
77   // Returns the maximum pixel span of a filter.
max_filter()78   int max_filter() const { return max_filter_; }
79 
80   // Returns the number of filters in this filter. This is the dimension of the
81   // output image.
num_values()82   int num_values() const { return static_cast<int>(filters_.size()); }
83 
84   // Appends the given list of scaling values for generating a given output
85   // pixel. |filter_offset| is the distance from the edge of the image to where
86   // the scaling factors start. The scaling factors apply to the source pixels
87   // starting from this position, and going for the next |filter_length| pixels.
88   //
89   // You will probably want to make sure your input is normalized (that is,
90   // all entries in |filter_values| sub to one) to prevent affecting the overall
91   // brighness of the image.
92   //
93   // The filter_length must be > 0.
94   //
95   // This version will automatically convert your input to fixed point.
96   SK_API void AddFilter(int filter_offset,
97                         const float* filter_values,
98                         int filter_length);
99 
100   // Same as the above version, but the input is already fixed point.
101   void AddFilter(int filter_offset,
102                  const Fixed* filter_values,
103                  int filter_length);
104 
105   // Retrieves a filter for the given |value_offset|, a position in the output
106   // image in the direction we're convolving. The offset and length of the
107   // filter values are put into the corresponding out arguments (see AddFilter
108   // above for what these mean), and a pointer to the first scaling factor is
109   // returned. There will be |filter_length| values in this array.
FilterForValue(int value_offset,int * filter_offset,int * filter_length)110   inline const Fixed* FilterForValue(int value_offset,
111                                      int* filter_offset,
112                                      int* filter_length) const {
113     const FilterInstance& filter = filters_[value_offset];
114     *filter_offset = filter.offset;
115     *filter_length = filter.trimmed_length;
116     if (filter.trimmed_length == 0) {
117       return NULL;
118     }
119     return &filter_values_[filter.data_location];
120   }
121 
122   // Retrieves the filter for the offset 0, presumed to be the one and only.
123   // The offset and length of the filter values are put into the corresponding
124   // out arguments (see AddFilter). Note that |filter_legth| and
125   // |specified_filter_length| may be different if leading/trailing zeros of the
126   // original floating point form were clipped.
127   // There will be |filter_length| values in the return array.
128   // Returns NULL if the filter is 0-length (for instance when all floating
129   // point values passed to AddFilter were clipped to 0).
130   SK_API const Fixed* GetSingleFilter(int* specified_filter_length,
131                                       int* filter_offset,
132                                       int* filter_length) const;
133 
PaddingForSIMD()134   inline void PaddingForSIMD() {
135     // Padding |padding_count| of more dummy coefficients after the coefficients
136     // of last filter to prevent SIMD instructions which load 8 or 16 bytes
137     // together to access invalid memory areas. We are not trying to align the
138     // coefficients right now due to the opaqueness of <vector> implementation.
139     // This has to be done after all |AddFilter| calls.
140 #ifdef SIMD_PADDING
141     for (int i = 0; i < SIMD_PADDING; ++i)
142       filter_values_.push_back(static_cast<Fixed>(0));
143 #endif
144   }
145 
146  private:
147   struct FilterInstance {
148     // Offset within filter_values for this instance of the filter.
149     int data_location;
150 
151     // Distance from the left of the filter to the center. IN PIXELS
152     int offset;
153 
154     // Number of values in this filter instance.
155     int trimmed_length;
156 
157     // Filter length as specified. Note that this may be different from
158     // 'trimmed_length' if leading/trailing zeros of the original floating
159     // point form were clipped differently on each tail.
160     int length;
161   };
162 
163   // Stores the information for each filter added to this class.
164   std::vector<FilterInstance> filters_;
165 
166   // We store all the filter values in this flat list, indexed by
167   // |FilterInstance.data_location| to avoid the mallocs required for storing
168   // each one separately.
169   std::vector<Fixed> filter_values_;
170 
171   // The maximum size of any filter we've added.
172   int max_filter_;
173 };
174 
175 // Does a two-dimensional convolution on the given source image.
176 //
177 // It is assumed the source pixel offsets referenced in the input filters
178 // reference only valid pixels, so the source image size is not required. Each
179 // row of the source image starts |source_byte_row_stride| after the previous
180 // one (this allows you to have rows with some padding at the end).
181 //
182 // The result will be put into the given output buffer. The destination image
183 // size will be xfilter.num_values() * yfilter.num_values() pixels. It will be
184 // in rows of exactly xfilter.num_values() * 4 bytes.
185 //
186 // |source_has_alpha| is a hint that allows us to avoid doing computations on
187 // the alpha channel if the image is opaque. If you don't know, set this to
188 // true and it will work properly, but setting this to false will be a few
189 // percent faster if you know the image is opaque.
190 //
191 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
192 // (this is ARGB when loaded into 32-bit words on a little-endian machine).
193 SK_API void BGRAConvolve2D(const unsigned char* source_data,
194                            int source_byte_row_stride,
195                            bool source_has_alpha,
196                            const ConvolutionFilter1D& xfilter,
197                            const ConvolutionFilter1D& yfilter,
198                            int output_byte_row_stride,
199                            unsigned char* output,
200                            bool use_simd_if_possible);
201 
202 // Does a 1D convolution of the given source image along the X dimension on
203 // a single channel of the bitmap.
204 //
205 // The function uses the same convolution kernel for each pixel. That kernel
206 // must be added to |filter| at offset 0. This is a most straightforward
207 // implementation of convolution, intended chiefly for development purposes.
208 SK_API void SingleChannelConvolveX1D(const unsigned char* source_data,
209                                      int source_byte_row_stride,
210                                      int input_channel_index,
211                                      int input_channel_count,
212                                      const ConvolutionFilter1D& filter,
213                                      const SkISize& image_size,
214                                      unsigned char* output,
215                                      int output_byte_row_stride,
216                                      int output_channel_index,
217                                      int output_channel_count,
218                                      bool absolute_values);
219 
220 // Does a 1D convolution of the given source image along the Y dimension on
221 // a single channel of the bitmap.
222 SK_API void SingleChannelConvolveY1D(const unsigned char* source_data,
223                                      int source_byte_row_stride,
224                                      int input_channel_index,
225                                      int input_channel_count,
226                                      const ConvolutionFilter1D& filter,
227                                      const SkISize& image_size,
228                                      unsigned char* output,
229                                      int output_byte_row_stride,
230                                      int output_channel_index,
231                                      int output_channel_count,
232                                      bool absolute_values);
233 
234 // Set up the |filter| instance with a gaussian kernel. |kernel_sigma| is the
235 // parameter of gaussian. If |derivative| is true, the kernel will be that of
236 // the first derivative. Intended for use with the two routines above.
237 SK_API void SetUpGaussianConvolutionKernel(ConvolutionFilter1D* filter,
238                                            float kernel_sigma,
239                                            bool derivative);
240 
241 }  // namespace skia
242 
243 #endif  // SKIA_EXT_CONVOLVER_H_
244