1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "Blur.h"
8 
9 #include <algorithm>
10 #include <math.h>
11 #include <string.h>
12 
13 #include "mozilla/CheckedInt.h"
14 
15 #include "2D.h"
16 #include "DataSurfaceHelpers.h"
17 #include "Tools.h"
18 
19 #ifdef BUILD_ARM_NEON
20 #include "mozilla/arm.h"
21 #endif
22 
23 using namespace std;
24 
25 namespace mozilla {
26 namespace gfx {
27 
28 /**
29  * Box blur involves looking at one pixel, and setting its value to the average
30  * of its neighbouring pixels.
31  * @param aInput The input buffer.
32  * @param aOutput The output buffer.
33  * @param aLeftLobe The number of pixels to blend on the left.
34  * @param aRightLobe The number of pixels to blend on the right.
35  * @param aWidth The number of columns in the buffers.
36  * @param aRows The number of rows in the buffers.
37  * @param aSkipRect An area to skip blurring in.
38  * XXX shouldn't we pass stride in separately here?
39  */
40 static void
BoxBlurHorizontal(unsigned char * aInput,unsigned char * aOutput,int32_t aLeftLobe,int32_t aRightLobe,int32_t aWidth,int32_t aRows,const IntRect & aSkipRect)41 BoxBlurHorizontal(unsigned char* aInput,
42                   unsigned char* aOutput,
43                   int32_t aLeftLobe,
44                   int32_t aRightLobe,
45                   int32_t aWidth,
46                   int32_t aRows,
47                   const IntRect& aSkipRect)
48 {
49     MOZ_ASSERT(aWidth > 0);
50 
51     int32_t boxSize = aLeftLobe + aRightLobe + 1;
52     bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
53                                   aWidth <= aSkipRect.XMost();
54     if (boxSize == 1) {
55         memcpy(aOutput, aInput, aWidth*aRows);
56         return;
57     }
58     uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
59 
60     for (int32_t y = 0; y < aRows; y++) {
61         // Check whether the skip rect intersects this row. If the skip
62         // rect covers the whole surface in this row, we can avoid
63         // this row entirely (and any others along the skip rect).
64         bool inSkipRectY = y >= aSkipRect.y &&
65                            y < aSkipRect.YMost();
66         if (inSkipRectY && skipRectCoversWholeRow) {
67             y = aSkipRect.YMost() - 1;
68             continue;
69         }
70 
71         uint32_t alphaSum = 0;
72         for (int32_t i = 0; i < boxSize; i++) {
73             int32_t pos = i - aLeftLobe;
74             // See assertion above; if aWidth is zero, then we would have no
75             // valid position to clamp to.
76             pos = max(pos, 0);
77             pos = min(pos, aWidth - 1);
78             alphaSum += aInput[aWidth * y + pos];
79         }
80         for (int32_t x = 0; x < aWidth; x++) {
81             // Check whether we are within the skip rect. If so, go
82             // to the next point outside the skip rect.
83             if (inSkipRectY && x >= aSkipRect.x &&
84                 x < aSkipRect.XMost()) {
85                 x = aSkipRect.XMost();
86                 if (x >= aWidth)
87                     break;
88 
89                 // Recalculate the neighbouring alpha values for
90                 // our new point on the surface.
91                 alphaSum = 0;
92                 for (int32_t i = 0; i < boxSize; i++) {
93                     int32_t pos = x + i - aLeftLobe;
94                     // See assertion above; if aWidth is zero, then we would have no
95                     // valid position to clamp to.
96                     pos = max(pos, 0);
97                     pos = min(pos, aWidth - 1);
98                     alphaSum += aInput[aWidth * y + pos];
99                 }
100             }
101             int32_t tmp = x - aLeftLobe;
102             int32_t last = max(tmp, 0);
103             int32_t next = min(tmp + boxSize, aWidth - 1);
104 
105             aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32;
106 
107             alphaSum += aInput[aWidth * y + next] -
108                         aInput[aWidth * y + last];
109         }
110     }
111 }
112 
113 /**
114  * Identical to BoxBlurHorizontal, except it blurs top and bottom instead of
115  * left and right.
116  * XXX shouldn't we pass stride in separately here?
117  */
118 static void
BoxBlurVertical(unsigned char * aInput,unsigned char * aOutput,int32_t aTopLobe,int32_t aBottomLobe,int32_t aWidth,int32_t aRows,const IntRect & aSkipRect)119 BoxBlurVertical(unsigned char* aInput,
120                 unsigned char* aOutput,
121                 int32_t aTopLobe,
122                 int32_t aBottomLobe,
123                 int32_t aWidth,
124                 int32_t aRows,
125                 const IntRect& aSkipRect)
126 {
127     MOZ_ASSERT(aRows > 0);
128 
129     int32_t boxSize = aTopLobe + aBottomLobe + 1;
130     bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
131                                      aRows <= aSkipRect.YMost();
132     if (boxSize == 1) {
133         memcpy(aOutput, aInput, aWidth*aRows);
134         return;
135     }
136     uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
137 
138     for (int32_t x = 0; x < aWidth; x++) {
139         bool inSkipRectX = x >= aSkipRect.x &&
140                            x < aSkipRect.XMost();
141         if (inSkipRectX && skipRectCoversWholeColumn) {
142             x = aSkipRect.XMost() - 1;
143             continue;
144         }
145 
146         uint32_t alphaSum = 0;
147         for (int32_t i = 0; i < boxSize; i++) {
148             int32_t pos = i - aTopLobe;
149             // See assertion above; if aRows is zero, then we would have no
150             // valid position to clamp to.
151             pos = max(pos, 0);
152             pos = min(pos, aRows - 1);
153             alphaSum += aInput[aWidth * pos + x];
154         }
155         for (int32_t y = 0; y < aRows; y++) {
156             if (inSkipRectX && y >= aSkipRect.y &&
157                 y < aSkipRect.YMost()) {
158                 y = aSkipRect.YMost();
159                 if (y >= aRows)
160                     break;
161 
162                 alphaSum = 0;
163                 for (int32_t i = 0; i < boxSize; i++) {
164                     int32_t pos = y + i - aTopLobe;
165                     // See assertion above; if aRows is zero, then we would have no
166                     // valid position to clamp to.
167                     pos = max(pos, 0);
168                     pos = min(pos, aRows - 1);
169                     alphaSum += aInput[aWidth * pos + x];
170                 }
171             }
172             int32_t tmp = y - aTopLobe;
173             int32_t last = max(tmp, 0);
174             int32_t next = min(tmp + boxSize, aRows - 1);
175 
176             aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32;
177 
178             alphaSum += aInput[aWidth * next + x] -
179                         aInput[aWidth * last + x];
180         }
181     }
182 }
183 
ComputeLobes(int32_t aRadius,int32_t aLobes[3][2])184 static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2])
185 {
186     int32_t major, minor, final;
187 
188     /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for
189      * some notes about approximating the Gaussian blur with box-blurs.
190      * The comments below are in the terminology of that page.
191      */
192     int32_t z = aRadius / 3;
193     switch (aRadius % 3) {
194     case 0:
195         // aRadius = z*3; choose d = 2*z + 1
196         major = minor = final = z;
197         break;
198     case 1:
199         // aRadius = z*3 + 1
200         // This is a tricky case since there is no value of d which will
201         // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1
202         // for some integer k, then the radius will be 3*k. If d is even,
203         // i.e. d=2*k, then the radius will be 3*k - 1.
204         // So we have to choose values that don't match the standard
205         // algorithm.
206         major = z + 1;
207         minor = final = z;
208         break;
209     case 2:
210         // aRadius = z*3 + 2; choose d = 2*z + 2
211         major = final = z + 1;
212         minor = z;
213         break;
214     default:
215         // Mathematical impossibility!
216         MOZ_ASSERT(false);
217         major = minor = final = 0;
218     }
219     MOZ_ASSERT(major + minor + final == aRadius);
220 
221     aLobes[0][0] = major;
222     aLobes[0][1] = minor;
223     aLobes[1][0] = minor;
224     aLobes[1][1] = major;
225     aLobes[2][0] = final;
226     aLobes[2][1] = final;
227 }
228 
229 static void
SpreadHorizontal(unsigned char * aInput,unsigned char * aOutput,int32_t aRadius,int32_t aWidth,int32_t aRows,int32_t aStride,const IntRect & aSkipRect)230 SpreadHorizontal(unsigned char* aInput,
231                  unsigned char* aOutput,
232                  int32_t aRadius,
233                  int32_t aWidth,
234                  int32_t aRows,
235                  int32_t aStride,
236                  const IntRect& aSkipRect)
237 {
238     if (aRadius == 0) {
239         memcpy(aOutput, aInput, aStride * aRows);
240         return;
241     }
242 
243     bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
244                                     aWidth <= aSkipRect.XMost();
245     for (int32_t y = 0; y < aRows; y++) {
246         // Check whether the skip rect intersects this row. If the skip
247         // rect covers the whole surface in this row, we can avoid
248         // this row entirely (and any others along the skip rect).
249         bool inSkipRectY = y >= aSkipRect.y &&
250                              y < aSkipRect.YMost();
251         if (inSkipRectY && skipRectCoversWholeRow) {
252             y = aSkipRect.YMost() - 1;
253             continue;
254         }
255 
256         for (int32_t x = 0; x < aWidth; x++) {
257             // Check whether we are within the skip rect. If so, go
258             // to the next point outside the skip rect.
259             if (inSkipRectY && x >= aSkipRect.x &&
260                 x < aSkipRect.XMost()) {
261                 x = aSkipRect.XMost();
262                 if (x >= aWidth)
263                     break;
264             }
265 
266             int32_t sMin = max(x - aRadius, 0);
267             int32_t sMax = min(x + aRadius, aWidth - 1);
268             int32_t v = 0;
269             for (int32_t s = sMin; s <= sMax; ++s) {
270                 v = max<int32_t>(v, aInput[aStride * y + s]);
271             }
272             aOutput[aStride * y + x] = v;
273         }
274     }
275 }
276 
277 static void
SpreadVertical(unsigned char * aInput,unsigned char * aOutput,int32_t aRadius,int32_t aWidth,int32_t aRows,int32_t aStride,const IntRect & aSkipRect)278 SpreadVertical(unsigned char* aInput,
279                unsigned char* aOutput,
280                int32_t aRadius,
281                int32_t aWidth,
282                int32_t aRows,
283                int32_t aStride,
284                const IntRect& aSkipRect)
285 {
286     if (aRadius == 0) {
287         memcpy(aOutput, aInput, aStride * aRows);
288         return;
289     }
290 
291     bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
292                                      aRows <= aSkipRect.YMost();
293     for (int32_t x = 0; x < aWidth; x++) {
294         bool inSkipRectX = x >= aSkipRect.x &&
295                            x < aSkipRect.XMost();
296         if (inSkipRectX && skipRectCoversWholeColumn) {
297             x = aSkipRect.XMost() - 1;
298             continue;
299         }
300 
301         for (int32_t y = 0; y < aRows; y++) {
302             // Check whether we are within the skip rect. If so, go
303             // to the next point outside the skip rect.
304             if (inSkipRectX && y >= aSkipRect.y &&
305                 y < aSkipRect.YMost()) {
306                 y = aSkipRect.YMost();
307                 if (y >= aRows)
308                     break;
309             }
310 
311             int32_t sMin = max(y - aRadius, 0);
312             int32_t sMax = min(y + aRadius, aRows - 1);
313             int32_t v = 0;
314             for (int32_t s = sMin; s <= sMax; ++s) {
315                 v = max<int32_t>(v, aInput[aStride * s + x]);
316             }
317             aOutput[aStride * y + x] = v;
318         }
319     }
320 }
321 
322 CheckedInt<int32_t>
RoundUpToMultipleOf4(int32_t aVal)323 AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal)
324 {
325   CheckedInt<int32_t> val(aVal);
326 
327   val += 3;
328   val /= 4;
329   val *= 4;
330 
331   return val;
332 }
333 
AlphaBoxBlur(const Rect & aRect,const IntSize & aSpreadRadius,const IntSize & aBlurRadius,const Rect * aDirtyRect,const Rect * aSkipRect)334 AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
335                            const IntSize& aSpreadRadius,
336                            const IntSize& aBlurRadius,
337                            const Rect* aDirtyRect,
338                            const Rect* aSkipRect)
339  : mSpreadRadius(aSpreadRadius),
340    mBlurRadius(aBlurRadius),
341    mSurfaceAllocationSize(0)
342 {
343   Rect rect(aRect);
344   rect.Inflate(Size(aBlurRadius + aSpreadRadius));
345   rect.RoundOut();
346 
347   if (aDirtyRect) {
348     // If we get passed a dirty rect from layout, we can minimize the
349     // shadow size and make painting faster.
350     mHasDirtyRect = true;
351     mDirtyRect = *aDirtyRect;
352     Rect requiredBlurArea = mDirtyRect.Intersect(rect);
353     requiredBlurArea.Inflate(Size(aBlurRadius + aSpreadRadius));
354     rect = requiredBlurArea.Intersect(rect);
355   } else {
356     mHasDirtyRect = false;
357   }
358 
359   mRect = IntRect(int32_t(rect.x), int32_t(rect.y),
360                   int32_t(rect.width), int32_t(rect.height));
361   if (mRect.IsEmpty()) {
362     return;
363   }
364 
365   if (aSkipRect) {
366     // If we get passed a skip rect, we can lower the amount of
367     // blurring/spreading we need to do. We convert it to IntRect to avoid
368     // expensive int<->float conversions if we were to use Rect instead.
369     Rect skipRect = *aSkipRect;
370     skipRect.RoundIn();
371     skipRect.Deflate(Size(aBlurRadius + aSpreadRadius));
372     mSkipRect = IntRect(int32_t(skipRect.x), int32_t(skipRect.y),
373                         int32_t(skipRect.width), int32_t(skipRect.height));
374 
375     mSkipRect = mSkipRect.Intersect(mRect);
376     if (mSkipRect.IsEqualInterior(mRect))
377       return;
378 
379     mSkipRect -= mRect.TopLeft();
380   } else {
381     mSkipRect = IntRect(0, 0, 0, 0);
382   }
383 
384   CheckedInt<int32_t> stride = RoundUpToMultipleOf4(mRect.width);
385   if (stride.isValid()) {
386     mStride = stride.value();
387 
388     // We need to leave room for an additional 3 bytes for a potential overrun
389     // in our blurring code.
390     size_t size = BufferSizeFromStrideAndHeight(mStride, mRect.height, 3);
391     if (size != 0) {
392       mSurfaceAllocationSize = size;
393     }
394   }
395 }
396 
AlphaBoxBlur(const Rect & aRect,int32_t aStride,float aSigmaX,float aSigmaY)397 AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
398                            int32_t aStride,
399                            float aSigmaX,
400                            float aSigmaY)
401   : mRect(int32_t(aRect.x), int32_t(aRect.y),
402           int32_t(aRect.width), int32_t(aRect.height)),
403     mSpreadRadius(),
404     mBlurRadius(CalculateBlurRadius(Point(aSigmaX, aSigmaY))),
405     mStride(aStride),
406     mSurfaceAllocationSize(0)
407 {
408   IntRect intRect;
409   if (aRect.ToIntRect(&intRect)) {
410     size_t minDataSize = BufferSizeFromStrideAndHeight(intRect.width, intRect.height);
411     if (minDataSize != 0) {
412       mSurfaceAllocationSize = minDataSize;
413     }
414   }
415 }
416 
417 
~AlphaBoxBlur()418 AlphaBoxBlur::~AlphaBoxBlur()
419 {
420 }
421 
422 IntSize
GetSize()423 AlphaBoxBlur::GetSize()
424 {
425   IntSize size(mRect.width, mRect.height);
426   return size;
427 }
428 
429 int32_t
GetStride()430 AlphaBoxBlur::GetStride()
431 {
432   return mStride;
433 }
434 
435 IntRect
GetRect()436 AlphaBoxBlur::GetRect()
437 {
438   return mRect;
439 }
440 
441 Rect*
GetDirtyRect()442 AlphaBoxBlur::GetDirtyRect()
443 {
444   if (mHasDirtyRect) {
445     return &mDirtyRect;
446   }
447 
448   return nullptr;
449 }
450 
451 size_t
GetSurfaceAllocationSize() const452 AlphaBoxBlur::GetSurfaceAllocationSize() const
453 {
454   return mSurfaceAllocationSize;
455 }
456 
457 void
Blur(uint8_t * aData)458 AlphaBoxBlur::Blur(uint8_t* aData)
459 {
460   if (!aData) {
461     return;
462   }
463 
464   // no need to do all this if not blurring or spreading
465   if (mBlurRadius != IntSize(0,0) || mSpreadRadius != IntSize(0,0)) {
466     int32_t stride = GetStride();
467 
468     IntSize size = GetSize();
469 
470     if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) {
471       // No need to use CheckedInt here - we have validated it in the constructor.
472       size_t szB = stride * size.height;
473       unsigned char* tmpData = new (std::nothrow) uint8_t[szB];
474 
475       if (!tmpData) {
476         return;
477       }
478 
479       memset(tmpData, 0, szB);
480 
481       SpreadHorizontal(aData, tmpData, mSpreadRadius.width, GetSize().width, GetSize().height, stride, mSkipRect);
482       SpreadVertical(tmpData, aData, mSpreadRadius.height, GetSize().width, GetSize().height, stride, mSkipRect);
483 
484       delete [] tmpData;
485     }
486 
487     int32_t horizontalLobes[3][2];
488     ComputeLobes(mBlurRadius.width, horizontalLobes);
489     int32_t verticalLobes[3][2];
490     ComputeLobes(mBlurRadius.height, verticalLobes);
491 
492     // We want to allow for some extra space on the left for alignment reasons.
493     int32_t maxLeftLobe = RoundUpToMultipleOf4(horizontalLobes[0][0] + 1).value();
494 
495     IntSize integralImageSize(size.width + maxLeftLobe + horizontalLobes[1][1],
496                               size.height + verticalLobes[0][0] + verticalLobes[1][1] + 1);
497 
498     if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) {
499       // Fallback to old blurring code when the surface is so large it may
500       // overflow our integral image!
501 
502       // No need to use CheckedInt here - we have validated it in the constructor.
503       size_t szB = stride * size.height;
504       uint8_t* tmpData = new (std::nothrow) uint8_t[szB];
505       if (!tmpData) {
506         return;
507       }
508 
509       memset(tmpData, 0, szB);
510 
511       uint8_t* a = aData;
512       uint8_t* b = tmpData;
513       if (mBlurRadius.width > 0) {
514         BoxBlurHorizontal(a, b, horizontalLobes[0][0], horizontalLobes[0][1], stride, GetSize().height, mSkipRect);
515         BoxBlurHorizontal(b, a, horizontalLobes[1][0], horizontalLobes[1][1], stride, GetSize().height, mSkipRect);
516         BoxBlurHorizontal(a, b, horizontalLobes[2][0], horizontalLobes[2][1], stride, GetSize().height, mSkipRect);
517       } else {
518         a = tmpData;
519         b = aData;
520       }
521       // The result is in 'b' here.
522       if (mBlurRadius.height > 0) {
523         BoxBlurVertical(b, a, verticalLobes[0][0], verticalLobes[0][1], stride, GetSize().height, mSkipRect);
524         BoxBlurVertical(a, b, verticalLobes[1][0], verticalLobes[1][1], stride, GetSize().height, mSkipRect);
525         BoxBlurVertical(b, a, verticalLobes[2][0], verticalLobes[2][1], stride, GetSize().height, mSkipRect);
526       } else {
527         a = b;
528       }
529       // The result is in 'a' here.
530       if (a == tmpData) {
531         memcpy(aData, tmpData, szB);
532       }
533       delete [] tmpData;
534     } else {
535       size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width, 4);
536       if (integralImageStride == 0) {
537         return;
538       }
539 
540       // We need to leave room for an additional 12 bytes for a maximum overrun
541       // of 3 pixels in the blurring code.
542       size_t bufLen = BufferSizeFromStrideAndHeight(integralImageStride, integralImageSize.height, 12);
543       if (bufLen == 0) {
544         return;
545       }
546       // bufLen is a byte count, but here we want a multiple of 32-bit ints, so
547       // we divide by 4.
548       AlignedArray<uint32_t> integralImage((bufLen / 4) + ((bufLen % 4) ? 1 : 0));
549 
550       if (!integralImage) {
551         return;
552       }
553 
554 #ifdef USE_SSE2
555       if (Factory::HasSSE2()) {
556         BoxBlur_SSE2(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
557                      verticalLobes[0][1], integralImage, integralImageStride);
558         BoxBlur_SSE2(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
559                      verticalLobes[1][1], integralImage, integralImageStride);
560         BoxBlur_SSE2(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
561                      verticalLobes[2][1], integralImage, integralImageStride);
562       } else
563 #endif
564 #ifdef BUILD_ARM_NEON
565       if (mozilla::supports_neon()) {
566         BoxBlur_NEON(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
567                      verticalLobes[0][1], integralImage, integralImageStride);
568         BoxBlur_NEON(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
569                      verticalLobes[1][1], integralImage, integralImageStride);
570         BoxBlur_NEON(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
571                      verticalLobes[2][1], integralImage, integralImageStride);
572       } else
573 #endif
574       {
575 #ifdef _MIPS_ARCH_LOONGSON3A
576         BoxBlur_LS3(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
577                      verticalLobes[0][1], integralImage, integralImageStride);
578         BoxBlur_LS3(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
579                      verticalLobes[1][1], integralImage, integralImageStride);
580         BoxBlur_LS3(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
581                      verticalLobes[2][1], integralImage, integralImageStride);
582 #else
583         BoxBlur_C(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
584                   verticalLobes[0][1], integralImage, integralImageStride);
585         BoxBlur_C(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
586                   verticalLobes[1][1], integralImage, integralImageStride);
587         BoxBlur_C(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
588                   verticalLobes[2][1], integralImage, integralImageStride);
589 #endif
590       }
591     }
592   }
593 }
594 
595 MOZ_ALWAYS_INLINE void
GenerateIntegralRow(uint32_t * aDest,const uint8_t * aSource,uint32_t * aPreviousRow,const uint32_t & aSourceWidth,const uint32_t & aLeftInflation,const uint32_t & aRightInflation)596 GenerateIntegralRow(uint32_t  *aDest, const uint8_t *aSource, uint32_t *aPreviousRow,
597                     const uint32_t &aSourceWidth, const uint32_t &aLeftInflation, const uint32_t &aRightInflation)
598 {
599   uint32_t currentRowSum = 0;
600   uint32_t pixel = aSource[0];
601   for (uint32_t x = 0; x < aLeftInflation; x++) {
602     currentRowSum += pixel;
603     *aDest++ = currentRowSum + *aPreviousRow++;
604   }
605   for (uint32_t x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x += 4) {
606       uint32_t alphaValues = *(uint32_t*)(aSource + (x - aLeftInflation));
607 #if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__
608       currentRowSum += (alphaValues >> 24) & 0xff;
609       *aDest++ = *aPreviousRow++ + currentRowSum;
610       currentRowSum += (alphaValues >> 16) & 0xff;
611       *aDest++ = *aPreviousRow++ + currentRowSum;
612       currentRowSum += (alphaValues >> 8) & 0xff;
613       *aDest++ = *aPreviousRow++ + currentRowSum;
614       currentRowSum += alphaValues & 0xff;
615       *aDest++ = *aPreviousRow++ + currentRowSum;
616 #else
617       currentRowSum += alphaValues & 0xff;
618       *aDest++ = *aPreviousRow++ + currentRowSum;
619       alphaValues >>= 8;
620       currentRowSum += alphaValues & 0xff;
621       *aDest++ = *aPreviousRow++ + currentRowSum;
622       alphaValues >>= 8;
623       currentRowSum += alphaValues & 0xff;
624       *aDest++ = *aPreviousRow++ + currentRowSum;
625       alphaValues >>= 8;
626       currentRowSum += alphaValues & 0xff;
627       *aDest++ = *aPreviousRow++ + currentRowSum;
628 #endif
629   }
630   pixel = aSource[aSourceWidth - 1];
631   for (uint32_t x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) {
632     currentRowSum += pixel;
633     *aDest++ = currentRowSum + *aPreviousRow++;
634   }
635 }
636 
637 MOZ_ALWAYS_INLINE void
GenerateIntegralImage_C(int32_t aLeftInflation,int32_t aRightInflation,int32_t aTopInflation,int32_t aBottomInflation,uint32_t * aIntegralImage,size_t aIntegralImageStride,uint8_t * aSource,int32_t aSourceStride,const IntSize & aSize)638 GenerateIntegralImage_C(int32_t aLeftInflation, int32_t aRightInflation,
639                         int32_t aTopInflation, int32_t aBottomInflation,
640                         uint32_t *aIntegralImage, size_t aIntegralImageStride,
641                         uint8_t *aSource, int32_t aSourceStride, const IntSize &aSize)
642 {
643   uint32_t stride32bit = aIntegralImageStride / 4;
644 
645   IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation,
646                             aSize.height + aTopInflation + aBottomInflation);
647 
648   memset(aIntegralImage, 0, aIntegralImageStride);
649 
650   GenerateIntegralRow(aIntegralImage, aSource, aIntegralImage,
651                       aSize.width, aLeftInflation, aRightInflation);
652   for (int y = 1; y < aTopInflation + 1; y++) {
653     GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource, aIntegralImage + (y - 1) * stride32bit,
654                         aSize.width, aLeftInflation, aRightInflation);
655   }
656 
657   for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) {
658     GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + aSourceStride * (y - aTopInflation),
659                         aIntegralImage + (y - 1) * stride32bit, aSize.width, aLeftInflation, aRightInflation);
660   }
661 
662   if (aBottomInflation) {
663     for (int y = (aSize.height + aTopInflation); y < integralImageSize.height; y++) {
664       GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + ((aSize.height - 1) * aSourceStride),
665                           aIntegralImage + (y - 1) * stride32bit,
666                           aSize.width, aLeftInflation, aRightInflation);
667     }
668   }
669 }
670 
671 /**
672  * Attempt to do an in-place box blur using an integral image.
673  */
674 void
BoxBlur_C(uint8_t * aData,int32_t aLeftLobe,int32_t aRightLobe,int32_t aTopLobe,int32_t aBottomLobe,uint32_t * aIntegralImage,size_t aIntegralImageStride)675 AlphaBoxBlur::BoxBlur_C(uint8_t* aData,
676                         int32_t aLeftLobe,
677                         int32_t aRightLobe,
678                         int32_t aTopLobe,
679                         int32_t aBottomLobe,
680                         uint32_t *aIntegralImage,
681                         size_t aIntegralImageStride)
682 {
683   IntSize size = GetSize();
684 
685   MOZ_ASSERT(size.width > 0);
686 
687   // Our 'left' or 'top' lobe will include the current pixel. i.e. when
688   // looking at an integral image the value of a pixel at 'x,y' is calculated
689   // using the value of the integral image values above/below that.
690   aLeftLobe++;
691   aTopLobe++;
692   int32_t boxSize = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe);
693 
694   MOZ_ASSERT(boxSize > 0);
695 
696   if (boxSize == 1) {
697       return;
698   }
699 
700   int32_t stride32bit = aIntegralImageStride / 4;
701 
702   int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value();
703 
704   GenerateIntegralImage_C(leftInflation, aRightLobe, aTopLobe, aBottomLobe,
705                           aIntegralImage, aIntegralImageStride, aData,
706                           mStride, size);
707 
708   uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
709 
710   uint32_t *innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation;
711 
712   // Storing these locally makes this about 30% faster! Presumably the compiler
713   // can't be sure we're not altering the member variables in this loop.
714   IntRect skipRect = mSkipRect;
715   uint8_t *data = aData;
716   int32_t stride = mStride;
717   for (int32_t y = 0; y < size.height; y++) {
718     bool inSkipRectY = y > skipRect.y && y < skipRect.YMost();
719 
720     uint32_t *topLeftBase = innerIntegral + ((y - aTopLobe) * stride32bit - aLeftLobe);
721     uint32_t *topRightBase = innerIntegral + ((y - aTopLobe) * stride32bit + aRightLobe);
722     uint32_t *bottomRightBase = innerIntegral + ((y + aBottomLobe) * stride32bit + aRightLobe);
723     uint32_t *bottomLeftBase = innerIntegral + ((y + aBottomLobe) * stride32bit - aLeftLobe);
724 
725     for (int32_t x = 0; x < size.width; x++) {
726       if (inSkipRectY && x > skipRect.x && x < skipRect.XMost()) {
727         x = skipRect.XMost() - 1;
728         // Trigger early jump on coming loop iterations, this will be reset
729         // next line anyway.
730         inSkipRectY = false;
731         continue;
732       }
733       int32_t topLeft = topLeftBase[x];
734       int32_t topRight = topRightBase[x];
735       int32_t bottomRight = bottomRightBase[x];
736       int32_t bottomLeft = bottomLeftBase[x];
737 
738       uint32_t value = bottomRight - topRight - bottomLeft;
739       value += topLeft;
740 
741       data[stride * y + x] = (uint64_t(reciprocal) * value + (uint64_t(1) << 31)) >> 32;
742     }
743   }
744 }
745 
746 /**
747  * Compute the box blur size (which we're calling the blur radius) from
748  * the standard deviation.
749  *
750  * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for
751  * approximating a Gaussian using box blurs.  This yields quite a good
752  * approximation for a Gaussian.  Then we multiply this by 1.5 since our
753  * code wants the radius of the entire triple-box-blur kernel instead of
754  * the diameter of an individual box blur.  For more details, see:
755  *   http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement
756  *   https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19
757  */
758 static const Float GAUSSIAN_SCALE_FACTOR = Float((3 * sqrt(2 * M_PI) / 4) * 1.5);
759 
760 IntSize
CalculateBlurRadius(const Point & aStd)761 AlphaBoxBlur::CalculateBlurRadius(const Point& aStd)
762 {
763     IntSize size(static_cast<int32_t>(floor(aStd.x * GAUSSIAN_SCALE_FACTOR + 0.5f)),
764                  static_cast<int32_t>(floor(aStd.y * GAUSSIAN_SCALE_FACTOR + 0.5f)));
765 
766     return size;
767 }
768 
769 } // namespace gfx
770 } // namespace mozilla
771