1 // Copyright 2016 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Disclaimer: This is not an official Google product.
16 //
17 // Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
18 
19 #ifndef BUTTERAUGLI_BUTTERAUGLI_H_
20 #define BUTTERAUGLI_BUTTERAUGLI_H_
21 
22 #include <cassert>
23 #include <cmath>
24 #include <cstdint>
25 #include <cstdio>
26 #include <cstdlib>
27 #include <cstring>
28 #include <memory>
29 #include <vector>
30 
31 #ifndef PROFILER_ENABLED
32 #define PROFILER_ENABLED 0
33 #endif
34 #if PROFILER_ENABLED
35 #else
36 #define PROFILER_FUNC
37 #define PROFILER_ZONE(name)
38 #endif
39 
40 #define BUTTERAUGLI_ENABLE_CHECKS 0
41 
42 // This is the main interface to butteraugli image similarity
43 // analysis function.
44 
45 namespace butteraugli {
46 
47 template<typename T>
48 class Image;
49 
50 using Image8 = Image<uint8_t>;
51 using ImageF = Image<float>;
52 using ImageD = Image<double>;
53 
54 // ButteraugliInterface defines the public interface for butteraugli.
55 //
56 // It calculates the difference between rgb0 and rgb1.
57 //
58 // rgb0 and rgb1 contain the images. rgb0[c][px] and rgb1[c][px] contains
59 // the red image for c == 0, green for c == 1, blue for c == 2. Location index
60 // px is calculated as y * xsize + x.
61 //
62 // Value of pixels of images rgb0 and rgb1 need to be represented as raw
63 // intensity. Most image formats store gamma corrected intensity in pixel
64 // values. This gamma correction has to be removed, by applying the following
65 // function:
66 // butteraugli_val = 255.0 * pow(png_val / 255.0, gamma);
67 // A typical value of gamma is 2.2. It is usually stored in the image header.
68 // Take care not to confuse that value with its inverse. The gamma value should
69 // be always greater than one.
70 // Butteraugli does not work as intended if the caller does not perform
71 // gamma correction.
72 //
73 // diffmap will contain an image of the size xsize * ysize, containing
74 // localized differences for values px (indexed with the px the same as rgb0
75 // and rgb1). diffvalue will give a global score of similarity.
76 //
77 // A diffvalue smaller than kButteraugliGood indicates that images can be
78 // observed as the same image.
79 // diffvalue larger than kButteraugliBad indicates that a difference between
80 // the images can be observed.
81 // A diffvalue between kButteraugliGood and kButteraugliBad indicates that
82 // a subtle difference can be observed between the images.
83 //
84 // Returns true on success.
85 
86 bool ButteraugliInterface(const std::vector<ImageF> &rgb0,
87                           const std::vector<ImageF> &rgb1,
88                           ImageF &diffmap,
89                           double &diffvalue);
90 
91 const double kButteraugliQuantLow = 0.26;
92 const double kButteraugliQuantHigh = 1.454;
93 
94 // Converts the butteraugli score into fuzzy class values that are continuous
95 // at the class boundary. The class boundary location is based on human
96 // raters, but the slope is arbitrary. Particularly, it does not reflect
97 // the expectation value of probabilities of the human raters. It is just
98 // expected that a smoother class boundary will allow for higher-level
99 // optimization algorithms to work faster.
100 //
101 // Returns 2.0 for a perfect match, and 1.0 for 'ok', 0.0 for bad. Because the
102 // scoring is fuzzy, a butteraugli score of 0.96 would return a class of
103 // around 1.9.
104 double ButteraugliFuzzyClass(double score);
105 
106 // Input values should be in range 0 (bad) to 2 (good). Use
107 // kButteraugliNormalization as normalization.
108 double ButteraugliFuzzyInverse(double seek);
109 
110 // Returns a map which can be used for adaptive quantization. Values can
111 // typically range from kButteraugliQuantLow to kButteraugliQuantHigh. Low
112 // values require coarse quantization (e.g. near random noise), high values
113 // require fine quantization (e.g. in smooth bright areas).
114 bool ButteraugliAdaptiveQuantization(size_t xsize, size_t ysize,
115     const std::vector<std::vector<float> > &rgb, std::vector<float> &quant);
116 
117 // Implementation details, don't use anything below or your code will
118 // break in the future.
119 
120 #ifdef _MSC_VER
121 #define BUTTERAUGLI_RESTRICT
122 #else
123 #define BUTTERAUGLI_RESTRICT __restrict__
124 #endif
125 
126 #ifdef _MSC_VER
127 #define BUTTERAUGLI_CACHE_ALIGNED_RETURN /* not supported */
128 #else
129 #define BUTTERAUGLI_CACHE_ALIGNED_RETURN __attribute__((assume_aligned(64)))
130 #endif
131 
132 // Alias for unchangeable, non-aliased pointers. T is a pointer type,
133 // possibly to a const type. Example: ConstRestrict<uint8_t*> ptr = nullptr.
134 // The conventional syntax uint8_t* const RESTRICT is more confusing - it is
135 // not immediately obvious that the pointee is non-const.
136 template <typename T>
137 using ConstRestrict = T const BUTTERAUGLI_RESTRICT;
138 
139 // Functions that depend on the cache line size.
140 class CacheAligned {
141  public:
142   static constexpr size_t kPointerSize = sizeof(void *);
143   static constexpr size_t kCacheLineSize = 64;
144 
145   // The aligned-return annotation is only allowed on function declarations.
146   static void *Allocate(const size_t bytes) BUTTERAUGLI_CACHE_ALIGNED_RETURN;
147   static void Free(void *aligned_pointer);
148 };
149 
150 template <typename T>
151 using CacheAlignedUniquePtrT = std::unique_ptr<T[], void (*)(void *)>;
152 
153 using CacheAlignedUniquePtr = CacheAlignedUniquePtrT<uint8_t>;
154 
155 template <typename T = uint8_t>
Allocate(const size_t entries)156 static inline CacheAlignedUniquePtrT<T> Allocate(const size_t entries) {
157   return CacheAlignedUniquePtrT<T>(
158       static_cast<ConstRestrict<T *>>(
159           CacheAligned::Allocate(entries * sizeof(T))),
160       CacheAligned::Free);
161 }
162 
163 // Returns the smallest integer not less than "amount" that is divisible by
164 // "multiple", which must be a power of two.
165 template <size_t multiple>
Align(const size_t amount)166 static inline size_t Align(const size_t amount) {
167   static_assert(multiple != 0 && ((multiple & (multiple - 1)) == 0),
168                 "Align<> argument must be a power of two");
169   return (amount + multiple - 1) & ~(multiple - 1);
170 }
171 
172 // Single channel, contiguous (cache-aligned) rows separated by padding.
173 // T must be POD.
174 //
175 // Rationale: vectorization benefits from aligned operands - unaligned loads and
176 // especially stores are expensive when the address crosses cache line
177 // boundaries. Introducing padding after each row ensures the start of a row is
178 // aligned, and that row loops can process entire vectors (writes to the padding
179 // are allowed and ignored).
180 //
181 // We prefer a planar representation, where channels are stored as separate
182 // 2D arrays, because that simplifies vectorization (repeating the same
183 // operation on multiple adjacent components) without the complexity of a
184 // hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients can easily iterate
185 // over all components in a row and Image requires no knowledge of the pixel
186 // format beyond the component type "T". The downside is that we duplicate the
187 // xsize/ysize members for each channel.
188 //
189 // This image layout could also be achieved with a vector and a row accessor
190 // function, but a class wrapper with support for "deleter" allows wrapping
191 // existing memory allocated by clients without copying the pixels. It also
192 // provides convenient accessors for xsize/ysize, which shortens function
193 // argument lists. Supports move-construction so it can be stored in containers.
194 template <typename ComponentType>
195 class Image {
196   // Returns cache-aligned row stride, being careful to avoid 2K aliasing.
BytesPerRow(const size_t xsize)197   static size_t BytesPerRow(const size_t xsize) {
198     // Allow reading one extra AVX-2 vector on the right margin.
199     const size_t row_size = xsize * sizeof(T) + 32;
200     const size_t align = CacheAligned::kCacheLineSize;
201     size_t bytes_per_row = (row_size + align - 1) & ~(align - 1);
202     // During the lengthy window before writes are committed to memory, CPUs
203     // guard against read after write hazards by checking the address, but
204     // only the lower 11 bits. We avoid a false dependency between writes to
205     // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
206     if (bytes_per_row % 2048 == 0) {
207       bytes_per_row += align;
208     }
209     return bytes_per_row;
210   }
211 
212  public:
213   using T = ComponentType;
214 
Image()215   Image() : xsize_(0), ysize_(0), bytes_per_row_(0), bytes_(static_cast<uint8_t*>(nullptr), Ignore) {}
216 
Image(const size_t xsize,const size_t ysize)217   Image(const size_t xsize, const size_t ysize)
218       : xsize_(xsize),
219         ysize_(ysize),
220         bytes_per_row_(BytesPerRow(xsize)),
221         bytes_(Allocate(bytes_per_row_ * ysize)) {}
222 
Image(const size_t xsize,const size_t ysize,ConstRestrict<uint8_t * > bytes,const size_t bytes_per_row)223   Image(const size_t xsize, const size_t ysize, ConstRestrict<uint8_t *> bytes,
224         const size_t bytes_per_row)
225       : xsize_(xsize),
226         ysize_(ysize),
227         bytes_per_row_(bytes_per_row),
228         bytes_(bytes, Ignore) {}
229 
230   // Move constructor (required for returning Image from function)
Image(Image && other)231   Image(Image &&other)
232       : xsize_(other.xsize_),
233         ysize_(other.ysize_),
234         bytes_per_row_(other.bytes_per_row_),
235         bytes_(std::move(other.bytes_)) {}
236 
237   // Move assignment (required for std::vector)
238   Image &operator=(Image &&other) {
239     xsize_ = other.xsize_;
240     ysize_ = other.ysize_;
241     bytes_per_row_ = other.bytes_per_row_;
242     bytes_ = std::move(other.bytes_);
243     return *this;
244   }
245 
Swap(Image & other)246   void Swap(Image &other) {
247     std::swap(xsize_, other.xsize_);
248     std::swap(ysize_, other.ysize_);
249     std::swap(bytes_per_row_, other.bytes_per_row_);
250     std::swap(bytes_, other.bytes_);
251   }
252 
253   // How many pixels.
xsize()254   size_t xsize() const { return xsize_; }
ysize()255   size_t ysize() const { return ysize_; }
256 
Row(const size_t y)257   ConstRestrict<T *> Row(const size_t y) BUTTERAUGLI_CACHE_ALIGNED_RETURN {
258 #ifdef BUTTERAUGLI_ENABLE_CHECKS
259     if (y >= ysize_) {
260       printf("Row %zu out of bounds (ysize=%zu)\n", y, ysize_);
261       abort();
262     }
263 #endif
264     return reinterpret_cast<T *>(bytes_.get() + y * bytes_per_row_);
265   }
266 
Row(const size_t y)267   ConstRestrict<const T *> Row(const size_t y) const
268       BUTTERAUGLI_CACHE_ALIGNED_RETURN {
269 #ifdef BUTTERAUGLI_ENABLE_CHECKS
270     if (y >= ysize_) {
271       printf("Const row %zu out of bounds (ysize=%zu)\n", y, ysize_);
272       abort();
273     }
274 #endif
275     return reinterpret_cast<const T *>(bytes_.get() + y * bytes_per_row_);
276   }
277 
278   // Raw access to byte contents, for interfacing with other libraries.
279   // Unsigned char instead of char to avoid surprises (sign extension).
bytes()280   ConstRestrict<uint8_t *> bytes() { return bytes_.get(); }
bytes()281   ConstRestrict<const uint8_t *> bytes() const { return bytes_.get(); }
bytes_per_row()282   size_t bytes_per_row() const { return bytes_per_row_; }
283 
284   // Returns number of pixels (some of which are padding) per row. Useful for
285   // computing other rows via pointer arithmetic.
PixelsPerRow()286   intptr_t PixelsPerRow() const {
287     static_assert(CacheAligned::kCacheLineSize % sizeof(T) == 0,
288                   "Padding must be divisible by the pixel size.");
289     return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
290   }
291 
292  private:
293   // Deleter used when bytes are not owned.
Ignore(void * ptr)294   static void Ignore(void *ptr) {}
295 
296   // (Members are non-const to enable assignment during move-assignment.)
297   size_t xsize_;  // original intended pixels, not including any padding.
298   size_t ysize_;
299   size_t bytes_per_row_;  // [bytes] including padding.
300   CacheAlignedUniquePtr bytes_;
301 };
302 
303 // Returns newly allocated planes of the given dimensions.
304 template <typename T>
CreatePlanes(const size_t xsize,const size_t ysize,const size_t num_planes)305 static inline std::vector<Image<T>> CreatePlanes(const size_t xsize,
306                                                  const size_t ysize,
307                                                  const size_t num_planes) {
308   std::vector<Image<T>> planes;
309   planes.reserve(num_planes);
310   for (size_t i = 0; i < num_planes; ++i) {
311     planes.emplace_back(xsize, ysize);
312   }
313   return planes;
314 }
315 
316 // Returns a new image with the same dimensions and pixel values.
317 template <typename T>
CopyPixels(const Image<T> & other)318 static inline Image<T> CopyPixels(const Image<T> &other) {
319   Image<T> copy(other.xsize(), other.ysize());
320   const void *BUTTERAUGLI_RESTRICT from = other.bytes();
321   void *BUTTERAUGLI_RESTRICT to = copy.bytes();
322   memcpy(to, from, other.ysize() * other.bytes_per_row());
323   return copy;
324 }
325 
326 // Returns new planes with the same dimensions and pixel values.
327 template <typename T>
CopyPlanes(const std::vector<Image<T>> & planes)328 static inline std::vector<Image<T>> CopyPlanes(
329     const std::vector<Image<T>> &planes) {
330   std::vector<Image<T>> copy;
331   copy.reserve(planes.size());
332   for (const Image<T> &plane : planes) {
333     copy.push_back(CopyPixels(plane));
334   }
335   return copy;
336 }
337 
338 // Compacts a padded image into a preallocated packed vector.
339 template <typename T>
CopyToPacked(const Image<T> & from,std::vector<T> * to)340 static inline void CopyToPacked(const Image<T> &from, std::vector<T> *to) {
341   const size_t xsize = from.xsize();
342   const size_t ysize = from.ysize();
343 #if BUTTERAUGLI_ENABLE_CHECKS
344   if (to->size() < xsize * ysize) {
345     printf("%zu x %zu exceeds %zu capacity\n", xsize, ysize, to->size());
346     abort();
347   }
348 #endif
349   for (size_t y = 0; y < ysize; ++y) {
350     ConstRestrict<const float*> row_from = from.Row(y);
351     ConstRestrict<float*> row_to = to->data() + y * xsize;
352     memcpy(row_to, row_from, xsize * sizeof(T));
353   }
354 }
355 
356 // Expands a packed vector into a preallocated padded image.
357 template <typename T>
CopyFromPacked(const std::vector<T> & from,Image<T> * to)358 static inline void CopyFromPacked(const std::vector<T> &from, Image<T> *to) {
359   const size_t xsize = to->xsize();
360   const size_t ysize = to->ysize();
361   assert(from.size() == xsize * ysize);
362   for (size_t y = 0; y < ysize; ++y) {
363     ConstRestrict<const float*> row_from = from.data() + y * xsize;
364     ConstRestrict<float*> row_to = to->Row(y);
365     memcpy(row_to, row_from, xsize * sizeof(T));
366   }
367 }
368 
369 template <typename T>
PlanesFromPacked(const size_t xsize,const size_t ysize,const std::vector<std::vector<T>> & packed)370 static inline std::vector<Image<T>> PlanesFromPacked(
371     const size_t xsize, const size_t ysize,
372     const std::vector<std::vector<T>> &packed) {
373   std::vector<Image<T>> planes;
374   planes.reserve(packed.size());
375   for (const std::vector<T> &p : packed) {
376     planes.push_back(Image<T>(xsize, ysize));
377     CopyFromPacked(p, &planes.back());
378   }
379   return planes;
380 }
381 
382 template <typename T>
PackedFromPlanes(const std::vector<Image<T>> & planes)383 static inline std::vector<std::vector<T>> PackedFromPlanes(
384     const std::vector<Image<T>> &planes) {
385   assert(!planes.empty());
386   const size_t num_pixels = planes[0].xsize() * planes[0].ysize();
387   std::vector<std::vector<T>> packed;
388   packed.reserve(planes.size());
389   for (const Image<T> &image : planes) {
390     packed.push_back(std::vector<T>(num_pixels));
391     CopyToPacked(image, &packed.back());
392   }
393   return packed;
394 }
395 
396 class ButteraugliComparator {
397  public:
398   ButteraugliComparator(size_t xsize, size_t ysize, int step);
399 
400   // Computes the butteraugli map between rgb0 and rgb1 and updates result.
401   void Diffmap(const std::vector<ImageF> &rgb0,
402                const std::vector<ImageF> &rgb1,
403                ImageF &result);
404 
405   // Same as above, but OpsinDynamicsImage() was already applied to
406   // rgb0 and rgb1.
407   void DiffmapOpsinDynamicsImage(const std::vector<ImageF> &rgb0,
408                                  const std::vector<ImageF> &rgb1,
409                                  ImageF &result);
410 
411  private:
412   void BlockDiffMap(const std::vector<std::vector<float> > &rgb0,
413                     const std::vector<std::vector<float> > &rgb1,
414                     std::vector<float>* block_diff_dc,
415                     std::vector<float>* block_diff_ac);
416 
417 
418   void EdgeDetectorMap(const std::vector<std::vector<float> > &rgb0,
419                        const std::vector<std::vector<float> > &rgb1,
420                        std::vector<float>* edge_detector_map);
421 
422   void EdgeDetectorLowFreq(const std::vector<std::vector<float> > &rgb0,
423                            const std::vector<std::vector<float> > &rgb1,
424                            std::vector<float>* block_diff_ac);
425 
426   void CombineChannels(const std::vector<std::vector<float> >& scale_xyb,
427                        const std::vector<std::vector<float> >& scale_xyb_dc,
428                        const std::vector<float>& block_diff_dc,
429                        const std::vector<float>& block_diff_ac,
430                        const std::vector<float>& edge_detector_map,
431                        std::vector<float>* result);
432 
433   const size_t xsize_;
434   const size_t ysize_;
435   const size_t num_pixels_;
436   const int step_;
437   const size_t res_xsize_;
438   const size_t res_ysize_;
439 };
440 
441 void ButteraugliDiffmap(const std::vector<ImageF> &rgb0,
442                         const std::vector<ImageF> &rgb1,
443                         ImageF &diffmap);
444 
445 double ButteraugliScoreFromDiffmap(const ImageF& distmap);
446 
447 // Compute values of local frequency and dc masking based on the activity
448 // in the two images.
449 void Mask(const std::vector<std::vector<float> > &rgb0,
450           const std::vector<std::vector<float> > &rgb1,
451           size_t xsize, size_t ysize,
452           std::vector<std::vector<float> > *mask,
453           std::vector<std::vector<float> > *mask_dc);
454 
455 // Computes difference metrics for one 8x8 block.
456 void ButteraugliBlockDiff(double rgb0[192],
457                           double rgb1[192],
458                           double diff_xyb_dc[3],
459                           double diff_xyb_ac[3],
460                           double diff_xyb_edge_dc[3]);
461 
462 void OpsinAbsorbance(const double in[3], double out[3]);
463 
464 void OpsinDynamicsImage(size_t xsize, size_t ysize,
465                         std::vector<std::vector<float> > &rgb);
466 
467 void MaskHighIntensityChange(
468     size_t xsize, size_t ysize,
469     const std::vector<std::vector<float> > &c0,
470     const std::vector<std::vector<float> > &c1,
471     std::vector<std::vector<float> > &rgb0,
472     std::vector<std::vector<float> > &rgb1);
473 
474 void Blur(size_t xsize, size_t ysize, float* channel, double sigma,
475           double border_ratio = 0.0);
476 
477 void RgbToXyb(double r, double g, double b,
478               double *valx, double *valy, double *valz);
479 
480 double SimpleGamma(double v);
481 
482 double GammaMinArg();
483 double GammaMaxArg();
484 
485 // Polynomial evaluation via Clenshaw's scheme (similar to Horner's).
486 // Template enables compile-time unrolling of the recursion, but must reside
487 // outside of a class due to the specialization.
488 template <int INDEX>
ClenshawRecursion(const double x,const double * coefficients,double * b1,double * b2)489 static inline void ClenshawRecursion(const double x, const double *coefficients,
490                                      double *b1, double *b2) {
491   const double x_b1 = x * (*b1);
492   const double t = (x_b1 + x_b1) - (*b2) + coefficients[INDEX];
493   *b2 = *b1;
494   *b1 = t;
495 
496   ClenshawRecursion<INDEX - 1>(x, coefficients, b1, b2);
497 }
498 
499 // Base case
500 template <>
501 inline void ClenshawRecursion<0>(const double x, const double *coefficients,
502                                  double *b1, double *b2) {
503   const double x_b1 = x * (*b1);
504   // The final iteration differs - no 2 * x_b1 here.
505   *b1 = x_b1 - (*b2) + coefficients[0];
506 }
507 
508 // Rational polynomial := dividing two polynomial evaluations. These are easier
509 // to find than minimax polynomials.
510 struct RationalPolynomial {
511   template <int N>
EvaluatePolynomialRationalPolynomial512   static double EvaluatePolynomial(const double x,
513                                    const double (&coefficients)[N]) {
514     double b1 = 0.0;
515     double b2 = 0.0;
516     ClenshawRecursion<N - 1>(x, coefficients, &b1, &b2);
517     return b1;
518   }
519 
520   // Evaluates the polynomial at x (in [min_value, max_value]).
operatorRationalPolynomial521   inline double operator()(const float x) const {
522     // First normalize to [0, 1].
523     const double x01 = (x - min_value) / (max_value - min_value);
524     // And then to [-1, 1] domain of Chebyshev polynomials.
525     const double xc = 2.0 * x01 - 1.0;
526 
527     const double yp = EvaluatePolynomial(xc, p);
528     const double yq = EvaluatePolynomial(xc, q);
529     if (yq == 0.0) return 0.0;
530     return static_cast<float>(yp / yq);
531   }
532 
533   // Domain of the polynomials; they are undefined elsewhere.
534   double min_value;
535   double max_value;
536 
537   // Coefficients of T_n (Chebyshev polynomials of the first kind).
538   // Degree 5/5 is a compromise between accuracy (0.1%) and numerical stability.
539   double p[5 + 1];
540   double q[5 + 1];
541 };
542 
GammaPolynomial(float value)543 static inline float GammaPolynomial(float value) {
544   // Generated by gamma_polynomial.m from equispaced x/gamma(x) samples.
545   static const RationalPolynomial r = {
546   0.770000000000000, 274.579999999999984,
547   {
548     881.979476556478289, 1496.058452015812463, 908.662212739659481,
549     373.566100223287378, 85.840860336314364, 6.683258861509244,
550   },
551   {
552     12.262350348616792, 20.557285797683576, 12.161463238367844,
553     4.711532733641639, 0.899112889751053, 0.035662329617191,
554   }};
555   return r(value);
556 }
557 
558 }  // namespace butteraugli
559 
560 #endif  // BUTTERAUGLI_BUTTERAUGLI_H_
561