1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
16 #define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
17 
18 // SIMD/multicore-friendly planar image representation with row accessors.
19 
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <string.h>
23 
24 #include <cstddef>
25 #include <utility>  // std::move
26 
27 #include "hwy/aligned_allocator.h"
28 #include "hwy/base.h"
29 
30 namespace hwy {
31 
32 // Type-independent parts of Image<> - reduces code duplication and facilitates
33 // moving member function implementations to cc file.
34 struct ImageBase {
35   // Returns required alignment in bytes for externally allocated memory.
36   static size_t VectorSize();
37 
38   // Returns distance [bytes] between the start of two consecutive rows, a
39   // multiple of VectorSize but NOT kAlias (see implementation).
40   static size_t BytesPerRow(const size_t xsize, const size_t sizeof_t);
41 
42   // No allocation (for output params or unused images)
ImageBaseImageBase43   ImageBase()
44       : xsize_(0),
45         ysize_(0),
46         bytes_per_row_(0),
47         bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {}
48 
49   // Allocates memory (this is the common case)
50   ImageBase(size_t xsize, size_t ysize, size_t sizeof_t);
51 
52   // References but does not take ownership of external memory. Useful for
53   // interoperability with other libraries. `aligned` must be aligned to a
54   // multiple of VectorSize() and `bytes_per_row` must also be a multiple of
55   // VectorSize() or preferably equal to BytesPerRow().
56   ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned);
57 
58   // Copy construction/assignment is forbidden to avoid inadvertent copies,
59   // which can be very expensive. Use CopyImageTo() instead.
60   ImageBase(const ImageBase& other) = delete;
61   ImageBase& operator=(const ImageBase& other) = delete;
62 
63   // Move constructor (required for returning Image from function)
64   ImageBase(ImageBase&& other) noexcept = default;
65 
66   // Move assignment (required for std::vector)
67   ImageBase& operator=(ImageBase&& other) noexcept = default;
68 
69   void Swap(ImageBase& other);
70 
71   // Useful for pre-allocating image with some padding for alignment purposes
72   // and later reporting the actual valid dimensions. Caller is responsible
73   // for ensuring xsize/ysize are <= the original dimensions.
ShrinkToImageBase74   void ShrinkTo(const size_t xsize, const size_t ysize) {
75     xsize_ = static_cast<uint32_t>(xsize);
76     ysize_ = static_cast<uint32_t>(ysize);
77     // NOTE: we can't recompute bytes_per_row for more compact storage and
78     // better locality because that would invalidate the image contents.
79   }
80 
81   // How many pixels.
xsizeImageBase82   HWY_INLINE size_t xsize() const { return xsize_; }
ysizeImageBase83   HWY_INLINE size_t ysize() const { return ysize_; }
84 
85   // NOTE: do not use this for copying rows - the valid xsize may be much less.
bytes_per_rowImageBase86   HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
87 
88   // Raw access to byte contents, for interfacing with other libraries.
89   // Unsigned char instead of char to avoid surprises (sign extension).
bytesImageBase90   HWY_INLINE uint8_t* bytes() {
91     void* p = bytes_.get();
92     return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
93   }
bytesImageBase94   HWY_INLINE const uint8_t* bytes() const {
95     const void* p = bytes_.get();
96     return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
97   }
98 
99  protected:
100   // Returns pointer to the start of a row.
VoidRowImageBase101   HWY_INLINE void* VoidRow(const size_t y) const {
102 #if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
103     defined(THREAD_SANITIZER)
104     if (y >= ysize_) {
105       HWY_ABORT("Row(%zu) >= %u\n", y, ysize_);
106     }
107 #endif
108 
109     void* row = bytes_.get() + y * bytes_per_row_;
110     return HWY_ASSUME_ALIGNED(row, 64);
111   }
112 
113   enum class Padding {
114     // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
115     kRoundUp,
116     // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra
117     // vector to be initialized. If done by default, this would suppress
118     // legitimate msan warnings. We therefore require users to explicitly call
119     // InitializePadding before using unaligned loads (e.g. convolution).
120     kUnaligned
121   };
122 
123   // Initializes the minimum bytes required to suppress msan warnings from
124   // legitimate (according to Padding mode) vector loads/stores on the right
125   // border, where some lanes are uninitialized and assumed to be unused.
126   void InitializePadding(size_t sizeof_t, Padding padding);
127 
128   // (Members are non-const to enable assignment during move-assignment.)
129   uint32_t xsize_;  // In valid pixels, not including any padding.
130   uint32_t ysize_;
131   size_t bytes_per_row_;  // Includes padding.
132   AlignedFreeUniquePtr<uint8_t[]> bytes_;
133 };
134 
135 // Single channel, aligned rows separated by padding. T must be POD.
136 //
137 // 'Single channel' (one 2D array per channel) simplifies vectorization
138 // (repeating the same operation on multiple adjacent components) without the
139 // complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
140 // can easily iterate over all components in a row and Image requires no
141 // knowledge of the pixel format beyond the component type "T".
142 //
143 // 'Aligned' means each row is aligned to the L1 cache line size. This prevents
144 // false sharing between two threads operating on adjacent rows.
145 //
146 // 'Padding' is still relevant because vectors could potentially be larger than
147 // a cache line. By rounding up row sizes to the vector size, we allow
148 // reading/writing ALIGNED vectors whose first lane is a valid sample. This
149 // avoids needing a separate loop to handle remaining unaligned lanes.
150 //
151 // This image layout could also be achieved with a vector and a row accessor
152 // function, but a class wrapper with support for "deleter" allows wrapping
153 // existing memory allocated by clients without copying the pixels. It also
154 // provides convenient accessors for xsize/ysize, which shortens function
155 // argument lists. Supports move-construction so it can be stored in containers.
156 template <typename ComponentType>
157 class Image : public ImageBase {
158  public:
159   using T = ComponentType;
160 
161   Image() = default;
Image(const size_t xsize,const size_t ysize)162   Image(const size_t xsize, const size_t ysize)
163       : ImageBase(xsize, ysize, sizeof(T)) {}
Image(const size_t xsize,const size_t ysize,size_t bytes_per_row,void * aligned)164   Image(const size_t xsize, const size_t ysize, size_t bytes_per_row,
165         void* aligned)
166       : ImageBase(xsize, ysize, bytes_per_row, aligned) {}
167 
InitializePaddingForUnalignedAccesses()168   void InitializePaddingForUnalignedAccesses() {
169     InitializePadding(sizeof(T), Padding::kUnaligned);
170   }
171 
ConstRow(const size_t y)172   HWY_INLINE const T* ConstRow(const size_t y) const {
173     return static_cast<const T*>(VoidRow(y));
174   }
ConstRow(const size_t y)175   HWY_INLINE const T* ConstRow(const size_t y) {
176     return static_cast<const T*>(VoidRow(y));
177   }
178 
179   // Returns pointer to non-const. This allows passing const Image* parameters
180   // when the callee is only supposed to fill the pixels, as opposed to
181   // allocating or resizing the image.
MutableRow(const size_t y)182   HWY_INLINE T* MutableRow(const size_t y) const {
183     return static_cast<T*>(VoidRow(y));
184   }
MutableRow(const size_t y)185   HWY_INLINE T* MutableRow(const size_t y) {
186     return static_cast<T*>(VoidRow(y));
187   }
188 
189   // Returns number of pixels (some of which are padding) per row. Useful for
190   // computing other rows via pointer arithmetic. WARNING: this must
191   // NOT be used to determine xsize.
PixelsPerRow()192   HWY_INLINE intptr_t PixelsPerRow() const {
193     return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
194   }
195 };
196 
197 using ImageF = Image<float>;
198 
199 // A bundle of 3 same-sized images. To fill an existing Image3 using
200 // single-channel producers, we also need access to each const Image*. Const
201 // prevents breaking the same-size invariant, while still allowing pixels to be
202 // changed via MutableRow.
203 template <typename ComponentType>
204 class Image3 {
205  public:
206   using T = ComponentType;
207   using ImageT = Image<T>;
208   static constexpr size_t kNumPlanes = 3;
209 
Image3()210   Image3() : planes_{ImageT(), ImageT(), ImageT()} {}
211 
Image3(const size_t xsize,const size_t ysize)212   Image3(const size_t xsize, const size_t ysize)
213       : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize),
214                 ImageT(xsize, ysize)} {}
215 
Image3(Image3 && other)216   Image3(Image3&& other) noexcept {
217     for (size_t i = 0; i < kNumPlanes; i++) {
218       planes_[i] = std::move(other.planes_[i]);
219     }
220   }
221 
Image3(ImageT && plane0,ImageT && plane1,ImageT && plane2)222   Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) {
223     if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) {
224       HWY_ABORT("Not same size: %zu x %zu, %zu x %zu, %zu x %zu\n",
225                 plane0.xsize(), plane0.ysize(), plane1.xsize(), plane1.ysize(),
226                 plane2.xsize(), plane2.ysize());
227     }
228     planes_[0] = std::move(plane0);
229     planes_[1] = std::move(plane1);
230     planes_[2] = std::move(plane2);
231   }
232 
233   // Copy construction/assignment is forbidden to avoid inadvertent copies,
234   // which can be very expensive. Use CopyImageTo instead.
235   Image3(const Image3& other) = delete;
236   Image3& operator=(const Image3& other) = delete;
237 
238   Image3& operator=(Image3&& other) noexcept {
239     for (size_t i = 0; i < kNumPlanes; i++) {
240       planes_[i] = std::move(other.planes_[i]);
241     }
242     return *this;
243   }
244 
ConstPlaneRow(const size_t c,const size_t y)245   HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
246     return static_cast<const T*>(VoidPlaneRow(c, y));
247   }
ConstPlaneRow(const size_t c,const size_t y)248   HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) {
249     return static_cast<const T*>(VoidPlaneRow(c, y));
250   }
251 
MutablePlaneRow(const size_t c,const size_t y)252   HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const {
253     return static_cast<T*>(VoidPlaneRow(c, y));
254   }
MutablePlaneRow(const size_t c,const size_t y)255   HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) {
256     return static_cast<T*>(VoidPlaneRow(c, y));
257   }
258 
Plane(size_t idx)259   HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; }
260 
Swap(Image3 & other)261   void Swap(Image3& other) {
262     for (size_t c = 0; c < 3; ++c) {
263       other.planes_[c].Swap(planes_[c]);
264     }
265   }
266 
ShrinkTo(const size_t xsize,const size_t ysize)267   void ShrinkTo(const size_t xsize, const size_t ysize) {
268     for (ImageT& plane : planes_) {
269       plane.ShrinkTo(xsize, ysize);
270     }
271   }
272 
273   // Sizes of all three images are guaranteed to be equal.
xsize()274   HWY_INLINE size_t xsize() const { return planes_[0].xsize(); }
ysize()275   HWY_INLINE size_t ysize() const { return planes_[0].ysize(); }
276   // Returns offset [bytes] from one row to the next row of the same plane.
277   // WARNING: this must NOT be used to determine xsize, nor for copying rows -
278   // the valid xsize may be much less.
bytes_per_row()279   HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
280   // Returns number of pixels (some of which are padding) per row. Useful for
281   // computing other rows via pointer arithmetic. WARNING: this must NOT be used
282   // to determine xsize.
PixelsPerRow()283   HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
284 
285  private:
286   // Returns pointer to the start of a row.
VoidPlaneRow(const size_t c,const size_t y)287   HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const {
288 #if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
289     defined(THREAD_SANITIZER)
290     if (c >= kNumPlanes || y >= ysize()) {
291       HWY_ABORT("PlaneRow(%zu, %zu) >= %zu\n", c, y, ysize());
292     }
293 #endif
294     // Use the first plane's stride because the compiler might not realize they
295     // are all equal. Thus we only need a single multiplication for all planes.
296     const size_t row_offset = y * planes_[0].bytes_per_row();
297     const void* row = planes_[c].bytes() + row_offset;
298     return static_cast<const T * HWY_RESTRICT>(
299         HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT));
300   }
301 
302  private:
303   ImageT planes_[kNumPlanes];
304 };
305 
306 using Image3F = Image3<float>;
307 
308 // Rectangular region in image(s). Factoring this out of Image instead of
309 // shifting the pointer by x0/y0 allows this to apply to multiple images with
310 // different resolutions. Can compare size via SameSize(rect1, rect2).
311 class Rect {
312  public:
313   // Most windows are xsize_max * ysize_max, except those on the borders where
314   // begin + size_max > end.
Rect(size_t xbegin,size_t ybegin,size_t xsize_max,size_t ysize_max,size_t xend,size_t yend)315   constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
316                  size_t ysize_max, size_t xend, size_t yend)
317       : x0_(xbegin),
318         y0_(ybegin),
319         xsize_(ClampedSize(xbegin, xsize_max, xend)),
320         ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
321 
322   // Construct with origin and known size (typically from another Rect).
Rect(size_t xbegin,size_t ybegin,size_t xsize,size_t ysize)323   constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
324       : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
325 
326   // Construct a rect that covers a whole image.
327   template <typename Image>
Rect(const Image & image)328   explicit Rect(const Image& image)
329       : Rect(0, 0, image.xsize(), image.ysize()) {}
330 
Rect()331   Rect() : Rect(0, 0, 0, 0) {}
332 
333   Rect(const Rect&) = default;
334   Rect& operator=(const Rect&) = default;
335 
Subrect(size_t xbegin,size_t ybegin,size_t xsize_max,size_t ysize_max)336   Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max,
337                size_t ysize_max) {
338     return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_,
339                 y0_ + ysize_);
340   }
341 
342   template <typename T>
ConstRow(const Image<T> * image,size_t y)343   const T* ConstRow(const Image<T>* image, size_t y) const {
344     return image->ConstRow(y + y0_) + x0_;
345   }
346 
347   template <typename T>
MutableRow(const Image<T> * image,size_t y)348   T* MutableRow(const Image<T>* image, size_t y) const {
349     return image->MutableRow(y + y0_) + x0_;
350   }
351 
352   template <typename T>
ConstPlaneRow(const Image3<T> & image,size_t c,size_t y)353   const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const {
354     return image.ConstPlaneRow(c, y + y0_) + x0_;
355   }
356 
357   template <typename T>
MutablePlaneRow(Image3<T> * image,const size_t c,size_t y)358   T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const {
359     return image->MutablePlaneRow(c, y + y0_) + x0_;
360   }
361 
362   // Returns true if this Rect fully resides in the given image. ImageT could be
363   // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
364   template <class ImageT>
IsInside(const ImageT & image)365   bool IsInside(const ImageT& image) const {
366     return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
367   }
368 
x0()369   size_t x0() const { return x0_; }
y0()370   size_t y0() const { return y0_; }
xsize()371   size_t xsize() const { return xsize_; }
ysize()372   size_t ysize() const { return ysize_; }
373 
374  private:
375   // Returns size_max, or whatever is left in [begin, end).
ClampedSize(size_t begin,size_t size_max,size_t end)376   static constexpr size_t ClampedSize(size_t begin, size_t size_max,
377                                       size_t end) {
378     return (begin + size_max <= end) ? size_max
379                                      : (end > begin ? end - begin : 0);
380   }
381 
382   size_t x0_;
383   size_t y0_;
384 
385   size_t xsize_;
386   size_t ysize_;
387 };
388 
389 // Works for any image-like input type(s).
390 template <class Image1, class Image2>
SameSize(const Image1 & image1,const Image2 & image2)391 HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) {
392   return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
393 }
394 
395 // Mirrors out of bounds coordinates and returns valid coordinates unchanged.
396 // We assume the radius (distance outside the image) is small compared to the
397 // image size, otherwise this might not terminate.
398 // The mirror is outside the last column (border pixel is also replicated).
Mirror(int64_t x,const int64_t xsize)399 static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x,
400                                                  const int64_t xsize) {
401   HWY_DASSERT(xsize != 0);
402 
403   // TODO(janwas): replace with branchless version
404   while (x < 0 || x >= xsize) {
405     if (x < 0) {
406       x = -x - 1;
407     } else {
408       x = 2 * xsize - 1 - x;
409     }
410   }
411   return static_cast<size_t>(x);
412 }
413 
414 // Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
415 
416 // Mirrors (repeating the edge pixel once). Useful for convolutions.
417 struct WrapMirror {
operatorWrapMirror418   HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const {
419     return Mirror(coord, static_cast<int64_t>(size));
420   }
421 };
422 
423 // Returns the same coordinate, for when we know "coord" is already valid (e.g.
424 // interior of an image).
425 struct WrapUnchanged {
operatorWrapUnchanged426   HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const {
427     return static_cast<size_t>(coord);
428   }
429 };
430 
431 // Similar to Wrap* but for row pointers (reduces Row() multiplications).
432 
433 class WrapRowMirror {
434  public:
435   template <class View>
WrapRowMirror(const View & image,size_t ysize)436   WrapRowMirror(const View& image, size_t ysize)
437       : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
438 
operator()439   const float* operator()(const float* const HWY_RESTRICT row,
440                           const int64_t stride) const {
441     if (row < first_row_) {
442       const int64_t num_before = first_row_ - row;
443       // Mirrored; one row before => row 0, two before = row 1, ...
444       return first_row_ + num_before - stride;
445     }
446     if (row > last_row_) {
447       const int64_t num_after = row - last_row_;
448       // Mirrored; one row after => last row, two after = last - 1, ...
449       return last_row_ - num_after + stride;
450     }
451     return row;
452   }
453 
454  private:
455   const float* const HWY_RESTRICT first_row_;
456   const float* const HWY_RESTRICT last_row_;
457 };
458 
459 struct WrapRowUnchanged {
operatorWrapRowUnchanged460   HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row,
461                                      int64_t /*stride*/) const {
462     return row;
463   }
464 };
465 
466 }  // namespace hwy
467 
468 #endif  // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
469