1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
16 #define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
17 
18 // SIMD/multicore-friendly planar image representation with row accessors.
19 
20 #include <inttypes.h>
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <string.h>
24 
25 #include <cstddef>
26 #include <utility>  // std::move
27 
28 #include "hwy/aligned_allocator.h"
29 #include "hwy/base.h"
30 #include "hwy/highway_export.h"
31 
32 namespace hwy {
33 
34 // Type-independent parts of Image<> - reduces code duplication and facilitates
35 // moving member function implementations to cc file.
36 struct HWY_CONTRIB_DLLEXPORT ImageBase {
37   // Returns required alignment in bytes for externally allocated memory.
38   static size_t VectorSize();
39 
40   // Returns distance [bytes] between the start of two consecutive rows, a
41   // multiple of VectorSize but NOT kAlias (see implementation).
42   static size_t BytesPerRow(const size_t xsize, const size_t sizeof_t);
43 
44   // No allocation (for output params or unused images)
ImageBaseImageBase45   ImageBase()
46       : xsize_(0),
47         ysize_(0),
48         bytes_per_row_(0),
49         bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {}
50 
51   // Allocates memory (this is the common case)
52   ImageBase(size_t xsize, size_t ysize, size_t sizeof_t);
53 
54   // References but does not take ownership of external memory. Useful for
55   // interoperability with other libraries. `aligned` must be aligned to a
56   // multiple of VectorSize() and `bytes_per_row` must also be a multiple of
57   // VectorSize() or preferably equal to BytesPerRow().
58   ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned);
59 
60   // Copy construction/assignment is forbidden to avoid inadvertent copies,
61   // which can be very expensive. Use CopyImageTo() instead.
62   ImageBase(const ImageBase& other) = delete;
63   ImageBase& operator=(const ImageBase& other) = delete;
64 
65   // Move constructor (required for returning Image from function)
66   ImageBase(ImageBase&& other) noexcept = default;
67 
68   // Move assignment (required for std::vector)
69   ImageBase& operator=(ImageBase&& other) noexcept = default;
70 
71   void Swap(ImageBase& other);
72 
73   // Useful for pre-allocating image with some padding for alignment purposes
74   // and later reporting the actual valid dimensions. Caller is responsible
75   // for ensuring xsize/ysize are <= the original dimensions.
ShrinkToImageBase76   void ShrinkTo(const size_t xsize, const size_t ysize) {
77     xsize_ = static_cast<uint32_t>(xsize);
78     ysize_ = static_cast<uint32_t>(ysize);
79     // NOTE: we can't recompute bytes_per_row for more compact storage and
80     // better locality because that would invalidate the image contents.
81   }
82 
83   // How many pixels.
xsizeImageBase84   HWY_INLINE size_t xsize() const { return xsize_; }
ysizeImageBase85   HWY_INLINE size_t ysize() const { return ysize_; }
86 
87   // NOTE: do not use this for copying rows - the valid xsize may be much less.
bytes_per_rowImageBase88   HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
89 
90   // Raw access to byte contents, for interfacing with other libraries.
91   // Unsigned char instead of char to avoid surprises (sign extension).
bytesImageBase92   HWY_INLINE uint8_t* bytes() {
93     void* p = bytes_.get();
94     return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
95   }
bytesImageBase96   HWY_INLINE const uint8_t* bytes() const {
97     const void* p = bytes_.get();
98     return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
99   }
100 
101  protected:
102   // Returns pointer to the start of a row.
VoidRowImageBase103   HWY_INLINE void* VoidRow(const size_t y) const {
104 #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
105     if (y >= ysize_) {
106       HWY_ABORT("Row(%" PRIu64 ") >= %u\n", static_cast<uint64_t>(y), ysize_);
107     }
108 #endif
109 
110     void* row = bytes_.get() + y * bytes_per_row_;
111     return HWY_ASSUME_ALIGNED(row, 64);
112   }
113 
114   enum class Padding {
115     // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
116     kRoundUp,
117     // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra
118     // vector to be initialized. If done by default, this would suppress
119     // legitimate msan warnings. We therefore require users to explicitly call
120     // InitializePadding before using unaligned loads (e.g. convolution).
121     kUnaligned
122   };
123 
124   // Initializes the minimum bytes required to suppress msan warnings from
125   // legitimate (according to Padding mode) vector loads/stores on the right
126   // border, where some lanes are uninitialized and assumed to be unused.
127   void InitializePadding(size_t sizeof_t, Padding padding);
128 
129   // (Members are non-const to enable assignment during move-assignment.)
130   uint32_t xsize_;  // In valid pixels, not including any padding.
131   uint32_t ysize_;
132   size_t bytes_per_row_;  // Includes padding.
133   AlignedFreeUniquePtr<uint8_t[]> bytes_;
134 };
135 
136 // Single channel, aligned rows separated by padding. T must be POD.
137 //
138 // 'Single channel' (one 2D array per channel) simplifies vectorization
139 // (repeating the same operation on multiple adjacent components) without the
140 // complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
141 // can easily iterate over all components in a row and Image requires no
142 // knowledge of the pixel format beyond the component type "T".
143 //
144 // 'Aligned' means each row is aligned to the L1 cache line size. This prevents
145 // false sharing between two threads operating on adjacent rows.
146 //
147 // 'Padding' is still relevant because vectors could potentially be larger than
148 // a cache line. By rounding up row sizes to the vector size, we allow
149 // reading/writing ALIGNED vectors whose first lane is a valid sample. This
150 // avoids needing a separate loop to handle remaining unaligned lanes.
151 //
152 // This image layout could also be achieved with a vector and a row accessor
153 // function, but a class wrapper with support for "deleter" allows wrapping
154 // existing memory allocated by clients without copying the pixels. It also
155 // provides convenient accessors for xsize/ysize, which shortens function
156 // argument lists. Supports move-construction so it can be stored in containers.
157 template <typename ComponentType>
158 class Image : public ImageBase {
159  public:
160   using T = ComponentType;
161 
162   Image() = default;
Image(const size_t xsize,const size_t ysize)163   Image(const size_t xsize, const size_t ysize)
164       : ImageBase(xsize, ysize, sizeof(T)) {}
Image(const size_t xsize,const size_t ysize,size_t bytes_per_row,void * aligned)165   Image(const size_t xsize, const size_t ysize, size_t bytes_per_row,
166         void* aligned)
167       : ImageBase(xsize, ysize, bytes_per_row, aligned) {}
168 
InitializePaddingForUnalignedAccesses()169   void InitializePaddingForUnalignedAccesses() {
170     InitializePadding(sizeof(T), Padding::kUnaligned);
171   }
172 
ConstRow(const size_t y)173   HWY_INLINE const T* ConstRow(const size_t y) const {
174     return static_cast<const T*>(VoidRow(y));
175   }
ConstRow(const size_t y)176   HWY_INLINE const T* ConstRow(const size_t y) {
177     return static_cast<const T*>(VoidRow(y));
178   }
179 
180   // Returns pointer to non-const. This allows passing const Image* parameters
181   // when the callee is only supposed to fill the pixels, as opposed to
182   // allocating or resizing the image.
MutableRow(const size_t y)183   HWY_INLINE T* MutableRow(const size_t y) const {
184     return static_cast<T*>(VoidRow(y));
185   }
MutableRow(const size_t y)186   HWY_INLINE T* MutableRow(const size_t y) {
187     return static_cast<T*>(VoidRow(y));
188   }
189 
190   // Returns number of pixels (some of which are padding) per row. Useful for
191   // computing other rows via pointer arithmetic. WARNING: this must
192   // NOT be used to determine xsize.
PixelsPerRow()193   HWY_INLINE intptr_t PixelsPerRow() const {
194     return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
195   }
196 };
197 
198 using ImageF = Image<float>;
199 
200 // A bundle of 3 same-sized images. To fill an existing Image3 using
201 // single-channel producers, we also need access to each const Image*. Const
202 // prevents breaking the same-size invariant, while still allowing pixels to be
203 // changed via MutableRow.
204 template <typename ComponentType>
205 class Image3 {
206  public:
207   using T = ComponentType;
208   using ImageT = Image<T>;
209   static constexpr size_t kNumPlanes = 3;
210 
Image3()211   Image3() : planes_{ImageT(), ImageT(), ImageT()} {}
212 
Image3(const size_t xsize,const size_t ysize)213   Image3(const size_t xsize, const size_t ysize)
214       : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize),
215                 ImageT(xsize, ysize)} {}
216 
Image3(Image3 && other)217   Image3(Image3&& other) noexcept {
218     for (size_t i = 0; i < kNumPlanes; i++) {
219       planes_[i] = std::move(other.planes_[i]);
220     }
221   }
222 
Image3(ImageT && plane0,ImageT && plane1,ImageT && plane2)223   Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) {
224     if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) {
225       HWY_ABORT("Not same size: %" PRIu64 " x %" PRIu64 ", %" PRIu64
226                 " x %" PRIu64 ", %" PRIu64 " x %" PRIu64 "\n",
227                 static_cast<uint64_t>(plane0.xsize()),
228                 static_cast<uint64_t>(plane0.ysize()),
229                 static_cast<uint64_t>(plane1.xsize()),
230                 static_cast<uint64_t>(plane1.ysize()),
231                 static_cast<uint64_t>(plane2.xsize()),
232                 static_cast<uint64_t>(plane2.ysize()));
233     }
234     planes_[0] = std::move(plane0);
235     planes_[1] = std::move(plane1);
236     planes_[2] = std::move(plane2);
237   }
238 
239   // Copy construction/assignment is forbidden to avoid inadvertent copies,
240   // which can be very expensive. Use CopyImageTo instead.
241   Image3(const Image3& other) = delete;
242   Image3& operator=(const Image3& other) = delete;
243 
244   Image3& operator=(Image3&& other) noexcept {
245     for (size_t i = 0; i < kNumPlanes; i++) {
246       planes_[i] = std::move(other.planes_[i]);
247     }
248     return *this;
249   }
250 
ConstPlaneRow(const size_t c,const size_t y)251   HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
252     return static_cast<const T*>(VoidPlaneRow(c, y));
253   }
ConstPlaneRow(const size_t c,const size_t y)254   HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) {
255     return static_cast<const T*>(VoidPlaneRow(c, y));
256   }
257 
MutablePlaneRow(const size_t c,const size_t y)258   HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const {
259     return static_cast<T*>(VoidPlaneRow(c, y));
260   }
MutablePlaneRow(const size_t c,const size_t y)261   HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) {
262     return static_cast<T*>(VoidPlaneRow(c, y));
263   }
264 
Plane(size_t idx)265   HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; }
266 
Swap(Image3 & other)267   void Swap(Image3& other) {
268     for (size_t c = 0; c < 3; ++c) {
269       other.planes_[c].Swap(planes_[c]);
270     }
271   }
272 
ShrinkTo(const size_t xsize,const size_t ysize)273   void ShrinkTo(const size_t xsize, const size_t ysize) {
274     for (ImageT& plane : planes_) {
275       plane.ShrinkTo(xsize, ysize);
276     }
277   }
278 
279   // Sizes of all three images are guaranteed to be equal.
xsize()280   HWY_INLINE size_t xsize() const { return planes_[0].xsize(); }
ysize()281   HWY_INLINE size_t ysize() const { return planes_[0].ysize(); }
282   // Returns offset [bytes] from one row to the next row of the same plane.
283   // WARNING: this must NOT be used to determine xsize, nor for copying rows -
284   // the valid xsize may be much less.
bytes_per_row()285   HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
286   // Returns number of pixels (some of which are padding) per row. Useful for
287   // computing other rows via pointer arithmetic. WARNING: this must NOT be used
288   // to determine xsize.
PixelsPerRow()289   HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
290 
291  private:
292   // Returns pointer to the start of a row.
VoidPlaneRow(const size_t c,const size_t y)293   HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const {
294 #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
295     if (c >= kNumPlanes || y >= ysize()) {
296       HWY_ABORT("PlaneRow(%" PRIu64 ", %" PRIu64 ") >= %" PRIu64 "\n",
297                 static_cast<uint64_t>(c), static_cast<uint64_t>(y),
298                 static_cast<uint64_t>(ysize()));
299     }
300 #endif
301     // Use the first plane's stride because the compiler might not realize they
302     // are all equal. Thus we only need a single multiplication for all planes.
303     const size_t row_offset = y * planes_[0].bytes_per_row();
304     const void* row = planes_[c].bytes() + row_offset;
305     return static_cast<const T * HWY_RESTRICT>(
306         HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT));
307   }
308 
309  private:
310   ImageT planes_[kNumPlanes];
311 };
312 
313 using Image3F = Image3<float>;
314 
315 // Rectangular region in image(s). Factoring this out of Image instead of
316 // shifting the pointer by x0/y0 allows this to apply to multiple images with
317 // different resolutions. Can compare size via SameSize(rect1, rect2).
318 class Rect {
319  public:
320   // Most windows are xsize_max * ysize_max, except those on the borders where
321   // begin + size_max > end.
Rect(size_t xbegin,size_t ybegin,size_t xsize_max,size_t ysize_max,size_t xend,size_t yend)322   constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
323                  size_t ysize_max, size_t xend, size_t yend)
324       : x0_(xbegin),
325         y0_(ybegin),
326         xsize_(ClampedSize(xbegin, xsize_max, xend)),
327         ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
328 
329   // Construct with origin and known size (typically from another Rect).
Rect(size_t xbegin,size_t ybegin,size_t xsize,size_t ysize)330   constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
331       : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
332 
333   // Construct a rect that covers a whole image.
334   template <typename Image>
Rect(const Image & image)335   explicit Rect(const Image& image)
336       : Rect(0, 0, image.xsize(), image.ysize()) {}
337 
Rect()338   Rect() : Rect(0, 0, 0, 0) {}
339 
340   Rect(const Rect&) = default;
341   Rect& operator=(const Rect&) = default;
342 
Subrect(size_t xbegin,size_t ybegin,size_t xsize_max,size_t ysize_max)343   Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max,
344                size_t ysize_max) {
345     return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_,
346                 y0_ + ysize_);
347   }
348 
349   template <typename T>
ConstRow(const Image<T> * image,size_t y)350   const T* ConstRow(const Image<T>* image, size_t y) const {
351     return image->ConstRow(y + y0_) + x0_;
352   }
353 
354   template <typename T>
MutableRow(const Image<T> * image,size_t y)355   T* MutableRow(const Image<T>* image, size_t y) const {
356     return image->MutableRow(y + y0_) + x0_;
357   }
358 
359   template <typename T>
ConstPlaneRow(const Image3<T> & image,size_t c,size_t y)360   const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const {
361     return image.ConstPlaneRow(c, y + y0_) + x0_;
362   }
363 
364   template <typename T>
MutablePlaneRow(Image3<T> * image,const size_t c,size_t y)365   T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const {
366     return image->MutablePlaneRow(c, y + y0_) + x0_;
367   }
368 
369   // Returns true if this Rect fully resides in the given image. ImageT could be
370   // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
371   template <class ImageT>
IsInside(const ImageT & image)372   bool IsInside(const ImageT& image) const {
373     return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
374   }
375 
x0()376   size_t x0() const { return x0_; }
y0()377   size_t y0() const { return y0_; }
xsize()378   size_t xsize() const { return xsize_; }
ysize()379   size_t ysize() const { return ysize_; }
380 
381  private:
382   // Returns size_max, or whatever is left in [begin, end).
ClampedSize(size_t begin,size_t size_max,size_t end)383   static constexpr size_t ClampedSize(size_t begin, size_t size_max,
384                                       size_t end) {
385     return (begin + size_max <= end) ? size_max
386                                      : (end > begin ? end - begin : 0);
387   }
388 
389   size_t x0_;
390   size_t y0_;
391 
392   size_t xsize_;
393   size_t ysize_;
394 };
395 
396 // Works for any image-like input type(s).
397 template <class Image1, class Image2>
SameSize(const Image1 & image1,const Image2 & image2)398 HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) {
399   return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
400 }
401 
402 // Mirrors out of bounds coordinates and returns valid coordinates unchanged.
403 // We assume the radius (distance outside the image) is small compared to the
404 // image size, otherwise this might not terminate.
405 // The mirror is outside the last column (border pixel is also replicated).
Mirror(int64_t x,const int64_t xsize)406 static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x,
407                                                  const int64_t xsize) {
408   HWY_DASSERT(xsize != 0);
409 
410   // TODO(janwas): replace with branchless version
411   while (x < 0 || x >= xsize) {
412     if (x < 0) {
413       x = -x - 1;
414     } else {
415       x = 2 * xsize - 1 - x;
416     }
417   }
418   return static_cast<size_t>(x);
419 }
420 
421 // Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
422 
423 // Mirrors (repeating the edge pixel once). Useful for convolutions.
424 struct WrapMirror {
operatorWrapMirror425   HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const {
426     return Mirror(coord, static_cast<int64_t>(size));
427   }
428 };
429 
430 // Returns the same coordinate, for when we know "coord" is already valid (e.g.
431 // interior of an image).
432 struct WrapUnchanged {
operatorWrapUnchanged433   HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const {
434     return static_cast<size_t>(coord);
435   }
436 };
437 
438 // Similar to Wrap* but for row pointers (reduces Row() multiplications).
439 
440 class WrapRowMirror {
441  public:
442   template <class View>
WrapRowMirror(const View & image,size_t ysize)443   WrapRowMirror(const View& image, size_t ysize)
444       : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
445 
operator()446   const float* operator()(const float* const HWY_RESTRICT row,
447                           const int64_t stride) const {
448     if (row < first_row_) {
449       const int64_t num_before = first_row_ - row;
450       // Mirrored; one row before => row 0, two before = row 1, ...
451       return first_row_ + num_before - stride;
452     }
453     if (row > last_row_) {
454       const int64_t num_after = row - last_row_;
455       // Mirrored; one row after => last row, two after = last - 1, ...
456       return last_row_ - num_after + stride;
457     }
458     return row;
459   }
460 
461  private:
462   const float* const HWY_RESTRICT first_row_;
463   const float* const HWY_RESTRICT last_row_;
464 };
465 
466 struct WrapRowUnchanged {
operatorWrapRowUnchanged467   HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row,
468                                      int64_t /*stride*/) const {
469     return row;
470   }
471 };
472 
473 }  // namespace hwy
474 
475 #endif  // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
476