1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
16 #define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
17
18 // SIMD/multicore-friendly planar image representation with row accessors.
19
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <string.h>
23
24 #include <cstddef>
25 #include <utility> // std::move
26
27 #include "hwy/aligned_allocator.h"
28 #include "hwy/base.h"
29
30 namespace hwy {
31
32 // Type-independent parts of Image<> - reduces code duplication and facilitates
33 // moving member function implementations to cc file.
34 struct ImageBase {
35 // Returns required alignment in bytes for externally allocated memory.
36 static size_t VectorSize();
37
38 // Returns distance [bytes] between the start of two consecutive rows, a
39 // multiple of VectorSize but NOT kAlias (see implementation).
40 static size_t BytesPerRow(const size_t xsize, const size_t sizeof_t);
41
42 // No allocation (for output params or unused images)
ImageBaseImageBase43 ImageBase()
44 : xsize_(0),
45 ysize_(0),
46 bytes_per_row_(0),
47 bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {}
48
49 // Allocates memory (this is the common case)
50 ImageBase(size_t xsize, size_t ysize, size_t sizeof_t);
51
52 // References but does not take ownership of external memory. Useful for
53 // interoperability with other libraries. `aligned` must be aligned to a
54 // multiple of VectorSize() and `bytes_per_row` must also be a multiple of
55 // VectorSize() or preferably equal to BytesPerRow().
56 ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned);
57
58 // Copy construction/assignment is forbidden to avoid inadvertent copies,
59 // which can be very expensive. Use CopyImageTo() instead.
60 ImageBase(const ImageBase& other) = delete;
61 ImageBase& operator=(const ImageBase& other) = delete;
62
63 // Move constructor (required for returning Image from function)
64 ImageBase(ImageBase&& other) noexcept = default;
65
66 // Move assignment (required for std::vector)
67 ImageBase& operator=(ImageBase&& other) noexcept = default;
68
69 void Swap(ImageBase& other);
70
71 // Useful for pre-allocating image with some padding for alignment purposes
72 // and later reporting the actual valid dimensions. Caller is responsible
73 // for ensuring xsize/ysize are <= the original dimensions.
ShrinkToImageBase74 void ShrinkTo(const size_t xsize, const size_t ysize) {
75 xsize_ = static_cast<uint32_t>(xsize);
76 ysize_ = static_cast<uint32_t>(ysize);
77 // NOTE: we can't recompute bytes_per_row for more compact storage and
78 // better locality because that would invalidate the image contents.
79 }
80
81 // How many pixels.
xsizeImageBase82 HWY_INLINE size_t xsize() const { return xsize_; }
ysizeImageBase83 HWY_INLINE size_t ysize() const { return ysize_; }
84
85 // NOTE: do not use this for copying rows - the valid xsize may be much less.
bytes_per_rowImageBase86 HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
87
88 // Raw access to byte contents, for interfacing with other libraries.
89 // Unsigned char instead of char to avoid surprises (sign extension).
bytesImageBase90 HWY_INLINE uint8_t* bytes() {
91 void* p = bytes_.get();
92 return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
93 }
bytesImageBase94 HWY_INLINE const uint8_t* bytes() const {
95 const void* p = bytes_.get();
96 return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
97 }
98
99 protected:
100 // Returns pointer to the start of a row.
VoidRowImageBase101 HWY_INLINE void* VoidRow(const size_t y) const {
102 #if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
103 defined(THREAD_SANITIZER)
104 if (y >= ysize_) {
105 HWY_ABORT("Row(%zu) >= %u\n", y, ysize_);
106 }
107 #endif
108
109 void* row = bytes_.get() + y * bytes_per_row_;
110 return HWY_ASSUME_ALIGNED(row, 64);
111 }
112
113 enum class Padding {
114 // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
115 kRoundUp,
116 // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra
117 // vector to be initialized. If done by default, this would suppress
118 // legitimate msan warnings. We therefore require users to explicitly call
119 // InitializePadding before using unaligned loads (e.g. convolution).
120 kUnaligned
121 };
122
123 // Initializes the minimum bytes required to suppress msan warnings from
124 // legitimate (according to Padding mode) vector loads/stores on the right
125 // border, where some lanes are uninitialized and assumed to be unused.
126 void InitializePadding(size_t sizeof_t, Padding padding);
127
128 // (Members are non-const to enable assignment during move-assignment.)
129 uint32_t xsize_; // In valid pixels, not including any padding.
130 uint32_t ysize_;
131 size_t bytes_per_row_; // Includes padding.
132 AlignedFreeUniquePtr<uint8_t[]> bytes_;
133 };
134
135 // Single channel, aligned rows separated by padding. T must be POD.
136 //
137 // 'Single channel' (one 2D array per channel) simplifies vectorization
138 // (repeating the same operation on multiple adjacent components) without the
139 // complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
140 // can easily iterate over all components in a row and Image requires no
141 // knowledge of the pixel format beyond the component type "T".
142 //
143 // 'Aligned' means each row is aligned to the L1 cache line size. This prevents
144 // false sharing between two threads operating on adjacent rows.
145 //
146 // 'Padding' is still relevant because vectors could potentially be larger than
147 // a cache line. By rounding up row sizes to the vector size, we allow
148 // reading/writing ALIGNED vectors whose first lane is a valid sample. This
149 // avoids needing a separate loop to handle remaining unaligned lanes.
150 //
151 // This image layout could also be achieved with a vector and a row accessor
152 // function, but a class wrapper with support for "deleter" allows wrapping
153 // existing memory allocated by clients without copying the pixels. It also
154 // provides convenient accessors for xsize/ysize, which shortens function
155 // argument lists. Supports move-construction so it can be stored in containers.
156 template <typename ComponentType>
157 class Image : public ImageBase {
158 public:
159 using T = ComponentType;
160
161 Image() = default;
Image(const size_t xsize,const size_t ysize)162 Image(const size_t xsize, const size_t ysize)
163 : ImageBase(xsize, ysize, sizeof(T)) {}
Image(const size_t xsize,const size_t ysize,size_t bytes_per_row,void * aligned)164 Image(const size_t xsize, const size_t ysize, size_t bytes_per_row,
165 void* aligned)
166 : ImageBase(xsize, ysize, bytes_per_row, aligned) {}
167
InitializePaddingForUnalignedAccesses()168 void InitializePaddingForUnalignedAccesses() {
169 InitializePadding(sizeof(T), Padding::kUnaligned);
170 }
171
ConstRow(const size_t y)172 HWY_INLINE const T* ConstRow(const size_t y) const {
173 return static_cast<const T*>(VoidRow(y));
174 }
ConstRow(const size_t y)175 HWY_INLINE const T* ConstRow(const size_t y) {
176 return static_cast<const T*>(VoidRow(y));
177 }
178
179 // Returns pointer to non-const. This allows passing const Image* parameters
180 // when the callee is only supposed to fill the pixels, as opposed to
181 // allocating or resizing the image.
MutableRow(const size_t y)182 HWY_INLINE T* MutableRow(const size_t y) const {
183 return static_cast<T*>(VoidRow(y));
184 }
MutableRow(const size_t y)185 HWY_INLINE T* MutableRow(const size_t y) {
186 return static_cast<T*>(VoidRow(y));
187 }
188
189 // Returns number of pixels (some of which are padding) per row. Useful for
190 // computing other rows via pointer arithmetic. WARNING: this must
191 // NOT be used to determine xsize.
PixelsPerRow()192 HWY_INLINE intptr_t PixelsPerRow() const {
193 return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
194 }
195 };
196
197 using ImageF = Image<float>;
198
199 // A bundle of 3 same-sized images. To fill an existing Image3 using
200 // single-channel producers, we also need access to each const Image*. Const
201 // prevents breaking the same-size invariant, while still allowing pixels to be
202 // changed via MutableRow.
203 template <typename ComponentType>
204 class Image3 {
205 public:
206 using T = ComponentType;
207 using ImageT = Image<T>;
208 static constexpr size_t kNumPlanes = 3;
209
Image3()210 Image3() : planes_{ImageT(), ImageT(), ImageT()} {}
211
Image3(const size_t xsize,const size_t ysize)212 Image3(const size_t xsize, const size_t ysize)
213 : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize),
214 ImageT(xsize, ysize)} {}
215
Image3(Image3 && other)216 Image3(Image3&& other) noexcept {
217 for (size_t i = 0; i < kNumPlanes; i++) {
218 planes_[i] = std::move(other.planes_[i]);
219 }
220 }
221
Image3(ImageT && plane0,ImageT && plane1,ImageT && plane2)222 Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) {
223 if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) {
224 HWY_ABORT("Not same size: %zu x %zu, %zu x %zu, %zu x %zu\n",
225 plane0.xsize(), plane0.ysize(), plane1.xsize(), plane1.ysize(),
226 plane2.xsize(), plane2.ysize());
227 }
228 planes_[0] = std::move(plane0);
229 planes_[1] = std::move(plane1);
230 planes_[2] = std::move(plane2);
231 }
232
233 // Copy construction/assignment is forbidden to avoid inadvertent copies,
234 // which can be very expensive. Use CopyImageTo instead.
235 Image3(const Image3& other) = delete;
236 Image3& operator=(const Image3& other) = delete;
237
238 Image3& operator=(Image3&& other) noexcept {
239 for (size_t i = 0; i < kNumPlanes; i++) {
240 planes_[i] = std::move(other.planes_[i]);
241 }
242 return *this;
243 }
244
ConstPlaneRow(const size_t c,const size_t y)245 HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
246 return static_cast<const T*>(VoidPlaneRow(c, y));
247 }
ConstPlaneRow(const size_t c,const size_t y)248 HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) {
249 return static_cast<const T*>(VoidPlaneRow(c, y));
250 }
251
MutablePlaneRow(const size_t c,const size_t y)252 HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const {
253 return static_cast<T*>(VoidPlaneRow(c, y));
254 }
MutablePlaneRow(const size_t c,const size_t y)255 HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) {
256 return static_cast<T*>(VoidPlaneRow(c, y));
257 }
258
Plane(size_t idx)259 HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; }
260
Swap(Image3 & other)261 void Swap(Image3& other) {
262 for (size_t c = 0; c < 3; ++c) {
263 other.planes_[c].Swap(planes_[c]);
264 }
265 }
266
ShrinkTo(const size_t xsize,const size_t ysize)267 void ShrinkTo(const size_t xsize, const size_t ysize) {
268 for (ImageT& plane : planes_) {
269 plane.ShrinkTo(xsize, ysize);
270 }
271 }
272
273 // Sizes of all three images are guaranteed to be equal.
xsize()274 HWY_INLINE size_t xsize() const { return planes_[0].xsize(); }
ysize()275 HWY_INLINE size_t ysize() const { return planes_[0].ysize(); }
276 // Returns offset [bytes] from one row to the next row of the same plane.
277 // WARNING: this must NOT be used to determine xsize, nor for copying rows -
278 // the valid xsize may be much less.
bytes_per_row()279 HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
280 // Returns number of pixels (some of which are padding) per row. Useful for
281 // computing other rows via pointer arithmetic. WARNING: this must NOT be used
282 // to determine xsize.
PixelsPerRow()283 HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
284
285 private:
286 // Returns pointer to the start of a row.
VoidPlaneRow(const size_t c,const size_t y)287 HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const {
288 #if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
289 defined(THREAD_SANITIZER)
290 if (c >= kNumPlanes || y >= ysize()) {
291 HWY_ABORT("PlaneRow(%zu, %zu) >= %zu\n", c, y, ysize());
292 }
293 #endif
294 // Use the first plane's stride because the compiler might not realize they
295 // are all equal. Thus we only need a single multiplication for all planes.
296 const size_t row_offset = y * planes_[0].bytes_per_row();
297 const void* row = planes_[c].bytes() + row_offset;
298 return static_cast<const T * HWY_RESTRICT>(
299 HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT));
300 }
301
302 private:
303 ImageT planes_[kNumPlanes];
304 };
305
306 using Image3F = Image3<float>;
307
308 // Rectangular region in image(s). Factoring this out of Image instead of
309 // shifting the pointer by x0/y0 allows this to apply to multiple images with
310 // different resolutions. Can compare size via SameSize(rect1, rect2).
311 class Rect {
312 public:
313 // Most windows are xsize_max * ysize_max, except those on the borders where
314 // begin + size_max > end.
Rect(size_t xbegin,size_t ybegin,size_t xsize_max,size_t ysize_max,size_t xend,size_t yend)315 constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
316 size_t ysize_max, size_t xend, size_t yend)
317 : x0_(xbegin),
318 y0_(ybegin),
319 xsize_(ClampedSize(xbegin, xsize_max, xend)),
320 ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
321
322 // Construct with origin and known size (typically from another Rect).
Rect(size_t xbegin,size_t ybegin,size_t xsize,size_t ysize)323 constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
324 : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
325
326 // Construct a rect that covers a whole image.
327 template <typename Image>
Rect(const Image & image)328 explicit Rect(const Image& image)
329 : Rect(0, 0, image.xsize(), image.ysize()) {}
330
Rect()331 Rect() : Rect(0, 0, 0, 0) {}
332
333 Rect(const Rect&) = default;
334 Rect& operator=(const Rect&) = default;
335
Subrect(size_t xbegin,size_t ybegin,size_t xsize_max,size_t ysize_max)336 Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max,
337 size_t ysize_max) {
338 return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_,
339 y0_ + ysize_);
340 }
341
342 template <typename T>
ConstRow(const Image<T> * image,size_t y)343 const T* ConstRow(const Image<T>* image, size_t y) const {
344 return image->ConstRow(y + y0_) + x0_;
345 }
346
347 template <typename T>
MutableRow(const Image<T> * image,size_t y)348 T* MutableRow(const Image<T>* image, size_t y) const {
349 return image->MutableRow(y + y0_) + x0_;
350 }
351
352 template <typename T>
ConstPlaneRow(const Image3<T> & image,size_t c,size_t y)353 const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const {
354 return image.ConstPlaneRow(c, y + y0_) + x0_;
355 }
356
357 template <typename T>
MutablePlaneRow(Image3<T> * image,const size_t c,size_t y)358 T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const {
359 return image->MutablePlaneRow(c, y + y0_) + x0_;
360 }
361
362 // Returns true if this Rect fully resides in the given image. ImageT could be
363 // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
364 template <class ImageT>
IsInside(const ImageT & image)365 bool IsInside(const ImageT& image) const {
366 return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
367 }
368
x0()369 size_t x0() const { return x0_; }
y0()370 size_t y0() const { return y0_; }
xsize()371 size_t xsize() const { return xsize_; }
ysize()372 size_t ysize() const { return ysize_; }
373
374 private:
375 // Returns size_max, or whatever is left in [begin, end).
ClampedSize(size_t begin,size_t size_max,size_t end)376 static constexpr size_t ClampedSize(size_t begin, size_t size_max,
377 size_t end) {
378 return (begin + size_max <= end) ? size_max
379 : (end > begin ? end - begin : 0);
380 }
381
382 size_t x0_;
383 size_t y0_;
384
385 size_t xsize_;
386 size_t ysize_;
387 };
388
389 // Works for any image-like input type(s).
390 template <class Image1, class Image2>
SameSize(const Image1 & image1,const Image2 & image2)391 HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) {
392 return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
393 }
394
395 // Mirrors out of bounds coordinates and returns valid coordinates unchanged.
396 // We assume the radius (distance outside the image) is small compared to the
397 // image size, otherwise this might not terminate.
398 // The mirror is outside the last column (border pixel is also replicated).
Mirror(int64_t x,const int64_t xsize)399 static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x,
400 const int64_t xsize) {
401 HWY_DASSERT(xsize != 0);
402
403 // TODO(janwas): replace with branchless version
404 while (x < 0 || x >= xsize) {
405 if (x < 0) {
406 x = -x - 1;
407 } else {
408 x = 2 * xsize - 1 - x;
409 }
410 }
411 return static_cast<size_t>(x);
412 }
413
414 // Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
415
416 // Mirrors (repeating the edge pixel once). Useful for convolutions.
417 struct WrapMirror {
operatorWrapMirror418 HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const {
419 return Mirror(coord, static_cast<int64_t>(size));
420 }
421 };
422
423 // Returns the same coordinate, for when we know "coord" is already valid (e.g.
424 // interior of an image).
425 struct WrapUnchanged {
operatorWrapUnchanged426 HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const {
427 return static_cast<size_t>(coord);
428 }
429 };
430
431 // Similar to Wrap* but for row pointers (reduces Row() multiplications).
432
433 class WrapRowMirror {
434 public:
435 template <class View>
WrapRowMirror(const View & image,size_t ysize)436 WrapRowMirror(const View& image, size_t ysize)
437 : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
438
operator()439 const float* operator()(const float* const HWY_RESTRICT row,
440 const int64_t stride) const {
441 if (row < first_row_) {
442 const int64_t num_before = first_row_ - row;
443 // Mirrored; one row before => row 0, two before = row 1, ...
444 return first_row_ + num_before - stride;
445 }
446 if (row > last_row_) {
447 const int64_t num_after = row - last_row_;
448 // Mirrored; one row after => last row, two after = last - 1, ...
449 return last_row_ - num_after + stride;
450 }
451 return row;
452 }
453
454 private:
455 const float* const HWY_RESTRICT first_row_;
456 const float* const HWY_RESTRICT last_row_;
457 };
458
459 struct WrapRowUnchanged {
operatorWrapRowUnchanged460 HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row,
461 int64_t /*stride*/) const {
462 return row;
463 }
464 };
465
466 } // namespace hwy
467
468 #endif // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
469