1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "hwy/contrib/image/image.h"
16 
17 #include <algorithm>  // swap
18 #include <cstddef>
19 
20 #undef HWY_TARGET_INCLUDE
21 #define HWY_TARGET_INCLUDE "hwy/contrib/image/image.cc"
22 #include "hwy/foreach_target.h"
23 #include "hwy/highway.h"
24 
25 HWY_BEFORE_NAMESPACE();
26 namespace hwy {
27 namespace HWY_NAMESPACE {
GetVectorSize()28 size_t GetVectorSize() { return Lanes(ScalableTag<uint8_t>()); }
29 // NOLINTNEXTLINE(google-readability-namespace-comments)
30 }  // namespace HWY_NAMESPACE
31 
32 }  // namespace hwy
33 HWY_AFTER_NAMESPACE();
34 
35 #if HWY_ONCE
36 namespace hwy {
37 namespace {
38 HWY_EXPORT(GetVectorSize);  // Local function.
39 }  // namespace
40 
VectorSize()41 size_t ImageBase::VectorSize() {
42   // Do not cache result - must return the current value, which may be greater
43   // than the first call if it was subject to DisableTargets!
44   return HWY_DYNAMIC_DISPATCH(GetVectorSize)();
45 }
46 
BytesPerRow(const size_t xsize,const size_t sizeof_t)47 size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) {
48   const size_t vec_size = VectorSize();
49   size_t valid_bytes = xsize * sizeof_t;
50 
51   // Allow unaligned accesses starting at the last valid value - this may raise
52   // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
53   // Skip for the scalar case because no extra lanes will be loaded.
54   if (vec_size != 1) {
55     HWY_DASSERT(vec_size >= sizeof_t);
56     valid_bytes += vec_size - sizeof_t;
57   }
58 
59   // Round up to vector and cache line size.
60   const size_t align = HWY_MAX(vec_size, HWY_ALIGNMENT);
61   size_t bytes_per_row = RoundUpTo(valid_bytes, align);
62 
63   // During the lengthy window before writes are committed to memory, CPUs
64   // guard against read after write hazards by checking the address, but
65   // only the lower 11 bits. We avoid a false dependency between writes to
66   // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
67   // Avoid2K prevents the same problem for the planes of an Image3.
68   if (bytes_per_row % HWY_ALIGNMENT == 0) {
69     bytes_per_row += align;
70   }
71 
72   HWY_DASSERT(bytes_per_row % align == 0);
73   return bytes_per_row;
74 }
75 
ImageBase(const size_t xsize,const size_t ysize,const size_t sizeof_t)76 ImageBase::ImageBase(const size_t xsize, const size_t ysize,
77                      const size_t sizeof_t)
78     : xsize_(static_cast<uint32_t>(xsize)),
79       ysize_(static_cast<uint32_t>(ysize)),
80       bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
81   HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
82 
83   bytes_per_row_ = 0;
84   // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
85   // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
86   if (xsize != 0 && ysize != 0) {
87     bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
88     bytes_ = AllocateAligned<uint8_t>(bytes_per_row_ * ysize);
89     HWY_ASSERT(bytes_.get() != nullptr);
90     InitializePadding(sizeof_t, Padding::kRoundUp);
91   }
92 }
93 
ImageBase(const size_t xsize,const size_t ysize,const size_t bytes_per_row,void * const aligned)94 ImageBase::ImageBase(const size_t xsize, const size_t ysize,
95                      const size_t bytes_per_row, void* const aligned)
96     : xsize_(static_cast<uint32_t>(xsize)),
97       ysize_(static_cast<uint32_t>(ysize)),
98       bytes_per_row_(bytes_per_row),
99       bytes_(static_cast<uint8_t*>(aligned),
100              AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
101   const size_t vec_size = VectorSize();
102   HWY_ASSERT(bytes_per_row % vec_size == 0);
103   HWY_ASSERT(reinterpret_cast<uintptr_t>(aligned) % vec_size == 0);
104 }
105 
InitializePadding(const size_t sizeof_t,Padding padding)106 void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) {
107 #if defined(MEMORY_SANITIZER) || HWY_IDE
108   if (xsize_ == 0 || ysize_ == 0) return;
109 
110   const size_t vec_size = VectorSize();  // Bytes, independent of sizeof_t!
111   if (vec_size == 1) return;             // Scalar mode: no padding needed
112 
113   const size_t valid_size = xsize_ * sizeof_t;
114   const size_t initialize_size = padding == Padding::kRoundUp
115                                      ? RoundUpTo(valid_size, vec_size)
116                                      : valid_size + vec_size - sizeof_t;
117   if (valid_size == initialize_size) return;
118 
119   for (size_t y = 0; y < ysize_; ++y) {
120     uint8_t* HWY_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
121 #if defined(__clang__) && (__clang_major__ <= 6)
122     // There's a bug in msan in clang-6 when handling AVX2 operations. This
123     // workaround allows tests to pass on msan, although it is slower and
124     // prevents msan warnings from uninitialized images.
125     memset(row, 0, initialize_size);
126 #else
127     memset(row + valid_size, 0, initialize_size - valid_size);
128 #endif  // clang6
129   }
130 #else
131   (void)sizeof_t;
132   (void)padding;
133 #endif  // MEMORY_SANITIZER
134 }
135 
Swap(ImageBase & other)136 void ImageBase::Swap(ImageBase& other) {
137   std::swap(xsize_, other.xsize_);
138   std::swap(ysize_, other.ysize_);
139   std::swap(bytes_per_row_, other.bytes_per_row_);
140   std::swap(bytes_, other.bytes_);
141 }
142 
143 }  // namespace hwy
144 #endif  // HWY_ONCE
145