1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "hwy/contrib/image/image.h"
16
17 #include <algorithm> // swap
18 #include <cstddef>
19
20 #undef HWY_TARGET_INCLUDE
21 #define HWY_TARGET_INCLUDE "hwy/contrib/image/image.cc"
22 #include "hwy/foreach_target.h"
23 #include "hwy/highway.h"
24
25 HWY_BEFORE_NAMESPACE();
26 namespace hwy {
27 namespace HWY_NAMESPACE {
GetVectorSize()28 size_t GetVectorSize() { return Lanes(ScalableTag<uint8_t>()); }
29 // NOLINTNEXTLINE(google-readability-namespace-comments)
30 } // namespace HWY_NAMESPACE
31
32 } // namespace hwy
33 HWY_AFTER_NAMESPACE();
34
35 #if HWY_ONCE
36 namespace hwy {
37 namespace {
38 HWY_EXPORT(GetVectorSize); // Local function.
39 } // namespace
40
VectorSize()41 size_t ImageBase::VectorSize() {
42 // Do not cache result - must return the current value, which may be greater
43 // than the first call if it was subject to DisableTargets!
44 return HWY_DYNAMIC_DISPATCH(GetVectorSize)();
45 }
46
BytesPerRow(const size_t xsize,const size_t sizeof_t)47 size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) {
48 const size_t vec_size = VectorSize();
49 size_t valid_bytes = xsize * sizeof_t;
50
51 // Allow unaligned accesses starting at the last valid value - this may raise
52 // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
53 // Skip for the scalar case because no extra lanes will be loaded.
54 if (vec_size != 1) {
55 HWY_DASSERT(vec_size >= sizeof_t);
56 valid_bytes += vec_size - sizeof_t;
57 }
58
59 // Round up to vector and cache line size.
60 const size_t align = HWY_MAX(vec_size, HWY_ALIGNMENT);
61 size_t bytes_per_row = RoundUpTo(valid_bytes, align);
62
63 // During the lengthy window before writes are committed to memory, CPUs
64 // guard against read after write hazards by checking the address, but
65 // only the lower 11 bits. We avoid a false dependency between writes to
66 // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
67 // Avoid2K prevents the same problem for the planes of an Image3.
68 if (bytes_per_row % HWY_ALIGNMENT == 0) {
69 bytes_per_row += align;
70 }
71
72 HWY_DASSERT(bytes_per_row % align == 0);
73 return bytes_per_row;
74 }
75
ImageBase(const size_t xsize,const size_t ysize,const size_t sizeof_t)76 ImageBase::ImageBase(const size_t xsize, const size_t ysize,
77 const size_t sizeof_t)
78 : xsize_(static_cast<uint32_t>(xsize)),
79 ysize_(static_cast<uint32_t>(ysize)),
80 bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
81 HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
82
83 bytes_per_row_ = 0;
84 // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
85 // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
86 if (xsize != 0 && ysize != 0) {
87 bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
88 bytes_ = AllocateAligned<uint8_t>(bytes_per_row_ * ysize);
89 HWY_ASSERT(bytes_.get() != nullptr);
90 InitializePadding(sizeof_t, Padding::kRoundUp);
91 }
92 }
93
ImageBase(const size_t xsize,const size_t ysize,const size_t bytes_per_row,void * const aligned)94 ImageBase::ImageBase(const size_t xsize, const size_t ysize,
95 const size_t bytes_per_row, void* const aligned)
96 : xsize_(static_cast<uint32_t>(xsize)),
97 ysize_(static_cast<uint32_t>(ysize)),
98 bytes_per_row_(bytes_per_row),
99 bytes_(static_cast<uint8_t*>(aligned),
100 AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
101 const size_t vec_size = VectorSize();
102 HWY_ASSERT(bytes_per_row % vec_size == 0);
103 HWY_ASSERT(reinterpret_cast<uintptr_t>(aligned) % vec_size == 0);
104 }
105
InitializePadding(const size_t sizeof_t,Padding padding)106 void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) {
107 #if defined(MEMORY_SANITIZER) || HWY_IDE
108 if (xsize_ == 0 || ysize_ == 0) return;
109
110 const size_t vec_size = VectorSize(); // Bytes, independent of sizeof_t!
111 if (vec_size == 1) return; // Scalar mode: no padding needed
112
113 const size_t valid_size = xsize_ * sizeof_t;
114 const size_t initialize_size = padding == Padding::kRoundUp
115 ? RoundUpTo(valid_size, vec_size)
116 : valid_size + vec_size - sizeof_t;
117 if (valid_size == initialize_size) return;
118
119 for (size_t y = 0; y < ysize_; ++y) {
120 uint8_t* HWY_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
121 #if defined(__clang__) && (__clang_major__ <= 6)
122 // There's a bug in msan in clang-6 when handling AVX2 operations. This
123 // workaround allows tests to pass on msan, although it is slower and
124 // prevents msan warnings from uninitialized images.
125 memset(row, 0, initialize_size);
126 #else
127 memset(row + valid_size, 0, initialize_size - valid_size);
128 #endif // clang6
129 }
130 #else
131 (void)sizeof_t;
132 (void)padding;
133 #endif // MEMORY_SANITIZER
134 }
135
Swap(ImageBase & other)136 void ImageBase::Swap(ImageBase& other) {
137 std::swap(xsize_, other.xsize_);
138 std::swap(ysize_, other.ysize_);
139 std::swap(bytes_per_row_, other.bytes_per_row_);
140 std::swap(bytes_, other.bytes_);
141 }
142
143 } // namespace hwy
144 #endif // HWY_ONCE
145