1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "hwy/contrib/image/image.h"
16
17 #include <cstddef>
18
19 #undef HWY_TARGET_INCLUDE
20 #define HWY_TARGET_INCLUDE "hwy/contrib/image/image.cc"
21
22 #include <algorithm> // swap
23
24 #include "hwy/foreach_target.h"
25 #include "hwy/highway.h"
26 HWY_BEFORE_NAMESPACE();
27 namespace hwy {
28 namespace HWY_NAMESPACE {
GetVectorSize()29 size_t GetVectorSize() { return Lanes(ScalableTag<uint8_t>()); }
30 // NOLINTNEXTLINE(google-readability-namespace-comments)
31 } // namespace HWY_NAMESPACE
32
33 } // namespace hwy
34 HWY_AFTER_NAMESPACE();
35
36 #if HWY_ONCE
37 namespace hwy {
38 namespace {
39 HWY_EXPORT(GetVectorSize); // Local function.
40 } // namespace
41
VectorSize()42 size_t ImageBase::VectorSize() {
43 // Do not cache result - must return the current value, which may be greater
44 // than the first call if it was subject to DisableTargets!
45 return HWY_DYNAMIC_DISPATCH(GetVectorSize)();
46 }
47
BytesPerRow(const size_t xsize,const size_t sizeof_t)48 size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) {
49 const size_t vec_size = VectorSize();
50 size_t valid_bytes = xsize * sizeof_t;
51
52 // Allow unaligned accesses starting at the last valid value - this may raise
53 // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
54 // Skip for the scalar case because no extra lanes will be loaded.
55 if (vec_size != 1) {
56 HWY_DASSERT(vec_size >= sizeof_t);
57 valid_bytes += vec_size - sizeof_t;
58 }
59
60 // Round up to vector and cache line size.
61 const size_t align = HWY_MAX(vec_size, HWY_ALIGNMENT);
62 size_t bytes_per_row = RoundUpTo(valid_bytes, align);
63
64 // During the lengthy window before writes are committed to memory, CPUs
65 // guard against read after write hazards by checking the address, but
66 // only the lower 11 bits. We avoid a false dependency between writes to
67 // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
68 // Avoid2K prevents the same problem for the planes of an Image3.
69 if (bytes_per_row % HWY_ALIGNMENT == 0) {
70 bytes_per_row += align;
71 }
72
73 HWY_DASSERT(bytes_per_row % align == 0);
74 return bytes_per_row;
75 }
76
ImageBase(const size_t xsize,const size_t ysize,const size_t sizeof_t)77 ImageBase::ImageBase(const size_t xsize, const size_t ysize,
78 const size_t sizeof_t)
79 : xsize_(static_cast<uint32_t>(xsize)),
80 ysize_(static_cast<uint32_t>(ysize)),
81 bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
82 HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
83
84 bytes_per_row_ = 0;
85 // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
86 // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
87 if (xsize != 0 && ysize != 0) {
88 bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
89 bytes_ = AllocateAligned<uint8_t>(bytes_per_row_ * ysize);
90 HWY_ASSERT(bytes_.get() != nullptr);
91 InitializePadding(sizeof_t, Padding::kRoundUp);
92 }
93 }
94
ImageBase(const size_t xsize,const size_t ysize,const size_t bytes_per_row,void * const aligned)95 ImageBase::ImageBase(const size_t xsize, const size_t ysize,
96 const size_t bytes_per_row, void* const aligned)
97 : xsize_(static_cast<uint32_t>(xsize)),
98 ysize_(static_cast<uint32_t>(ysize)),
99 bytes_per_row_(bytes_per_row),
100 bytes_(static_cast<uint8_t*>(aligned),
101 AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
102 const size_t vec_size = VectorSize();
103 HWY_ASSERT(bytes_per_row % vec_size == 0);
104 HWY_ASSERT(reinterpret_cast<uintptr_t>(aligned) % vec_size == 0);
105 }
106
InitializePadding(const size_t sizeof_t,Padding padding)107 void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) {
108 #if defined(MEMORY_SANITIZER) || HWY_IDE
109 if (xsize_ == 0 || ysize_ == 0) return;
110
111 const size_t vec_size = VectorSize(); // Bytes, independent of sizeof_t!
112 if (vec_size == 1) return; // Scalar mode: no padding needed
113
114 const size_t valid_size = xsize_ * sizeof_t;
115 const size_t initialize_size = padding == Padding::kRoundUp
116 ? RoundUpTo(valid_size, vec_size)
117 : valid_size + vec_size - sizeof_t;
118 if (valid_size == initialize_size) return;
119
120 for (size_t y = 0; y < ysize_; ++y) {
121 uint8_t* HWY_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
122 #if defined(__clang__) && (__clang_major__ <= 6)
123 // There's a bug in msan in clang-6 when handling AVX2 operations. This
124 // workaround allows tests to pass on msan, although it is slower and
125 // prevents msan warnings from uninitialized images.
126 memset(row, 0, initialize_size);
127 #else
128 memset(row + valid_size, 0, initialize_size - valid_size);
129 #endif // clang6
130 }
131 #else
132 (void)sizeof_t;
133 (void)padding;
134 #endif // MEMORY_SANITIZER
135 }
136
Swap(ImageBase & other)137 void ImageBase::Swap(ImageBase& other) {
138 std::swap(xsize_, other.xsize_);
139 std::swap(ysize_, other.ysize_);
140 std::swap(bytes_per_row_, other.bytes_per_row_);
141 std::swap(bytes_, other.bytes_);
142 }
143
144 } // namespace hwy
145 #endif // HWY_ONCE
146