1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "hwy/contrib/image/image.h"
16 
17 #include <cstddef>
18 
19 #undef HWY_TARGET_INCLUDE
20 #define HWY_TARGET_INCLUDE "hwy/contrib/image/image.cc"
21 
22 #include <algorithm>  // swap
23 
24 #include "hwy/foreach_target.h"
25 #include "hwy/highway.h"
26 HWY_BEFORE_NAMESPACE();
27 namespace hwy {
28 namespace HWY_NAMESPACE {
GetVectorSize()29 size_t GetVectorSize() { return Lanes(ScalableTag<uint8_t>()); }
30 // NOLINTNEXTLINE(google-readability-namespace-comments)
31 }  // namespace HWY_NAMESPACE
32 
33 }  // namespace hwy
34 HWY_AFTER_NAMESPACE();
35 
36 #if HWY_ONCE
37 namespace hwy {
38 namespace {
39 HWY_EXPORT(GetVectorSize);  // Local function.
40 }  // namespace
41 
VectorSize()42 size_t ImageBase::VectorSize() {
43   // Do not cache result - must return the current value, which may be greater
44   // than the first call if it was subject to DisableTargets!
45   return HWY_DYNAMIC_DISPATCH(GetVectorSize)();
46 }
47 
BytesPerRow(const size_t xsize,const size_t sizeof_t)48 size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) {
49   const size_t vec_size = VectorSize();
50   size_t valid_bytes = xsize * sizeof_t;
51 
52   // Allow unaligned accesses starting at the last valid value - this may raise
53   // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
54   // Skip for the scalar case because no extra lanes will be loaded.
55   if (vec_size != 1) {
56     HWY_DASSERT(vec_size >= sizeof_t);
57     valid_bytes += vec_size - sizeof_t;
58   }
59 
60   // Round up to vector and cache line size.
61   const size_t align = HWY_MAX(vec_size, HWY_ALIGNMENT);
62   size_t bytes_per_row = RoundUpTo(valid_bytes, align);
63 
64   // During the lengthy window before writes are committed to memory, CPUs
65   // guard against read after write hazards by checking the address, but
66   // only the lower 11 bits. We avoid a false dependency between writes to
67   // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
68   // Avoid2K prevents the same problem for the planes of an Image3.
69   if (bytes_per_row % HWY_ALIGNMENT == 0) {
70     bytes_per_row += align;
71   }
72 
73   HWY_DASSERT(bytes_per_row % align == 0);
74   return bytes_per_row;
75 }
76 
ImageBase(const size_t xsize,const size_t ysize,const size_t sizeof_t)77 ImageBase::ImageBase(const size_t xsize, const size_t ysize,
78                      const size_t sizeof_t)
79     : xsize_(static_cast<uint32_t>(xsize)),
80       ysize_(static_cast<uint32_t>(ysize)),
81       bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
82   HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
83 
84   bytes_per_row_ = 0;
85   // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
86   // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
87   if (xsize != 0 && ysize != 0) {
88     bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
89     bytes_ = AllocateAligned<uint8_t>(bytes_per_row_ * ysize);
90     HWY_ASSERT(bytes_.get() != nullptr);
91     InitializePadding(sizeof_t, Padding::kRoundUp);
92   }
93 }
94 
ImageBase(const size_t xsize,const size_t ysize,const size_t bytes_per_row,void * const aligned)95 ImageBase::ImageBase(const size_t xsize, const size_t ysize,
96                      const size_t bytes_per_row, void* const aligned)
97     : xsize_(static_cast<uint32_t>(xsize)),
98       ysize_(static_cast<uint32_t>(ysize)),
99       bytes_per_row_(bytes_per_row),
100       bytes_(static_cast<uint8_t*>(aligned),
101              AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
102   const size_t vec_size = VectorSize();
103   HWY_ASSERT(bytes_per_row % vec_size == 0);
104   HWY_ASSERT(reinterpret_cast<uintptr_t>(aligned) % vec_size == 0);
105 }
106 
InitializePadding(const size_t sizeof_t,Padding padding)107 void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) {
108 #if defined(MEMORY_SANITIZER) || HWY_IDE
109   if (xsize_ == 0 || ysize_ == 0) return;
110 
111   const size_t vec_size = VectorSize();  // Bytes, independent of sizeof_t!
112   if (vec_size == 1) return;             // Scalar mode: no padding needed
113 
114   const size_t valid_size = xsize_ * sizeof_t;
115   const size_t initialize_size = padding == Padding::kRoundUp
116                                      ? RoundUpTo(valid_size, vec_size)
117                                      : valid_size + vec_size - sizeof_t;
118   if (valid_size == initialize_size) return;
119 
120   for (size_t y = 0; y < ysize_; ++y) {
121     uint8_t* HWY_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
122 #if defined(__clang__) && (__clang_major__ <= 6)
123     // There's a bug in msan in clang-6 when handling AVX2 operations. This
124     // workaround allows tests to pass on msan, although it is slower and
125     // prevents msan warnings from uninitialized images.
126     memset(row, 0, initialize_size);
127 #else
128     memset(row + valid_size, 0, initialize_size - valid_size);
129 #endif  // clang6
130   }
131 #else
132   (void)sizeof_t;
133   (void)padding;
134 #endif  // MEMORY_SANITIZER
135 }
136 
Swap(ImageBase & other)137 void ImageBase::Swap(ImageBase& other) {
138   std::swap(xsize_, other.xsize_);
139   std::swap(ysize_, other.ysize_);
140   std::swap(bytes_per_row_, other.bytes_per_row_);
141   std::swap(bytes_, other.bytes_);
142 }
143 
144 }  // namespace hwy
145 #endif  // HWY_ONCE
146