1 //===- CRunnerUtils.h - Utils for debugging MLIR execution ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares basic classes and functions to manipulate structured MLIR
10 // types at runtime. Entities in this file must be compliant with C++11 and be
11 // retargetable, including on targets without a C++ runtime.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef EXECUTIONENGINE_CRUNNERUTILS_H_
16 #define EXECUTIONENGINE_CRUNNERUTILS_H_
17 
18 #ifdef _WIN32
19 #ifndef MLIR_CRUNNERUTILS_EXPORT
20 #ifdef mlir_c_runner_utils_EXPORTS
21 // We are building this library
22 #define MLIR_CRUNNERUTILS_EXPORT __declspec(dllexport)
23 #define MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
24 #else
25 // We are using this library
26 #define MLIR_CRUNNERUTILS_EXPORT __declspec(dllimport)
27 #endif // mlir_c_runner_utils_EXPORTS
28 #endif // MLIR_CRUNNERUTILS_EXPORT
29 #else  // _WIN32
30 #define MLIR_CRUNNERUTILS_EXPORT
31 #define MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
32 #endif // _WIN32
33 
34 #include <array>
35 #include <cassert>
36 #include <cstdint>
37 #include <initializer_list>
38 
39 //===----------------------------------------------------------------------===//
40 // Codegen-compatible structures for Vector type.
41 //===----------------------------------------------------------------------===//
42 namespace mlir {
43 namespace detail {
44 
isPowerOf2(int N)45 constexpr bool isPowerOf2(int N) { return (!(N & (N - 1))); }
46 
nextPowerOf2(int N)47 constexpr unsigned nextPowerOf2(int N) {
48   return (N <= 1) ? 1 : (isPowerOf2(N) ? N : (2 * nextPowerOf2((N + 1) / 2)));
49 }
50 
51 template <typename T, int Dim, bool IsPowerOf2>
52 struct Vector1D;
53 
54 template <typename T, int Dim>
55 struct Vector1D<T, Dim, /*IsPowerOf2=*/true> {
56   Vector1D() {
57     static_assert(detail::nextPowerOf2(sizeof(T[Dim])) == sizeof(T[Dim]),
58                   "size error");
59   }
60   inline T &operator[](unsigned i) { return vector[i]; }
61   inline const T &operator[](unsigned i) const { return vector[i]; }
62 
63 private:
64   T vector[Dim];
65 };
66 
67 // 1-D vector, padded to the next power of 2 allocation.
68 // Specialization occurs to avoid zero size arrays (which fail in -Werror).
69 template <typename T, int Dim>
70 struct Vector1D<T, Dim, /*IsPowerOf2=*/false> {
71   Vector1D() {
72     static_assert(nextPowerOf2(sizeof(T[Dim])) > sizeof(T[Dim]), "size error");
73     static_assert(nextPowerOf2(sizeof(T[Dim])) < 2 * sizeof(T[Dim]),
74                   "size error");
75   }
76   inline T &operator[](unsigned i) { return vector[i]; }
77   inline const T &operator[](unsigned i) const { return vector[i]; }
78 
79 private:
80   T vector[Dim];
81   char padding[nextPowerOf2(sizeof(T[Dim])) - sizeof(T[Dim])];
82 };
83 } // end namespace detail
84 } // end namespace mlir
85 
86 // N-D vectors recurse down to 1-D.
87 template <typename T, int Dim, int... Dims>
88 struct Vector {
89   inline Vector<T, Dims...> &operator[](unsigned i) { return vector[i]; }
90   inline const Vector<T, Dims...> &operator[](unsigned i) const {
91     return vector[i];
92   }
93 
94 private:
95   Vector<T, Dims...> vector[Dim];
96 };
97 
98 // 1-D vectors in LLVM are automatically padded to the next power of 2.
99 // We insert explicit padding in to account for this.
100 template <typename T, int Dim>
101 struct Vector<T, Dim>
102     : public mlir::detail::Vector1D<T, Dim,
103                                     mlir::detail::isPowerOf2(sizeof(T[Dim]))> {
104 };
105 
106 template <int D1, typename T>
107 using Vector1D = Vector<T, D1>;
108 template <int D1, int D2, typename T>
109 using Vector2D = Vector<T, D1, D2>;
110 template <int D1, int D2, int D3, typename T>
111 using Vector3D = Vector<T, D1, D2, D3>;
112 template <int D1, int D2, int D3, int D4, typename T>
113 using Vector4D = Vector<T, D1, D2, D3, D4>;
114 
115 template <int N>
116 void dropFront(int64_t arr[N], int64_t *res) {
117   for (unsigned i = 1; i < N; ++i)
118     *(res + i - 1) = arr[i];
119 }
120 
121 //===----------------------------------------------------------------------===//
122 // Codegen-compatible structures for StridedMemRef type.
123 //===----------------------------------------------------------------------===//
124 template <typename T, int Rank>
125 class StridedMemrefIterator;
126 
127 /// StridedMemRef descriptor type with static rank.
128 template <typename T, int N>
129 struct StridedMemRefType {
130   T *basePtr;
131   T *data;
132   int64_t offset;
133   int64_t sizes[N];
134   int64_t strides[N];
135 
136   template <typename Range,
137             typename sfinae = decltype(std::declval<Range>().begin())>
138   T &operator[](Range &&indices) {
139     assert(indices.size() == N &&
140            "indices should match rank in memref subscript");
141     int64_t curOffset = offset;
142     for (int dim = N - 1; dim >= 0; --dim) {
143       int64_t currentIndex = *(indices.begin() + dim);
144       assert(currentIndex < sizes[dim] && "Index overflow");
145       curOffset += currentIndex * strides[dim];
146     }
147     return data[curOffset];
148   }
149 
150   StridedMemrefIterator<T, N> begin() { return {*this}; }
151   StridedMemrefIterator<T, N> end() { return {*this, -1}; }
152 
153   // This operator[] is extremely slow and only for sugaring purposes.
154   StridedMemRefType<T, N - 1> operator[](int64_t idx) {
155     StridedMemRefType<T, N - 1> res;
156     res.basePtr = basePtr;
157     res.data = data;
158     res.offset = offset + idx * strides[0];
159     dropFront<N>(sizes, res.sizes);
160     dropFront<N>(strides, res.strides);
161     return res;
162   }
163 };
164 
165 /// StridedMemRef descriptor type specialized for rank 1.
166 template <typename T>
167 struct StridedMemRefType<T, 1> {
168   T *basePtr;
169   T *data;
170   int64_t offset;
171   int64_t sizes[1];
172   int64_t strides[1];
173 
174   template <typename Range,
175             typename sfinae = decltype(std::declval<Range>().begin())>
176   T &operator[](Range indices) {
177     assert(indices.size() == 1 &&
178            "indices should match rank in memref subscript");
179     return (*this)[*indices.begin()];
180   }
181 
182   StridedMemrefIterator<T, 1> begin() { return {*this}; }
183   StridedMemrefIterator<T, 1> end() { return {*this, -1}; }
184 
185   T &operator[](int64_t idx) { return *(data + offset + idx * strides[0]); }
186 };
187 
188 /// StridedMemRef descriptor type specialized for rank 0.
189 template <typename T>
190 struct StridedMemRefType<T, 0> {
191   T *basePtr;
192   T *data;
193   int64_t offset;
194 
195   template <typename Range,
196             typename sfinae = decltype(std::declval<Range>().begin())>
197   T &operator[](Range indices) {
198     assert((indices.size() == 0) &&
199            "Expect empty indices for 0-rank memref subscript");
200     return data[offset];
201   }
202 
203   StridedMemrefIterator<T, 0> begin() { return {*this}; }
204   StridedMemrefIterator<T, 0> end() { return {*this, 1}; }
205 };
206 
207 /// Iterate over all elements in a strided memref.
208 template <typename T, int Rank>
209 class StridedMemrefIterator {
210 public:
211   StridedMemrefIterator(StridedMemRefType<T, Rank> &descriptor,
212                         int64_t offset = 0)
213       : offset(offset), descriptor(descriptor) {}
214   StridedMemrefIterator<T, Rank> &operator++() {
215     int dim = Rank - 1;
216     while (dim >= 0 && indices[dim] == (descriptor.sizes[dim] - 1)) {
217       offset -= indices[dim] * descriptor.strides[dim];
218       indices[dim] = 0;
219       --dim;
220     }
221     if (dim < 0) {
222       offset = -1;
223       return *this;
224     }
225     ++indices[dim];
226     offset += descriptor.strides[dim];
227     return *this;
228   }
229 
230   T &operator*() { return descriptor.data[offset]; }
231   T *operator->() { return &descriptor.data[offset]; }
232 
233   const std::array<int64_t, Rank> &getIndices() { return indices; }
234 
235   bool operator==(const StridedMemrefIterator &other) const {
236     return other.offset == offset && &other.descriptor == &descriptor;
237   }
238 
239   bool operator!=(const StridedMemrefIterator &other) const {
240     return !(*this == other);
241   }
242 
243 private:
244   /// Offset in the buffer. This can be derived from the indices and the
245   /// descriptor.
246   int64_t offset = 0;
247   /// Array of indices in the multi-dimensional memref.
248   std::array<int64_t, Rank> indices = {};
249   /// Descriptor for the strided memref.
250   StridedMemRefType<T, Rank> &descriptor;
251 };
252 
253 /// Iterate over all elements in a 0-ranked strided memref.
254 template <typename T>
255 class StridedMemrefIterator<T, 0> {
256 public:
257   StridedMemrefIterator(StridedMemRefType<T, 0> &descriptor, int64_t offset = 0)
258       : elt(descriptor.data + offset) {}
259 
260   StridedMemrefIterator<T, 0> &operator++() {
261     ++elt;
262     return *this;
263   }
264 
265   T &operator*() { return *elt; }
266   T *operator->() { return elt; }
267 
268   // There are no indices for a 0-ranked memref, but this API is provided for
269   // consistency with the general case.
270   const std::array<int64_t, 0> &getIndices() {
271     // Since this is a 0-array of indices we can keep a single global const
272     // copy.
273     static const std::array<int64_t, 0> indices = {};
274     return indices;
275   }
276 
277   bool operator==(const StridedMemrefIterator &other) const {
278     return other.elt == elt;
279   }
280 
281   bool operator!=(const StridedMemrefIterator &other) const {
282     return !(*this == other);
283   }
284 
285 private:
286   /// Pointer to the single element in the zero-ranked memref.
287   T *elt;
288 };
289 
290 //===----------------------------------------------------------------------===//
291 // Codegen-compatible structure for UnrankedMemRef type.
292 //===----------------------------------------------------------------------===//
293 // Unranked MemRef
294 template <typename T>
295 struct UnrankedMemRefType {
296   int64_t rank;
297   void *descriptor;
298 };
299 
300 //===----------------------------------------------------------------------===//
301 // DynamicMemRefType type.
302 //===----------------------------------------------------------------------===//
303 // A reference to one of the StridedMemRef types.
304 template <typename T>
305 class DynamicMemRefType {
306 public:
307   explicit DynamicMemRefType(const StridedMemRefType<T, 0> &mem_ref)
308       : rank(0), basePtr(mem_ref.basePtr), data(mem_ref.data),
309         offset(mem_ref.offset), sizes(nullptr), strides(nullptr) {}
310   template <int N>
311   explicit DynamicMemRefType(const StridedMemRefType<T, N> &mem_ref)
312       : rank(N), basePtr(mem_ref.basePtr), data(mem_ref.data),
313         offset(mem_ref.offset), sizes(mem_ref.sizes), strides(mem_ref.strides) {
314   }
315   explicit DynamicMemRefType(const UnrankedMemRefType<T> &mem_ref)
316       : rank(mem_ref.rank) {
317     auto *desc = static_cast<StridedMemRefType<T, 1> *>(mem_ref.descriptor);
318     basePtr = desc->basePtr;
319     data = desc->data;
320     offset = desc->offset;
321     sizes = rank == 0 ? nullptr : desc->sizes;
322     strides = sizes + rank;
323   }
324 
325   int64_t rank;
326   T *basePtr;
327   T *data;
328   int64_t offset;
329   const int64_t *sizes;
330   const int64_t *strides;
331 };
332 
333 //===----------------------------------------------------------------------===//
334 // Small runtime support library for memref.copy lowering during codegen.
335 //===----------------------------------------------------------------------===//
336 extern "C" MLIR_CRUNNERUTILS_EXPORT void
337 memrefCopy(int64_t elemSize, UnrankedMemRefType<char> *src,
338            UnrankedMemRefType<char> *dst);
339 
340 //===----------------------------------------------------------------------===//
341 // Small runtime support library for vector.print lowering during codegen.
342 //===----------------------------------------------------------------------===//
343 extern "C" MLIR_CRUNNERUTILS_EXPORT void printI64(int64_t i);
344 extern "C" MLIR_CRUNNERUTILS_EXPORT void printU64(uint64_t u);
345 extern "C" MLIR_CRUNNERUTILS_EXPORT void printF32(float f);
346 extern "C" MLIR_CRUNNERUTILS_EXPORT void printF64(double d);
347 extern "C" MLIR_CRUNNERUTILS_EXPORT void printOpen();
348 extern "C" MLIR_CRUNNERUTILS_EXPORT void printClose();
349 extern "C" MLIR_CRUNNERUTILS_EXPORT void printComma();
350 extern "C" MLIR_CRUNNERUTILS_EXPORT void printNewline();
351 
352 //===----------------------------------------------------------------------===//
353 // Small runtime support library for timing execution and printing GFLOPS
354 //===----------------------------------------------------------------------===//
355 extern "C" MLIR_CRUNNERUTILS_EXPORT void print_flops(double flops);
356 extern "C" MLIR_CRUNNERUTILS_EXPORT double rtclock();
357 
358 #endif // EXECUTIONENGINE_CRUNNERUTILS_H_
359