1 #pragma once
2 
3 #include <cmath>
4 #include "views.h"
5 
6 #ifdef __GNUC__
7 #define ALWAYS_INLINE inline __attribute__((always_inline))
8 #define INLINE_LAMBDA __attribute__((always_inline))
9 #elif defined(_MSC_VER)
10 #define ALWAYS_INLINE __forceinline
11 #define INLINE_LAMBDA
12 #else
13 #define ALWAYS_INLINE inline
14 #define INLINE_LAMBDA
15 #endif
16 
17 struct Identity {
18     template <typename T>
operatorIdentity19     ALWAYS_INLINE T operator() (T && val) const {
20         return std::forward<T>(val);
21     }
22 };
23 
24 struct Plus {
25     template <typename T>
operatorPlus26     ALWAYS_INLINE T operator()(T a, T b) const {
27         return a + b;
28     }
29 };
30 
31 // Helper to force a fixed bound loop to be completely unrolled
32 template <int unroll>
33 struct ForceUnroll{
34     template <typename Func>
operatorForceUnroll35     ALWAYS_INLINE void operator()(const Func& f) const {
36         ForceUnroll<unroll - 1>{}(f);
37         f(unroll - 1);
38     }
39 };
40 
41 template <>
42 struct ForceUnroll<1> {
43     template <typename Func>
44     ALWAYS_INLINE void operator()(const Func& f) const {
45         f(0);
46     }
47 };
48 
49 template <int ilp_factor=4,
50           typename T,
51           typename TransformFunc,
52           typename ProjectFunc = Identity,
53           typename ReduceFunc = Plus>
54 void transform_reduce_2d_(
55     StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y,
56     const TransformFunc& map,
57     const ProjectFunc& project = Identity{},
58     const ReduceFunc& reduce = Plus{}) {
59     // Result type of calling map
60     using AccumulateType = typename std::decay<decltype(
61         map(std::declval<T>(), std::declval<T>()))>::type;
62     intptr_t xs = x.strides[1], ys = y.strides[1];
63 
64     intptr_t i = 0;
65     if (xs == 1 && ys == 1) {
66         for (; i + (ilp_factor - 1) < x.shape[0]; i += ilp_factor) {
67             const T* x_rows[ilp_factor];
68             const T* y_rows[ilp_factor];
69             ForceUnroll<ilp_factor>{}([&](int k) {
70                 x_rows[k] = &x(i + k, 0);
71                 y_rows[k] = &y(i + k, 0);
72             });
73 
74             AccumulateType dist[ilp_factor] = {};
75             for (intptr_t j = 0; j < x.shape[1]; ++j) {
76                 ForceUnroll<ilp_factor>{}([&](int k) {
77                     auto val = map(x_rows[k][j], y_rows[k][j]);
78                     dist[k] = reduce(dist[k], val);
79                 });
80             }
81 
82             ForceUnroll<ilp_factor>{}([&](int k) {
83                 out(i + k, 0) = project(dist[k]);
84             });
85         }
86     } else {
87         for (; i + (ilp_factor - 1) < x.shape[0]; i += ilp_factor) {
88             const T* x_rows[ilp_factor];
89             const T* y_rows[ilp_factor];
90             ForceUnroll<ilp_factor>{}([&](int k) {
91                 x_rows[k] = &x(i + k, 0);
92                 y_rows[k] = &y(i + k, 0);
93             });
94 
95             AccumulateType dist[ilp_factor] = {};
96             for (intptr_t j = 0; j < x.shape[1]; ++j) {
97                 auto x_offset = j * xs;
98                 auto y_offset = j * ys;
99                 ForceUnroll<ilp_factor>{}([&](int k) {
100                     auto val = map(x_rows[k][x_offset], y_rows[k][y_offset]);
101                     dist[k] = reduce(dist[k], val);
102                 });
103             }
104 
105             ForceUnroll<ilp_factor>{}([&](int k) {
106                 out(i + k, 0) = project(dist[k]);
107             });
108         }
109     }
110     for (; i < x.shape[0]; ++i) {
111         const T* x_row = &x(i, 0);
112         const T* y_row = &y(i, 0);
113         AccumulateType dist = {};
114         for (intptr_t j = 0; j < x.shape[1]; ++j) {
115             auto val = map(x_row[j * xs], y_row[j * ys]);
116             dist = reduce(dist, val);
117         }
118         out(i, 0) = project(dist);
119     }
120 }
121 
122 template <int ilp_factor=2, typename T,
123           typename TransformFunc,
124           typename ProjectFunc = Identity,
125           typename ReduceFunc = Plus>
126 void transform_reduce_2d_(
127     StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y,
128     StridedView2D<const T> w, const TransformFunc& map,
129     const ProjectFunc& project = Identity{},
130     const ReduceFunc& reduce = Plus{}) {
131     intptr_t i = 0;
132     intptr_t xs = x.strides[1], ys = y.strides[1], ws = w.strides[1];
133     // Result type of calling map
134     using AccumulateType = typename std::decay<decltype(
135         map(std::declval<T>(), std::declval<T>(), std::declval<T>()))>::type;
136 
137     for (; i + (ilp_factor - 1) < x.shape[0]; i += ilp_factor) {
138         const T* x_rows[ilp_factor];
139         const T* y_rows[ilp_factor];
140         const T* w_rows[ilp_factor];
141         ForceUnroll<ilp_factor>{}([&](int k) {
142             x_rows[k] = &x(i + k, 0);
143             y_rows[k] = &y(i + k, 0);
144             w_rows[k] = &w(i + k, 0);
145         });
146 
147         AccumulateType dist[ilp_factor] = {};
148         for (intptr_t j = 0; j < x.shape[1]; ++j) {
149             ForceUnroll<ilp_factor>{}([&](int k) {
150                 auto val = map(x_rows[k][j * xs], y_rows[k][j * ys], w_rows[k][j * ws]);
151                 dist[k] = reduce(dist[k], val);
152             });
153         }
154 
155         ForceUnroll<ilp_factor>{}([&](int k) {
156             out(i + k, 0) = project(dist[k]);
157         });
158     }
159     for (; i < x.shape[0]; ++i) {
160         const T* x_row = &x(i, 0);
161         const T* y_row = &y(i, 0);
162         const T* w_row = &w(i, 0);
163         AccumulateType dist = {};
164         for (intptr_t j = 0; j < x.shape[1]; ++j) {
165             auto val = map(x_row[j * xs], y_row[j * ys], w_row[j * ws]);
166             dist = reduce(dist, val);
167         }
168         out(i, 0) = project(dist);
169     }
170 }
171 
172 struct MinkowskiDistance {
173     double p_;
174 
175     template <typename T>
176     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const {
177         const T p = static_cast<T>(p_);
178         const T invp = static_cast<T>(1.0 / p_);
179         transform_reduce_2d_(out, x, y, [p](T x, T y) INLINE_LAMBDA {
180             auto diff = std::abs(x - y);
181             return std::pow(diff, p);
182         },
183         [invp](T x) { return std::pow(x, invp); });
184     }
185 
186     template <typename T>
187     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const {
188         const T p = static_cast<T>(p_);
189         const T invp = static_cast<T>(1.0 / p_);
190         transform_reduce_2d_(out, x, y, w, [p](T x, T y, T w) INLINE_LAMBDA {
191             auto diff = std::abs(x - y);
192             return w * std::pow(diff, p);
193         },
194         [invp](T x) { return std::pow(x, invp); });
195     }
196 };
197 
198 struct EuclideanDistance {
199     template <typename T>
200     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const {
201         transform_reduce_2d_(out, x, y, [](T x, T y) INLINE_LAMBDA {
202             auto diff = std::abs(x - y);
203             return diff * diff;
204         },
205         [](T x) { return std::sqrt(x); });
206     }
207 
208     template <typename T>
209     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const {
210         transform_reduce_2d_(out, x, y, w, [](T x, T y, T w) INLINE_LAMBDA {
211             auto diff = std::abs(x - y);
212             return w * (diff * diff);
213         },
214         [](T x) { return std::sqrt(x); });
215     }
216 };
217 
218 struct ChebyshevDistance {
219     template <typename T>
220     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const {
221         transform_reduce_2d_(out, x, y, [](T x, T y) INLINE_LAMBDA {
222             return std::abs(x - y);
223         },
224         Identity{},
225         [](T x, T y) { return std::max(x, y); });
226     }
227 
228     template <typename T>
229     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const {
230         for (intptr_t i = 0; i < x.shape[0]; ++i) {
231             T dist = 0;
232             for (intptr_t j = 0; j < x.shape[1]; ++j) {
233                 auto diff = std::abs(x(i, j) - y(i, j));
234                 if (w(i, j) > 0 && diff > dist) {
235                     dist = diff;
236                 }
237             }
238             out(i, 0) = dist;
239         }
240     }
241 };
242 
243 struct CityBlockDistance {
244     template <typename T>
245     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const {
246         transform_reduce_2d_<2>(out, x, y, [](T x, T y) INLINE_LAMBDA {
247             return std::abs(x - y);
248         });
249     }
250 
251     template <typename T>
252     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const {
253         transform_reduce_2d_(out, x, y, w, [](T x, T y, T w) INLINE_LAMBDA {
254             return w * std::abs(x - y);
255         });
256     }
257 };
258 
259 struct SquareEuclideanDistance {
260     template <typename T>
261     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const {
262         transform_reduce_2d_(out, x, y, [](T x, T y) INLINE_LAMBDA {
263             auto diff = x - y;
264             return diff * diff;
265         });
266     }
267 
268     template <typename T>
269     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const {
270         transform_reduce_2d_(out, x, y, w, [](T x, T y, T w) INLINE_LAMBDA {
271             auto diff = x - y;
272             return w * diff * diff;
273         });
274     }
275 };
276 
277 struct BraycurtisDistance {
278     template <typename T>
279     struct Acc {
280         Acc(): diff(0), sum(0) {}
281         T diff, sum;
282     };
283 
284     template <typename T>
285     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const {
286         // dist = abs(x - y).sum() / abs(x + y).sum()
287         transform_reduce_2d_<2>(out, x, y, [](T x, T y) INLINE_LAMBDA {
288             Acc<T> acc;
289             acc.diff = std::abs(x - y);
290             acc.sum = std::abs(x + y);
291             return acc;
292         },
293         [](const Acc<T>& acc) INLINE_LAMBDA {
294             return acc.diff / acc.sum;
295         },
296         [](const Acc<T>& a, const Acc<T>& b) INLINE_LAMBDA {
297             Acc<T> acc;
298             acc.diff = a.diff + b.diff;
299             acc.sum = a.sum + b.sum;
300             return acc;
301         });
302     }
303 
304     template <typename T>
305     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const {
306         // dist = (w * abs(x - y)).sum() / (w * abs(x + y)).sum()
307         transform_reduce_2d_(out, x, y, w, [](T x, T y, T w) INLINE_LAMBDA {
308             Acc<T> acc;
309             acc.diff = w * std::abs(x - y);
310             acc.sum = w * std::abs(x + y);
311             return acc;
312         },
313         [](const Acc<T>& acc) INLINE_LAMBDA {
314             return acc.diff / acc.sum;
315         },
316         [](const Acc<T>& a, const Acc<T>& b) INLINE_LAMBDA {
317             Acc<T> acc;
318             acc.diff = a.diff + b.diff;
319             acc.sum = a.sum + b.sum;
320             return acc;
321         });
322     }
323 };
324 
325 struct CanberraDistance {
326     template <typename T>
327     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const {
328         // dist = (abs(x - y) / (abs(x) + abs(y))).sum()
329         transform_reduce_2d_<2>(out, x, y, [](T x, T y) INLINE_LAMBDA {
330             auto num = std::abs(x - y);
331             auto denom = std::abs(x) + std::abs(y);
332             // branchless replacement for (denom == 0) ? 0 : num / denom;
333             return num / (denom + (denom == 0));
334         });
335     }
336 
337     template <typename T>
338     void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const {
339         // dist = (w * abs(x - y) / (abs(x) + abs(y))).sum()
340         transform_reduce_2d_(out, x, y, w, [](T x, T y, T w) INLINE_LAMBDA {
341             auto num = w * std::abs(x - y);
342             auto denom = std::abs(x) + std::abs(y);
343             // branchless replacement for (denom == 0) ? 0 : num / denom;
344             return num / (denom + (denom == 0));
345         });
346     }
347 };
348