1 #pragma once 2 3 #include <cmath> 4 #include "views.h" 5 6 #ifdef __GNUC__ 7 #define ALWAYS_INLINE inline __attribute__((always_inline)) 8 #define INLINE_LAMBDA __attribute__((always_inline)) 9 #elif defined(_MSC_VER) 10 #define ALWAYS_INLINE __forceinline 11 #define INLINE_LAMBDA 12 #else 13 #define ALWAYS_INLINE inline 14 #define INLINE_LAMBDA 15 #endif 16 17 struct Identity { 18 template <typename T> operatorIdentity19 ALWAYS_INLINE T operator() (T && val) const { 20 return std::forward<T>(val); 21 } 22 }; 23 24 struct Plus { 25 template <typename T> operatorPlus26 ALWAYS_INLINE T operator()(T a, T b) const { 27 return a + b; 28 } 29 }; 30 31 // Helper to force a fixed bound loop to be completely unrolled 32 template <int unroll> 33 struct ForceUnroll{ 34 template <typename Func> operatorForceUnroll35 ALWAYS_INLINE void operator()(const Func& f) const { 36 ForceUnroll<unroll - 1>{}(f); 37 f(unroll - 1); 38 } 39 }; 40 41 template <> 42 struct ForceUnroll<1> { 43 template <typename Func> 44 ALWAYS_INLINE void operator()(const Func& f) const { 45 f(0); 46 } 47 }; 48 49 template <int ilp_factor=4, 50 typename T, 51 typename TransformFunc, 52 typename ProjectFunc = Identity, 53 typename ReduceFunc = Plus> 54 void transform_reduce_2d_( 55 StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, 56 const TransformFunc& map, 57 const ProjectFunc& project = Identity{}, 58 const ReduceFunc& reduce = Plus{}) { 59 // Result type of calling map 60 using AccumulateType = typename std::decay<decltype( 61 map(std::declval<T>(), std::declval<T>()))>::type; 62 intptr_t xs = x.strides[1], ys = y.strides[1]; 63 64 intptr_t i = 0; 65 if (xs == 1 && ys == 1) { 66 for (; i + (ilp_factor - 1) < x.shape[0]; i += ilp_factor) { 67 const T* x_rows[ilp_factor]; 68 const T* y_rows[ilp_factor]; 69 ForceUnroll<ilp_factor>{}([&](int k) { 70 x_rows[k] = &x(i + k, 0); 71 y_rows[k] = &y(i + k, 0); 72 }); 73 74 AccumulateType dist[ilp_factor] = {}; 75 for (intptr_t j = 0; j < x.shape[1]; ++j) { 76 ForceUnroll<ilp_factor>{}([&](int k) { 77 auto val = map(x_rows[k][j], y_rows[k][j]); 78 dist[k] = reduce(dist[k], val); 79 }); 80 } 81 82 ForceUnroll<ilp_factor>{}([&](int k) { 83 out(i + k, 0) = project(dist[k]); 84 }); 85 } 86 } else { 87 for (; i + (ilp_factor - 1) < x.shape[0]; i += ilp_factor) { 88 const T* x_rows[ilp_factor]; 89 const T* y_rows[ilp_factor]; 90 ForceUnroll<ilp_factor>{}([&](int k) { 91 x_rows[k] = &x(i + k, 0); 92 y_rows[k] = &y(i + k, 0); 93 }); 94 95 AccumulateType dist[ilp_factor] = {}; 96 for (intptr_t j = 0; j < x.shape[1]; ++j) { 97 auto x_offset = j * xs; 98 auto y_offset = j * ys; 99 ForceUnroll<ilp_factor>{}([&](int k) { 100 auto val = map(x_rows[k][x_offset], y_rows[k][y_offset]); 101 dist[k] = reduce(dist[k], val); 102 }); 103 } 104 105 ForceUnroll<ilp_factor>{}([&](int k) { 106 out(i + k, 0) = project(dist[k]); 107 }); 108 } 109 } 110 for (; i < x.shape[0]; ++i) { 111 const T* x_row = &x(i, 0); 112 const T* y_row = &y(i, 0); 113 AccumulateType dist = {}; 114 for (intptr_t j = 0; j < x.shape[1]; ++j) { 115 auto val = map(x_row[j * xs], y_row[j * ys]); 116 dist = reduce(dist, val); 117 } 118 out(i, 0) = project(dist); 119 } 120 } 121 122 template <int ilp_factor=2, typename T, 123 typename TransformFunc, 124 typename ProjectFunc = Identity, 125 typename ReduceFunc = Plus> 126 void transform_reduce_2d_( 127 StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, 128 StridedView2D<const T> w, const TransformFunc& map, 129 const ProjectFunc& project = Identity{}, 130 const ReduceFunc& reduce = Plus{}) { 131 intptr_t i = 0; 132 intptr_t xs = x.strides[1], ys = y.strides[1], ws = w.strides[1]; 133 // Result type of calling map 134 using AccumulateType = typename std::decay<decltype( 135 map(std::declval<T>(), std::declval<T>(), std::declval<T>()))>::type; 136 137 for (; i + (ilp_factor - 1) < x.shape[0]; i += ilp_factor) { 138 const T* x_rows[ilp_factor]; 139 const T* y_rows[ilp_factor]; 140 const T* w_rows[ilp_factor]; 141 ForceUnroll<ilp_factor>{}([&](int k) { 142 x_rows[k] = &x(i + k, 0); 143 y_rows[k] = &y(i + k, 0); 144 w_rows[k] = &w(i + k, 0); 145 }); 146 147 AccumulateType dist[ilp_factor] = {}; 148 for (intptr_t j = 0; j < x.shape[1]; ++j) { 149 ForceUnroll<ilp_factor>{}([&](int k) { 150 auto val = map(x_rows[k][j * xs], y_rows[k][j * ys], w_rows[k][j * ws]); 151 dist[k] = reduce(dist[k], val); 152 }); 153 } 154 155 ForceUnroll<ilp_factor>{}([&](int k) { 156 out(i + k, 0) = project(dist[k]); 157 }); 158 } 159 for (; i < x.shape[0]; ++i) { 160 const T* x_row = &x(i, 0); 161 const T* y_row = &y(i, 0); 162 const T* w_row = &w(i, 0); 163 AccumulateType dist = {}; 164 for (intptr_t j = 0; j < x.shape[1]; ++j) { 165 auto val = map(x_row[j * xs], y_row[j * ys], w_row[j * ws]); 166 dist = reduce(dist, val); 167 } 168 out(i, 0) = project(dist); 169 } 170 } 171 172 struct MinkowskiDistance { 173 double p_; 174 175 template <typename T> 176 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const { 177 const T p = static_cast<T>(p_); 178 const T invp = static_cast<T>(1.0 / p_); 179 transform_reduce_2d_(out, x, y, [p](T x, T y) INLINE_LAMBDA { 180 auto diff = std::abs(x - y); 181 return std::pow(diff, p); 182 }, 183 [invp](T x) { return std::pow(x, invp); }); 184 } 185 186 template <typename T> 187 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const { 188 const T p = static_cast<T>(p_); 189 const T invp = static_cast<T>(1.0 / p_); 190 transform_reduce_2d_(out, x, y, w, [p](T x, T y, T w) INLINE_LAMBDA { 191 auto diff = std::abs(x - y); 192 return w * std::pow(diff, p); 193 }, 194 [invp](T x) { return std::pow(x, invp); }); 195 } 196 }; 197 198 struct EuclideanDistance { 199 template <typename T> 200 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const { 201 transform_reduce_2d_(out, x, y, [](T x, T y) INLINE_LAMBDA { 202 auto diff = std::abs(x - y); 203 return diff * diff; 204 }, 205 [](T x) { return std::sqrt(x); }); 206 } 207 208 template <typename T> 209 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const { 210 transform_reduce_2d_(out, x, y, w, [](T x, T y, T w) INLINE_LAMBDA { 211 auto diff = std::abs(x - y); 212 return w * (diff * diff); 213 }, 214 [](T x) { return std::sqrt(x); }); 215 } 216 }; 217 218 struct ChebyshevDistance { 219 template <typename T> 220 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const { 221 transform_reduce_2d_(out, x, y, [](T x, T y) INLINE_LAMBDA { 222 return std::abs(x - y); 223 }, 224 Identity{}, 225 [](T x, T y) { return std::max(x, y); }); 226 } 227 228 template <typename T> 229 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const { 230 for (intptr_t i = 0; i < x.shape[0]; ++i) { 231 T dist = 0; 232 for (intptr_t j = 0; j < x.shape[1]; ++j) { 233 auto diff = std::abs(x(i, j) - y(i, j)); 234 if (w(i, j) > 0 && diff > dist) { 235 dist = diff; 236 } 237 } 238 out(i, 0) = dist; 239 } 240 } 241 }; 242 243 struct CityBlockDistance { 244 template <typename T> 245 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const { 246 transform_reduce_2d_<2>(out, x, y, [](T x, T y) INLINE_LAMBDA { 247 return std::abs(x - y); 248 }); 249 } 250 251 template <typename T> 252 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const { 253 transform_reduce_2d_(out, x, y, w, [](T x, T y, T w) INLINE_LAMBDA { 254 return w * std::abs(x - y); 255 }); 256 } 257 }; 258 259 struct SquareEuclideanDistance { 260 template <typename T> 261 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const { 262 transform_reduce_2d_(out, x, y, [](T x, T y) INLINE_LAMBDA { 263 auto diff = x - y; 264 return diff * diff; 265 }); 266 } 267 268 template <typename T> 269 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const { 270 transform_reduce_2d_(out, x, y, w, [](T x, T y, T w) INLINE_LAMBDA { 271 auto diff = x - y; 272 return w * diff * diff; 273 }); 274 } 275 }; 276 277 struct BraycurtisDistance { 278 template <typename T> 279 struct Acc { 280 Acc(): diff(0), sum(0) {} 281 T diff, sum; 282 }; 283 284 template <typename T> 285 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const { 286 // dist = abs(x - y).sum() / abs(x + y).sum() 287 transform_reduce_2d_<2>(out, x, y, [](T x, T y) INLINE_LAMBDA { 288 Acc<T> acc; 289 acc.diff = std::abs(x - y); 290 acc.sum = std::abs(x + y); 291 return acc; 292 }, 293 [](const Acc<T>& acc) INLINE_LAMBDA { 294 return acc.diff / acc.sum; 295 }, 296 [](const Acc<T>& a, const Acc<T>& b) INLINE_LAMBDA { 297 Acc<T> acc; 298 acc.diff = a.diff + b.diff; 299 acc.sum = a.sum + b.sum; 300 return acc; 301 }); 302 } 303 304 template <typename T> 305 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const { 306 // dist = (w * abs(x - y)).sum() / (w * abs(x + y)).sum() 307 transform_reduce_2d_(out, x, y, w, [](T x, T y, T w) INLINE_LAMBDA { 308 Acc<T> acc; 309 acc.diff = w * std::abs(x - y); 310 acc.sum = w * std::abs(x + y); 311 return acc; 312 }, 313 [](const Acc<T>& acc) INLINE_LAMBDA { 314 return acc.diff / acc.sum; 315 }, 316 [](const Acc<T>& a, const Acc<T>& b) INLINE_LAMBDA { 317 Acc<T> acc; 318 acc.diff = a.diff + b.diff; 319 acc.sum = a.sum + b.sum; 320 return acc; 321 }); 322 } 323 }; 324 325 struct CanberraDistance { 326 template <typename T> 327 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y) const { 328 // dist = (abs(x - y) / (abs(x) + abs(y))).sum() 329 transform_reduce_2d_<2>(out, x, y, [](T x, T y) INLINE_LAMBDA { 330 auto num = std::abs(x - y); 331 auto denom = std::abs(x) + std::abs(y); 332 // branchless replacement for (denom == 0) ? 0 : num / denom; 333 return num / (denom + (denom == 0)); 334 }); 335 } 336 337 template <typename T> 338 void operator()(StridedView2D<T> out, StridedView2D<const T> x, StridedView2D<const T> y, StridedView2D<const T> w) const { 339 // dist = (w * abs(x - y) / (abs(x) + abs(y))).sum() 340 transform_reduce_2d_(out, x, y, w, [](T x, T y, T w) INLINE_LAMBDA { 341 auto num = w * std::abs(x - y); 342 auto denom = std::abs(x) + std::abs(y); 343 // branchless replacement for (denom == 0) ? 0 : num / denom; 344 return num / (denom + (denom == 0)); 345 }); 346 } 347 }; 348