1# distutils: language = c 2# cython: wraparound=False, cdivision=True, boundscheck=False 3 4import numpy as np 5 6from libc.math cimport sqrt, acos 7 8from dipy.segment.cythonutils cimport tuple2shape, shape2tuple, same_shape 9from dipy.segment.featurespeed cimport IdentityFeature, ResampleFeature 10 11DEF biggest_double = 1.7976931348623157e+308 # np.finfo('f8').max 12 13import math 14cdef double PI = math.pi 15 16 17cdef class Metric(object): 18 """ Computes a distance between two sequential data. 19 20 A sequence of N-dimensional points is represented as a 2D array with 21 shape (nb_points, nb_dimensions). A `feature` object can be specified 22 in order to calculate the distance between extracted features, rather 23 than directly between the sequential data. 24 25 Parameters 26 ---------- 27 feature : `Feature` object, optional 28 It is used to extract features before computing the distance. 29 30 Notes 31 ----- 32 When subclassing `Metric`, one only needs to override the `dist` and 33 `are_compatible` methods. 34 """ 35 def __init__(Metric self, Feature feature=IdentityFeature()): 36 self.feature = feature 37 self.is_order_invariant = self.feature.is_order_invariant 38 39 property feature: 40 """ `Feature` object used to extract features from sequential data """ 41 def __get__(Metric self): 42 return self.feature 43 44 property is_order_invariant: 45 """ Is this metric invariant to the sequence's ordering """ 46 def __get__(Metric self): 47 return bool(self.is_order_invariant) 48 49 cdef int c_are_compatible(Metric self, Shape shape1, Shape shape2) nogil except -1: 50 """ Cython version of `Metric.are_compatible`. """ 51 with gil: 52 return self.are_compatible(shape2tuple(shape1), shape2tuple(shape2)) 53 54 cdef double c_dist(Metric self, Data2D features1, Data2D features2) nogil except -1: 55 """ Cython version of `Metric.dist`. """ 56 with gil: 57 _features1 = np.asarray(<float[:features1.shape[0], :features1.shape[1]]> <float*> features1._data) 58 _features2 = np.asarray(<float[:features2.shape[0], :features2.shape[1]]> <float*> features2._data) 59 return self.dist(_features1, _features2) 60 61 cpdef are_compatible(Metric self, shape1, shape2): 62 """ Checks if features can be used by `metric.dist` based on their shape. 63 64 Basically this method exists so we don't have to do this check 65 inside the `metric.dist` function (speedup). 66 67 Parameters 68 ---------- 69 shape1 : int, 1-tuple or 2-tuple 70 shape of the first data point's features 71 shape2 : int, 1-tuple or 2-tuple 72 shape of the second data point's features 73 74 Returns 75 ------- 76 are_compatible : bool 77 whether or not shapes are compatible 78 """ 79 raise NotImplementedError("Metric's subclasses must implement method `are_compatible(self, shape1, shape2)`!") 80 81 cpdef double dist(Metric self, features1, features2) except -1: 82 """ Computes a distance between two data points based on their features. 83 84 Parameters 85 ---------- 86 features1 : 2D array 87 Features of the first data point. 88 features2 : 2D array 89 Features of the second data point. 90 91 Returns 92 ------- 93 double 94 Distance between two data points. 95 """ 96 raise NotImplementedError("Metric's subclasses must implement method `dist(self, features1, features2)`!") 97 98 99cdef class CythonMetric(Metric): 100 """ Computes a distance between two sequential data. 101 102 A sequence of N-dimensional points is represented as a 2D array with 103 shape (nb_points, nb_dimensions). A `feature` object can be specified 104 in order to calculate the distance between extracted features, rather 105 than directly between the sequential data. 106 107 Parameters 108 ---------- 109 feature : `Feature` object, optional 110 It is used to extract features before computing the distance. 111 112 Notes 113 ----- 114 When subclassing `CythonMetric`, one only needs to override the `c_dist` and 115 `c_are_compatible` methods. 116 """ 117 cpdef are_compatible(CythonMetric self, shape1, shape2): 118 """ Checks if features can be used by `metric.dist` based on their shape. 119 120 Basically this method exists so we don't have to do this check 121 inside method `dist` (speedup). 122 123 Parameters 124 ---------- 125 shape1 : int, 1-tuple or 2-tuple 126 Shape of the first data point's features. 127 shape2 : int, 1-tuple or 2-tuple 128 Shape of the second data point's features. 129 130 Returns 131 ------- 132 bool 133 Whether or not shapes are compatible. 134 135 Notes 136 ----- 137 This method calls its Cython version `self.c_are_compatible` accordingly. 138 """ 139 if np.asarray(shape1).ndim == 0: 140 shape1 = (1, shape1) 141 elif len(shape1) == 1: 142 shape1 = (1,) + shape1 143 144 if np.asarray(shape2).ndim == 0: 145 shape2 = (1, shape2) 146 elif len(shape2) == 1: 147 shape2 = (1,) + shape2 148 149 return self.c_are_compatible(tuple2shape(shape1), tuple2shape(shape2)) == 1 150 151 cpdef double dist(CythonMetric self, features1, features2) except -1: 152 """ Computes a distance between two data points based on their features. 153 154 Parameters 155 ---------- 156 features1 : 2D array 157 Features of the first data point. 158 features2 : 2D array 159 Features of the second data point. 160 161 Returns 162 ------- 163 double 164 Distance between two data points. 165 166 Notes 167 ----- 168 This method calls its Cython version `self.c_dist` accordingly. 169 """ 170 # If needed, we convert features to 2D arrays. 171 features1 = np.asarray(features1) 172 if features1.ndim == 0: 173 features1 = features1[np.newaxis, np.newaxis] 174 elif features1.ndim == 1: 175 features1 = features1[np.newaxis] 176 elif features1.ndim == 2: 177 pass 178 else: 179 raise TypeError("Only scalar, 1D or 2D array features are" 180 " supported for parameter 'features1'!") 181 182 features2 = np.asarray(features2) 183 if features2.ndim == 0: 184 features2 = features2[np.newaxis, np.newaxis] 185 elif features2.ndim == 1: 186 features2 = features2[np.newaxis] 187 elif features2.ndim == 2: 188 pass 189 else: 190 raise TypeError("Only scalar, 1D or 2D array features are" 191 " supported for parameter 'features2'!") 192 193 if not self.are_compatible(features1.shape, features2.shape): 194 raise ValueError("Features are not compatible according to this metric!") 195 196 return self.c_dist(features1, features2) 197 198 199cdef class SumPointwiseEuclideanMetric(CythonMetric): 200 r""" Computes the sum of pointwise Euclidean distances between two 201 sequential data. 202 203 A sequence of N-dimensional points is represented as a 2D array with 204 shape (nb_points, nb_dimensions). A `feature` object can be specified 205 in order to calculate the distance between the features, rather than 206 directly between the sequential data. 207 208 Parameters 209 ---------- 210 feature : `Feature` object, optional 211 It is used to extract features before computing the distance. 212 213 Notes 214 ----- 215 The distance between two 2D sequential data:: 216 217 s1 s2 218 219 0* a *0 220 \ | 221 \ | 222 1* | 223 | b *1 224 | \ 225 2* \ 226 c *2 227 228 is equal to $a+b+c$ where $a$ is the Euclidean distance between s1[0] and 229 s2[0], $b$ between s1[1] and s2[1] and $c$ between s1[2] and s2[2]. 230 """ 231 cdef double c_dist(SumPointwiseEuclideanMetric self, Data2D features1, Data2D features2) nogil except -1: 232 cdef : 233 int N = features1.shape[0], D = features1.shape[1] 234 int n, d 235 double dd, dist_n, dist = 0.0 236 237 for n in range(N): 238 dist_n = 0.0 239 for d in range(D): 240 dd = features1[n, d] - features2[n, d] 241 dist_n += dd*dd 242 243 dist += sqrt(dist_n) 244 245 return dist 246 247 cdef int c_are_compatible(SumPointwiseEuclideanMetric self, Shape shape1, Shape shape2) nogil except -1: 248 return same_shape(shape1, shape2) 249 250 251cdef class AveragePointwiseEuclideanMetric(SumPointwiseEuclideanMetric): 252 r""" Computes the average of pointwise Euclidean distances between two 253 sequential data. 254 255 A sequence of N-dimensional points is represented as a 2D array with 256 shape (nb_points, nb_dimensions). A `feature` object can be specified 257 in order to calculate the distance between the features, rather than 258 directly between the sequential data. 259 260 Parameters 261 ---------- 262 feature : `Feature` object, optional 263 It is used to extract features before computing the distance. 264 265 Notes 266 ----- 267 The distance between two 2D sequential data:: 268 269 s1 s2 270 271 0* a *0 272 \ | 273 \ | 274 1* | 275 | b *1 276 | \ 277 2* \ 278 c *2 279 280 is equal to $(a+b+c)/3$ where $a$ is the Euclidean distance between s1[0] and 281 s2[0], $b$ between s1[1] and s2[1] and $c$ between s1[2] and s2[2]. 282 """ 283 cdef double c_dist(AveragePointwiseEuclideanMetric self, Data2D features1, Data2D features2) nogil except -1: 284 cdef int N = features1.shape[0] 285 cdef double dist = SumPointwiseEuclideanMetric.c_dist(self, features1, features2) 286 return dist / N 287 288 289cdef class MinimumAverageDirectFlipMetric(AveragePointwiseEuclideanMetric): 290 r""" Computes the MDF distance (minimum average direct-flip) between two 291 sequential data. 292 293 A sequence of N-dimensional points is represented as a 2D array with 294 shape (nb_points, nb_dimensions). 295 296 Notes 297 ----- 298 The distance between two 2D sequential data:: 299 300 s1 s2 301 302 0* a *0 303 \ | 304 \ | 305 1* | 306 | b *1 307 | \ 308 2* \ 309 c *2 310 311 is equal to $\min((a+b+c)/3, (a'+b'+c')/3)$ where $a$ is the Euclidean distance 312 between s1[0] and s2[0], $b$ between s1[1] and s2[1], $c$ between s1[2] 313 and s2[2], $a'$ between s1[0] and s2[2], $b'$ between s1[1] and s2[1] 314 and $c'$ between s1[2] and s2[0]. 315 """ 316 property is_order_invariant: 317 """ Is this metric invariant to the sequence's ordering """ 318 def __get__(MinimumAverageDirectFlipMetric self): 319 return True # Ordering is handled in the distance computation 320 321 cdef double c_dist(MinimumAverageDirectFlipMetric self, Data2D features1, Data2D features2) nogil except -1: 322 cdef double dist_direct = AveragePointwiseEuclideanMetric.c_dist(self, features1, features2) 323 cdef double dist_flipped = AveragePointwiseEuclideanMetric.c_dist(self, features1, features2[::-1]) 324 return min(dist_direct, dist_flipped) 325 326 327cdef class CosineMetric(CythonMetric): 328 r""" Computes the cosine distance between two vectors. 329 330 A vector (i.e. a N-dimensional point) is represented as a 2D array with 331 shape (1, nb_dimensions). 332 333 Notes 334 ----- 335 The distance between two vectors $v_1$ and $v_2$ is equal to 336 $\frac{1}{\pi} \arccos\left(\frac{v_1 \cdot v_2}{\|v_1\| \|v_2\|}\right)$ 337 and is bounded within $[0,1]$. 338 """ 339 def __init__(CosineMetric self, Feature feature): 340 super(CosineMetric, self).__init__(feature=feature) 341 342 cdef int c_are_compatible(CosineMetric self, Shape shape1, Shape shape2) nogil except -1: 343 return same_shape(shape1, shape2) != 0 and shape1.dims[0] == 1 344 345 cdef double c_dist(CosineMetric self, Data2D features1, Data2D features2) nogil except -1: 346 cdef : 347 int d, D = features1.shape[1] 348 double sqr_norm_features1 = 0.0, sqr_norm_features2 = 0.0 349 double cos_theta = 0.0 350 351 for d in range(D): 352 cos_theta += features1[0, d] * features2[0, d] 353 sqr_norm_features1 += features1[0, d] * features1[0, d] 354 sqr_norm_features2 += features2[0, d] * features2[0, d] 355 356 if sqr_norm_features1 == 0.: 357 if sqr_norm_features2 == 0.: 358 return 0. 359 else: 360 return 1. 361 362 cos_theta /= sqrt(sqr_norm_features1) * sqrt(sqr_norm_features2) 363 364 # Make sure it's in [-1, 1], i.e. within domain of arccosine 365 cos_theta = min(cos_theta, 1.) 366 cos_theta = max(cos_theta, -1.) 367 return acos(cos_theta) / PI # Normalized cosine distance 368 369 370cpdef distance_matrix(Metric metric, data1, data2=None): 371 """ Computes the distance matrix between two lists of sequential data. 372 373 The distance matrix is obtained by computing the pairwise distance of all 374 tuples spawn by the Cartesian product of `data1` with `data2`. If `data2` 375 is not provided, the Cartesian product of `data1` with itself is used 376 instead. A sequence of N-dimensional points is represented as a 2D array with 377 shape (nb_points, nb_dimensions). 378 379 Parameters 380 ---------- 381 metric : `Metric` object 382 Tells how to compute the distance between two sequential data. 383 data1 : list of 2D arrays 384 List of sequences of N-dimensional points. 385 data2 : list of 2D arrays 386 Llist of sequences of N-dimensional points. 387 388 Returns 389 ------- 390 2D array (double) 391 Distance matrix. 392 """ 393 cdef int i, j 394 if data2 is None: 395 data2 = data1 396 397 shape = metric.feature.infer_shape(data1[0].astype(np.float32)) 398 distance_matrix = np.zeros((len(data1), len(data2)), dtype=np.float64) 399 cdef: 400 Data2D features1 = np.empty(shape, np.float32) 401 Data2D features2 = np.empty(shape, np.float32) 402 403 for i in range(len(data1)): 404 datum1 = data1[i] if data1[i].flags.writeable and data1[i].dtype is np.float32 else data1[i].astype(np.float32) 405 metric.feature.c_extract(datum1, features1) 406 for j in range(len(data2)): 407 datum2 = data2[j] if data2[j].flags.writeable and data2[j].dtype is np.float32 else data2[j].astype(np.float32) 408 metric.feature.c_extract(datum2, features2) 409 distance_matrix[i, j] = metric.c_dist(features1, features2) 410 411 return distance_matrix 412 413 414cpdef double dist(Metric metric, datum1, datum2) except -1: 415 """ Computes a distance between `datum1` and `datum2`. 416 417 A sequence of N-dimensional points is represented as a 2D array with 418 shape (nb_points, nb_dimensions). 419 420 Parameters 421 ---------- 422 metric : `Metric` object 423 Tells how to compute the distance between `datum1` and `datum2`. 424 datum1 : 2D array 425 Sequence of N-dimensional points. 426 datum2 : 2D array 427 Sequence of N-dimensional points. 428 429 Returns 430 ------- 431 double 432 Distance between two data points. 433 """ 434 datum1 = datum1 if datum1.flags.writeable and datum1.dtype is np.float32 else datum1.astype(np.float32) 435 datum2 = datum2 if datum2.flags.writeable and datum2.dtype is np.float32 else datum2.astype(np.float32) 436 437 cdef: 438 Shape shape1 = metric.feature.c_infer_shape(datum1) 439 Shape shape2 = metric.feature.c_infer_shape(datum2) 440 Data2D features1 = np.empty(shape2tuple(shape1), np.float32) 441 Data2D features2 = np.empty(shape2tuple(shape2), np.float32) 442 443 metric.feature.c_extract(datum1, features1) 444 metric.feature.c_extract(datum2, features2) 445 return metric.c_dist(features1, features2) 446