1# distutils: language = c
2# cython: wraparound=False, cdivision=True, boundscheck=False
3
4import numpy as np
5
6from libc.math cimport sqrt, acos
7
8from dipy.segment.cythonutils cimport tuple2shape, shape2tuple, same_shape
9from dipy.segment.featurespeed cimport IdentityFeature, ResampleFeature
10
11DEF biggest_double = 1.7976931348623157e+308  #  np.finfo('f8').max
12
13import math
14cdef double PI = math.pi
15
16
17cdef class Metric(object):
18    """ Computes a distance between two sequential data.
19
20    A sequence of N-dimensional points is represented as a 2D array with
21    shape (nb_points, nb_dimensions). A `feature` object can be specified
22    in order to calculate the distance between extracted features, rather
23    than directly between the sequential data.
24
25    Parameters
26    ----------
27    feature : `Feature` object, optional
28        It is used to extract features before computing the distance.
29
30    Notes
31    -----
32    When subclassing `Metric`, one only needs to override the `dist` and
33    `are_compatible` methods.
34    """
35    def __init__(Metric self, Feature feature=IdentityFeature()):
36        self.feature = feature
37        self.is_order_invariant = self.feature.is_order_invariant
38
39    property feature:
40        """ `Feature` object used to extract features from sequential data """
41        def __get__(Metric self):
42            return self.feature
43
44    property is_order_invariant:
45        """ Is this metric invariant to the sequence's ordering """
46        def __get__(Metric self):
47            return bool(self.is_order_invariant)
48
49    cdef int c_are_compatible(Metric self, Shape shape1, Shape shape2) nogil except -1:
50        """ Cython version of `Metric.are_compatible`. """
51        with gil:
52            return self.are_compatible(shape2tuple(shape1), shape2tuple(shape2))
53
54    cdef double c_dist(Metric self, Data2D features1, Data2D features2) nogil except -1:
55        """ Cython version of `Metric.dist`. """
56        with gil:
57            _features1 = np.asarray(<float[:features1.shape[0], :features1.shape[1]]> <float*> features1._data)
58            _features2 = np.asarray(<float[:features2.shape[0], :features2.shape[1]]> <float*> features2._data)
59            return self.dist(_features1, _features2)
60
61    cpdef are_compatible(Metric self, shape1, shape2):
62        """ Checks if features can be used by `metric.dist` based on their shape.
63
64        Basically this method exists so we don't have to do this check
65        inside the `metric.dist` function (speedup).
66
67        Parameters
68        ----------
69        shape1 : int, 1-tuple or 2-tuple
70            shape of the first data point's features
71        shape2 : int, 1-tuple or 2-tuple
72            shape of the second data point's features
73
74        Returns
75        -------
76        are_compatible : bool
77            whether or not shapes are compatible
78        """
79        raise NotImplementedError("Metric's subclasses must implement method `are_compatible(self, shape1, shape2)`!")
80
81    cpdef double dist(Metric self, features1, features2) except -1:
82        """ Computes a distance between two data points based on their features.
83
84        Parameters
85        ----------
86        features1 : 2D array
87            Features of the first data point.
88        features2 : 2D array
89            Features of the second data point.
90
91        Returns
92        -------
93        double
94            Distance between two data points.
95        """
96        raise NotImplementedError("Metric's subclasses must implement method `dist(self, features1, features2)`!")
97
98
99cdef class CythonMetric(Metric):
100    """ Computes a distance between two sequential data.
101
102    A sequence of N-dimensional points is represented as a 2D array with
103    shape (nb_points, nb_dimensions). A `feature` object can be specified
104    in order to calculate the distance between extracted features, rather
105    than directly between the sequential data.
106
107    Parameters
108    ----------
109    feature : `Feature` object, optional
110        It is used to extract features before computing the distance.
111
112    Notes
113    -----
114    When subclassing `CythonMetric`, one only needs to override the `c_dist` and
115    `c_are_compatible` methods.
116    """
117    cpdef are_compatible(CythonMetric self, shape1, shape2):
118        """ Checks if features can be used by `metric.dist` based on their shape.
119
120        Basically this method exists so we don't have to do this check
121        inside method `dist` (speedup).
122
123        Parameters
124        ----------
125        shape1 : int, 1-tuple or 2-tuple
126            Shape of the first data point's features.
127        shape2 : int, 1-tuple or 2-tuple
128            Shape of the second data point's features.
129
130        Returns
131        -------
132        bool
133            Whether or not shapes are compatible.
134
135        Notes
136        -----
137        This method calls its Cython version `self.c_are_compatible` accordingly.
138        """
139        if np.asarray(shape1).ndim == 0:
140            shape1 = (1, shape1)
141        elif len(shape1) == 1:
142            shape1 = (1,) + shape1
143
144        if np.asarray(shape2).ndim == 0:
145            shape2 = (1, shape2)
146        elif len(shape2) == 1:
147            shape2 = (1,) + shape2
148
149        return self.c_are_compatible(tuple2shape(shape1), tuple2shape(shape2)) == 1
150
151    cpdef double dist(CythonMetric self, features1, features2) except -1:
152        """ Computes a distance between two data points based on their features.
153
154        Parameters
155        ----------
156        features1 : 2D array
157            Features of the first data point.
158        features2 : 2D array
159            Features of the second data point.
160
161        Returns
162        -------
163        double
164            Distance between two data points.
165
166        Notes
167        -----
168        This method calls its Cython version `self.c_dist` accordingly.
169        """
170        # If needed, we convert features to 2D arrays.
171        features1 = np.asarray(features1)
172        if features1.ndim == 0:
173            features1 = features1[np.newaxis, np.newaxis]
174        elif features1.ndim == 1:
175            features1 = features1[np.newaxis]
176        elif features1.ndim == 2:
177            pass
178        else:
179            raise TypeError("Only scalar, 1D or 2D array features are"
180                            " supported for parameter 'features1'!")
181
182        features2 = np.asarray(features2)
183        if features2.ndim == 0:
184            features2 = features2[np.newaxis, np.newaxis]
185        elif features2.ndim == 1:
186            features2 = features2[np.newaxis]
187        elif features2.ndim == 2:
188            pass
189        else:
190            raise TypeError("Only scalar, 1D or 2D array features are"
191                            " supported for parameter 'features2'!")
192
193        if not self.are_compatible(features1.shape, features2.shape):
194            raise ValueError("Features are not compatible according to this metric!")
195
196        return self.c_dist(features1, features2)
197
198
199cdef class SumPointwiseEuclideanMetric(CythonMetric):
200    r""" Computes the sum of pointwise Euclidean distances between two
201    sequential data.
202
203    A sequence of N-dimensional points is represented as a 2D array with
204    shape (nb_points, nb_dimensions). A `feature` object can be specified
205    in order to calculate the distance between the features, rather than
206    directly between the sequential data.
207
208    Parameters
209    ----------
210    feature : `Feature` object, optional
211        It is used to extract features before computing the distance.
212
213    Notes
214    -----
215    The distance between two 2D sequential data::
216
217        s1       s2
218
219        0*   a    *0
220          \       |
221           \      |
222           1*     |
223            |  b  *1
224            |      \
225            2*      \
226                c    *2
227
228    is equal to $a+b+c$ where $a$ is the Euclidean distance between s1[0] and
229    s2[0], $b$ between s1[1] and s2[1] and $c$ between s1[2] and s2[2].
230    """
231    cdef double c_dist(SumPointwiseEuclideanMetric self, Data2D features1, Data2D features2) nogil except -1:
232        cdef :
233            int N = features1.shape[0], D = features1.shape[1]
234            int n, d
235            double dd, dist_n, dist = 0.0
236
237        for n in range(N):
238            dist_n = 0.0
239            for d in range(D):
240                dd = features1[n, d] - features2[n, d]
241                dist_n += dd*dd
242
243            dist += sqrt(dist_n)
244
245        return dist
246
247    cdef int c_are_compatible(SumPointwiseEuclideanMetric self, Shape shape1, Shape shape2) nogil except -1:
248        return same_shape(shape1, shape2)
249
250
251cdef class AveragePointwiseEuclideanMetric(SumPointwiseEuclideanMetric):
252    r""" Computes the average of pointwise Euclidean distances between two
253    sequential data.
254
255    A sequence of N-dimensional points is represented as a 2D array with
256    shape (nb_points, nb_dimensions). A `feature` object can be specified
257    in order to calculate the distance between the features, rather than
258    directly between the sequential data.
259
260    Parameters
261    ----------
262    feature : `Feature` object, optional
263        It is used to extract features before computing the distance.
264
265    Notes
266    -----
267    The distance between two 2D sequential data::
268
269        s1       s2
270
271        0*   a    *0
272          \       |
273           \      |
274           1*     |
275            |  b  *1
276            |      \
277            2*      \
278                c    *2
279
280    is equal to $(a+b+c)/3$ where $a$ is the Euclidean distance between s1[0] and
281    s2[0], $b$ between s1[1] and s2[1] and $c$ between s1[2] and s2[2].
282    """
283    cdef double c_dist(AveragePointwiseEuclideanMetric self, Data2D features1, Data2D features2) nogil except -1:
284        cdef int N = features1.shape[0]
285        cdef double dist = SumPointwiseEuclideanMetric.c_dist(self, features1, features2)
286        return dist / N
287
288
289cdef class MinimumAverageDirectFlipMetric(AveragePointwiseEuclideanMetric):
290    r""" Computes the MDF distance (minimum average direct-flip) between two
291    sequential data.
292
293    A sequence of N-dimensional points is represented as a 2D array with
294    shape (nb_points, nb_dimensions).
295
296    Notes
297    -----
298    The distance between two 2D sequential data::
299
300        s1       s2
301
302        0*   a    *0
303          \       |
304           \      |
305           1*     |
306            |  b  *1
307            |      \
308            2*      \
309                c    *2
310
311    is equal to $\min((a+b+c)/3, (a'+b'+c')/3)$ where $a$ is the Euclidean distance
312    between s1[0] and s2[0], $b$ between s1[1] and s2[1], $c$ between s1[2]
313    and s2[2], $a'$ between s1[0] and s2[2], $b'$ between s1[1] and s2[1]
314    and $c'$ between s1[2] and s2[0].
315    """
316    property is_order_invariant:
317        """ Is this metric invariant to the sequence's ordering """
318        def __get__(MinimumAverageDirectFlipMetric self):
319            return True  # Ordering is handled in the distance computation
320
321    cdef double c_dist(MinimumAverageDirectFlipMetric self, Data2D features1, Data2D features2) nogil except -1:
322        cdef double dist_direct = AveragePointwiseEuclideanMetric.c_dist(self, features1, features2)
323        cdef double dist_flipped = AveragePointwiseEuclideanMetric.c_dist(self, features1, features2[::-1])
324        return min(dist_direct, dist_flipped)
325
326
327cdef class CosineMetric(CythonMetric):
328    r""" Computes the cosine distance between two vectors.
329
330    A vector (i.e. a N-dimensional point) is represented as a 2D array with
331    shape (1, nb_dimensions).
332
333    Notes
334    -----
335    The distance between two vectors $v_1$ and $v_2$ is equal to
336    $\frac{1}{\pi} \arccos\left(\frac{v_1 \cdot v_2}{\|v_1\| \|v_2\|}\right)$
337    and is bounded within $[0,1]$.
338    """
339    def __init__(CosineMetric self, Feature feature):
340        super(CosineMetric, self).__init__(feature=feature)
341
342    cdef int c_are_compatible(CosineMetric self, Shape shape1, Shape shape2) nogil except -1:
343        return same_shape(shape1, shape2) != 0 and shape1.dims[0] == 1
344
345    cdef double c_dist(CosineMetric self, Data2D features1, Data2D features2) nogil except -1:
346        cdef :
347            int d, D = features1.shape[1]
348            double sqr_norm_features1 = 0.0, sqr_norm_features2 = 0.0
349            double cos_theta = 0.0
350
351        for d in range(D):
352            cos_theta += features1[0, d] * features2[0, d]
353            sqr_norm_features1 += features1[0, d] * features1[0, d]
354            sqr_norm_features2 += features2[0, d] * features2[0, d]
355
356        if sqr_norm_features1 == 0.:
357            if sqr_norm_features2 == 0.:
358                return 0.
359            else:
360                return 1.
361
362        cos_theta /= sqrt(sqr_norm_features1) * sqrt(sqr_norm_features2)
363
364        # Make sure it's in [-1, 1], i.e. within domain of arccosine
365        cos_theta = min(cos_theta, 1.)
366        cos_theta = max(cos_theta, -1.)
367        return acos(cos_theta) / PI  # Normalized cosine distance
368
369
370cpdef distance_matrix(Metric metric, data1, data2=None):
371    """ Computes the distance matrix between two lists of sequential data.
372
373    The distance matrix is obtained by computing the pairwise distance of all
374    tuples spawn by the Cartesian product of `data1` with `data2`. If `data2`
375    is not provided, the Cartesian product of `data1` with itself is used
376    instead. A sequence of N-dimensional points is represented as a 2D array with
377    shape (nb_points, nb_dimensions).
378
379    Parameters
380    ----------
381    metric : `Metric` object
382        Tells how to compute the distance between two sequential data.
383    data1 : list of 2D arrays
384        List of sequences of N-dimensional points.
385    data2 : list of 2D arrays
386        Llist of sequences of N-dimensional points.
387
388    Returns
389    -------
390    2D array (double)
391        Distance matrix.
392    """
393    cdef int i, j
394    if data2 is None:
395        data2 = data1
396
397    shape = metric.feature.infer_shape(data1[0].astype(np.float32))
398    distance_matrix = np.zeros((len(data1), len(data2)), dtype=np.float64)
399    cdef:
400        Data2D features1 = np.empty(shape, np.float32)
401        Data2D features2 = np.empty(shape, np.float32)
402
403    for i in range(len(data1)):
404        datum1 = data1[i] if data1[i].flags.writeable and data1[i].dtype is np.float32 else data1[i].astype(np.float32)
405        metric.feature.c_extract(datum1, features1)
406        for j in range(len(data2)):
407            datum2 = data2[j] if data2[j].flags.writeable and data2[j].dtype is np.float32 else data2[j].astype(np.float32)
408            metric.feature.c_extract(datum2, features2)
409            distance_matrix[i, j] = metric.c_dist(features1, features2)
410
411    return distance_matrix
412
413
414cpdef double dist(Metric metric, datum1, datum2) except -1:
415    """ Computes a distance between `datum1` and `datum2`.
416
417    A sequence of N-dimensional points is represented as a 2D array with
418    shape (nb_points, nb_dimensions).
419
420    Parameters
421    ----------
422    metric : `Metric` object
423        Tells how to compute the distance between `datum1` and `datum2`.
424    datum1 : 2D array
425        Sequence of N-dimensional points.
426    datum2 : 2D array
427        Sequence of N-dimensional points.
428
429    Returns
430    -------
431    double
432        Distance between two data points.
433    """
434    datum1 = datum1 if datum1.flags.writeable and datum1.dtype is np.float32 else datum1.astype(np.float32)
435    datum2 = datum2 if datum2.flags.writeable and datum2.dtype is np.float32 else datum2.astype(np.float32)
436
437    cdef:
438        Shape shape1 = metric.feature.c_infer_shape(datum1)
439        Shape shape2 = metric.feature.c_infer_shape(datum2)
440        Data2D features1 = np.empty(shape2tuple(shape1), np.float32)
441        Data2D features2 = np.empty(shape2tuple(shape2), np.float32)
442
443    metric.feature.c_extract(datum1, features1)
444    metric.feature.c_extract(datum2, features2)
445    return metric.c_dist(features1, features2)
446