1# cython: boundscheck=False
2# cython: nonecheck=False
3# cython: initializedcheck=False
4# mutual reachability distance compiutations
5# Authors: Leland McInnes
6# License: 3-clause BSD
7
8import numpy as np
9cimport numpy as np
10
11from scipy.spatial.distance import pdist, squareform
12from scipy.sparse import lil_matrix as sparse_matrix
13from sklearn.neighbors import KDTree, BallTree
14import gc
15
16
17def mutual_reachability(distance_matrix, min_points=5, alpha=1.0):
18    """Compute the weighted adjacency matrix of the mutual reachability
19    graph of a distance matrix.
20
21    Parameters
22    ----------
23    distance_matrix : ndarray, shape (n_samples, n_samples)
24        Array of distances between samples.
25
26    min_points : int, optional (default=5)
27        The number of points in a neighbourhood for a point to be considered
28        a core point.
29
30    Returns
31    -------
32    mututal_reachability: ndarray, shape (n_samples, n_samples)
33        Weighted adjacency matrix of the mutual reachability graph.
34
35    References
36    ----------
37    .. [1] Campello, R. J., Moulavi, D., & Sander, J. (2013, April).
38       Density-based clustering based on hierarchical density estimates.
39       In Pacific-Asia Conference on Knowledge Discovery and Data Mining
40       (pp. 160-172). Springer Berlin Heidelberg.
41    """
42    size = distance_matrix.shape[0]
43    min_points = min(size - 1, min_points)
44    try:
45        core_distances = np.partition(distance_matrix,
46                                      min_points,
47                                      axis=0)[min_points]
48    except AttributeError:
49        core_distances = np.sort(distance_matrix,
50                                 axis=0)[min_points]
51
52    if alpha != 1.0:
53        distance_matrix = distance_matrix / alpha
54
55    stage1 = np.where(core_distances > distance_matrix,
56                      core_distances, distance_matrix)
57    result = np.where(core_distances > stage1.T,
58                      core_distances.T, stage1.T).T
59    return result
60
61
62cpdef sparse_mutual_reachability(object lil_matrix, np.intp_t min_points=5,
63                                 float alpha=1.0, float max_dist=0.):
64
65    cdef np.intp_t i
66    cdef np.intp_t j
67    cdef np.intp_t n
68    cdef np.double_t mr_dist
69    cdef list sorted_row_data
70    cdef np.ndarray[dtype=np.double_t, ndim=1] core_distance
71    cdef np.ndarray[dtype=np.int32_t, ndim=1] nz_row_data
72    cdef np.ndarray[dtype=np.int32_t, ndim=1] nz_col_data
73
74    result = sparse_matrix(lil_matrix.shape)
75    core_distance = np.empty(lil_matrix.shape[0], dtype=np.double)
76
77    for i in range(lil_matrix.shape[0]):
78        sorted_row_data = sorted(lil_matrix.data[i])
79        if min_points < len(sorted_row_data):
80            core_distance[i] = sorted_row_data[min_points]
81        else:
82            core_distance[i] = np.infty
83
84    if alpha != 1.0:
85        lil_matrix = lil_matrix / alpha
86
87    nz_row_data, nz_col_data = lil_matrix.nonzero()
88
89    for n in range(nz_row_data.shape[0]):
90        i = nz_row_data[n]
91        j = nz_col_data[n]
92
93        mr_dist = max(core_distance[i], core_distance[j], lil_matrix[i, j])
94        if np.isfinite(mr_dist):
95            result[i, j] = mr_dist
96        elif max_dist > 0:
97            result[i, j] = max_dist
98
99    return result.tocsr()
100
101
102def kdtree_mutual_reachability(X, distance_matrix, metric, p=2, min_points=5,
103                               alpha=1.0, **kwargs):
104    dim = distance_matrix.shape[0]
105    min_points = min(dim - 1, min_points)
106
107    if metric == 'minkowski':
108        tree = KDTree(X, metric=metric, p=p)
109    else:
110        tree = KDTree(X, metric=metric, **kwargs)
111
112    core_distances = tree.query(X, k=min_points)[0][:, -1]
113
114    if alpha != 1.0:
115        distance_matrix = distance_matrix / alpha
116
117    stage1 = np.where(core_distances > distance_matrix,
118                      core_distances, distance_matrix)
119    result = np.where(core_distances > stage1.T,
120                      core_distances.T, stage1.T).T
121    return result
122
123
124def balltree_mutual_reachability(X, distance_matrix, metric, p=2, min_points=5,
125                                 alpha=1.0, **kwargs):
126    dim = distance_matrix.shape[0]
127    min_points = min(dim - 1, min_points)
128
129    tree = BallTree(X, metric=metric, **kwargs)
130
131    core_distances = tree.query(X, k=min_points)[0][:, -1]
132
133    if alpha != 1.0:
134        distance_matrix = distance_matrix / alpha
135
136    stage1 = np.where(core_distances > distance_matrix,
137                      core_distances, distance_matrix)
138    result = np.where(core_distances > stage1.T,
139                      core_distances.T, stage1.T).T
140    return result
141
142
143cdef np.ndarray[np.double_t, ndim=1] mutual_reachability_from_pdist(
144        np.ndarray[np.double_t, ndim=1] core_distances,
145        np.ndarray[np.double_t, ndim=1] dists, np.intp_t dim):
146
147    cdef np.intp_t i
148    cdef np.intp_t j
149    cdef np.intp_t result_pos
150
151    result_pos = 0
152    for i in range(dim):
153        for j in range(i + 1, dim):
154            if core_distances[i] > core_distances[j]:
155                if core_distances[i] > dists[result_pos]:
156                    dists[result_pos] = core_distances[i]
157
158            else:
159                if core_distances[j] > dists[result_pos]:
160                    dists[result_pos] = core_distances[j]
161
162            result_pos += 1
163
164    return dists
165
166
167def kdtree_pdist_mutual_reachability(X,  metric, p=2, min_points=5, alpha=1.0,
168                                     **kwargs):
169
170    dim = X.shape[0]
171    min_points = min(dim - 1, min_points)
172
173    if metric == 'minkowski':
174        tree = KDTree(X, metric=metric, p=p)
175    else:
176        tree = KDTree(X, metric=metric, **kwargs)
177
178    core_distances = tree.query(X, k=min_points)[0][:, -1]
179
180    del tree
181    gc.collect()
182
183    dists = pdist(X, metric=metric, p=p, **kwargs)
184
185    if alpha != 1.0:
186        dists /= alpha
187
188    dists = mutual_reachability_from_pdist(core_distances, dists, dim)
189
190    return dists
191
192
193def balltree_pdist_mutual_reachability(X, metric, p=2, min_points=5, alpha=1.0,
194                                       **kwargs):
195
196    dim = X.shape[0]
197    min_points = min(dim - 1, min_points)
198
199    tree = BallTree(X, metric=metric, **kwargs)
200
201    core_distances = tree.query(X, k=min_points)[0][:, -1]
202
203    del tree
204    gc.collect()
205
206    dists = pdist(X, metric=metric, p=p, **kwargs)
207
208    if alpha != 1.0:
209        dists /= alpha
210
211    dists = mutual_reachability_from_pdist(core_distances, dists, dim)
212
213    return dists
214