1 /**
2 * Author: Damian Eads
3 * Date: September 22, 2007 (moved to new file on June 8, 2008)
4 *
5 * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
6 * Adapted for incorporation into Scipy, April 9, 2008.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * - Redistributions of source code must retain the above
12 * copyright notice, this list of conditions and the
13 * following disclaimer.
14 * - Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer
16 * in the documentation and/or other materials provided with the
17 * distribution.
18 * - Neither the name of the author nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34 #include "_c99compat.h"
35
36
37 static NPY_INLINE void
_row_norms(const double * X,npy_intp num_rows,const npy_intp num_cols,double * norms_buff)38 _row_norms(const double *X, npy_intp num_rows, const npy_intp num_cols, double *norms_buff){
39 /* Compute the row norms. */
40 npy_intp i, j;
41 for (i = 0; i < num_rows; ++i) {
42 for (j = 0; j < num_cols; ++j, ++X) {
43 const double curr_val = *X;
44 norms_buff[i] += curr_val * curr_val;
45 }
46 norms_buff[i] = sqrt(norms_buff[i]);
47 }
48 }
49
50 static NPY_INLINE double
sqeuclidean_distance_double(const double * u,const double * v,const npy_intp n)51 sqeuclidean_distance_double(const double *u, const double *v, const npy_intp n)
52 {
53 double s = 0.0;
54 npy_intp i;
55
56 for (i = 0; i < n; ++i) {
57 const double d = u[i] - v[i];
58 s += d * d;
59 }
60 return s;
61 }
62
63 static NPY_INLINE double
euclidean_distance_double(const double * u,const double * v,const npy_intp n)64 euclidean_distance_double(const double *u, const double *v, const npy_intp n)
65 {
66 return sqrt(sqeuclidean_distance_double(u, v, n));
67 }
68
69 #if 0 /* XXX unused */
70 static NPY_INLINE double
71 ess_distance(const double *u, const double *v, const npy_intp n)
72 {
73 double s = 0.0, d;
74 npy_intp i;
75
76 for (i = 0; i < n; ++i) {
77 d = fabs(u[i] - v[i]);
78 s += d * d;
79 }
80 return s;
81 }
82 #endif
83
84 static NPY_INLINE double
chebyshev_distance_double(const double * u,const double * v,const npy_intp n)85 chebyshev_distance_double(const double *u, const double *v, const npy_intp n)
86 {
87 double maxv = 0.0;
88 npy_intp i;
89
90 for (i = 0; i < n; ++i) {
91 const double d = fabs(u[i] - v[i]);
92 if (d > maxv) {
93 maxv = d;
94 }
95 }
96 return maxv;
97 }
98
99 static NPY_INLINE double
weighted_chebyshev_distance_double(const double * u,const double * v,const npy_intp n,const double * w)100 weighted_chebyshev_distance_double(const double *u, const double *v,
101 const npy_intp n, const double *w)
102 {
103 npy_intp i;
104 double maxv = 0.0;
105 for (i = 0; i < n; ++i) {
106 if (w[i] == 0.0) continue;
107 const double d = fabs(u[i] - v[i]);
108 if (d > maxv) {
109 maxv = d;
110 }
111 }
112 return maxv;
113 }
114
115 static NPY_INLINE double
canberra_distance_double(const double * u,const double * v,const npy_intp n)116 canberra_distance_double(const double *u, const double *v, const npy_intp n)
117 {
118 double tot = 0.;
119 npy_intp i;
120
121 for (i = 0; i < n; ++i) {
122 const double x = u[i], y = v[i];
123 const double snum = fabs(x - y);
124 const double sdenom = fabs(x) + fabs(y);
125 if (sdenom > 0.) {
126 tot += snum / sdenom;
127 }
128 }
129 return tot;
130 }
131
132 static NPY_INLINE double
bray_curtis_distance_double(const double * u,const double * v,const npy_intp n)133 bray_curtis_distance_double(const double *u, const double *v, const npy_intp n)
134 {
135 double s1 = 0.0, s2 = 0.0;
136 npy_intp i;
137
138 for (i = 0; i < n; ++i) {
139 s1 += fabs(u[i] - v[i]);
140 s2 += fabs(u[i] + v[i]);
141 }
142 return s1 / s2;
143 }
144
145 /*
146 * Timings with various BLAS implementations (gh-5657) have shown that using
147 * OpenBLAS's cblas_ddot here can give some speedup, but only for high-d data
148 * and an untuned ATLAS is slower than rolling our own.
149 */
150 static NPY_INLINE double
dot_product(const double * u,const double * v,const npy_intp n)151 dot_product(const double *u, const double *v, const npy_intp n)
152 {
153 double s = 0.0;
154 npy_intp i;
155
156 for (i = 0; i < n; ++i) {
157 s += u[i] * v[i];
158 }
159 return s;
160 }
161
162 static NPY_INLINE double
mahalanobis_distance(const double * u,const double * v,const double * covinv,double * dimbuf1,double * dimbuf2,const npy_intp n)163 mahalanobis_distance(const double *u, const double *v, const double *covinv,
164 double *dimbuf1, double *dimbuf2, const npy_intp n)
165 {
166 npy_intp i;
167
168 for (i = 0; i < n; ++i) {
169 dimbuf1[i] = u[i] - v[i];
170 }
171 /*
172 * Note: matrix-vector multiplication (GEMV). Again, OpenBLAS can speed
173 * this up for high-d data.
174 */
175 for (i = 0; i < n; ++i) {
176 const double *covrow = covinv + (i * n);
177 dimbuf2[i] = dot_product(dimbuf1, covrow, n);
178 }
179 return sqrt(dot_product(dimbuf1, dimbuf2, n));
180 }
181
182 static NPY_INLINE double
hamming_distance_double(const double * u,const double * v,const npy_intp n,const double * w)183 hamming_distance_double(const double *u, const double *v, const npy_intp n, const double *w)
184 {
185 npy_intp i;
186 double s = 0;
187 double w_sum = 0;
188
189 for (i = 0; i < n; ++i) {
190 s += ((double) (u[i] != v[i])) * w[i];
191 w_sum += w[i];
192 }
193
194 return s / w_sum;
195 }
196
197 static NPY_INLINE double
hamming_distance_char(const char * u,const char * v,const npy_intp n,const double * w)198 hamming_distance_char(const char *u, const char *v, const npy_intp n, const double *w)
199 {
200 npy_intp i;
201 double s = 0;
202 double w_sum = 0;
203
204 for (i = 0; i < n; ++i) {
205 s += ((double) (u[i] != v[i])) * w[i];
206 w_sum += w[i];
207 }
208
209 return s / w_sum;
210 }
211
212 static NPY_INLINE double
yule_distance_char(const char * u,const char * v,const npy_intp n)213 yule_distance_char(const char *u, const char *v, const npy_intp n)
214 {
215 npy_intp i;
216 npy_intp ntt = 0, nff = 0, nft = 0, ntf = 0;
217
218 for (i = 0; i < n; ++i) {
219 const npy_bool x = (u[i] != 0), y = (v[i] != 0);
220 ntt += x & y;
221 ntf += x & (!y);
222 nft += (!x) & y;
223 }
224 nff = n - ntt - ntf - nft;
225 double half_R = (double)ntf * nft;
226 if (half_R == 0.0) {
227 return 0.0;
228 }
229 return (2. * half_R) / ((double)ntt * nff + half_R);
230 }
231
232 static NPY_INLINE double
dice_distance_char(const char * u,const char * v,const npy_intp n)233 dice_distance_char(const char *u, const char *v, const npy_intp n)
234 {
235 npy_intp i;
236 npy_intp ntt = 0, ndiff = 0;
237
238 for (i = 0; i < n; ++i) {
239 const npy_bool x = (u[i] != 0), y = (v[i] != 0);
240 ntt += x & y;
241 ndiff += (x != y);
242 }
243 return ndiff / (2. * ntt + ndiff);
244 }
245
246 static NPY_INLINE double
rogerstanimoto_distance_char(const char * u,const char * v,const npy_intp n)247 rogerstanimoto_distance_char(const char *u, const char *v, const npy_intp n)
248 {
249 npy_intp i;
250 npy_intp ntt = 0, ndiff = 0;
251
252 for (i = 0; i < n; ++i) {
253 const npy_bool x = (u[i] != 0), y = (v[i] != 0);
254 ntt += x & y;
255 ndiff += (x != y);
256 }
257 return (2. * ndiff) / ((double)n + ndiff);
258 }
259
260 static NPY_INLINE double
russellrao_distance_char(const char * u,const char * v,const npy_intp n)261 russellrao_distance_char(const char *u, const char *v, const npy_intp n)
262 {
263 npy_intp i;
264 npy_intp ntt = 0;
265
266 for (i = 0; i < n; ++i) {
267 ntt += (u[i] != 0) & (v[i] != 0);
268 }
269 return (double)(n - ntt) / n;
270 }
271
272 static NPY_INLINE double
kulsinski_distance_char(const char * u,const char * v,const npy_intp n)273 kulsinski_distance_char(const char *u, const char *v, const npy_intp n)
274 {
275 npy_intp i;
276 npy_intp ntt = 0, ndiff = 0;
277
278 for (i = 0; i < n; ++i) {
279 const npy_bool x = (u[i] != 0), y = (v[i] != 0);
280 ntt += x & y;
281 ndiff += (x != y);
282 }
283 return ((double)ndiff - ntt + n) / ((double)ndiff + n);
284 }
285
286 static NPY_INLINE double
sokalsneath_distance_char(const char * u,const char * v,const npy_intp n)287 sokalsneath_distance_char(const char *u, const char *v, const npy_intp n)
288 {
289 npy_intp i;
290 npy_intp ntt = 0, ndiff = 0;
291
292 for (i = 0; i < n; ++i) {
293 const npy_bool x = (u[i] != 0), y = (v[i] != 0);
294 ntt += x & y;
295 ndiff += (x != y);
296 }
297 return (2. * ndiff) / (2. * ndiff + ntt);
298 }
299
300 static NPY_INLINE double
sokalmichener_distance_char(const char * u,const char * v,const npy_intp n)301 sokalmichener_distance_char(const char *u, const char *v, const npy_intp n)
302 {
303 npy_intp i;
304 npy_intp ntt = 0, ndiff = 0;
305
306 for (i = 0; i < n; ++i) {
307 const npy_bool x = (u[i] != 0), y = (v[i] != 0);
308 ntt += x & y;
309 ndiff += (x != y);
310 }
311 return (2. * ndiff) / ((double)ndiff + n);
312 }
313
314 static NPY_INLINE double
jaccard_distance_double(const double * u,const double * v,const npy_intp n)315 jaccard_distance_double(const double *u, const double *v, const npy_intp n)
316 {
317 npy_intp denom = 0, num = 0;
318 npy_intp i;
319
320 for (i = 0; i < n; ++i) {
321 const double x = u[i], y = v[i];
322 num += (x != y) & ((x != 0.0) | (y != 0.0));
323 denom += (x != 0.0) | (y != 0.0);
324 }
325 return denom == 0.0 ? 0.0 : (double)num / denom;
326 }
327
328 static NPY_INLINE double
jaccard_distance_char(const char * u,const char * v,const npy_intp n)329 jaccard_distance_char(const char *u, const char *v, const npy_intp n)
330 {
331 npy_intp num = 0, denom = 0;
332 npy_intp i;
333
334 for (i = 0; i < n; ++i) {
335 const npy_bool x = (u[i] != 0), y = (v[i] != 0);
336 num += (x != y);
337 denom += x | y;
338 }
339 return denom == 0.0 ? 0.0 : (double)num / denom;
340 }
341
342
343 static NPY_INLINE double
jensenshannon_distance_double(const double * p,const double * q,const npy_intp n)344 jensenshannon_distance_double(const double *p, const double *q, const npy_intp n)
345 {
346 npy_intp i;
347 double s = 0.0;
348 double p_sum = 0.0;
349 double q_sum = 0.0;
350
351 for (i = 0; i < n; ++i) {
352 if (p[i] < 0.0 || q[i] < 0.0)
353 return HUGE_VAL;
354 p_sum += p[i];
355 q_sum += q[i];
356 }
357
358 if (p_sum == 0.0 || q_sum == 0.0)
359 return HUGE_VAL;
360
361 for (i = 0; i < n; ++i) {
362 const double p_i = p[i] / p_sum;
363 const double q_i = q[i] / q_sum;
364 const double m_i = (p_i + q_i) / 2.0;
365 if (p_i > 0.0)
366 s += p_i * log(p_i / m_i);
367 if (q_i > 0.0)
368 s += q_i * log(q_i / m_i);
369 }
370
371 return sqrt(s / 2.0);
372 }
373
374
375 static NPY_INLINE double
seuclidean_distance(const double * var,const double * u,const double * v,const npy_intp n)376 seuclidean_distance(const double *var, const double *u, const double *v,
377 const npy_intp n)
378 {
379 double s = 0.0;
380 npy_intp i;
381
382 for (i = 0; i < n; ++i) {
383 const double d = u[i] - v[i];
384 s += (d * d) / var[i];
385 }
386 return sqrt(s);
387 }
388
389 static NPY_INLINE double
city_block_distance_double(const double * u,const double * v,const npy_intp n)390 city_block_distance_double(const double *u, const double *v, const npy_intp n)
391 {
392 double s = 0.0;
393 npy_intp i;
394
395 for (i = 0; i < n; ++i) {
396 s += fabs(u[i] - v[i]);
397 }
398 return s;
399 }
400
401 static NPY_INLINE double
minkowski_distance(const double * u,const double * v,const npy_intp n,const double p)402 minkowski_distance(const double *u, const double *v, const npy_intp n, const double p)
403 {
404 double s = 0.0;
405 npy_intp i;
406
407 for (i = 0; i < n; ++i) {
408 const double d = fabs(u[i] - v[i]);
409 s += pow(d, p);
410 }
411 return pow(s, 1.0 / p);
412 }
413
414 static NPY_INLINE double
weighted_minkowski_distance(const double * u,const double * v,const npy_intp n,const double p,const double * w)415 weighted_minkowski_distance(const double *u, const double *v, const npy_intp n,
416 const double p, const double *w)
417 {
418 npy_intp i = 0;
419 double s = 0.0;
420 for (i = 0; i < n; ++i) {
421 const double d = fabs(u[i] - v[i]);
422 s += pow(d, p) * w[i];
423 }
424 return pow(s, 1.0 / p);
425 }
426
427 #if 0 /* XXX unused */
428 static void
429 compute_mean_vector(double *res, const double *X, npy_intp num_rows, const npy_intp n)
430 {
431 npy_intp i, j;
432 const double *v;
433 for (i = 0; i < n; ++i) {
434 res[i] = 0.0;
435 }
436 for (j = 0; j < num_rows; ++j) {
437
438 v = X + (j * n);
439 for (i = 0; i < n; ++i) {
440 res[i] += v[i];
441 }
442 }
443 for (i = 0; i < n; ++i) {
444 res[i] /= (double)num_rows;
445 }
446 }
447 #endif
448
449 #define DEFINE_CDIST(name, type) \
450 static int cdist_ ## name ## _ ## type(const type *XA, const type *XB, \
451 double *dm, \
452 const npy_intp num_rowsA, \
453 const npy_intp num_rowsB, \
454 const npy_intp num_cols) \
455 { \
456 Py_ssize_t i, j; \
457 for (i = 0; i < num_rowsA; ++i) { \
458 const type *u = XA + num_cols * i; \
459 for (j = 0; j < num_rowsB; ++j, ++dm) { \
460 const type *v = XB + num_cols * j; \
461 *dm = name ## _distance_ ## type(u, v, num_cols); \
462 } \
463 } \
464 return 0;\
465 }
466
DEFINE_CDIST(bray_curtis,double)467 DEFINE_CDIST(bray_curtis, double)
468 DEFINE_CDIST(canberra, double)
469 DEFINE_CDIST(chebyshev, double)
470 DEFINE_CDIST(city_block, double)
471 DEFINE_CDIST(euclidean, double)
472 DEFINE_CDIST(jaccard, double)
473 DEFINE_CDIST(jensenshannon, double)
474 DEFINE_CDIST(sqeuclidean, double)
475
476 DEFINE_CDIST(dice, char)
477 DEFINE_CDIST(jaccard, char)
478 DEFINE_CDIST(kulsinski, char)
479 DEFINE_CDIST(rogerstanimoto, char)
480 DEFINE_CDIST(russellrao, char)
481 DEFINE_CDIST(sokalmichener, char)
482 DEFINE_CDIST(sokalsneath, char)
483 DEFINE_CDIST(yule, char)
484
485
486 #define DEFINE_PDIST(name, type) \
487 static int pdist_ ## name ## _ ## type(const type *X, double *dm, \
488 const npy_intp num_rows, \
489 const npy_intp num_cols) \
490 { \
491 Py_ssize_t i, j; \
492 double *it = dm; \
493 for (i = 0; i < num_rows; ++i) { \
494 const type *u = X + num_cols * i; \
495 for (j = i + 1; j < num_rows; ++j, it++) { \
496 const type *v = X + num_cols * j; \
497 *it = name ## _distance_ ## type(u, v, num_cols); \
498 } \
499 } \
500 return 0; \
501 }
502
503 DEFINE_PDIST(bray_curtis, double)
504 DEFINE_PDIST(canberra, double)
505 DEFINE_PDIST(chebyshev, double)
506 DEFINE_PDIST(city_block, double)
507 DEFINE_PDIST(euclidean, double)
508 DEFINE_PDIST(jaccard, double)
509 DEFINE_PDIST(jensenshannon, double)
510 DEFINE_PDIST(sqeuclidean, double)
511
512 DEFINE_PDIST(dice, char)
513 DEFINE_PDIST(jaccard, char)
514 DEFINE_PDIST(kulsinski, char)
515 DEFINE_PDIST(rogerstanimoto, char)
516 DEFINE_PDIST(russellrao, char)
517 DEFINE_PDIST(sokalmichener, char)
518 DEFINE_PDIST(sokalsneath, char)
519 DEFINE_PDIST(yule, char)
520
521 static NPY_INLINE int
522 pdist_mahalanobis(const double *X, double *dm, const npy_intp num_rows,
523 const npy_intp num_cols, const double *covinv)
524 {
525 npy_intp i, j;
526 double *dimbuf1 = calloc(2 * num_cols, sizeof(double));
527 double *dimbuf2;
528 if (!dimbuf1) {
529 return -1;
530 }
531
532 dimbuf2 = dimbuf1 + num_cols;
533
534 for (i = 0; i < num_rows; ++i) {
535 const double *u = X + (num_cols * i);
536 for (j = i + 1; j < num_rows; ++j, ++dm) {
537 const double *v = X + (num_cols * j);
538 *dm = mahalanobis_distance(u, v, covinv, dimbuf1, dimbuf2, num_cols);
539 }
540 }
541 free(dimbuf1);
542 return 0;
543 }
544
545 static NPY_INLINE int
pdist_weighted_chebyshev(const double * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double * w)546 pdist_weighted_chebyshev(const double *X, double *dm, npy_intp num_rows,
547 const npy_intp num_cols, const double *w)
548 {
549 npy_intp i, j;
550
551 for (i = 0; i < num_rows; ++i) {
552 const double *u = X + (num_cols * i);
553 for (j = i + 1; j < num_rows; ++j, ++dm) {
554 const double *v = X + (num_cols * j);
555 *dm = weighted_chebyshev_distance_double(u, v, num_cols, w);
556 }
557 }
558 return 0;
559 }
560
561 static NPY_INLINE int
pdist_cosine(const double * X,double * dm,const npy_intp num_rows,const npy_intp num_cols)562 pdist_cosine(const double *X, double *dm, const npy_intp num_rows,
563 const npy_intp num_cols)
564 {
565 double cosine;
566 npy_intp i, j;
567
568 double * norms_buff = calloc(num_rows, sizeof(double));
569 if (!norms_buff)
570 return -1;
571
572 _row_norms(X, num_rows, num_cols, norms_buff);
573
574 for (i = 0; i < num_rows; ++i) {
575 const double *u = X + (num_cols * i);
576 for (j = i + 1; j < num_rows; ++j, ++dm) {
577 const double *v = X + (num_cols * j);
578 cosine = dot_product(u, v, num_cols) / (norms_buff[i] * norms_buff[j]);
579 if (fabs(cosine) > 1.) {
580 /* Clip to correct rounding error. */
581 cosine = npy_copysign(1, cosine);
582 }
583 *dm = 1. - cosine;
584 }
585 }
586 free(norms_buff);
587 return 0;
588 }
589
590 static NPY_INLINE int
pdist_seuclidean(const double * X,const double * var,double * dm,const npy_intp num_rows,const npy_intp num_cols)591 pdist_seuclidean(const double *X, const double *var, double *dm,
592 const npy_intp num_rows, const npy_intp num_cols)
593 {
594 npy_intp i, j;
595
596 for (i = 0; i < num_rows; ++i) {
597 const double *u = X + (num_cols * i);
598 for (j = i + 1; j < num_rows; ++j, ++dm) {
599 const double *v = X + (num_cols * j);
600 *dm = seuclidean_distance(var, u, v, num_cols);
601 }
602 }
603 return 0;
604 }
605
606 static NPY_INLINE int
pdist_minkowski(const double * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double p)607 pdist_minkowski(const double *X, double *dm, npy_intp num_rows,
608 const npy_intp num_cols, const double p)
609 {
610 npy_intp i, j;
611 if (p == 1.0) {
612 return pdist_city_block_double(X, dm, num_rows, num_cols);
613 }
614 if (p == 2.0) {
615 return pdist_euclidean_double(X, dm, num_rows, num_cols);
616 }
617 if (sc_isinf(p)) {
618 return pdist_chebyshev_double(X, dm, num_rows, num_cols);
619 }
620
621 for (i = 0; i < num_rows; ++i) {
622 const double *u = X + (num_cols * i);
623 for (j = i + 1; j < num_rows; ++j, ++dm) {
624 const double *v = X + (num_cols * j);
625 *dm = minkowski_distance(u, v, num_cols, p);
626 }
627 }
628 return 0;
629 }
630
631 /* Old weighting type which is inconsistent with other distance metrics.
632 Remove in SciPy 1.8 along with wminkowski. */
633 static NPY_INLINE int
pdist_old_weighted_minkowski(const double * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double p,const double * w)634 pdist_old_weighted_minkowski(const double *X, double *dm, npy_intp num_rows,
635 const npy_intp num_cols, const double p, const double *w)
636 {
637 npy_intp i, j;
638
639 // Covert from old style weights to new weights
640 double * new_weights = malloc(num_cols * sizeof(double));
641 if (!new_weights) {
642 return 1;
643 }
644 for (i = 0; i < num_cols; ++i) {
645 new_weights[i] = pow(w[i], p);
646 }
647
648 for (i = 0; i < num_rows; ++i) {
649 const double *u = X + (num_cols * i);
650 for (j = i + 1; j < num_rows; ++j, ++dm) {
651 const double *v = X + (num_cols * j);
652 *dm = weighted_minkowski_distance(u, v, num_cols, p, new_weights);
653 }
654 }
655 free(new_weights);
656 return 0;
657 }
658
659 static NPY_INLINE int
pdist_weighted_minkowski(const double * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double p,const double * w)660 pdist_weighted_minkowski(const double *X, double *dm, npy_intp num_rows,
661 const npy_intp num_cols, const double p, const double *w)
662 {
663 npy_intp i, j;
664
665 if (sc_isinf(p)) {
666 return pdist_weighted_chebyshev(X, dm, num_rows, num_cols, w);
667 }
668
669 for (i = 0; i < num_rows; ++i) {
670 const double *u = X + (num_cols * i);
671 for (j = i + 1; j < num_rows; ++j, ++dm) {
672 const double *v = X + (num_cols * j);
673 *dm = weighted_minkowski_distance(u, v, num_cols, p, w);
674 }
675 }
676 return 0;
677 }
678
679 static NPY_INLINE int
pdist_hamming_double(const double * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double * w)680 pdist_hamming_double(const double *X, double *dm, npy_intp num_rows,
681 const npy_intp num_cols, const double *w)
682 {
683 npy_intp i, j;
684
685 for (i = 0; i < num_rows; ++i) {
686 const double *u = X + (num_cols * i);
687 for (j = i + 1; j < num_rows; ++j, ++dm) {
688 const double *v = X + (num_cols * j);
689 *dm = hamming_distance_double(u, v, num_cols, w);
690 }
691 }
692 return 0;
693 }
694
695 static NPY_INLINE int
pdist_hamming_char(const char * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double * w)696 pdist_hamming_char(const char *X, double *dm, npy_intp num_rows,
697 const npy_intp num_cols, const double *w)
698 {
699 npy_intp i, j;
700
701 for (i = 0; i < num_rows; ++i) {
702 const char *u = X + (num_cols * i);
703 for (j = i + 1; j < num_rows; ++j, ++dm) {
704 const char *v = X + (num_cols * j);
705 *dm = hamming_distance_char(u, v, num_cols, w);
706 }
707 }
708 return 0;
709 }
710
711 static NPY_INLINE void
dist_to_squareform_from_vector_generic(char * M,const char * v,const npy_intp n,npy_intp s)712 dist_to_squareform_from_vector_generic(char *M, const char *v, const npy_intp n,
713 npy_intp s)
714 {
715 char *it1 = M + s;
716 char *it2;
717 npy_intp i, j;
718
719 for (i = 1; i < n; ++i) {
720 memcpy(it1, v, (n - i) * s);
721 it1 += (n + 1) * s;
722
723 it2 = M + i * (n + 1) * s - s;
724 for (j = i; j < n; ++j) {
725 memcpy(it2, v, s);
726 v += s;
727 it2 += n*s;
728 }
729 }
730 }
731
732 static NPY_INLINE void
dist_to_squareform_from_vector_double(double * M,const double * v,const npy_intp n)733 dist_to_squareform_from_vector_double(double *M, const double *v, const npy_intp n)
734 {
735 double *it1 = M + 1;
736 double *it2;
737 npy_intp i, j;
738
739 for (i = 1; i < n; ++i, it1 += n+1) {
740 memcpy(it1, v, (n - i) * sizeof(double));
741
742 it2 = M + i * (n + 1) - 1;
743 for (j = i; j < n; ++j, ++v, it2 += n) {
744 *it2 = *v;
745 }
746 }
747 }
748
749 static NPY_INLINE void
dist_to_vector_from_squareform(const char * M,char * v,const npy_intp n,npy_intp s)750 dist_to_vector_from_squareform(const char *M, char *v, const npy_intp n, npy_intp s)
751 {
752 const char *cit = M + s;
753 npy_intp i;
754
755 for (i = 1; i < n; ++i) {
756 const npy_intp len = (n - i) * s;
757 memcpy(v, cit, len);
758 v += len;
759 cit += (n + 1) * s;
760 }
761 }
762
763 static NPY_INLINE int
cdist_weighted_chebyshev(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double * w)764 cdist_weighted_chebyshev(const double *XA, const double *XB, double *dm,
765 const npy_intp num_rowsA, const npy_intp num_rowsB,
766 const npy_intp num_cols, const double *w)
767 {
768 npy_intp i, j;
769
770 for (i = 0; i < num_rowsA; ++i) {
771 const double *u = XA + (num_cols * i);
772 for (j = 0; j < num_rowsB; ++j, ++dm) {
773 const double *v = XB + (num_cols * j);
774 *dm = weighted_chebyshev_distance_double(u, v, num_cols, w);
775 }
776 }
777 return 0;
778 }
779
780
781 /** cdist */
782 static NPY_INLINE int
cdist_cosine(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols)783 cdist_cosine(const double *XA, const double *XB, double *dm, const npy_intp num_rowsA,
784 const npy_intp num_rowsB, const npy_intp num_cols)
785 {
786 double cosine;
787 npy_intp i, j;
788
789 double * norms_buffA = calloc(num_rowsA + num_rowsB, sizeof(double));
790 double * norms_buffB;
791 if (!norms_buffA)
792 return -1;
793
794 norms_buffB = norms_buffA + num_rowsA;
795
796 _row_norms(XA, num_rowsA, num_cols, norms_buffA);
797 _row_norms(XB, num_rowsB, num_cols, norms_buffB);
798
799 for (i = 0; i < num_rowsA; ++i) {
800 const double *u = XA + (num_cols * i);
801 for (j = 0; j < num_rowsB; ++j, ++dm) {
802 const double *v = XB + (num_cols * j);
803 cosine = dot_product(u, v, num_cols) / (norms_buffA[i] * norms_buffB[j]);
804 if (fabs(cosine) > 1.) {
805 /* Clip to correct rounding error. */
806 cosine = npy_copysign(1, cosine);
807 }
808 *dm = 1. - cosine;
809 }
810 }
811 free(norms_buffA);
812 return 0;
813 }
814
815 static NPY_INLINE int
cdist_mahalanobis(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double * covinv)816 cdist_mahalanobis(const double *XA, const double *XB, double *dm,
817 const npy_intp num_rowsA, const npy_intp num_rowsB,
818 const npy_intp num_cols, const double *covinv)
819 {
820 npy_intp i, j;
821
822 double *dimbuf1 = calloc(2 * num_cols, sizeof(double));
823 double *dimbuf2;
824 if (!dimbuf1) {
825 return -1;
826 }
827 dimbuf2 = dimbuf1 + num_cols;
828
829 for (i = 0; i < num_rowsA; ++i) {
830 const double *u = XA + (num_cols * i);
831 for (j = 0; j < num_rowsB; ++j, ++dm) {
832 const double *v = XB + (num_cols * j);
833 *dm = mahalanobis_distance(u, v, covinv, dimbuf1, dimbuf2, num_cols);
834 }
835 }
836 free(dimbuf1);
837 return 0;
838 }
839
840 static NPY_INLINE int
cdist_seuclidean(const double * XA,const double * XB,const double * var,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols)841 cdist_seuclidean(const double *XA, const double *XB, const double *var,
842 double *dm, const npy_intp num_rowsA, const npy_intp num_rowsB,
843 const npy_intp num_cols)
844 {
845 npy_intp i, j;
846
847 for (i = 0; i < num_rowsA; ++i) {
848 const double *u = XA + (num_cols * i);
849 for (j = 0; j < num_rowsB; ++j, ++dm) {
850 const double *v = XB + (num_cols * j);
851 *dm = seuclidean_distance(var, u, v, num_cols);
852 }
853 }
854 return 0;
855 }
856
857 static NPY_INLINE int
cdist_minkowski(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double p)858 cdist_minkowski(const double *XA, const double *XB, double *dm,
859 const npy_intp num_rowsA, const npy_intp num_rowsB,
860 const npy_intp num_cols, const double p)
861 {
862 npy_intp i, j;
863 if (p == 1.0) {
864 return cdist_city_block_double(XA, XB, dm, num_rowsA, num_rowsB, num_cols);
865 }
866 if (p == 2.0) {
867 return cdist_euclidean_double(XA, XB, dm, num_rowsA, num_rowsB, num_cols);
868 }
869 if (sc_isinf(p)) {
870 return cdist_chebyshev_double(XA, XB, dm, num_rowsA, num_rowsB, num_cols);
871 }
872
873 for (i = 0; i < num_rowsA; ++i) {
874 const double *u = XA + (num_cols * i);
875 for (j = 0; j < num_rowsB; ++j, ++dm) {
876 const double *v = XB + (num_cols * j);
877 *dm = minkowski_distance(u, v, num_cols, p);
878 }
879 }
880 return 0;
881 }
882
883 /* Old weighting type which is inconsistent with other distance metrics.
884 Remove in SciPy 1.8 along with wminkowski. */
885 static NPY_INLINE int
cdist_old_weighted_minkowski(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double p,const double * w)886 cdist_old_weighted_minkowski(const double *XA, const double *XB, double *dm,
887 const npy_intp num_rowsA, const npy_intp num_rowsB,
888 const npy_intp num_cols, const double p,
889 const double *w)
890 {
891 npy_intp i, j;
892
893 // Covert from old style weights to new weights
894 double * new_weights = malloc(num_cols * sizeof(double));
895 if (!new_weights) {
896 return 1;
897 }
898
899 for (i = 0; i < num_cols; ++i) {
900 new_weights[i] = pow(w[i], p);
901 }
902
903 for (i = 0; i < num_rowsA; ++i) {
904 const double *u = XA + (num_cols * i);
905 for (j = 0; j < num_rowsB; ++j, ++dm) {
906 const double *v = XB + (num_cols * j);
907 *dm = weighted_minkowski_distance(u, v, num_cols, p, new_weights);
908 }
909 }
910 free(new_weights);
911 return 0;
912 }
913
914 static NPY_INLINE int
cdist_weighted_minkowski(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double p,const double * w)915 cdist_weighted_minkowski(const double *XA, const double *XB, double *dm,
916 const npy_intp num_rowsA, const npy_intp num_rowsB,
917 const npy_intp num_cols, const double p,
918 const double *w)
919 {
920 npy_intp i, j;
921
922 if (sc_isinf(p)) {
923 return cdist_weighted_chebyshev(XA, XB, dm, num_rowsA, num_rowsB, num_cols, w);
924 }
925
926 for (i = 0; i < num_rowsA; ++i) {
927 const double *u = XA + (num_cols * i);
928 for (j = 0; j < num_rowsB; ++j, ++dm) {
929 const double *v = XB + (num_cols * j);
930 *dm = weighted_minkowski_distance(u, v, num_cols, p, w);
931 }
932 }
933 return 0;
934 }
935
936 static NPY_INLINE int
cdist_hamming_double(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double * w)937 cdist_hamming_double(const double *XA, const double *XB, double *dm,
938 const npy_intp num_rowsA, const npy_intp num_rowsB,
939 const npy_intp num_cols,
940 const double *w)
941 {
942 npy_intp i, j;
943
944 for (i = 0; i < num_rowsA; ++i) {
945 const double *u = XA + (num_cols * i);
946 for (j = 0; j < num_rowsB; ++j, ++dm) {
947 const double *v = XB + (num_cols * j);
948 *dm = hamming_distance_double(u, v, num_cols, w);
949 }
950 }
951 return 0;
952 }
953
954 static NPY_INLINE int
cdist_hamming_char(const char * XA,const char * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double * w)955 cdist_hamming_char(const char *XA, const char *XB, double *dm,
956 const npy_intp num_rowsA, const npy_intp num_rowsB,
957 const npy_intp num_cols,
958 const double *w)
959 {
960 npy_intp i, j;
961
962 for (i = 0; i < num_rowsA; ++i) {
963 const char *u = XA + (num_cols * i);
964 for (j = 0; j < num_rowsB; ++j, ++dm) {
965 const char *v = XB + (num_cols * j);
966 *dm = hamming_distance_char(u, v, num_cols, w);
967 }
968 }
969 return 0;
970 }
971