1 /**
2  * Author: Damian Eads
3  * Date:   September 22, 2007 (moved to new file on June 8, 2008)
4  *
5  * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
6  * Adapted for incorporation into Scipy, April 9, 2008.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *   - Redistributions of source code must retain the above
12  *     copyright notice, this list of conditions and the
13  *     following disclaimer.
14  *   - Redistributions in binary form must reproduce the above copyright
15  *     notice, this list of conditions and the following disclaimer
16  *     in the documentation and/or other materials provided with the
17  *     distribution.
18  *   - Neither the name of the author nor the names of its
19  *     contributors may be used to endorse or promote products derived
20  *     from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 #include "_c99compat.h"
35 
36 
37 static NPY_INLINE void
_row_norms(const double * X,npy_intp num_rows,const npy_intp num_cols,double * norms_buff)38 _row_norms(const double *X, npy_intp num_rows, const npy_intp num_cols, double *norms_buff){
39     /* Compute the row norms. */
40     npy_intp i, j;
41     for (i = 0; i < num_rows; ++i) {
42         for (j = 0; j < num_cols; ++j, ++X) {
43             const double curr_val = *X;
44             norms_buff[i] += curr_val * curr_val;
45         }
46         norms_buff[i] = sqrt(norms_buff[i]);
47     }
48 }
49 
50 static NPY_INLINE double
sqeuclidean_distance_double(const double * u,const double * v,const npy_intp n)51 sqeuclidean_distance_double(const double *u, const double *v, const npy_intp n)
52 {
53     double s = 0.0;
54     npy_intp i;
55 
56     for (i = 0; i < n; ++i) {
57         const double d = u[i] - v[i];
58         s += d * d;
59     }
60     return s;
61 }
62 
63 static NPY_INLINE double
euclidean_distance_double(const double * u,const double * v,const npy_intp n)64 euclidean_distance_double(const double *u, const double *v, const npy_intp n)
65 {
66     return sqrt(sqeuclidean_distance_double(u, v, n));
67 }
68 
69 #if 0   /* XXX unused */
70 static NPY_INLINE double
71 ess_distance(const double *u, const double *v, const npy_intp n)
72 {
73     double s = 0.0, d;
74     npy_intp i;
75 
76     for (i = 0; i < n; ++i) {
77         d = fabs(u[i] - v[i]);
78         s += d * d;
79     }
80     return s;
81 }
82 #endif
83 
84 static NPY_INLINE double
chebyshev_distance_double(const double * u,const double * v,const npy_intp n)85 chebyshev_distance_double(const double *u, const double *v, const npy_intp n)
86 {
87     double maxv = 0.0;
88     npy_intp i;
89 
90     for (i = 0; i < n; ++i) {
91         const double d = fabs(u[i] - v[i]);
92         if (d > maxv) {
93             maxv = d;
94         }
95     }
96     return maxv;
97 }
98 
99 static NPY_INLINE double
weighted_chebyshev_distance_double(const double * u,const double * v,const npy_intp n,const double * w)100 weighted_chebyshev_distance_double(const double *u, const double *v,
101                                    const npy_intp n, const double *w)
102 {
103     npy_intp i;
104     double maxv = 0.0;
105     for (i = 0; i < n; ++i) {
106         if (w[i] == 0.0) continue;
107         const double d = fabs(u[i] - v[i]);
108         if (d > maxv) {
109             maxv = d;
110         }
111     }
112     return maxv;
113 }
114 
115 static NPY_INLINE double
canberra_distance_double(const double * u,const double * v,const npy_intp n)116 canberra_distance_double(const double *u, const double *v, const npy_intp n)
117 {
118     double tot = 0.;
119     npy_intp i;
120 
121     for (i = 0; i < n; ++i) {
122         const double x = u[i], y = v[i];
123         const double snum = fabs(x - y);
124         const double sdenom = fabs(x) + fabs(y);
125         if (sdenom > 0.) {
126             tot += snum / sdenom;
127         }
128     }
129     return tot;
130 }
131 
132 static NPY_INLINE double
bray_curtis_distance_double(const double * u,const double * v,const npy_intp n)133 bray_curtis_distance_double(const double *u, const double *v, const npy_intp n)
134 {
135     double s1 = 0.0, s2 = 0.0;
136     npy_intp i;
137 
138     for (i = 0; i < n; ++i) {
139         s1 += fabs(u[i] - v[i]);
140         s2 += fabs(u[i] + v[i]);
141     }
142     return s1 / s2;
143 }
144 
145 /*
146  * Timings with various BLAS implementations (gh-5657) have shown that using
147  * OpenBLAS's cblas_ddot here can give some speedup, but only for high-d data
148  * and an untuned ATLAS is slower than rolling our own.
149  */
150 static NPY_INLINE double
dot_product(const double * u,const double * v,const npy_intp n)151 dot_product(const double *u, const double *v, const npy_intp n)
152 {
153     double s = 0.0;
154     npy_intp i;
155 
156     for (i = 0; i < n; ++i) {
157         s += u[i] * v[i];
158     }
159     return s;
160 }
161 
162 static NPY_INLINE double
mahalanobis_distance(const double * u,const double * v,const double * covinv,double * dimbuf1,double * dimbuf2,const npy_intp n)163 mahalanobis_distance(const double *u, const double *v, const double *covinv,
164                      double *dimbuf1, double *dimbuf2, const npy_intp n)
165 {
166     npy_intp i;
167 
168     for (i = 0; i < n; ++i) {
169         dimbuf1[i] = u[i] - v[i];
170     }
171     /*
172      * Note: matrix-vector multiplication (GEMV). Again, OpenBLAS can speed
173      * this up for high-d data.
174      */
175     for (i = 0; i < n; ++i) {
176         const double *covrow = covinv + (i * n);
177         dimbuf2[i] = dot_product(dimbuf1, covrow, n);
178     }
179     return sqrt(dot_product(dimbuf1, dimbuf2, n));
180 }
181 
182 static NPY_INLINE double
hamming_distance_double(const double * u,const double * v,const npy_intp n,const double * w)183 hamming_distance_double(const double *u, const double *v, const npy_intp n, const double *w)
184 {
185     npy_intp i;
186     double s = 0;
187     double w_sum = 0;
188 
189     for (i = 0; i < n; ++i) {
190         s += ((double) (u[i] != v[i])) * w[i];
191         w_sum += w[i];
192     }
193 
194     return s / w_sum;
195 }
196 
197 static NPY_INLINE double
hamming_distance_char(const char * u,const char * v,const npy_intp n,const double * w)198 hamming_distance_char(const char *u, const char *v, const npy_intp n, const double *w)
199 {
200     npy_intp i;
201     double s = 0;
202     double w_sum = 0;
203 
204     for (i = 0; i < n; ++i) {
205         s += ((double) (u[i] != v[i])) * w[i];
206         w_sum += w[i];
207     }
208 
209     return s / w_sum;
210 }
211 
212 static NPY_INLINE double
yule_distance_char(const char * u,const char * v,const npy_intp n)213 yule_distance_char(const char *u, const char *v, const npy_intp n)
214 {
215     npy_intp i;
216     npy_intp ntt = 0, nff = 0, nft = 0, ntf = 0;
217 
218     for (i = 0; i < n; ++i) {
219         const npy_bool x = (u[i] != 0), y = (v[i] != 0);
220         ntt += x & y;
221         ntf += x & (!y);
222         nft += (!x) & y;
223     }
224     nff = n - ntt - ntf - nft;
225     double half_R = (double)ntf * nft;
226     if (half_R == 0.0) {
227         return 0.0;
228     }
229     return (2. * half_R) / ((double)ntt * nff + half_R);
230 }
231 
232 static NPY_INLINE double
dice_distance_char(const char * u,const char * v,const npy_intp n)233 dice_distance_char(const char *u, const char *v, const npy_intp n)
234 {
235     npy_intp i;
236     npy_intp ntt = 0, ndiff = 0;
237 
238     for (i = 0; i < n; ++i) {
239         const npy_bool x = (u[i] != 0), y = (v[i] != 0);
240         ntt += x & y;
241         ndiff += (x != y);
242     }
243     return ndiff / (2. * ntt + ndiff);
244 }
245 
246 static NPY_INLINE double
rogerstanimoto_distance_char(const char * u,const char * v,const npy_intp n)247 rogerstanimoto_distance_char(const char *u, const char *v, const npy_intp n)
248 {
249     npy_intp i;
250     npy_intp ntt = 0, ndiff = 0;
251 
252     for (i = 0; i < n; ++i) {
253         const npy_bool x = (u[i] != 0), y = (v[i] != 0);
254         ntt += x & y;
255         ndiff += (x != y);
256     }
257     return (2. * ndiff) / ((double)n + ndiff);
258 }
259 
260 static NPY_INLINE double
russellrao_distance_char(const char * u,const char * v,const npy_intp n)261 russellrao_distance_char(const char *u, const char *v, const npy_intp n)
262 {
263     npy_intp i;
264     npy_intp ntt = 0;
265 
266     for (i = 0; i < n; ++i) {
267         ntt += (u[i] != 0) & (v[i] != 0);
268     }
269     return (double)(n - ntt) / n;
270 }
271 
272 static NPY_INLINE double
kulsinski_distance_char(const char * u,const char * v,const npy_intp n)273 kulsinski_distance_char(const char *u, const char *v, const npy_intp n)
274 {
275     npy_intp i;
276     npy_intp ntt = 0, ndiff = 0;
277 
278     for (i = 0; i < n; ++i) {
279         const npy_bool x = (u[i] != 0), y = (v[i] != 0);
280         ntt += x & y;
281         ndiff += (x != y);
282     }
283     return ((double)ndiff - ntt + n) / ((double)ndiff + n);
284 }
285 
286 static NPY_INLINE double
sokalsneath_distance_char(const char * u,const char * v,const npy_intp n)287 sokalsneath_distance_char(const char *u, const char *v, const npy_intp n)
288 {
289     npy_intp i;
290     npy_intp ntt = 0, ndiff = 0;
291 
292     for (i = 0; i < n; ++i) {
293         const npy_bool x = (u[i] != 0), y = (v[i] != 0);
294         ntt += x & y;
295         ndiff += (x != y);
296     }
297     return (2. * ndiff) / (2. * ndiff + ntt);
298 }
299 
300 static NPY_INLINE double
sokalmichener_distance_char(const char * u,const char * v,const npy_intp n)301 sokalmichener_distance_char(const char *u, const char *v, const npy_intp n)
302 {
303     npy_intp i;
304     npy_intp ntt = 0, ndiff = 0;
305 
306     for (i = 0; i < n; ++i) {
307         const npy_bool x = (u[i] != 0), y = (v[i] != 0);
308         ntt += x & y;
309         ndiff += (x != y);
310     }
311     return (2. * ndiff) / ((double)ndiff + n);
312 }
313 
314 static NPY_INLINE double
jaccard_distance_double(const double * u,const double * v,const npy_intp n)315 jaccard_distance_double(const double *u, const double *v, const npy_intp n)
316 {
317     npy_intp denom = 0, num = 0;
318     npy_intp i;
319 
320     for (i = 0; i < n; ++i) {
321         const double x = u[i], y = v[i];
322         num += (x != y) & ((x != 0.0) | (y != 0.0));
323         denom += (x != 0.0) | (y != 0.0);
324     }
325     return denom == 0.0 ? 0.0 : (double)num / denom;
326 }
327 
328 static NPY_INLINE double
jaccard_distance_char(const char * u,const char * v,const npy_intp n)329 jaccard_distance_char(const char *u, const char *v, const npy_intp n)
330 {
331     npy_intp num = 0, denom = 0;
332     npy_intp i;
333 
334     for (i = 0; i < n; ++i) {
335         const npy_bool x = (u[i] != 0), y = (v[i] != 0);
336         num += (x != y);
337         denom += x | y;
338     }
339     return denom == 0.0 ? 0.0 : (double)num / denom;
340 }
341 
342 
343 static NPY_INLINE double
jensenshannon_distance_double(const double * p,const double * q,const npy_intp n)344 jensenshannon_distance_double(const double *p, const double *q, const npy_intp n)
345 {
346     npy_intp i;
347     double s = 0.0;
348     double p_sum = 0.0;
349     double q_sum = 0.0;
350 
351     for (i = 0; i < n; ++i) {
352         if (p[i] < 0.0 || q[i] < 0.0)
353             return HUGE_VAL;
354         p_sum += p[i];
355         q_sum += q[i];
356     }
357 
358     if (p_sum == 0.0 || q_sum == 0.0)
359         return HUGE_VAL;
360 
361     for (i = 0; i < n; ++i) {
362         const double p_i = p[i] / p_sum;
363         const double q_i = q[i] / q_sum;
364         const double m_i = (p_i + q_i) / 2.0;
365         if (p_i > 0.0)
366             s += p_i * log(p_i / m_i);
367         if (q_i > 0.0)
368             s += q_i * log(q_i / m_i);
369     }
370 
371     return sqrt(s / 2.0);
372 }
373 
374 
375 static NPY_INLINE double
seuclidean_distance(const double * var,const double * u,const double * v,const npy_intp n)376 seuclidean_distance(const double *var, const double *u, const double *v,
377                     const npy_intp n)
378 {
379     double s = 0.0;
380     npy_intp i;
381 
382     for (i = 0; i < n; ++i) {
383         const double d = u[i] - v[i];
384         s += (d * d) / var[i];
385     }
386     return sqrt(s);
387 }
388 
389 static NPY_INLINE double
city_block_distance_double(const double * u,const double * v,const npy_intp n)390 city_block_distance_double(const double *u, const double *v, const npy_intp n)
391 {
392     double s = 0.0;
393     npy_intp i;
394 
395     for (i = 0; i < n; ++i) {
396         s += fabs(u[i] - v[i]);
397     }
398     return s;
399 }
400 
401 static NPY_INLINE double
minkowski_distance(const double * u,const double * v,const npy_intp n,const double p)402 minkowski_distance(const double *u, const double *v, const npy_intp n, const double p)
403 {
404     double s = 0.0;
405     npy_intp i;
406 
407     for (i = 0; i < n; ++i) {
408         const double d = fabs(u[i] - v[i]);
409         s += pow(d, p);
410     }
411     return pow(s, 1.0 / p);
412 }
413 
414 static NPY_INLINE double
weighted_minkowski_distance(const double * u,const double * v,const npy_intp n,const double p,const double * w)415 weighted_minkowski_distance(const double *u, const double *v, const npy_intp n,
416                             const double p, const double *w)
417 {
418     npy_intp i = 0;
419     double s = 0.0;
420     for (i = 0; i < n; ++i) {
421         const double d = fabs(u[i] - v[i]);
422         s += pow(d, p) * w[i];
423     }
424     return pow(s, 1.0 / p);
425 }
426 
427 #if 0   /* XXX unused */
428 static void
429 compute_mean_vector(double *res, const double *X, npy_intp num_rows, const npy_intp n)
430 {
431     npy_intp i, j;
432     const double *v;
433     for (i = 0; i < n; ++i) {
434         res[i] = 0.0;
435     }
436     for (j = 0; j < num_rows; ++j) {
437 
438         v = X + (j * n);
439         for (i = 0; i < n; ++i) {
440             res[i] += v[i];
441         }
442     }
443     for (i = 0; i < n; ++i) {
444         res[i] /= (double)num_rows;
445     }
446 }
447 #endif
448 
449 #define DEFINE_CDIST(name, type) \
450     static int cdist_ ## name ## _ ## type(const type *XA, const type *XB, \
451                                            double *dm,                     \
452                                            const npy_intp num_rowsA,       \
453                                            const npy_intp num_rowsB,       \
454                                            const npy_intp num_cols)        \
455     {                                                                      \
456         Py_ssize_t i, j;                                                   \
457         for (i = 0; i < num_rowsA; ++i) {                                  \
458             const type *u = XA + num_cols * i;                             \
459             for (j = 0; j < num_rowsB; ++j, ++dm) {                        \
460                 const type *v = XB + num_cols * j;                         \
461                 *dm = name ## _distance_ ## type(u, v, num_cols);          \
462             }                                                              \
463         }                                                                  \
464         return 0;\
465     }
466 
DEFINE_CDIST(bray_curtis,double)467 DEFINE_CDIST(bray_curtis, double)
468 DEFINE_CDIST(canberra, double)
469 DEFINE_CDIST(chebyshev, double)
470 DEFINE_CDIST(city_block, double)
471 DEFINE_CDIST(euclidean, double)
472 DEFINE_CDIST(jaccard, double)
473 DEFINE_CDIST(jensenshannon, double)
474 DEFINE_CDIST(sqeuclidean, double)
475 
476 DEFINE_CDIST(dice, char)
477 DEFINE_CDIST(jaccard, char)
478 DEFINE_CDIST(kulsinski, char)
479 DEFINE_CDIST(rogerstanimoto, char)
480 DEFINE_CDIST(russellrao, char)
481 DEFINE_CDIST(sokalmichener, char)
482 DEFINE_CDIST(sokalsneath, char)
483 DEFINE_CDIST(yule, char)
484 
485 
486 #define DEFINE_PDIST(name, type) \
487     static int pdist_ ## name ## _ ## type(const type *X, double *dm,       \
488                                            const npy_intp num_rows,         \
489                                            const npy_intp num_cols)         \
490     {                                                                       \
491         Py_ssize_t i, j;                                                    \
492         double *it = dm;                                                    \
493         for (i = 0; i < num_rows; ++i) {                                    \
494             const type *u = X + num_cols * i;                               \
495             for (j = i + 1; j < num_rows; ++j, it++) {                      \
496                 const type *v = X + num_cols * j;                           \
497                 *it = name ## _distance_ ## type(u, v, num_cols);           \
498             }                                                               \
499         }                                                                   \
500         return 0; \
501     }
502 
503 DEFINE_PDIST(bray_curtis, double)
504 DEFINE_PDIST(canberra, double)
505 DEFINE_PDIST(chebyshev, double)
506 DEFINE_PDIST(city_block, double)
507 DEFINE_PDIST(euclidean, double)
508 DEFINE_PDIST(jaccard, double)
509 DEFINE_PDIST(jensenshannon, double)
510 DEFINE_PDIST(sqeuclidean, double)
511 
512 DEFINE_PDIST(dice, char)
513 DEFINE_PDIST(jaccard, char)
514 DEFINE_PDIST(kulsinski, char)
515 DEFINE_PDIST(rogerstanimoto, char)
516 DEFINE_PDIST(russellrao, char)
517 DEFINE_PDIST(sokalmichener, char)
518 DEFINE_PDIST(sokalsneath, char)
519 DEFINE_PDIST(yule, char)
520 
521 static NPY_INLINE int
522 pdist_mahalanobis(const double *X, double *dm, const npy_intp num_rows,
523                   const npy_intp num_cols, const double *covinv)
524 {
525     npy_intp i, j;
526     double *dimbuf1 = calloc(2 * num_cols, sizeof(double));
527     double *dimbuf2;
528     if (!dimbuf1) {
529         return -1;
530     }
531 
532     dimbuf2 = dimbuf1 + num_cols;
533 
534     for (i = 0; i < num_rows; ++i) {
535         const double *u = X + (num_cols * i);
536         for (j = i + 1; j < num_rows; ++j, ++dm) {
537             const double *v = X + (num_cols * j);
538             *dm = mahalanobis_distance(u, v, covinv, dimbuf1, dimbuf2, num_cols);
539         }
540     }
541     free(dimbuf1);
542     return 0;
543 }
544 
545 static NPY_INLINE int
pdist_weighted_chebyshev(const double * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double * w)546 pdist_weighted_chebyshev(const double *X, double *dm, npy_intp num_rows,
547                          const npy_intp num_cols, const double *w)
548 {
549     npy_intp i, j;
550 
551     for (i = 0; i < num_rows; ++i) {
552         const double *u = X + (num_cols * i);
553         for (j = i + 1; j < num_rows; ++j, ++dm) {
554             const double *v = X + (num_cols * j);
555             *dm = weighted_chebyshev_distance_double(u, v, num_cols, w);
556         }
557     }
558     return 0;
559 }
560 
561 static NPY_INLINE int
pdist_cosine(const double * X,double * dm,const npy_intp num_rows,const npy_intp num_cols)562 pdist_cosine(const double *X, double *dm, const npy_intp num_rows,
563              const npy_intp num_cols)
564 {
565     double cosine;
566     npy_intp i, j;
567 
568     double * norms_buff = calloc(num_rows, sizeof(double));
569     if (!norms_buff)
570         return -1;
571 
572     _row_norms(X, num_rows, num_cols, norms_buff);
573 
574     for (i = 0; i < num_rows; ++i) {
575         const double *u = X + (num_cols * i);
576         for (j = i + 1; j < num_rows; ++j, ++dm) {
577             const double *v = X + (num_cols * j);
578             cosine = dot_product(u, v, num_cols) / (norms_buff[i] * norms_buff[j]);
579             if (fabs(cosine) > 1.) {
580                 /* Clip to correct rounding error. */
581                 cosine = npy_copysign(1, cosine);
582             }
583             *dm = 1. - cosine;
584         }
585     }
586     free(norms_buff);
587     return 0;
588 }
589 
590 static NPY_INLINE int
pdist_seuclidean(const double * X,const double * var,double * dm,const npy_intp num_rows,const npy_intp num_cols)591 pdist_seuclidean(const double *X, const double *var, double *dm,
592                  const npy_intp num_rows, const npy_intp num_cols)
593 {
594     npy_intp i, j;
595 
596     for (i = 0; i < num_rows; ++i) {
597         const double *u = X + (num_cols * i);
598         for (j = i + 1; j < num_rows; ++j, ++dm) {
599             const double *v = X + (num_cols * j);
600             *dm = seuclidean_distance(var, u, v, num_cols);
601         }
602     }
603     return 0;
604 }
605 
606 static NPY_INLINE int
pdist_minkowski(const double * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double p)607 pdist_minkowski(const double *X, double *dm, npy_intp num_rows,
608                 const npy_intp num_cols, const double p)
609 {
610     npy_intp i, j;
611     if (p == 1.0) {
612         return pdist_city_block_double(X, dm, num_rows, num_cols);
613     }
614     if (p == 2.0) {
615         return pdist_euclidean_double(X, dm, num_rows, num_cols);
616     }
617     if (sc_isinf(p)) {
618         return pdist_chebyshev_double(X, dm, num_rows, num_cols);
619     }
620 
621     for (i = 0; i < num_rows; ++i) {
622         const double *u = X + (num_cols * i);
623         for (j = i + 1; j < num_rows; ++j, ++dm) {
624             const double *v = X + (num_cols * j);
625             *dm = minkowski_distance(u, v, num_cols, p);
626         }
627     }
628     return 0;
629 }
630 
631 /* Old weighting type which is inconsistent with other distance metrics.
632    Remove in SciPy 1.8 along with wminkowski. */
633 static NPY_INLINE int
pdist_old_weighted_minkowski(const double * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double p,const double * w)634 pdist_old_weighted_minkowski(const double *X, double *dm, npy_intp num_rows,
635                              const npy_intp num_cols, const double p, const double *w)
636 {
637     npy_intp i, j;
638 
639     // Covert from old style weights to new weights
640     double * new_weights = malloc(num_cols * sizeof(double));
641     if (!new_weights) {
642         return 1;
643     }
644     for (i = 0; i < num_cols; ++i) {
645         new_weights[i] = pow(w[i], p);
646     }
647 
648     for (i = 0; i < num_rows; ++i) {
649         const double *u = X + (num_cols * i);
650         for (j = i + 1; j < num_rows; ++j, ++dm) {
651             const double *v = X + (num_cols * j);
652             *dm = weighted_minkowski_distance(u, v, num_cols, p, new_weights);
653         }
654     }
655     free(new_weights);
656     return 0;
657 }
658 
659 static NPY_INLINE int
pdist_weighted_minkowski(const double * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double p,const double * w)660 pdist_weighted_minkowski(const double *X, double *dm, npy_intp num_rows,
661                          const npy_intp num_cols, const double p, const double *w)
662 {
663     npy_intp i, j;
664 
665     if (sc_isinf(p)) {
666         return pdist_weighted_chebyshev(X, dm, num_rows, num_cols, w);
667     }
668 
669     for (i = 0; i < num_rows; ++i) {
670         const double *u = X + (num_cols * i);
671         for (j = i + 1; j < num_rows; ++j, ++dm) {
672             const double *v = X + (num_cols * j);
673             *dm = weighted_minkowski_distance(u, v, num_cols, p, w);
674         }
675     }
676     return 0;
677 }
678 
679 static NPY_INLINE int
pdist_hamming_double(const double * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double * w)680 pdist_hamming_double(const double *X, double *dm, npy_intp num_rows,
681                          const npy_intp num_cols, const double *w)
682 {
683     npy_intp i, j;
684 
685     for (i = 0; i < num_rows; ++i) {
686         const double *u = X + (num_cols * i);
687         for (j = i + 1; j < num_rows; ++j, ++dm) {
688             const double *v = X + (num_cols * j);
689             *dm = hamming_distance_double(u, v, num_cols, w);
690         }
691     }
692     return 0;
693 }
694 
695 static NPY_INLINE int
pdist_hamming_char(const char * X,double * dm,npy_intp num_rows,const npy_intp num_cols,const double * w)696 pdist_hamming_char(const char *X, double *dm, npy_intp num_rows,
697                          const npy_intp num_cols, const double *w)
698 {
699     npy_intp i, j;
700 
701     for (i = 0; i < num_rows; ++i) {
702         const char *u = X + (num_cols * i);
703         for (j = i + 1; j < num_rows; ++j, ++dm) {
704             const char *v = X + (num_cols * j);
705             *dm = hamming_distance_char(u, v, num_cols, w);
706         }
707     }
708     return 0;
709 }
710 
711 static NPY_INLINE void
dist_to_squareform_from_vector_generic(char * M,const char * v,const npy_intp n,npy_intp s)712 dist_to_squareform_from_vector_generic(char *M, const char *v, const npy_intp n,
713                                        npy_intp s)
714 {
715     char *it1 = M + s;
716     char *it2;
717     npy_intp i, j;
718 
719     for (i = 1; i < n; ++i) {
720         memcpy(it1, v, (n - i) * s);
721         it1 += (n + 1) * s;
722 
723         it2 = M + i * (n + 1) * s - s;
724         for (j = i; j < n; ++j) {
725             memcpy(it2, v, s);
726             v += s;
727             it2 += n*s;
728         }
729     }
730 }
731 
732 static NPY_INLINE void
dist_to_squareform_from_vector_double(double * M,const double * v,const npy_intp n)733 dist_to_squareform_from_vector_double(double *M, const double *v, const npy_intp n)
734 {
735     double *it1 = M + 1;
736     double *it2;
737     npy_intp i, j;
738 
739     for (i = 1; i < n; ++i, it1 += n+1) {
740         memcpy(it1, v, (n - i) * sizeof(double));
741 
742         it2 = M + i * (n + 1) - 1;
743         for (j = i; j < n; ++j, ++v, it2 += n) {
744             *it2 = *v;
745         }
746     }
747 }
748 
749 static NPY_INLINE void
dist_to_vector_from_squareform(const char * M,char * v,const npy_intp n,npy_intp s)750 dist_to_vector_from_squareform(const char *M, char *v, const npy_intp n, npy_intp s)
751 {
752     const char *cit = M + s;
753     npy_intp i;
754 
755     for (i = 1; i < n; ++i) {
756         const npy_intp len = (n - i) * s;
757         memcpy(v, cit, len);
758         v += len;
759         cit += (n + 1) * s;
760     }
761 }
762 
763 static NPY_INLINE int
cdist_weighted_chebyshev(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double * w)764 cdist_weighted_chebyshev(const double *XA, const double *XB, double *dm,
765                          const npy_intp num_rowsA, const npy_intp num_rowsB,
766                          const npy_intp num_cols, const double *w)
767 {
768     npy_intp i, j;
769 
770     for (i = 0; i < num_rowsA; ++i) {
771         const double *u = XA + (num_cols * i);
772         for (j = 0; j < num_rowsB; ++j, ++dm) {
773             const double *v = XB + (num_cols * j);
774             *dm = weighted_chebyshev_distance_double(u, v, num_cols, w);
775         }
776     }
777     return 0;
778 }
779 
780 
781 /** cdist */
782 static NPY_INLINE int
cdist_cosine(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols)783 cdist_cosine(const double *XA, const double *XB, double *dm, const npy_intp num_rowsA,
784              const npy_intp num_rowsB, const npy_intp num_cols)
785 {
786     double cosine;
787     npy_intp i, j;
788 
789     double * norms_buffA = calloc(num_rowsA + num_rowsB, sizeof(double));
790     double * norms_buffB;
791     if (!norms_buffA)
792         return -1;
793 
794     norms_buffB = norms_buffA + num_rowsA;
795 
796     _row_norms(XA, num_rowsA, num_cols, norms_buffA);
797     _row_norms(XB, num_rowsB, num_cols, norms_buffB);
798 
799     for (i = 0; i < num_rowsA; ++i) {
800         const double *u = XA + (num_cols * i);
801         for (j = 0; j < num_rowsB; ++j, ++dm) {
802             const double *v = XB + (num_cols * j);
803             cosine = dot_product(u, v, num_cols) / (norms_buffA[i] * norms_buffB[j]);
804             if (fabs(cosine) > 1.) {
805                 /* Clip to correct rounding error. */
806                 cosine = npy_copysign(1, cosine);
807             }
808             *dm = 1. - cosine;
809         }
810     }
811     free(norms_buffA);
812     return 0;
813 }
814 
815 static NPY_INLINE int
cdist_mahalanobis(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double * covinv)816 cdist_mahalanobis(const double *XA, const double *XB, double *dm,
817                   const npy_intp num_rowsA, const npy_intp num_rowsB,
818                   const npy_intp num_cols, const double *covinv)
819 {
820     npy_intp i, j;
821 
822     double *dimbuf1 = calloc(2 * num_cols, sizeof(double));
823     double *dimbuf2;
824     if (!dimbuf1) {
825         return -1;
826     }
827     dimbuf2 = dimbuf1 + num_cols;
828 
829     for (i = 0; i < num_rowsA; ++i) {
830         const double *u = XA + (num_cols * i);
831         for (j = 0; j < num_rowsB; ++j, ++dm) {
832             const double *v = XB + (num_cols * j);
833             *dm = mahalanobis_distance(u, v, covinv, dimbuf1, dimbuf2, num_cols);
834         }
835     }
836     free(dimbuf1);
837     return 0;
838 }
839 
840 static NPY_INLINE int
cdist_seuclidean(const double * XA,const double * XB,const double * var,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols)841 cdist_seuclidean(const double *XA, const double *XB, const double *var,
842                  double *dm, const npy_intp num_rowsA, const npy_intp num_rowsB,
843                  const npy_intp num_cols)
844 {
845     npy_intp i, j;
846 
847     for (i = 0; i < num_rowsA; ++i) {
848         const double *u = XA + (num_cols * i);
849         for (j = 0; j < num_rowsB; ++j, ++dm) {
850             const double *v = XB + (num_cols * j);
851             *dm = seuclidean_distance(var, u, v, num_cols);
852         }
853     }
854     return 0;
855 }
856 
857 static NPY_INLINE int
cdist_minkowski(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double p)858 cdist_minkowski(const double *XA, const double *XB, double *dm,
859                 const npy_intp num_rowsA, const npy_intp num_rowsB,
860                 const npy_intp num_cols, const double p)
861 {
862     npy_intp i, j;
863     if (p == 1.0) {
864         return cdist_city_block_double(XA, XB, dm, num_rowsA, num_rowsB, num_cols);
865     }
866     if (p == 2.0) {
867         return cdist_euclidean_double(XA, XB, dm, num_rowsA, num_rowsB, num_cols);
868     }
869     if (sc_isinf(p)) {
870         return cdist_chebyshev_double(XA, XB, dm, num_rowsA, num_rowsB, num_cols);
871     }
872 
873     for (i = 0; i < num_rowsA; ++i) {
874         const double *u = XA + (num_cols * i);
875         for (j = 0; j < num_rowsB; ++j, ++dm) {
876             const double *v = XB + (num_cols * j);
877             *dm = minkowski_distance(u, v, num_cols, p);
878         }
879     }
880     return 0;
881 }
882 
883 /* Old weighting type which is inconsistent with other distance metrics.
884    Remove in SciPy 1.8 along with wminkowski. */
885 static NPY_INLINE int
cdist_old_weighted_minkowski(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double p,const double * w)886 cdist_old_weighted_minkowski(const double *XA, const double *XB, double *dm,
887                              const npy_intp num_rowsA, const npy_intp num_rowsB,
888                              const npy_intp num_cols, const double p,
889                              const double *w)
890 {
891     npy_intp i, j;
892 
893     // Covert from old style weights to new weights
894     double * new_weights = malloc(num_cols * sizeof(double));
895     if (!new_weights) {
896       return 1;
897     }
898 
899     for (i = 0; i < num_cols; ++i) {
900       new_weights[i] = pow(w[i], p);
901     }
902 
903     for (i = 0; i < num_rowsA; ++i) {
904         const double *u = XA + (num_cols * i);
905         for (j = 0; j < num_rowsB; ++j, ++dm) {
906             const double *v = XB + (num_cols * j);
907             *dm = weighted_minkowski_distance(u, v, num_cols, p, new_weights);
908         }
909     }
910     free(new_weights);
911     return 0;
912 }
913 
914 static NPY_INLINE int
cdist_weighted_minkowski(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double p,const double * w)915 cdist_weighted_minkowski(const double *XA, const double *XB, double *dm,
916                          const npy_intp num_rowsA, const npy_intp num_rowsB,
917                          const npy_intp num_cols, const double p,
918                          const double *w)
919 {
920     npy_intp i, j;
921 
922     if (sc_isinf(p)) {
923         return cdist_weighted_chebyshev(XA, XB, dm, num_rowsA, num_rowsB, num_cols, w);
924     }
925 
926     for (i = 0; i < num_rowsA; ++i) {
927         const double *u = XA + (num_cols * i);
928         for (j = 0; j < num_rowsB; ++j, ++dm) {
929             const double *v = XB + (num_cols * j);
930             *dm = weighted_minkowski_distance(u, v, num_cols, p, w);
931         }
932     }
933     return 0;
934 }
935 
936 static NPY_INLINE int
cdist_hamming_double(const double * XA,const double * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double * w)937 cdist_hamming_double(const double *XA, const double *XB, double *dm,
938                          const npy_intp num_rowsA, const npy_intp num_rowsB,
939                          const npy_intp num_cols,
940                          const double *w)
941 {
942     npy_intp i, j;
943 
944     for (i = 0; i < num_rowsA; ++i) {
945         const double *u = XA + (num_cols * i);
946         for (j = 0; j < num_rowsB; ++j, ++dm) {
947             const double *v = XB + (num_cols * j);
948             *dm = hamming_distance_double(u, v, num_cols, w);
949         }
950     }
951     return 0;
952 }
953 
954 static NPY_INLINE int
cdist_hamming_char(const char * XA,const char * XB,double * dm,const npy_intp num_rowsA,const npy_intp num_rowsB,const npy_intp num_cols,const double * w)955 cdist_hamming_char(const char *XA, const char *XB, double *dm,
956                          const npy_intp num_rowsA, const npy_intp num_rowsB,
957                          const npy_intp num_cols,
958                          const double *w)
959 {
960     npy_intp i, j;
961 
962     for (i = 0; i < num_rowsA; ++i) {
963         const char *u = XA + (num_cols * i);
964         for (j = 0; j < num_rowsB; ++j, ++dm) {
965             const char *v = XB + (num_cols * j);
966             *dm = hamming_distance_char(u, v, num_cols, w);
967         }
968     }
969     return 0;
970 }
971