1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.distance.distancefunction;
22 
23 import de.lmu.ifi.dbs.elki.data.NumberVector;
24 import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable;
25 import de.lmu.ifi.dbs.elki.utilities.Alias;
26 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
27 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
28 
29 /**
30  * Bray-Curtis distance function / Sørensen–Dice coefficient for continuous
31  * vector spaces (not only binary data).
32  * <p>
33  * Reference:
34  * <p>
35  * J. R. Bray, J. T. Curtis<br>
36  * An ordination of the upland forest communities of southern Wisconsin<br>
37  * Ecological monographs 27.4
38  * <p>
39  * Also:
40  * <p>
41  * T. Sørensen<br>
42  * A method of establishing groups of equal amplitude in plant sociology based
43  * on similarity of species and its application to analyses of the vegetation on
44  * Danish commons<br>
45  * Kongelige Danske Videnskabernes Selskab 5 (4)
46  * <p>
47  * and:
48  * <p>
49  * L. R. Dice<br>
50  * Measures of the Amount of Ecologic Association Between Species<br>
51  * Ecology 26 (3)
52  * <p>
53  * Note: we modified the usual definition of Bray-Curtis for use with negative
54  * values. In essence, this function is defined as:
55  * <p>
56  * ManhattanDistance(v1, v2) / (ManhattanNorm(v1) + ManhattanNorm(v2))
57  * <p>
58  * This obviously limits the usefulness of this distance function for cases
59  * where this kind of normalization is desired. In particular in low dimensional
60  * data it should be used with care.
61  * <p>
62  * TODO: add a version <i>optimized</i> for sparse vectors / binary data.
63  *
64  * @author Erich Schubert
65  * @since 0.6.0
66  */
67 @Reference(authors = "J. R. Bray, J. T. Curtis", //
68     title = "An ordination of the upland forest communities of southern Wisconsin", //
69     booktitle = "Ecological monographs 27.4", //
70     url = "https://doi.org/10.2307/1942268", //
71     bibkey = "doi:10.2307/1942268")
72 @Reference(authors = "T. Sørensen", //
73     title = "A method of establishing groups of equal amplitude in plant sociology based on similarity of species and its application to analyses of the vegetation on Danish commons", //
74     booktitle = "Kongelige Danske Videnskabernes Selskab 5 (4)", //
75     bibkey = "journals/misc/Sorensen48")
76 @Reference(authors = "L. R. Dice", //
77     title = "Measures of the Amount of Ecologic Association Between Species", //
78     booktitle = "Ecology 26 (3)", //
79     url = "https://doi.org/10.2307/1932409", //
80     bibkey = "doi:10.2307/1932409")
81 @Alias({ "bray-curtis", "braycurtis", "sorensen", "dice", "sorensen-dice" })
82 public class BrayCurtisDistanceFunction extends AbstractNumberVectorDistanceFunction implements SpatialPrimitiveDistanceFunction<NumberVector> {
83   /**
84    * Static instance.
85    */
86   public static final BrayCurtisDistanceFunction STATIC_CONTINUOUS = new BrayCurtisDistanceFunction();
87 
88   /**
89    * Constructor.
90    *
91    * @deprecated Use {@link #STATIC_CONTINUOUS} instance instead.
92    */
93   @Deprecated
BrayCurtisDistanceFunction()94   public BrayCurtisDistanceFunction() {
95     super();
96   }
97 
98   @Override
distance(NumberVector v1, NumberVector v2)99   public double distance(NumberVector v1, NumberVector v2) {
100     final int dim = dimensionality(v1, v2);
101     double sumdiff = 0., sumsum = 0.;
102     for(int d = 0; d < dim; d++) {
103       final double xd = v1.doubleValue(d), yd = v2.doubleValue(d);
104       sumdiff += Math.abs(xd - yd);
105       sumsum += Math.abs(xd) + Math.abs(yd);
106     }
107     return sumsum > 0 ? sumdiff / sumsum : 0;
108   }
109 
110   @Override
minDist(SpatialComparable mbr1, SpatialComparable mbr2)111   public double minDist(SpatialComparable mbr1, SpatialComparable mbr2) {
112     final int dim = dimensionality(mbr1, mbr2);
113     double sumdiff = 0., sumsum = 0.;
114     for(int d = 0; d < dim; d++) {
115       final double min1 = mbr1.getMin(d), max1 = mbr1.getMax(d);
116       final double min2 = mbr2.getMin(d), max2 = mbr2.getMax(d);
117       sumdiff += (max1 < min2) ? min2 - max1 : (min1 > max2) ? min1 - max2 : 0;
118       sumsum += Math.max(-min1, max1) + Math.max(-min2, max2);
119     }
120     return sumsum > 0 ? sumdiff / sumsum : 0;
121   }
122 
123   @Override
equals(Object obj)124   public boolean equals(Object obj) {
125     return obj == this || (obj != null && this.getClass().equals(obj.getClass()));
126   }
127 
128   @Override
hashCode()129   public int hashCode() {
130     return getClass().hashCode();
131   }
132 
133   /**
134    * Parameterization class.
135    *
136    * @author Erich Schubert
137    */
138   public static class Parameterizer extends AbstractParameterizer {
139     @Override
makeInstance()140     protected BrayCurtisDistanceFunction makeInstance() {
141       return BrayCurtisDistanceFunction.STATIC_CONTINUOUS;
142     }
143   }
144 }
145