1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.distance.distancefunction; 22 23 import de.lmu.ifi.dbs.elki.data.NumberVector; 24 import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; 25 import de.lmu.ifi.dbs.elki.utilities.Alias; 26 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; 27 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; 28 29 /** 30 * Bray-Curtis distance function / Sørensen–Dice coefficient for continuous 31 * vector spaces (not only binary data). 32 * <p> 33 * Reference: 34 * <p> 35 * J. R. Bray, J. T. Curtis<br> 36 * An ordination of the upland forest communities of southern Wisconsin<br> 37 * Ecological monographs 27.4 38 * <p> 39 * Also: 40 * <p> 41 * T. Sørensen<br> 42 * A method of establishing groups of equal amplitude in plant sociology based 43 * on similarity of species and its application to analyses of the vegetation on 44 * Danish commons<br> 45 * Kongelige Danske Videnskabernes Selskab 5 (4) 46 * <p> 47 * and: 48 * <p> 49 * L. R. Dice<br> 50 * Measures of the Amount of Ecologic Association Between Species<br> 51 * Ecology 26 (3) 52 * <p> 53 * Note: we modified the usual definition of Bray-Curtis for use with negative 54 * values. In essence, this function is defined as: 55 * <p> 56 * ManhattanDistance(v1, v2) / (ManhattanNorm(v1) + ManhattanNorm(v2)) 57 * <p> 58 * This obviously limits the usefulness of this distance function for cases 59 * where this kind of normalization is desired. In particular in low dimensional 60 * data it should be used with care. 61 * <p> 62 * TODO: add a version <i>optimized</i> for sparse vectors / binary data. 63 * 64 * @author Erich Schubert 65 * @since 0.6.0 66 */ 67 @Reference(authors = "J. R. Bray, J. T. Curtis", // 68 title = "An ordination of the upland forest communities of southern Wisconsin", // 69 booktitle = "Ecological monographs 27.4", // 70 url = "https://doi.org/10.2307/1942268", // 71 bibkey = "doi:10.2307/1942268") 72 @Reference(authors = "T. Sørensen", // 73 title = "A method of establishing groups of equal amplitude in plant sociology based on similarity of species and its application to analyses of the vegetation on Danish commons", // 74 booktitle = "Kongelige Danske Videnskabernes Selskab 5 (4)", // 75 bibkey = "journals/misc/Sorensen48") 76 @Reference(authors = "L. R. Dice", // 77 title = "Measures of the Amount of Ecologic Association Between Species", // 78 booktitle = "Ecology 26 (3)", // 79 url = "https://doi.org/10.2307/1932409", // 80 bibkey = "doi:10.2307/1932409") 81 @Alias({ "bray-curtis", "braycurtis", "sorensen", "dice", "sorensen-dice" }) 82 public class BrayCurtisDistanceFunction extends AbstractNumberVectorDistanceFunction implements SpatialPrimitiveDistanceFunction<NumberVector> { 83 /** 84 * Static instance. 85 */ 86 public static final BrayCurtisDistanceFunction STATIC_CONTINUOUS = new BrayCurtisDistanceFunction(); 87 88 /** 89 * Constructor. 90 * 91 * @deprecated Use {@link #STATIC_CONTINUOUS} instance instead. 92 */ 93 @Deprecated BrayCurtisDistanceFunction()94 public BrayCurtisDistanceFunction() { 95 super(); 96 } 97 98 @Override distance(NumberVector v1, NumberVector v2)99 public double distance(NumberVector v1, NumberVector v2) { 100 final int dim = dimensionality(v1, v2); 101 double sumdiff = 0., sumsum = 0.; 102 for(int d = 0; d < dim; d++) { 103 final double xd = v1.doubleValue(d), yd = v2.doubleValue(d); 104 sumdiff += Math.abs(xd - yd); 105 sumsum += Math.abs(xd) + Math.abs(yd); 106 } 107 return sumsum > 0 ? sumdiff / sumsum : 0; 108 } 109 110 @Override minDist(SpatialComparable mbr1, SpatialComparable mbr2)111 public double minDist(SpatialComparable mbr1, SpatialComparable mbr2) { 112 final int dim = dimensionality(mbr1, mbr2); 113 double sumdiff = 0., sumsum = 0.; 114 for(int d = 0; d < dim; d++) { 115 final double min1 = mbr1.getMin(d), max1 = mbr1.getMax(d); 116 final double min2 = mbr2.getMin(d), max2 = mbr2.getMax(d); 117 sumdiff += (max1 < min2) ? min2 - max1 : (min1 > max2) ? min1 - max2 : 0; 118 sumsum += Math.max(-min1, max1) + Math.max(-min2, max2); 119 } 120 return sumsum > 0 ? sumdiff / sumsum : 0; 121 } 122 123 @Override equals(Object obj)124 public boolean equals(Object obj) { 125 return obj == this || (obj != null && this.getClass().equals(obj.getClass())); 126 } 127 128 @Override hashCode()129 public int hashCode() { 130 return getClass().hashCode(); 131 } 132 133 /** 134 * Parameterization class. 135 * 136 * @author Erich Schubert 137 */ 138 public static class Parameterizer extends AbstractParameterizer { 139 @Override makeInstance()140 protected BrayCurtisDistanceFunction makeInstance() { 141 return BrayCurtisDistanceFunction.STATIC_CONTINUOUS; 142 } 143 } 144 } 145