1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.distance.distancefunction.histogram; 22 23 import de.lmu.ifi.dbs.elki.data.NumberVector; 24 import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; 25 import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractNumberVectorDistanceFunction; 26 import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction; 27 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; 28 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; 29 30 /** 31 * Distance function based on histogram matching, i.e. Manhattan distance on the 32 * cumulative density function. 33 * <p> 34 * This distance function assumes there exist a natural order in the vectors, 35 * i.e. they should be some 1-dimensional histogram. 36 * <p> 37 * This is also known as Earth Movers Distance (EMD), 1st Mallows distance or 38 * 1st Wasserstein metric (also Vasershtein metric), for the special case of a 39 * one-dimensional histogram, where the cost is linear in the number of bins to 40 * transport. 41 * <p> 42 * Reference: 43 * <p> 44 * L. N. Vaserstein<br> 45 * Markov processes over denumerable products of spaces describing large systems 46 * of automata<br> 47 * Problemy Peredachi Informatsii 5.3 / Problems of Information Transmission 5:3 48 * 49 * @author Erich Schubert 50 * @since 0.6.0 51 */ 52 @Reference(authors = "L. N. Vaserstein", // 53 title = "Markov processes over denumerable products of spaces describing large systems of automata", // 54 booktitle = "Problemy Peredachi Informatsii 5.3 / Problems of Information Transmission, 5:3", // 55 url = "http://mi.mathnet.ru/eng/ppi1811", // 56 bibkey = "journals/misc/Vaserstein69") 57 public class HistogramMatchDistanceFunction extends AbstractNumberVectorDistanceFunction implements SpatialPrimitiveDistanceFunction<NumberVector> { 58 /** 59 * Static instance. Use this! 60 */ 61 public static final HistogramMatchDistanceFunction STATIC = new HistogramMatchDistanceFunction(); 62 63 /** 64 * Constructor for the Histogram match distance function. 65 * 66 * @deprecated Use static instance! 67 */ 68 @Deprecated HistogramMatchDistanceFunction()69 public HistogramMatchDistanceFunction() { 70 super(); 71 } 72 73 @Override distance(NumberVector v1, NumberVector v2)74 public double distance(NumberVector v1, NumberVector v2) { 75 final int dim = dimensionality(v1, v2); 76 double xs = 0., ys = 0., agg = 0.; 77 for(int i = 0; i < dim; i++) { 78 xs += v1.doubleValue(i); 79 ys += v2.doubleValue(i); 80 agg += Math.abs(xs - ys); 81 } 82 return agg; 83 } 84 85 @Override minDist(SpatialComparable mbr1, SpatialComparable mbr2)86 public double minDist(SpatialComparable mbr1, SpatialComparable mbr2) { 87 final int dim = dimensionality(mbr1, mbr2); 88 double xmin = 0., xmax = 0., ymin = 0., ymax = 0., agg = 0.; 89 for(int i = 0; i < dim; i++) { 90 xmin += mbr1.getMin(i); 91 xmax += mbr1.getMax(i); 92 ymin += mbr2.getMin(i); 93 ymax += mbr2.getMax(i); 94 agg += (ymin > xmax) ? (ymin - xmax) : (xmin > ymax) ? (xmin - ymax) : 0.; 95 } 96 return agg; 97 } 98 99 @Override isMetric()100 public boolean isMetric() { 101 return true; 102 } 103 104 @Override toString()105 public String toString() { 106 return "HistogramMatchDistanceFunction"; 107 } 108 109 @Override equals(Object obj)110 public boolean equals(Object obj) { 111 return obj == this || (obj != null && this.getClass().equals(obj.getClass())); 112 } 113 114 @Override hashCode()115 public int hashCode() { 116 return getClass().hashCode(); 117 } 118 119 /** 120 * Parameterization class, using the static instance. 121 * 122 * @author Erich Schubert 123 */ 124 public static class Parameterizer extends AbstractParameterizer { 125 @Override makeInstance()126 protected HistogramMatchDistanceFunction makeInstance() { 127 return STATIC; 128 } 129 } 130 } 131