1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.distance.distancefunction.histogram;
22 
23 import de.lmu.ifi.dbs.elki.data.NumberVector;
24 import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable;
25 import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractNumberVectorDistanceFunction;
26 import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction;
27 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
28 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
29 
30 /**
31  * Distance function based on histogram matching, i.e. Manhattan distance on the
32  * cumulative density function.
33  * <p>
34  * This distance function assumes there exist a natural order in the vectors,
35  * i.e. they should be some 1-dimensional histogram.
36  * <p>
37  * This is also known as Earth Movers Distance (EMD), 1st Mallows distance or
38  * 1st Wasserstein metric (also Vasershtein metric), for the special case of a
39  * one-dimensional histogram, where the cost is linear in the number of bins to
40  * transport.
41  * <p>
42  * Reference:
43  * <p>
44  * L. N. Vaserstein<br>
45  * Markov processes over denumerable products of spaces describing large systems
46  * of automata<br>
47  * Problemy Peredachi Informatsii 5.3 / Problems of Information Transmission 5:3
48  *
49  * @author Erich Schubert
50  * @since 0.6.0
51  */
52 @Reference(authors = "L. N. Vaserstein", //
53     title = "Markov processes over denumerable products of spaces describing large systems of automata", //
54     booktitle = "Problemy Peredachi Informatsii 5.3 / Problems of Information Transmission, 5:3", //
55     url = "http://mi.mathnet.ru/eng/ppi1811", //
56     bibkey = "journals/misc/Vaserstein69")
57 public class HistogramMatchDistanceFunction extends AbstractNumberVectorDistanceFunction implements SpatialPrimitiveDistanceFunction<NumberVector> {
58   /**
59    * Static instance. Use this!
60    */
61   public static final HistogramMatchDistanceFunction STATIC = new HistogramMatchDistanceFunction();
62 
63   /**
64    * Constructor for the Histogram match distance function.
65    *
66    * @deprecated Use static instance!
67    */
68   @Deprecated
HistogramMatchDistanceFunction()69   public HistogramMatchDistanceFunction() {
70     super();
71   }
72 
73   @Override
distance(NumberVector v1, NumberVector v2)74   public double distance(NumberVector v1, NumberVector v2) {
75     final int dim = dimensionality(v1, v2);
76     double xs = 0., ys = 0., agg = 0.;
77     for(int i = 0; i < dim; i++) {
78       xs += v1.doubleValue(i);
79       ys += v2.doubleValue(i);
80       agg += Math.abs(xs - ys);
81     }
82     return agg;
83   }
84 
85   @Override
minDist(SpatialComparable mbr1, SpatialComparable mbr2)86   public double minDist(SpatialComparable mbr1, SpatialComparable mbr2) {
87     final int dim = dimensionality(mbr1, mbr2);
88     double xmin = 0., xmax = 0., ymin = 0., ymax = 0., agg = 0.;
89     for(int i = 0; i < dim; i++) {
90       xmin += mbr1.getMin(i);
91       xmax += mbr1.getMax(i);
92       ymin += mbr2.getMin(i);
93       ymax += mbr2.getMax(i);
94       agg += (ymin > xmax) ? (ymin - xmax) : (xmin > ymax) ? (xmin - ymax) : 0.;
95     }
96     return agg;
97   }
98 
99   @Override
isMetric()100   public boolean isMetric() {
101     return true;
102   }
103 
104   @Override
toString()105   public String toString() {
106     return "HistogramMatchDistanceFunction";
107   }
108 
109   @Override
equals(Object obj)110   public boolean equals(Object obj) {
111     return obj == this || (obj != null && this.getClass().equals(obj.getClass()));
112   }
113 
114   @Override
hashCode()115   public int hashCode() {
116     return getClass().hashCode();
117   }
118 
119   /**
120    * Parameterization class, using the static instance.
121    *
122    * @author Erich Schubert
123    */
124   public static class Parameterizer extends AbstractParameterizer {
125     @Override
makeInstance()126     protected HistogramMatchDistanceFunction makeInstance() {
127       return STATIC;
128     }
129   }
130 }
131