1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.distance.distancefunction.set;
22 
23 import de.lmu.ifi.dbs.elki.data.BitVector;
24 import de.lmu.ifi.dbs.elki.data.FeatureVector;
25 import de.lmu.ifi.dbs.elki.data.NumberVector;
26 import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
27 import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
28 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
29 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
30 
31 /**
32  * Computes the Hamming distance of arbitrary vectors - i.e. counting, on how
33  * many places they differ.
34  * <p>
35  * Reference:
36  * <p>
37  * R. W. Hamming<br>
38  * Error detecting and error correcting codes<br>
39  * Bell System technical journal, 29(2)
40  * <p>
41  * TODO: add a sparse (but not binary) optimized version?
42  *
43  * @author Erich Schubert
44  * @since 0.7.0
45  */
46 @Reference(authors = "R. W. Hamming", //
47     title = "Error detecting and error correcting codes", //
48     booktitle = "Bell System technical journal, 29(2)", //
49     url = "https://doi.org/10.1002/j.1538-7305.1950.tb00463.x", //
50     bibkey = "doi:10.1002/j.1538-7305.1950.tb00463.x")
51 public class HammingDistanceFunction extends AbstractSetDistanceFunction<FeatureVector<?>> implements NumberVectorDistanceFunction<FeatureVector<?>> {
52   /**
53    * Static instance.
54    */
55   public static final HammingDistanceFunction STATIC = new HammingDistanceFunction();
56 
57   @Override
isMetric()58   public boolean isMetric() {
59     return true;
60   }
61 
62   @Override
distance(FeatureVector<?> o1, FeatureVector<?> o2)63   public double distance(FeatureVector<?> o1, FeatureVector<?> o2) {
64     if(o1 instanceof BitVector && o2 instanceof BitVector) {
65       return ((BitVector) o1).hammingDistance((BitVector) o2);
66     }
67     if(o1 instanceof NumberVector && o2 instanceof NumberVector) {
68       return hammingDistanceNumberVector((NumberVector) o1, (NumberVector) o2);
69     }
70     final int d1 = o1.getDimensionality(), d2 = o2.getDimensionality();
71     int differences = 0;
72     int d = 0;
73     for(; d < d1 && d < d2; d++) {
74       Object v1 = o1.getValue(d), v2 = o2.getValue(d);
75       final boolean n1 = isNull(v1), n2 = isNull(v2);
76       if(n1 && n2) {
77         continue;
78       }
79       if(v1 instanceof Double && Double.isNaN((Double) v1)) {
80         continue;
81       }
82       if(v2 instanceof Double && Double.isNaN((Double) v2)) {
83         continue;
84       }
85       // One must be set.
86       if(n1 || n2 || !v1.equals(v2)) {
87         ++differences;
88       }
89     }
90     for(; d < d1; d++) {
91       Object v1 = o1.getValue(d);
92       if(!isNull(v1)) {
93         if(v1 instanceof Double && Double.isNaN((Double) v1)) {
94           continue;
95         }
96         ++differences;
97       }
98     }
99     for(; d < d2; d++) {
100       Object v2 = o2.getValue(d);
101       if(!isNull(v2)) {
102         if(v2 instanceof Double && Double.isNaN((Double) v2)) {
103           continue;
104         }
105         ++differences;
106       }
107     }
108     return differences;
109   }
110 
111   @Override
distance(NumberVector o1, NumberVector o2)112   public double distance(NumberVector o1, NumberVector o2) {
113     if(o1 instanceof BitVector && o2 instanceof BitVector) {
114       return ((BitVector) o1).hammingDistance((BitVector) o2);
115     }
116     return hammingDistanceNumberVector(o1, o2);
117   }
118 
119   /**
120    * Version for number vectors.
121    *
122    * @param o1 First vector
123    * @param o2 Second vector
124    * @return hamming distance
125    */
hammingDistanceNumberVector(NumberVector o1, NumberVector o2)126   private double hammingDistanceNumberVector(NumberVector o1, NumberVector o2) {
127     final int d1 = o1.getDimensionality(), d2 = o2.getDimensionality();
128     int differences = 0;
129     int d = 0;
130     for(; d < d1 && d < d2; d++) {
131       double v1 = o1.doubleValue(d), v2 = o2.doubleValue(d);
132       if(v1 != v1 || v2 != v2) { /* NaN */
133         continue;
134       }
135       if(v1 != v2) {
136         ++differences;
137       }
138     }
139     for(; d < d1; d++) {
140       double v1 = o1.doubleValue(d);
141       if(v1 != 0. && v1 == v1 /* not NaN */) {
142         ++differences;
143       }
144     }
145     for(; d < d2; d++) {
146       double v2 = o2.doubleValue(d);
147       if(v2 != 0. && v2 == v2 /* not NaN */) {
148         ++differences;
149       }
150     }
151     return differences;
152   }
153 
154   @Override
getInputTypeRestriction()155   public SimpleTypeInformation<? super FeatureVector<?>> getInputTypeRestriction() {
156     return FeatureVector.TYPE;
157   }
158 
159   @Override
equals(Object obj)160   public boolean equals(Object obj) {
161     return obj == this || (obj != null && this.getClass().equals(obj.getClass()));
162   }
163 
164   @Override
hashCode()165   public int hashCode() {
166     return getClass().hashCode();
167   }
168 
169   /**
170    * Parameterization class.
171    *
172    * @author Erich Schubert
173    */
174   public static class Parameterizer extends AbstractParameterizer {
175     @Override
makeInstance()176     protected HammingDistanceFunction makeInstance() {
177       return STATIC;
178     }
179   }
180 }
181