1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.distance.distancefunction.set; 22 23 import de.lmu.ifi.dbs.elki.data.BitVector; 24 import de.lmu.ifi.dbs.elki.data.FeatureVector; 25 import de.lmu.ifi.dbs.elki.data.NumberVector; 26 import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; 27 import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; 28 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; 29 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; 30 31 /** 32 * Computes the Hamming distance of arbitrary vectors - i.e. counting, on how 33 * many places they differ. 34 * <p> 35 * Reference: 36 * <p> 37 * R. W. Hamming<br> 38 * Error detecting and error correcting codes<br> 39 * Bell System technical journal, 29(2) 40 * <p> 41 * TODO: add a sparse (but not binary) optimized version? 42 * 43 * @author Erich Schubert 44 * @since 0.7.0 45 */ 46 @Reference(authors = "R. W. Hamming", // 47 title = "Error detecting and error correcting codes", // 48 booktitle = "Bell System technical journal, 29(2)", // 49 url = "https://doi.org/10.1002/j.1538-7305.1950.tb00463.x", // 50 bibkey = "doi:10.1002/j.1538-7305.1950.tb00463.x") 51 public class HammingDistanceFunction extends AbstractSetDistanceFunction<FeatureVector<?>> implements NumberVectorDistanceFunction<FeatureVector<?>> { 52 /** 53 * Static instance. 54 */ 55 public static final HammingDistanceFunction STATIC = new HammingDistanceFunction(); 56 57 @Override isMetric()58 public boolean isMetric() { 59 return true; 60 } 61 62 @Override distance(FeatureVector<?> o1, FeatureVector<?> o2)63 public double distance(FeatureVector<?> o1, FeatureVector<?> o2) { 64 if(o1 instanceof BitVector && o2 instanceof BitVector) { 65 return ((BitVector) o1).hammingDistance((BitVector) o2); 66 } 67 if(o1 instanceof NumberVector && o2 instanceof NumberVector) { 68 return hammingDistanceNumberVector((NumberVector) o1, (NumberVector) o2); 69 } 70 final int d1 = o1.getDimensionality(), d2 = o2.getDimensionality(); 71 int differences = 0; 72 int d = 0; 73 for(; d < d1 && d < d2; d++) { 74 Object v1 = o1.getValue(d), v2 = o2.getValue(d); 75 final boolean n1 = isNull(v1), n2 = isNull(v2); 76 if(n1 && n2) { 77 continue; 78 } 79 if(v1 instanceof Double && Double.isNaN((Double) v1)) { 80 continue; 81 } 82 if(v2 instanceof Double && Double.isNaN((Double) v2)) { 83 continue; 84 } 85 // One must be set. 86 if(n1 || n2 || !v1.equals(v2)) { 87 ++differences; 88 } 89 } 90 for(; d < d1; d++) { 91 Object v1 = o1.getValue(d); 92 if(!isNull(v1)) { 93 if(v1 instanceof Double && Double.isNaN((Double) v1)) { 94 continue; 95 } 96 ++differences; 97 } 98 } 99 for(; d < d2; d++) { 100 Object v2 = o2.getValue(d); 101 if(!isNull(v2)) { 102 if(v2 instanceof Double && Double.isNaN((Double) v2)) { 103 continue; 104 } 105 ++differences; 106 } 107 } 108 return differences; 109 } 110 111 @Override distance(NumberVector o1, NumberVector o2)112 public double distance(NumberVector o1, NumberVector o2) { 113 if(o1 instanceof BitVector && o2 instanceof BitVector) { 114 return ((BitVector) o1).hammingDistance((BitVector) o2); 115 } 116 return hammingDistanceNumberVector(o1, o2); 117 } 118 119 /** 120 * Version for number vectors. 121 * 122 * @param o1 First vector 123 * @param o2 Second vector 124 * @return hamming distance 125 */ hammingDistanceNumberVector(NumberVector o1, NumberVector o2)126 private double hammingDistanceNumberVector(NumberVector o1, NumberVector o2) { 127 final int d1 = o1.getDimensionality(), d2 = o2.getDimensionality(); 128 int differences = 0; 129 int d = 0; 130 for(; d < d1 && d < d2; d++) { 131 double v1 = o1.doubleValue(d), v2 = o2.doubleValue(d); 132 if(v1 != v1 || v2 != v2) { /* NaN */ 133 continue; 134 } 135 if(v1 != v2) { 136 ++differences; 137 } 138 } 139 for(; d < d1; d++) { 140 double v1 = o1.doubleValue(d); 141 if(v1 != 0. && v1 == v1 /* not NaN */) { 142 ++differences; 143 } 144 } 145 for(; d < d2; d++) { 146 double v2 = o2.doubleValue(d); 147 if(v2 != 0. && v2 == v2 /* not NaN */) { 148 ++differences; 149 } 150 } 151 return differences; 152 } 153 154 @Override getInputTypeRestriction()155 public SimpleTypeInformation<? super FeatureVector<?>> getInputTypeRestriction() { 156 return FeatureVector.TYPE; 157 } 158 159 @Override equals(Object obj)160 public boolean equals(Object obj) { 161 return obj == this || (obj != null && this.getClass().equals(obj.getClass())); 162 } 163 164 @Override hashCode()165 public int hashCode() { 166 return getClass().hashCode(); 167 } 168 169 /** 170 * Parameterization class. 171 * 172 * @author Erich Schubert 173 */ 174 public static class Parameterizer extends AbstractParameterizer { 175 @Override makeInstance()176 protected HammingDistanceFunction makeInstance() { 177 return STATIC; 178 } 179 } 180 } 181