1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.distance.distancefunction.external; 22 23 import java.io.*; 24 25 import de.lmu.ifi.dbs.elki.database.ids.DBID; 26 import de.lmu.ifi.dbs.elki.database.ids.DBIDRange; 27 import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; 28 import de.lmu.ifi.dbs.elki.database.relation.Relation; 29 import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractDBIDRangeDistanceFunction; 30 import de.lmu.ifi.dbs.elki.logging.Logging; 31 import de.lmu.ifi.dbs.elki.utilities.Alias; 32 import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; 33 import de.lmu.ifi.dbs.elki.utilities.io.FileUtil; 34 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; 35 import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; 36 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; 37 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; 38 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; 39 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; 40 41 import it.unimi.dsi.fastutil.longs.Long2FloatOpenHashMap; 42 43 /** 44 * Distance function that is based on float distances given by a distance matrix 45 * of an external ASCII file. 46 * <p> 47 * Note: parsing an ASCII file is rather expensive. 48 * <p> 49 * See {@link AsciiDistanceParser} for the default input format. 50 * <p> 51 * TODO: use a {@code float[]} instead of the hash map? 52 * 53 * @author Elke Achtert 54 * @author Erich Schubert 55 * @since 0.1 56 * 57 * @composed - - - DistanceCacheWriter 58 */ 59 @Alias("de.lmu.ifi.dbs.elki.distance.distancefunction.external.FileBasedFloatDistanceFunction") 60 public class FileBasedSparseFloatDistanceFunction extends AbstractDBIDRangeDistanceFunction { 61 /** 62 * Class logger. 63 */ 64 private static final Logging LOG = Logging.getLogger(FileBasedSparseFloatDistanceFunction.class); 65 66 /** 67 * The distance cache 68 */ 69 private Long2FloatOpenHashMap cache; 70 71 /** 72 * Distance parser 73 */ 74 private DistanceParser parser; 75 76 /** 77 * Input file of distance matrix 78 */ 79 private File matrixfile; 80 81 /** 82 * Minimum and maximum IDs seen. 83 */ 84 private int min, max; 85 86 /** 87 * Distance to return when not defined otherwise. 88 */ 89 protected float defaultDistance = Float.POSITIVE_INFINITY; 90 91 /** 92 * Constructor. 93 * 94 * @param parser Parser 95 * @param matrixfile input file 96 * @param defaultDistance Default distance (when undefined) 97 */ FileBasedSparseFloatDistanceFunction(DistanceParser parser, File matrixfile, float defaultDistance)98 public FileBasedSparseFloatDistanceFunction(DistanceParser parser, File matrixfile, float defaultDistance) { 99 super(); 100 this.parser = parser; 101 this.matrixfile = matrixfile; 102 this.defaultDistance = defaultDistance; 103 } 104 105 @Override instantiate(Relation<O> relation)106 public <O extends DBID> DistanceQuery<O> instantiate(Relation<O> relation) { 107 if(cache == null) { 108 try { 109 loadCache(relation.size(), new BufferedInputStream(FileUtil.tryGzipInput(new FileInputStream(matrixfile)))); 110 } 111 catch(IOException e) { 112 throw new AbortException("Could not load external distance file: " + matrixfile.toString(), e); 113 } 114 } 115 return super.instantiate(relation); 116 } 117 118 @Override distance(int i1, int i2)119 public double distance(int i1, int i2) { 120 return (i1 == i2) ? 0. : cache.get(makeKey(i1 + min, i2 + min)); 121 } 122 123 /** 124 * Fill cache from an input stream. 125 * 126 * @param size Expected size 127 * @param in Input stream 128 * @throws IOException 129 */ loadCache(int size, InputStream in)130 protected void loadCache(int size, InputStream in) throws IOException { 131 // Expect a sparse matrix here 132 cache = new Long2FloatOpenHashMap(size * 20); 133 cache.defaultReturnValue(Float.POSITIVE_INFINITY); 134 min = Integer.MAX_VALUE; 135 max = Integer.MIN_VALUE; 136 parser.parse(in, new DistanceCacheWriter() { 137 @Override 138 public void put(int id1, int id2, double distance) { 139 if(id1 < id2) { 140 min = id1 < min ? id1 : min; 141 max = id2 > max ? id2 : max; 142 } 143 else { 144 min = id2 < min ? id2 : min; 145 max = id1 > max ? id1 : max; 146 } 147 cache.put(makeKey(id1, id2), (float) distance); 148 } 149 }); 150 if(min != 0) { 151 LOG.verbose("Distance matrix is supposed to be 0-indexed. Choosing offset " + min + " to compensate."); 152 } 153 if(max + 1 - min != size) { 154 LOG.warning("ID range is not consistent with relation size."); 155 } 156 } 157 158 /** 159 * Combine two integer ids into a long value. 160 * 161 * @param i1 First id 162 * @param i2 Second id 163 * @return Combined value 164 */ makeKey(int i1, int i2)165 protected static final long makeKey(int i1, int i2) { 166 return (i1 < i2) // 167 ? ((((long) i1) << 32) | i2)// 168 : ((((long) i2) << 32) | i1); 169 } 170 171 @Override checkRange(DBIDRange range)172 public void checkRange(DBIDRange range) { 173 final int size = max + 1 - min; 174 if(size < range.size()) { 175 LOG.warning("Distance matrix has size " + size + " but range has size: " + range.size()); 176 } 177 } 178 179 @Override equals(Object obj)180 public boolean equals(Object obj) { 181 if(obj == null) { 182 return false; 183 } 184 if(getClass() != obj.getClass()) { 185 return false; 186 } 187 FileBasedSparseFloatDistanceFunction other = (FileBasedSparseFloatDistanceFunction) obj; 188 return this.cache.equals(other.cache); 189 } 190 191 /** 192 * Parameterization class. 193 * 194 * @author Erich Schubert 195 */ 196 public static class Parameterizer extends AbstractParameterizer { 197 /** 198 * Parameter that specifies the name of the distance matrix file. 199 */ 200 public static final OptionID MATRIX_ID = FileBasedSparseDoubleDistanceFunction.Parameterizer.MATRIX_ID; 201 202 /** 203 * Optional parameter to specify the parsers to provide a database, must 204 * extend {@link DistanceParser}. If this parameter is not set, 205 * {@link AsciiDistanceParser} is used as parser for all input files. 206 */ 207 public static final OptionID PARSER_ID = FileBasedSparseDoubleDistanceFunction.Parameterizer.PARSER_ID; 208 209 /** 210 * Optional parameter to specify the distance to return when no distance was 211 * given in the file. Defaults to infinity. 212 */ 213 public static final OptionID DEFAULTDIST_ID = FileBasedSparseDoubleDistanceFunction.Parameterizer.DEFAULTDIST_ID; 214 215 /** 216 * Input file. 217 */ 218 protected File matrixfile = null; 219 220 /** 221 * Parser for input file. 222 */ 223 protected DistanceParser parser = null; 224 225 /** 226 * Distance to return when not defined otherwise. 227 */ 228 protected float defaultDistance = Float.POSITIVE_INFINITY; 229 230 @Override makeOptions(Parameterization config)231 protected void makeOptions(Parameterization config) { 232 super.makeOptions(config); 233 final FileParameter MATRIX_PARAM = new FileParameter(MATRIX_ID, FileParameter.FileType.INPUT_FILE); 234 if(config.grab(MATRIX_PARAM)) { 235 matrixfile = MATRIX_PARAM.getValue(); 236 } 237 238 final ObjectParameter<DistanceParser> PARSER_PARAM = new ObjectParameter<>(PARSER_ID, DistanceParser.class, AsciiDistanceParser.class); 239 if(config.grab(PARSER_PARAM)) { 240 parser = PARSER_PARAM.instantiateClass(config); 241 } 242 243 DoubleParameter distanceP = new DoubleParameter(DEFAULTDIST_ID, Double.POSITIVE_INFINITY); 244 if(config.grab(distanceP)) { 245 defaultDistance = (float) distanceP.doubleValue(); 246 } 247 } 248 249 @Override makeInstance()250 protected FileBasedSparseFloatDistanceFunction makeInstance() { 251 return new FileBasedSparseFloatDistanceFunction(parser, matrixfile, defaultDistance); 252 } 253 } 254 } 255