1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan; 22 23 import de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm; 24 import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN; 25 import de.lmu.ifi.dbs.elki.data.type.TypeInformation; 26 import de.lmu.ifi.dbs.elki.database.datastore.DataStore; 27 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; 28 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; 29 import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; 30 import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; 31 import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; 32 import de.lmu.ifi.dbs.elki.database.ids.DBIDs; 33 import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList; 34 import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; 35 import de.lmu.ifi.dbs.elki.database.relation.Relation; 36 import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; 37 import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; 38 import de.lmu.ifi.dbs.elki.logging.Logging; 39 import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; 40 import de.lmu.ifi.dbs.elki.logging.statistics.Duration; 41 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; 42 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; 43 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; 44 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; 45 46 /** 47 * Abstract local model neighborhood predicate. 48 * 49 * @author Erich Schubert 50 * @since 0.7.0 51 * 52 * @has - - - Instance 53 * 54 * @param <O> object type 55 * @param <M> model type 56 * @param <N> neighborhood type 57 */ 58 public abstract class AbstractRangeQueryNeighborPredicate<O, M, N> implements NeighborPredicate<N> { 59 /** 60 * Range to query with. 61 */ 62 protected double epsilon; 63 64 /** 65 * Distance function to use. 66 */ 67 protected DistanceFunction<? super O> distFunc; 68 69 /** 70 * Full constructor. 71 * 72 * @param epsilon Epsilon value 73 * @param distFunc Distance function to use 74 */ AbstractRangeQueryNeighborPredicate(double epsilon, DistanceFunction<? super O> distFunc)75 public AbstractRangeQueryNeighborPredicate(double epsilon, DistanceFunction<? super O> distFunc) { 76 super(); 77 this.epsilon = epsilon; 78 this.distFunc = distFunc; 79 } 80 81 @Override getInputTypeRestriction()82 public TypeInformation getInputTypeRestriction() { 83 return distFunc.getInputTypeRestriction(); 84 } 85 86 /** 87 * Perform the preprocessing step. 88 * 89 * @param modelcls Class of models 90 * @param relation Data relation 91 * @param query Range query 92 * @return Precomputed models 93 */ preprocess(Class<? super M> modelcls, Relation<O> relation, RangeQuery<O> query)94 public DataStore<M> preprocess(Class<? super M> modelcls, Relation<O> relation, RangeQuery<O> query) { 95 WritableDataStore<M> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, modelcls); 96 97 Duration time = getLogger().newDuration(this.getClass().getName() + ".preprocessing-time").begin(); 98 FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), getLogger()) : null; 99 for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { 100 DoubleDBIDList neighbors = query.getRangeForDBID(iditer, epsilon); 101 storage.put(iditer, computeLocalModel(iditer, neighbors, relation)); 102 getLogger().incrementProcessed(progress); 103 } 104 getLogger().ensureCompleted(progress); 105 getLogger().statistics(time.end()); 106 return storage; 107 } 108 109 /** 110 * Method to compute the actual data model. 111 * 112 * @param id Object ID 113 * @param neighbors Neighbors 114 * @param relation Data relation 115 * @return Model for this object. 116 */ computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<O> relation)117 abstract protected M computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<O> relation); 118 119 /** 120 * Get the class logger. 121 * 122 * @return Logger 123 */ getLogger()124 abstract Logging getLogger(); 125 126 /** 127 * Instance for a particular data set. 128 * 129 * @author Erich Schubert 130 * 131 * @param <N> Neighborhood type 132 * @param <M> model type 133 */ 134 public abstract static class Instance<N, M> implements NeighborPredicate.Instance<N> { 135 /** 136 * DBIDs to process 137 */ 138 protected DBIDs ids; 139 140 /** 141 * Model storage. 142 */ 143 protected DataStore<M> storage; 144 145 /** 146 * Constructor. 147 * 148 * @param ids DBIDs to process 149 * @param storage Model storage 150 */ Instance(DBIDs ids, DataStore<M> storage)151 public Instance(DBIDs ids, DataStore<M> storage) { 152 super(); 153 this.ids = ids; 154 this.storage = storage; 155 } 156 157 @Override getIDs()158 public DBIDs getIDs() { 159 return ids; 160 } 161 } 162 163 /** 164 * Parameterization class 165 * 166 * @author Erich Schubert 167 * 168 * @hidden 169 * 170 * @param <O> object type 171 */ 172 public abstract static class Parameterizer<O> extends AbstractParameterizer { 173 /** 174 * Range to query with 175 */ 176 double epsilon; 177 178 /** 179 * Distance function to use 180 */ 181 DistanceFunction<O> distfun = null; 182 183 @Override makeOptions(Parameterization config)184 protected void makeOptions(Parameterization config) { 185 super.makeOptions(config); 186 configDistance(config); 187 configEpsilon(config); 188 } 189 190 /** 191 * Configure the distance parameter. 192 * 193 * @param config Parameter source 194 */ configDistance(Parameterization config)195 protected void configDistance(Parameterization config) { 196 // Get a distance function. 197 ObjectParameter<DistanceFunction<O>> distanceP = new ObjectParameter<>(DistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class); 198 if(config.grab(distanceP)) { 199 distfun = distanceP.instantiateClass(config); 200 } 201 } 202 203 /** 204 * Configure the epsilon parameter. 205 * 206 * @param config Parameter source 207 */ configEpsilon(Parameterization config)208 protected void configEpsilon(Parameterization config) { 209 // Get the epsilon parameter 210 DoubleParameter epsilonP = new DoubleParameter(DBSCAN.Parameterizer.EPSILON_ID); 211 if(config.grab(epsilonP)) { 212 epsilon = epsilonP.getValue(); 213 } 214 } 215 } 216 } 217