1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
22 
23 import de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm;
24 import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN;
25 import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
26 import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
27 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
28 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
29 import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
30 import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
31 import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
32 import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
33 import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
34 import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
35 import de.lmu.ifi.dbs.elki.database.relation.Relation;
36 import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
37 import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
38 import de.lmu.ifi.dbs.elki.logging.Logging;
39 import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
40 import de.lmu.ifi.dbs.elki.logging.statistics.Duration;
41 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
42 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
43 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
44 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
45 
46 /**
47  * Abstract local model neighborhood predicate.
48  *
49  * @author Erich Schubert
50  * @since 0.7.0
51  *
52  * @has - - - Instance
53  *
54  * @param <O> object type
55  * @param <M> model type
56  * @param <N> neighborhood type
57  */
58 public abstract class AbstractRangeQueryNeighborPredicate<O, M, N> implements NeighborPredicate<N> {
59   /**
60    * Range to query with.
61    */
62   protected double epsilon;
63 
64   /**
65    * Distance function to use.
66    */
67   protected DistanceFunction<? super O> distFunc;
68 
69   /**
70    * Full constructor.
71    *
72    * @param epsilon Epsilon value
73    * @param distFunc Distance function to use
74    */
AbstractRangeQueryNeighborPredicate(double epsilon, DistanceFunction<? super O> distFunc)75   public AbstractRangeQueryNeighborPredicate(double epsilon, DistanceFunction<? super O> distFunc) {
76     super();
77     this.epsilon = epsilon;
78     this.distFunc = distFunc;
79   }
80 
81   @Override
getInputTypeRestriction()82   public TypeInformation getInputTypeRestriction() {
83     return distFunc.getInputTypeRestriction();
84   }
85 
86   /**
87    * Perform the preprocessing step.
88    *
89    * @param modelcls Class of models
90    * @param relation Data relation
91    * @param query Range query
92    * @return Precomputed models
93    */
preprocess(Class<? super M> modelcls, Relation<O> relation, RangeQuery<O> query)94   public DataStore<M> preprocess(Class<? super M> modelcls, Relation<O> relation, RangeQuery<O> query) {
95     WritableDataStore<M> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, modelcls);
96 
97     Duration time = getLogger().newDuration(this.getClass().getName() + ".preprocessing-time").begin();
98     FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), getLogger()) : null;
99     for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
100       DoubleDBIDList neighbors = query.getRangeForDBID(iditer, epsilon);
101       storage.put(iditer, computeLocalModel(iditer, neighbors, relation));
102       getLogger().incrementProcessed(progress);
103     }
104     getLogger().ensureCompleted(progress);
105     getLogger().statistics(time.end());
106     return storage;
107   }
108 
109   /**
110    * Method to compute the actual data model.
111    *
112    * @param id Object ID
113    * @param neighbors Neighbors
114    * @param relation Data relation
115    * @return Model for this object.
116    */
computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<O> relation)117   abstract protected M computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<O> relation);
118 
119   /**
120    * Get the class logger.
121    *
122    * @return Logger
123    */
getLogger()124   abstract Logging getLogger();
125 
126   /**
127    * Instance for a particular data set.
128    *
129    * @author Erich Schubert
130    *
131    * @param <N> Neighborhood type
132    * @param <M> model type
133    */
134   public abstract static class Instance<N, M> implements NeighborPredicate.Instance<N> {
135     /**
136      * DBIDs to process
137      */
138     protected DBIDs ids;
139 
140     /**
141      * Model storage.
142      */
143     protected DataStore<M> storage;
144 
145     /**
146      * Constructor.
147      *
148      * @param ids DBIDs to process
149      * @param storage Model storage
150      */
Instance(DBIDs ids, DataStore<M> storage)151     public Instance(DBIDs ids, DataStore<M> storage) {
152       super();
153       this.ids = ids;
154       this.storage = storage;
155     }
156 
157     @Override
getIDs()158     public DBIDs getIDs() {
159       return ids;
160     }
161   }
162 
163   /**
164    * Parameterization class
165    *
166    * @author Erich Schubert
167    *
168    * @hidden
169    *
170    * @param <O> object type
171    */
172   public abstract static class Parameterizer<O> extends AbstractParameterizer {
173     /**
174      * Range to query with
175      */
176     double epsilon;
177 
178     /**
179      * Distance function to use
180      */
181     DistanceFunction<O> distfun = null;
182 
183     @Override
makeOptions(Parameterization config)184     protected void makeOptions(Parameterization config) {
185       super.makeOptions(config);
186       configDistance(config);
187       configEpsilon(config);
188     }
189 
190     /**
191      * Configure the distance parameter.
192      *
193      * @param config Parameter source
194      */
configDistance(Parameterization config)195     protected void configDistance(Parameterization config) {
196       // Get a distance function.
197       ObjectParameter<DistanceFunction<O>> distanceP = new ObjectParameter<>(DistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
198       if(config.grab(distanceP)) {
199         distfun = distanceP.instantiateClass(config);
200       }
201     }
202 
203     /**
204      * Configure the epsilon parameter.
205      *
206      * @param config Parameter source
207      */
configEpsilon(Parameterization config)208     protected void configEpsilon(Parameterization config) {
209       // Get the epsilon parameter
210       DoubleParameter epsilonP = new DoubleParameter(DBSCAN.Parameterizer.EPSILON_ID);
211       if(config.grab(epsilonP)) {
212         epsilon = epsilonP.getValue();
213       }
214     }
215   }
216 }
217