1 /*
2  * Copyright (C) 2015
3  * Johannes Schneider, ABB Research, Switzerland,
4  * johannes.schneider@alumni.ethz.ch
5  *
6  * This file is part of ELKI:
7  * Environment for Developing KDD-Applications Supported by Index-Structures
8  *
9  * Copyright (C) 2018
10  * ELKI Development Team
11  *
12  * This program is free software: you can redistribute it and/or modify
13  * it under the terms of the GNU Affero General Public License as published by
14  * the Free Software Foundation, either version 3 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Affero General Public License for more details.
21  *
22  * You should have received a copy of the GNU Affero General Public License
23  * along with this program. If not, see <http://www.gnu.org/licenses/>.
24  */
25 package de.lmu.ifi.dbs.elki.algorithm.clustering.optics;
26 
27 import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
28 import de.lmu.ifi.dbs.elki.data.NumberVector;
29 import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
30 import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
31 import de.lmu.ifi.dbs.elki.database.Database;
32 import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
33 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
34 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
35 import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
36 import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
37 import de.lmu.ifi.dbs.elki.database.ids.DBID;
38 import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
39 import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
40 import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
41 import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
42 import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
43 import de.lmu.ifi.dbs.elki.database.relation.Relation;
44 import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
45 import de.lmu.ifi.dbs.elki.index.preprocessed.fastoptics.RandomProjectedNeighborsAndDensities;
46 import de.lmu.ifi.dbs.elki.logging.Logging;
47 import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
48 import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
49 import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.UpdatableHeap;
50 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
51 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
52 import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
53 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
54 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
55 
56 /**
57  * FastOPTICS algorithm (Fast approximation of OPTICS)
58  * <p>
59  * Note that this is <em>not</em> FOPTICS as in "Fuzzy OPTICS"!
60  * <p>
61  * Reference:
62  * <p>
63  * J. Schneider, M. Vlachos<br>
64  * Fast parameterless density-based clustering via random projections<br>
65  * Proc. 22nd ACM Int. Conf. on Information and Knowledge Management (CIKM 2013)
66  * <p>
67  * This is based on the original code provided by Johannes Schneider, with
68  * ELKIfications and optimizations by Erich Schubert.
69  *
70  * @author Johannes Schneider
71  * @author Erich Schubert
72  * @since 0.7.0
73  *
74  * @composed - - - RandomProjectedNeighborsAndDensities
75  */
76 @Reference(authors = "J. Schneider, M. Vlachos", //
77     title = "Fast parameterless density-based clustering via random projections", //
78     booktitle = "Proc. 22nd ACM Int. Conf. on Information & Knowledge Management (CIKM 2013)", //
79     url = "https://doi.org/10.1145/2505515.2505590", //
80     bibkey = "DBLP:conf/cikm/SchneiderV13")
81 public class FastOPTICS<V extends NumberVector> extends AbstractAlgorithm<ClusterOrder> implements OPTICSTypeAlgorithm {
82   /**
83    * Class logger.
84    */
85   private static final Logging LOG = Logging.getLogger(FastOPTICS.class);
86 
87   /**
88    * undefined value for (reachability/average) distance
89    */
90   public static final double UNDEFINED_DISTANCE = -0.1f;
91 
92   /**
93    * Result: output order of points
94    */
95   ClusterOrder order;
96 
97   /**
98    * Result: reachability distances
99    */
100   WritableDoubleDataStore reachDist;
101 
102   /**
103    * processed points
104    */
105   ModifiableDBIDs processed;
106 
107   /**
108    * neighbors of a point
109    */
110   DataStore<? extends DBIDs> neighs;
111 
112   /**
113    * Inverse Densities correspond to average distances in point set of
114    * projections
115    */
116   DoubleDataStore inverseDensities;
117 
118   /**
119    * MinPts parameter.
120    */
121   int minPts;
122 
123   /**
124    * Index.
125    */
126   RandomProjectedNeighborsAndDensities<V> index;
127 
128   /**
129    * Constructor.
130    *
131    * @param minpts Minimum number of neighbors.
132    * @param index Index
133    */
FastOPTICS(int minpts, RandomProjectedNeighborsAndDensities<V> index)134   public FastOPTICS(int minpts, RandomProjectedNeighborsAndDensities<V> index) {
135     super();
136     this.minPts = minpts;
137     this.index = index;
138   }
139 
140   /**
141    * Run the algorithm.
142    *
143    * @param db Database
144    * @param rel Relation
145    */
run(Database db, Relation<V> rel)146   public ClusterOrder run(Database db, Relation<V> rel) {
147     DBIDs ids = rel.getDBIDs();
148     DistanceQuery<V> dq = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
149 
150     // initialize points used and reachability distance
151     reachDist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, UNDEFINED_DISTANCE);
152 
153     // compute projections, density estimates and neighborhoods
154     index.computeSetsBounds(rel, minPts, ids); // project points
155     inverseDensities = index.computeAverageDistInSet(); // compute densities
156     neighs = index.getNeighs(); // get neighbors of points
157 
158     // compute ordering as for OPTICS
159     FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("FastOPTICS clustering", ids.size(), LOG) : null;
160     processed = DBIDUtil.newHashSet(ids.size());
161     order = new ClusterOrder(ids, "FastOPTICS Cluster Order", "fast-optics");
162     for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
163       if(!processed.contains(it)) {
164         expandClusterOrder(DBIDUtil.deref(it), order, dq, prog);
165       }
166     }
167     index.logStatistics();
168     LOG.ensureCompleted(prog);
169     return order;
170   }
171 
172   /**
173    * OPTICS algorithm for processing a point, but with different density
174    * estimates
175    *
176    * @param ipt Point
177    * @param order Cluster order (output)
178    * @param dq Distance query
179    * @param prog Progress for logging.
180    */
expandClusterOrder(DBID ipt, ClusterOrder order, DistanceQuery<V> dq, FiniteProgress prog)181   protected void expandClusterOrder(DBID ipt, ClusterOrder order, DistanceQuery<V> dq, FiniteProgress prog) {
182     UpdatableHeap<OPTICSHeapEntry> heap = new UpdatableHeap<>();
183     heap.add(new OPTICSHeapEntry(ipt, null, Double.POSITIVE_INFINITY));
184     while(!heap.isEmpty()) {
185       final OPTICSHeapEntry current = heap.poll();
186       DBID currPt = current.objectID;
187       order.add(currPt, current.reachability, current.predecessorID);
188       processed.add(currPt);
189       double coredist = inverseDensities.doubleValue(currPt);
190       for(DBIDIter it = neighs.get(currPt).iter(); it.valid(); it.advance()) {
191         if(processed.contains(it)) {
192           continue;
193         }
194         double nrdist = dq.distance(currPt, it);
195         if(coredist > nrdist) {
196           nrdist = coredist;
197         }
198         if(reachDist.doubleValue(it) == UNDEFINED_DISTANCE) {
199           reachDist.put(it, nrdist);
200         }
201         else if(nrdist < reachDist.doubleValue(it)) {
202           reachDist.put(it, nrdist);
203         }
204         heap.add(new OPTICSHeapEntry(DBIDUtil.deref(it), currPt, nrdist));
205       }
206       LOG.incrementProcessed(prog);
207     }
208   }
209 
210   @Override
getMinPts()211   public int getMinPts() {
212     return minPts;
213   }
214 
215   @Override
getInputTypeRestriction()216   public TypeInformation[] getInputTypeRestriction() {
217     return TypeUtil.array(EuclideanDistanceFunction.STATIC.getInputTypeRestriction());
218   }
219 
220   @Override
getLogger()221   protected Logging getLogger() {
222     return LOG;
223   }
224 
225   /**
226    * Parameterization class.
227    *
228    * @author Erich Schubert
229    *
230    * @hidden
231    *
232    * @param <V> Vector type
233    */
234   public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
235     /**
236      * Minimum number of neighbors for density estimation.
237      */
238     int minpts;
239 
240     /**
241      * Random projection index.
242      */
243     RandomProjectedNeighborsAndDensities<V> index;
244 
245     @Override
makeOptions(Parameterization config)246     protected void makeOptions(Parameterization config) {
247       super.makeOptions(config);
248       IntParameter minptsP = new IntParameter(AbstractOPTICS.Parameterizer.MINPTS_ID) //
249           .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
250       if(config.grab(minptsP)) {
251         minpts = minptsP.intValue();
252       }
253       Class<RandomProjectedNeighborsAndDensities<V>> clz = ClassGenericsUtil.uglyCastIntoSubclass(RandomProjectedNeighborsAndDensities.class);
254       index = config.tryInstantiate(clz);
255     }
256 
257     @Override
makeInstance()258     protected FastOPTICS<V> makeInstance() {
259       return new FastOPTICS<V>(minpts, index);
260     }
261   }
262 }
263