1 /* 2 * Copyright (C) 2015 3 * Johannes Schneider, ABB Research, Switzerland, 4 * johannes.schneider@alumni.ethz.ch 5 * 6 * This file is part of ELKI: 7 * Environment for Developing KDD-Applications Supported by Index-Structures 8 * 9 * Copyright (C) 2018 10 * ELKI Development Team 11 * 12 * This program is free software: you can redistribute it and/or modify 13 * it under the terms of the GNU Affero General Public License as published by 14 * the Free Software Foundation, either version 3 of the License, or 15 * (at your option) any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU Affero General Public License for more details. 21 * 22 * You should have received a copy of the GNU Affero General Public License 23 * along with this program. If not, see <http://www.gnu.org/licenses/>. 24 */ 25 package de.lmu.ifi.dbs.elki.algorithm.clustering.optics; 26 27 import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; 28 import de.lmu.ifi.dbs.elki.data.NumberVector; 29 import de.lmu.ifi.dbs.elki.data.type.TypeInformation; 30 import de.lmu.ifi.dbs.elki.data.type.TypeUtil; 31 import de.lmu.ifi.dbs.elki.database.Database; 32 import de.lmu.ifi.dbs.elki.database.datastore.DataStore; 33 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; 34 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; 35 import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore; 36 import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; 37 import de.lmu.ifi.dbs.elki.database.ids.DBID; 38 import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; 39 import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; 40 import de.lmu.ifi.dbs.elki.database.ids.DBIDs; 41 import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; 42 import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; 43 import de.lmu.ifi.dbs.elki.database.relation.Relation; 44 import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; 45 import de.lmu.ifi.dbs.elki.index.preprocessed.fastoptics.RandomProjectedNeighborsAndDensities; 46 import de.lmu.ifi.dbs.elki.logging.Logging; 47 import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; 48 import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; 49 import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.UpdatableHeap; 50 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; 51 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; 52 import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; 53 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; 54 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; 55 56 /** 57 * FastOPTICS algorithm (Fast approximation of OPTICS) 58 * <p> 59 * Note that this is <em>not</em> FOPTICS as in "Fuzzy OPTICS"! 60 * <p> 61 * Reference: 62 * <p> 63 * J. Schneider, M. Vlachos<br> 64 * Fast parameterless density-based clustering via random projections<br> 65 * Proc. 22nd ACM Int. Conf. on Information and Knowledge Management (CIKM 2013) 66 * <p> 67 * This is based on the original code provided by Johannes Schneider, with 68 * ELKIfications and optimizations by Erich Schubert. 69 * 70 * @author Johannes Schneider 71 * @author Erich Schubert 72 * @since 0.7.0 73 * 74 * @composed - - - RandomProjectedNeighborsAndDensities 75 */ 76 @Reference(authors = "J. Schneider, M. Vlachos", // 77 title = "Fast parameterless density-based clustering via random projections", // 78 booktitle = "Proc. 22nd ACM Int. Conf. on Information & Knowledge Management (CIKM 2013)", // 79 url = "https://doi.org/10.1145/2505515.2505590", // 80 bibkey = "DBLP:conf/cikm/SchneiderV13") 81 public class FastOPTICS<V extends NumberVector> extends AbstractAlgorithm<ClusterOrder> implements OPTICSTypeAlgorithm { 82 /** 83 * Class logger. 84 */ 85 private static final Logging LOG = Logging.getLogger(FastOPTICS.class); 86 87 /** 88 * undefined value for (reachability/average) distance 89 */ 90 public static final double UNDEFINED_DISTANCE = -0.1f; 91 92 /** 93 * Result: output order of points 94 */ 95 ClusterOrder order; 96 97 /** 98 * Result: reachability distances 99 */ 100 WritableDoubleDataStore reachDist; 101 102 /** 103 * processed points 104 */ 105 ModifiableDBIDs processed; 106 107 /** 108 * neighbors of a point 109 */ 110 DataStore<? extends DBIDs> neighs; 111 112 /** 113 * Inverse Densities correspond to average distances in point set of 114 * projections 115 */ 116 DoubleDataStore inverseDensities; 117 118 /** 119 * MinPts parameter. 120 */ 121 int minPts; 122 123 /** 124 * Index. 125 */ 126 RandomProjectedNeighborsAndDensities<V> index; 127 128 /** 129 * Constructor. 130 * 131 * @param minpts Minimum number of neighbors. 132 * @param index Index 133 */ FastOPTICS(int minpts, RandomProjectedNeighborsAndDensities<V> index)134 public FastOPTICS(int minpts, RandomProjectedNeighborsAndDensities<V> index) { 135 super(); 136 this.minPts = minpts; 137 this.index = index; 138 } 139 140 /** 141 * Run the algorithm. 142 * 143 * @param db Database 144 * @param rel Relation 145 */ run(Database db, Relation<V> rel)146 public ClusterOrder run(Database db, Relation<V> rel) { 147 DBIDs ids = rel.getDBIDs(); 148 DistanceQuery<V> dq = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC); 149 150 // initialize points used and reachability distance 151 reachDist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, UNDEFINED_DISTANCE); 152 153 // compute projections, density estimates and neighborhoods 154 index.computeSetsBounds(rel, minPts, ids); // project points 155 inverseDensities = index.computeAverageDistInSet(); // compute densities 156 neighs = index.getNeighs(); // get neighbors of points 157 158 // compute ordering as for OPTICS 159 FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("FastOPTICS clustering", ids.size(), LOG) : null; 160 processed = DBIDUtil.newHashSet(ids.size()); 161 order = new ClusterOrder(ids, "FastOPTICS Cluster Order", "fast-optics"); 162 for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { 163 if(!processed.contains(it)) { 164 expandClusterOrder(DBIDUtil.deref(it), order, dq, prog); 165 } 166 } 167 index.logStatistics(); 168 LOG.ensureCompleted(prog); 169 return order; 170 } 171 172 /** 173 * OPTICS algorithm for processing a point, but with different density 174 * estimates 175 * 176 * @param ipt Point 177 * @param order Cluster order (output) 178 * @param dq Distance query 179 * @param prog Progress for logging. 180 */ expandClusterOrder(DBID ipt, ClusterOrder order, DistanceQuery<V> dq, FiniteProgress prog)181 protected void expandClusterOrder(DBID ipt, ClusterOrder order, DistanceQuery<V> dq, FiniteProgress prog) { 182 UpdatableHeap<OPTICSHeapEntry> heap = new UpdatableHeap<>(); 183 heap.add(new OPTICSHeapEntry(ipt, null, Double.POSITIVE_INFINITY)); 184 while(!heap.isEmpty()) { 185 final OPTICSHeapEntry current = heap.poll(); 186 DBID currPt = current.objectID; 187 order.add(currPt, current.reachability, current.predecessorID); 188 processed.add(currPt); 189 double coredist = inverseDensities.doubleValue(currPt); 190 for(DBIDIter it = neighs.get(currPt).iter(); it.valid(); it.advance()) { 191 if(processed.contains(it)) { 192 continue; 193 } 194 double nrdist = dq.distance(currPt, it); 195 if(coredist > nrdist) { 196 nrdist = coredist; 197 } 198 if(reachDist.doubleValue(it) == UNDEFINED_DISTANCE) { 199 reachDist.put(it, nrdist); 200 } 201 else if(nrdist < reachDist.doubleValue(it)) { 202 reachDist.put(it, nrdist); 203 } 204 heap.add(new OPTICSHeapEntry(DBIDUtil.deref(it), currPt, nrdist)); 205 } 206 LOG.incrementProcessed(prog); 207 } 208 } 209 210 @Override getMinPts()211 public int getMinPts() { 212 return minPts; 213 } 214 215 @Override getInputTypeRestriction()216 public TypeInformation[] getInputTypeRestriction() { 217 return TypeUtil.array(EuclideanDistanceFunction.STATIC.getInputTypeRestriction()); 218 } 219 220 @Override getLogger()221 protected Logging getLogger() { 222 return LOG; 223 } 224 225 /** 226 * Parameterization class. 227 * 228 * @author Erich Schubert 229 * 230 * @hidden 231 * 232 * @param <V> Vector type 233 */ 234 public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer { 235 /** 236 * Minimum number of neighbors for density estimation. 237 */ 238 int minpts; 239 240 /** 241 * Random projection index. 242 */ 243 RandomProjectedNeighborsAndDensities<V> index; 244 245 @Override makeOptions(Parameterization config)246 protected void makeOptions(Parameterization config) { 247 super.makeOptions(config); 248 IntParameter minptsP = new IntParameter(AbstractOPTICS.Parameterizer.MINPTS_ID) // 249 .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); 250 if(config.grab(minptsP)) { 251 minpts = minptsP.intValue(); 252 } 253 Class<RandomProjectedNeighborsAndDensities<V>> clz = ClassGenericsUtil.uglyCastIntoSubclass(RandomProjectedNeighborsAndDensities.class); 254 index = config.tryInstantiate(clz); 255 } 256 257 @Override makeInstance()258 protected FastOPTICS<V> makeInstance() { 259 return new FastOPTICS<V>(minpts, index); 260 } 261 } 262 } 263