1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
22 
23 import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN;
24 import de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.GeneralizedDBSCAN;
25 import de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.PreDeConCorePredicate;
26 import de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.PreDeConNeighborPredicate;
27 import de.lmu.ifi.dbs.elki.data.NumberVector;
28 import de.lmu.ifi.dbs.elki.logging.Logging;
29 import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
30 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
31 import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
32 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
33 import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
34 import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
35 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
36 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
37 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
38 
39 /**
40  * PreDeCon computes clusters of subspace preference weighted connected points.
41  * The algorithm searches for local subgroups of a set of feature vectors having
42  * a low variance along one or more (but not all) attributes.
43  * <p>
44  * Reference:
45  * <p>
46  * Christian Böhm, Karin Kailing, Hans-Peter Kriegel, Peer Kröger<br>
47  * Density Connected Clustering with Local Subspace Preferences.<br>
48  * Proc. 4th IEEE Int. Conf. on Data Mining (ICDM'04)
49  *
50  * @author Peer Kröger
51  * @since 0.1
52  *
53  * @has - - - PreDeCon.Settings
54  * @composed - - - PreDeConNeighborPredicate
55  * @composed - - - PreDeConCorePredicate
56  *
57  * @param <V> the type of NumberVector handled by this Algorithm
58  */
59 @Title("PreDeCon: Subspace Preference weighted Density Connected Clustering")
60 @Description("PreDeCon computes clusters of subspace preference weighted connected points. "//
61     + "The algorithm searches for local subgroups of a set of feature vectors having " + "a low variance along one or more (but not all) attributes.")
62 @Reference(authors = "Christian Böhm, Karin Kailing, Hans-Peter Kriegel, Peer Kröger", //
63     title = "Density Connected Clustering with Local Subspace Preferences", //
64     booktitle = "Proc. 4th IEEE Int. Conf. on Data Mining (ICDM'04)", //
65     url = "https://doi.org/10.1109/ICDM.2004.10087", //
66     bibkey = "DBLP:conf/icdm/BohmKKK04")
67 public class PreDeCon<V extends NumberVector> extends GeneralizedDBSCAN {
68   /**
69    * The logger for this class.
70    */
71   private static final Logging LOG = Logging.getLogger(PreDeCon.class);
72 
73   /**
74    * Constructor.
75    *
76    * @param settings PreDeCon settings.
77    */
PreDeCon(PreDeCon.Settings settings)78   public PreDeCon(PreDeCon.Settings settings) {
79     super(new PreDeConNeighborPredicate<>(settings), new PreDeConCorePredicate(settings), false);
80   }
81 
82   @Override
getLogger()83   protected Logging getLogger() {
84     return LOG;
85   }
86 
87   /**
88    * Class containing all the PreDeCon settings.
89    *
90    * @author Erich Schubert
91    */
92   public static class Settings {
93     /**
94      * Query radius parameter epsilon.
95      */
96     public double epsilon;
97 
98     /**
99      * The threshold for small eigenvalues.
100      */
101     public double delta;
102 
103     /**
104      * The kappa penality factor for deviations in preferred dimensions.
105      */
106     public double kappa = Parameterizer.KAPPA_DEFAULT;
107 
108     /**
109      * DBSCAN Minpts parameter, aka "mu".
110      */
111     public int minpts;
112 
113     /**
114      * Lambda: Maximum subspace dimensionality.
115      */
116     public int lambda = Integer.MAX_VALUE;
117 
118     /**
119      * Parameterization class.
120      *
121      * @author Erich Schubert
122      */
123     public static class Parameterizer extends AbstractParameterizer {
124       /**
125        * Parameter Delta: maximum variance allowed
126        */
127       public static final OptionID DELTA_ID = new OptionID("predecon.delta", "A double specifying the variance threshold for small Eigenvalues.");
128 
129       /**
130        * Parameter Kappa: penalty for deviations in preferred dimensions.
131        */
132       public static final OptionID KAPPA_ID = new OptionID("predecon.kappa", "Penalty factor for deviations in preferred (low-variance) dimensions.");
133 
134       /**
135        * Default for kappa parameter.
136        */
137       public static final double KAPPA_DEFAULT = 20.;
138 
139       /**
140        * Parameter Lambda: maximum dimensionality allowed.
141        */
142       public static final OptionID LAMBDA_ID = new OptionID("predecon.lambda", "Maximum dimensionality to consider for core points.");
143 
144       /**
145        * Settings to build.
146        */
147       Settings settings;
148 
149       @Override
makeOptions(Parameterization config)150       public void makeOptions(Parameterization config) {
151         settings = new Settings();
152         configEpsilon(config);
153         configMinPts(config);
154         configDelta(config);
155         configKappa(config);
156         configLambda(config);
157       }
158 
159       /**
160        * Configure the epsilon radius parameter.
161        *
162        * @param config Parameter source
163        */
configEpsilon(Parameterization config)164       protected void configEpsilon(Parameterization config) {
165         DoubleParameter epsilonP = new DoubleParameter(DBSCAN.Parameterizer.EPSILON_ID) //
166             .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
167         if(config.grab(epsilonP)) {
168           settings.epsilon = epsilonP.doubleValue();
169         }
170       }
171 
172       /**
173        * Configure the minPts aka "mu" parameter.
174        *
175        * @param config Parameter source
176        */
configMinPts(Parameterization config)177       protected void configMinPts(Parameterization config) {
178         IntParameter minptsP = new IntParameter(DBSCAN.Parameterizer.MINPTS_ID) //
179             .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
180         if(config.grab(minptsP)) {
181           settings.minpts = minptsP.intValue();
182         }
183       }
184 
185       /**
186        * Configure the delta parameter.
187        *
188        * @param config Parameter source
189        */
configDelta(Parameterization config)190       protected void configDelta(Parameterization config) {
191         DoubleParameter deltaP = new DoubleParameter(DELTA_ID) //
192             .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
193         if(config.grab(deltaP)) {
194           settings.delta = deltaP.doubleValue();
195         }
196       }
197 
198       /**
199        * Configure the kappa parameter.
200        *
201        * @param config Parameter source
202        */
configKappa(Parameterization config)203       protected void configKappa(Parameterization config) {
204         DoubleParameter kappaP = new DoubleParameter(KAPPA_ID) //
205             .addConstraint(CommonConstraints.GREATER_THAN_ONE_DOUBLE) //
206             .setDefaultValue(KAPPA_DEFAULT);
207         if(config.grab(kappaP)) {
208           settings.kappa = kappaP.doubleValue();
209         }
210       }
211 
212       /**
213        * Configure the delta parameter.
214        *
215        * @param config Parameter source
216        */
configLambda(Parameterization config)217       protected void configLambda(Parameterization config) {
218         IntParameter lambdaP = new IntParameter(LAMBDA_ID) //
219             .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT) //
220             .setOptional(true);
221         if(config.grab(lambdaP)) {
222           settings.lambda = lambdaP.intValue();
223         }
224       }
225 
226       @Override
makeInstance()227       public Settings makeInstance() {
228         return settings;
229       }
230     }
231   }
232 
233   /**
234    * Parameterization class.
235    *
236    * @author Erich Schubert
237    */
238   public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
239     /**
240      * PreDeConSettings.
241      */
242     protected PreDeCon.Settings settings;
243 
244     @Override
makeOptions(Parameterization config)245     protected void makeOptions(Parameterization config) {
246       settings = config.tryInstantiate(PreDeCon.Settings.class);
247     }
248 
249     @Override
makeInstance()250     protected PreDeCon<V> makeInstance() {
251       return new PreDeCon<>(settings);
252     }
253   }
254 }
255