1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.algorithm.outlier.svm;
22 
23 import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
24 import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
25 import de.lmu.ifi.dbs.elki.data.NumberVector;
26 import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
27 import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
28 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
29 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
30 import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
31 import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
32 import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
33 import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
34 import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
35 import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
36 import de.lmu.ifi.dbs.elki.database.relation.Relation;
37 import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
38 import de.lmu.ifi.dbs.elki.logging.Logging;
39 import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
40 import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
41 import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
42 import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
43 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
44 import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
45 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
46 import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
47 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
48 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
49 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
50 import libsvm.svm;
51 import libsvm.svm_model;
52 import libsvm.svm_node;
53 import libsvm.svm_parameter;
54 import libsvm.svm_print_interface;
55 import libsvm.svm_problem;
56 
57 /**
58  * Outlier-detection using one-class support vector machines.
59  * <p>
60  * Important note: from literature, the one-class SVM is trained as if 0 was the
61  * only counterexample. Outliers will only be detected when they are close to
62  * the origin in kernel space! In our experience, results from this method are
63  * rather mixed, in particular as you would likely need to tune hyperparameters.
64  * Results may be better if you have a training data set with positive examples
65  * only, then apply it only to new data (which is currently not supported in
66  * this implementation, it assumes a single-dataset scenario).
67  * <p>
68  * Reference:
69  * <p>
70  * B. Schölkopf, J. C. Platt, J. Shawe-Taylor, A. J. Smola, R. C.
71  * Williamson<br>
72  * Estimating the support of a high-dimensional distribution<br>
73  * Neural computation 13.7
74  *
75  * @author Erich Schubert
76  * @since 0.7.0
77  *
78  * @param V vector type
79  */
80 @Reference(authors = "B. Schölkopf, J. C. Platt, J. Shawe-Taylor, A. J. Smola, R. C. Williamson", //
81     title = "Estimating the support of a high-dimensional distribution", //
82     booktitle = "Neural computation 13.7", //
83     url = "https://doi.org/10.1162/089976601750264965", //
84     bibkey = "DBLP:journals/neco/ScholkopfPSSW01")
85 public class LibSVMOneClassOutlierDetection<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
86   /**
87    * Class logger.
88    */
89   private static final Logging LOG = Logging.getLogger(LibSVMOneClassOutlierDetection.class);
90 
91   /**
92    * Kernel functions. Expose as enum for convenience.
93    */
94   public enum SVMKernel { //
95     LINEAR, // Linear
96     QUADRATIC, // Quadratic
97     CUBIC, // Cubic
98     RBF, // Radial basis functions
99     SIGMOID, // Sigmoid
100   }
101 
102   /**
103    * Kernel function in use.
104    */
105   protected SVMKernel kernel = SVMKernel.RBF;
106 
107   /**
108    * Nu parameter.
109    */
110   double nu = 0.05;
111 
112   /**
113    * Constructor.
114    *
115    * @param kernel Kernel to use with SVM.
116    * @param nu Nu parameter
117    */
LibSVMOneClassOutlierDetection(SVMKernel kernel, double nu)118   public LibSVMOneClassOutlierDetection(SVMKernel kernel, double nu) {
119     super();
120     this.kernel = kernel;
121     this.nu = nu;
122   }
123 
124   /**
125    * Run one-class SVM.
126    *
127    * @param relation Data relation
128    * @return Outlier result.
129    */
run(Relation<V> relation)130   public OutlierResult run(Relation<V> relation) {
131     final int dim = RelationUtil.dimensionality(relation);
132     final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
133 
134     svm.svm_set_print_string_function(LOG_HELPER);
135 
136     svm_parameter param = new svm_parameter();
137     param.svm_type = svm_parameter.ONE_CLASS;
138     param.kernel_type = svm_parameter.LINEAR;
139     param.degree = 3;
140     switch(kernel){
141     case LINEAR:
142       param.kernel_type = svm_parameter.LINEAR;
143       break;
144     case QUADRATIC:
145       param.kernel_type = svm_parameter.POLY;
146       param.degree = 2;
147       break;
148     case CUBIC:
149       param.kernel_type = svm_parameter.POLY;
150       param.degree = 3;
151       break;
152     case RBF:
153       param.kernel_type = svm_parameter.RBF;
154       break;
155     case SIGMOID:
156       param.kernel_type = svm_parameter.SIGMOID;
157       break;
158     default:
159       throw new AbortException("Invalid kernel parameter: " + kernel);
160     }
161     // TODO: expose additional parameters to the end user!
162     param.nu = nu;
163     param.coef0 = 0.;
164     param.cache_size = 10000;
165     param.C = 1;
166     param.eps = 1e-4; // not used by one-class?
167     param.p = 0.1; // not used by one-class?
168     param.shrinking = 0;
169     param.probability = 0;
170     param.nr_weight = 0;
171     param.weight_label = new int[0];
172     param.weight = new double[0];
173     param.gamma = 1. / dim;
174 
175     // Transform data:
176     svm_problem prob = new svm_problem();
177     prob.l = relation.size();
178     prob.x = new svm_node[prob.l][];
179     prob.y = new double[prob.l];
180     {
181       DBIDIter iter = ids.iter();
182       for(int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
183         V vec = relation.get(iter);
184         // TODO: support compact sparse vectors, too!
185         svm_node[] x = new svm_node[dim];
186         for(int d = 0; d < dim; d++) {
187           x[d] = new svm_node();
188           x[d].index = d + 1;
189           x[d].value = vec.doubleValue(d);
190         }
191         prob.x[i] = x;
192         prob.y[i] = +1;
193       }
194     }
195 
196     if(LOG.isVerbose()) {
197       LOG.verbose("Training one-class SVM...");
198     }
199     String err = svm.svm_check_parameter(prob, param);
200     if(err != null) {
201       LOG.warning("svm_check_parameter: " + err);
202     }
203     svm_model model = svm.svm_train(prob, param);
204 
205     if(LOG.isVerbose()) {
206       LOG.verbose("Predicting...");
207     }
208     WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
209     DoubleMinMax mm = new DoubleMinMax();
210     {
211       DBIDIter iter = ids.iter();
212       double[] buf = new double[svm.svm_get_nr_class(model)];
213       for(int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
214         V vec = relation.get(iter);
215         svm_node[] x = new svm_node[dim];
216         for(int d = 0; d < dim; d++) {
217           x[d] = new svm_node();
218           x[d].index = d + 1;
219           x[d].value = vec.doubleValue(d);
220         }
221         svm.svm_predict_values(model, x, buf);
222         double score = -buf[0]; // / param.gamma; // Heuristic rescaling, sorry.
223         // Unfortunately, libsvm one-class currently yields a binary decision.
224         scores.putDouble(iter, score);
225         mm.put(score);
226       }
227     }
228     DoubleRelation scoreResult = new MaterializedDoubleRelation("One-Class SVM Decision", "svm-outlier", scores, ids);
229     OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.);
230     return new OutlierResult(scoreMeta, scoreResult);
231   }
232 
233   @Override
getInputTypeRestriction()234   public TypeInformation[] getInputTypeRestriction() {
235     return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
236   }
237 
238   @Override
getLogger()239   protected Logging getLogger() {
240     return LOG;
241   }
242 
243   /**
244    * Setup logging helper for SVM.
245    */
246   static final svm_print_interface LOG_HELPER = new svm_print_interface() {
247     @Override
248     public void print(String arg0) {
249       if(LOG.isVerbose()) {
250         LOG.verbose(arg0);
251       }
252     }
253   };
254 
255   /**
256    * Parameterization class.
257    *
258    * @author Erich Schubert
259    *
260    * @hidden
261    *
262    * @param <V> Vector type
263    */
264   public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
265     /**
266      * Parameter for kernel function.
267      */
268     public static final OptionID KERNEL_ID = new OptionID("svm.kernel", "Kernel to use with SVM.");
269 
270     /**
271      * SVM nu parameter
272      */
273     public static final OptionID NU_ID = new OptionID("svm.nu", "SVM nu parameter.");
274 
275     /**
276      * Kernel in use.
277      */
278     protected SVMKernel kernel = SVMKernel.RBF;
279 
280     /**
281      * Nu parameter.
282      */
283     protected double nu = 0.05;
284 
285     @Override
makeOptions(Parameterization config)286     protected void makeOptions(Parameterization config) {
287       super.makeOptions(config);
288 
289       EnumParameter<SVMKernel> kernelP = new EnumParameter<>(KERNEL_ID, SVMKernel.class, SVMKernel.RBF);
290       if(config.grab(kernelP)) {
291         kernel = kernelP.getValue();
292       }
293 
294       DoubleParameter nuP = new DoubleParameter(NU_ID, 0.05);
295       if(config.grab(nuP)) {
296         nu = nuP.doubleValue();
297       }
298     }
299 
300     @Override
makeInstance()301     protected LibSVMOneClassOutlierDetection<V> makeInstance() {
302       return new LibSVMOneClassOutlierDetection<>(kernel, nu);
303     }
304   }
305 }
306