1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.algorithm.outlier.svm; 22 23 import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; 24 import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; 25 import de.lmu.ifi.dbs.elki.data.NumberVector; 26 import de.lmu.ifi.dbs.elki.data.type.TypeInformation; 27 import de.lmu.ifi.dbs.elki.data.type.TypeUtil; 28 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; 29 import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; 30 import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; 31 import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; 32 import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; 33 import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; 34 import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; 35 import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; 36 import de.lmu.ifi.dbs.elki.database.relation.Relation; 37 import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; 38 import de.lmu.ifi.dbs.elki.logging.Logging; 39 import de.lmu.ifi.dbs.elki.math.DoubleMinMax; 40 import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; 41 import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; 42 import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; 43 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; 44 import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; 45 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; 46 import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; 47 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; 48 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; 49 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter; 50 import libsvm.svm; 51 import libsvm.svm_model; 52 import libsvm.svm_node; 53 import libsvm.svm_parameter; 54 import libsvm.svm_print_interface; 55 import libsvm.svm_problem; 56 57 /** 58 * Outlier-detection using one-class support vector machines. 59 * <p> 60 * Important note: from literature, the one-class SVM is trained as if 0 was the 61 * only counterexample. Outliers will only be detected when they are close to 62 * the origin in kernel space! In our experience, results from this method are 63 * rather mixed, in particular as you would likely need to tune hyperparameters. 64 * Results may be better if you have a training data set with positive examples 65 * only, then apply it only to new data (which is currently not supported in 66 * this implementation, it assumes a single-dataset scenario). 67 * <p> 68 * Reference: 69 * <p> 70 * B. Schölkopf, J. C. Platt, J. Shawe-Taylor, A. J. Smola, R. C. 71 * Williamson<br> 72 * Estimating the support of a high-dimensional distribution<br> 73 * Neural computation 13.7 74 * 75 * @author Erich Schubert 76 * @since 0.7.0 77 * 78 * @param V vector type 79 */ 80 @Reference(authors = "B. Schölkopf, J. C. Platt, J. Shawe-Taylor, A. J. Smola, R. C. Williamson", // 81 title = "Estimating the support of a high-dimensional distribution", // 82 booktitle = "Neural computation 13.7", // 83 url = "https://doi.org/10.1162/089976601750264965", // 84 bibkey = "DBLP:journals/neco/ScholkopfPSSW01") 85 public class LibSVMOneClassOutlierDetection<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { 86 /** 87 * Class logger. 88 */ 89 private static final Logging LOG = Logging.getLogger(LibSVMOneClassOutlierDetection.class); 90 91 /** 92 * Kernel functions. Expose as enum for convenience. 93 */ 94 public enum SVMKernel { // 95 LINEAR, // Linear 96 QUADRATIC, // Quadratic 97 CUBIC, // Cubic 98 RBF, // Radial basis functions 99 SIGMOID, // Sigmoid 100 } 101 102 /** 103 * Kernel function in use. 104 */ 105 protected SVMKernel kernel = SVMKernel.RBF; 106 107 /** 108 * Nu parameter. 109 */ 110 double nu = 0.05; 111 112 /** 113 * Constructor. 114 * 115 * @param kernel Kernel to use with SVM. 116 * @param nu Nu parameter 117 */ LibSVMOneClassOutlierDetection(SVMKernel kernel, double nu)118 public LibSVMOneClassOutlierDetection(SVMKernel kernel, double nu) { 119 super(); 120 this.kernel = kernel; 121 this.nu = nu; 122 } 123 124 /** 125 * Run one-class SVM. 126 * 127 * @param relation Data relation 128 * @return Outlier result. 129 */ run(Relation<V> relation)130 public OutlierResult run(Relation<V> relation) { 131 final int dim = RelationUtil.dimensionality(relation); 132 final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs()); 133 134 svm.svm_set_print_string_function(LOG_HELPER); 135 136 svm_parameter param = new svm_parameter(); 137 param.svm_type = svm_parameter.ONE_CLASS; 138 param.kernel_type = svm_parameter.LINEAR; 139 param.degree = 3; 140 switch(kernel){ 141 case LINEAR: 142 param.kernel_type = svm_parameter.LINEAR; 143 break; 144 case QUADRATIC: 145 param.kernel_type = svm_parameter.POLY; 146 param.degree = 2; 147 break; 148 case CUBIC: 149 param.kernel_type = svm_parameter.POLY; 150 param.degree = 3; 151 break; 152 case RBF: 153 param.kernel_type = svm_parameter.RBF; 154 break; 155 case SIGMOID: 156 param.kernel_type = svm_parameter.SIGMOID; 157 break; 158 default: 159 throw new AbortException("Invalid kernel parameter: " + kernel); 160 } 161 // TODO: expose additional parameters to the end user! 162 param.nu = nu; 163 param.coef0 = 0.; 164 param.cache_size = 10000; 165 param.C = 1; 166 param.eps = 1e-4; // not used by one-class? 167 param.p = 0.1; // not used by one-class? 168 param.shrinking = 0; 169 param.probability = 0; 170 param.nr_weight = 0; 171 param.weight_label = new int[0]; 172 param.weight = new double[0]; 173 param.gamma = 1. / dim; 174 175 // Transform data: 176 svm_problem prob = new svm_problem(); 177 prob.l = relation.size(); 178 prob.x = new svm_node[prob.l][]; 179 prob.y = new double[prob.l]; 180 { 181 DBIDIter iter = ids.iter(); 182 for(int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) { 183 V vec = relation.get(iter); 184 // TODO: support compact sparse vectors, too! 185 svm_node[] x = new svm_node[dim]; 186 for(int d = 0; d < dim; d++) { 187 x[d] = new svm_node(); 188 x[d].index = d + 1; 189 x[d].value = vec.doubleValue(d); 190 } 191 prob.x[i] = x; 192 prob.y[i] = +1; 193 } 194 } 195 196 if(LOG.isVerbose()) { 197 LOG.verbose("Training one-class SVM..."); 198 } 199 String err = svm.svm_check_parameter(prob, param); 200 if(err != null) { 201 LOG.warning("svm_check_parameter: " + err); 202 } 203 svm_model model = svm.svm_train(prob, param); 204 205 if(LOG.isVerbose()) { 206 LOG.verbose("Predicting..."); 207 } 208 WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB); 209 DoubleMinMax mm = new DoubleMinMax(); 210 { 211 DBIDIter iter = ids.iter(); 212 double[] buf = new double[svm.svm_get_nr_class(model)]; 213 for(int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) { 214 V vec = relation.get(iter); 215 svm_node[] x = new svm_node[dim]; 216 for(int d = 0; d < dim; d++) { 217 x[d] = new svm_node(); 218 x[d].index = d + 1; 219 x[d].value = vec.doubleValue(d); 220 } 221 svm.svm_predict_values(model, x, buf); 222 double score = -buf[0]; // / param.gamma; // Heuristic rescaling, sorry. 223 // Unfortunately, libsvm one-class currently yields a binary decision. 224 scores.putDouble(iter, score); 225 mm.put(score); 226 } 227 } 228 DoubleRelation scoreResult = new MaterializedDoubleRelation("One-Class SVM Decision", "svm-outlier", scores, ids); 229 OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.); 230 return new OutlierResult(scoreMeta, scoreResult); 231 } 232 233 @Override getInputTypeRestriction()234 public TypeInformation[] getInputTypeRestriction() { 235 return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD); 236 } 237 238 @Override getLogger()239 protected Logging getLogger() { 240 return LOG; 241 } 242 243 /** 244 * Setup logging helper for SVM. 245 */ 246 static final svm_print_interface LOG_HELPER = new svm_print_interface() { 247 @Override 248 public void print(String arg0) { 249 if(LOG.isVerbose()) { 250 LOG.verbose(arg0); 251 } 252 } 253 }; 254 255 /** 256 * Parameterization class. 257 * 258 * @author Erich Schubert 259 * 260 * @hidden 261 * 262 * @param <V> Vector type 263 */ 264 public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer { 265 /** 266 * Parameter for kernel function. 267 */ 268 public static final OptionID KERNEL_ID = new OptionID("svm.kernel", "Kernel to use with SVM."); 269 270 /** 271 * SVM nu parameter 272 */ 273 public static final OptionID NU_ID = new OptionID("svm.nu", "SVM nu parameter."); 274 275 /** 276 * Kernel in use. 277 */ 278 protected SVMKernel kernel = SVMKernel.RBF; 279 280 /** 281 * Nu parameter. 282 */ 283 protected double nu = 0.05; 284 285 @Override makeOptions(Parameterization config)286 protected void makeOptions(Parameterization config) { 287 super.makeOptions(config); 288 289 EnumParameter<SVMKernel> kernelP = new EnumParameter<>(KERNEL_ID, SVMKernel.class, SVMKernel.RBF); 290 if(config.grab(kernelP)) { 291 kernel = kernelP.getValue(); 292 } 293 294 DoubleParameter nuP = new DoubleParameter(NU_ID, 0.05); 295 if(config.grab(nuP)) { 296 nu = nuP.doubleValue(); 297 } 298 } 299 300 @Override makeInstance()301 protected LibSVMOneClassOutlierDetection<V> makeInstance() { 302 return new LibSVMOneClassOutlierDetection<>(kernel, nu); 303 } 304 } 305 } 306