1""" 2==================================================================== 3One-Class SVM versus One-Class SVM using Stochastic Gradient Descent 4==================================================================== 5 6This example shows how to approximate the solution of 7:class:`sklearn.svm.OneClassSVM` in the case of an RBF kernel with 8:class:`sklearn.linear_model.SGDOneClassSVM`, a Stochastic Gradient Descent 9(SGD) version of the One-Class SVM. A kernel approximation is first used in 10order to apply :class:`sklearn.linear_model.SGDOneClassSVM` which implements a 11linear One-Class SVM using SGD. 12 13Note that :class:`sklearn.linear_model.SGDOneClassSVM` scales linearly with 14the number of samples whereas the complexity of a kernelized 15:class:`sklearn.svm.OneClassSVM` is at best quadratic with respect to the 16number of samples. It is not the purpose of this example to illustrate the 17benefits of such an approximation in terms of computation time but rather to 18show that we obtain similar results on a toy dataset. 19 20""" # noqa: E501 21 22import numpy as np 23import matplotlib.pyplot as plt 24import matplotlib 25from sklearn.svm import OneClassSVM 26from sklearn.linear_model import SGDOneClassSVM 27from sklearn.kernel_approximation import Nystroem 28from sklearn.pipeline import make_pipeline 29 30font = {"weight": "normal", "size": 15} 31 32matplotlib.rc("font", **font) 33 34random_state = 42 35rng = np.random.RandomState(random_state) 36 37# Generate train data 38X = 0.3 * rng.randn(500, 2) 39X_train = np.r_[X + 2, X - 2] 40# Generate some regular novel observations 41X = 0.3 * rng.randn(20, 2) 42X_test = np.r_[X + 2, X - 2] 43# Generate some abnormal novel observations 44X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)) 45 46xx, yy = np.meshgrid(np.linspace(-4.5, 4.5, 50), np.linspace(-4.5, 4.5, 50)) 47 48# OCSVM hyperparameters 49nu = 0.05 50gamma = 2.0 51 52# Fit the One-Class SVM 53clf = OneClassSVM(gamma=gamma, kernel="rbf", nu=nu) 54clf.fit(X_train) 55y_pred_train = clf.predict(X_train) 56y_pred_test = clf.predict(X_test) 57y_pred_outliers = clf.predict(X_outliers) 58n_error_train = y_pred_train[y_pred_train == -1].size 59n_error_test = y_pred_test[y_pred_test == -1].size 60n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size 61 62Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) 63Z = Z.reshape(xx.shape) 64 65 66# Fit the One-Class SVM using a kernel approximation and SGD 67transform = Nystroem(gamma=gamma, random_state=random_state) 68clf_sgd = SGDOneClassSVM( 69 nu=nu, shuffle=True, fit_intercept=True, random_state=random_state, tol=1e-4 70) 71pipe_sgd = make_pipeline(transform, clf_sgd) 72pipe_sgd.fit(X_train) 73y_pred_train_sgd = pipe_sgd.predict(X_train) 74y_pred_test_sgd = pipe_sgd.predict(X_test) 75y_pred_outliers_sgd = pipe_sgd.predict(X_outliers) 76n_error_train_sgd = y_pred_train_sgd[y_pred_train_sgd == -1].size 77n_error_test_sgd = y_pred_test_sgd[y_pred_test_sgd == -1].size 78n_error_outliers_sgd = y_pred_outliers_sgd[y_pred_outliers_sgd == 1].size 79 80Z_sgd = pipe_sgd.decision_function(np.c_[xx.ravel(), yy.ravel()]) 81Z_sgd = Z_sgd.reshape(xx.shape) 82 83# plot the level sets of the decision function 84plt.figure(figsize=(9, 6)) 85plt.title("One Class SVM") 86plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu) 87a = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="darkred") 88plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors="palevioletred") 89 90s = 20 91b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k") 92b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k") 93c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k") 94plt.axis("tight") 95plt.xlim((-4.5, 4.5)) 96plt.ylim((-4.5, 4.5)) 97plt.legend( 98 [a.collections[0], b1, b2, c], 99 [ 100 "learned frontier", 101 "training observations", 102 "new regular observations", 103 "new abnormal observations", 104 ], 105 loc="upper left", 106) 107plt.xlabel( 108 "error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d" 109 % ( 110 n_error_train, 111 X_train.shape[0], 112 n_error_test, 113 X_test.shape[0], 114 n_error_outliers, 115 X_outliers.shape[0], 116 ) 117) 118plt.show() 119 120plt.figure(figsize=(9, 6)) 121plt.title("Online One-Class SVM") 122plt.contourf(xx, yy, Z_sgd, levels=np.linspace(Z_sgd.min(), 0, 7), cmap=plt.cm.PuBu) 123a = plt.contour(xx, yy, Z_sgd, levels=[0], linewidths=2, colors="darkred") 124plt.contourf(xx, yy, Z_sgd, levels=[0, Z_sgd.max()], colors="palevioletred") 125 126s = 20 127b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c="white", s=s, edgecolors="k") 128b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c="blueviolet", s=s, edgecolors="k") 129c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c="gold", s=s, edgecolors="k") 130plt.axis("tight") 131plt.xlim((-4.5, 4.5)) 132plt.ylim((-4.5, 4.5)) 133plt.legend( 134 [a.collections[0], b1, b2, c], 135 [ 136 "learned frontier", 137 "training observations", 138 "new regular observations", 139 "new abnormal observations", 140 ], 141 loc="upper left", 142) 143plt.xlabel( 144 "error train: %d/%d; errors novel regular: %d/%d; errors novel abnormal: %d/%d" 145 % ( 146 n_error_train_sgd, 147 X_train.shape[0], 148 n_error_test_sgd, 149 X_test.shape[0], 150 n_error_outliers_sgd, 151 X_outliers.shape[0], 152 ) 153) 154plt.show() 155