1#!/usr/bin/env python 2 3import os, sys 4sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path 5from svm import * 6from svm import __all__ as svm_all 7from svm import scipy, sparse 8from commonutil import * 9from commonutil import __all__ as common_all 10 11if sys.version_info[0] < 3: 12 range = xrange 13 from itertools import izip as zip 14 _cstr = lambda s: s.encode("utf-8") if isinstance(s,unicode) else str(s) 15else: 16 _cstr = lambda s: bytes(s, "utf-8") 17 18__all__ = ['svm_load_model', 'svm_predict', 'svm_save_model', 'svm_train'] + svm_all + common_all 19 20 21def svm_load_model(model_file_name): 22 """ 23 svm_load_model(model_file_name) -> model 24 25 Load a LIBSVM model from model_file_name and return. 26 """ 27 model = libsvm.svm_load_model(_cstr(model_file_name)) 28 if not model: 29 print("can't open model file %s" % model_file_name) 30 return None 31 model = toPyModel(model) 32 return model 33 34def svm_save_model(model_file_name, model): 35 """ 36 svm_save_model(model_file_name, model) -> None 37 38 Save a LIBSVM model to the file model_file_name. 39 """ 40 libsvm.svm_save_model(_cstr(model_file_name), model) 41 42def svm_train(arg1, arg2=None, arg3=None): 43 """ 44 svm_train(y, x [, options]) -> model | ACC | MSE 45 46 y: a list/tuple/ndarray of l true labels (type must be int/double). 47 48 x: 1. a list/tuple of l training instances. Feature vector of 49 each training instance is a list/tuple or dictionary. 50 51 2. an l * n numpy ndarray or scipy spmatrix (n: number of features). 52 53 svm_train(prob [, options]) -> model | ACC | MSE 54 svm_train(prob, param) -> model | ACC| MSE 55 56 Train an SVM model from data (y, x) or an svm_problem prob using 57 'options' or an svm_parameter param. 58 If '-v' is specified in 'options' (i.e., cross validation) 59 either accuracy (ACC) or mean-squared error (MSE) is returned. 60 options: 61 -s svm_type : set type of SVM (default 0) 62 0 -- C-SVC (multi-class classification) 63 1 -- nu-SVC (multi-class classification) 64 2 -- one-class SVM 65 3 -- epsilon-SVR (regression) 66 4 -- nu-SVR (regression) 67 -t kernel_type : set type of kernel function (default 2) 68 0 -- linear: u'*v 69 1 -- polynomial: (gamma*u'*v + coef0)^degree 70 2 -- radial basis function: exp(-gamma*|u-v|^2) 71 3 -- sigmoid: tanh(gamma*u'*v + coef0) 72 4 -- precomputed kernel (kernel values in training_set_file) 73 -d degree : set degree in kernel function (default 3) 74 -g gamma : set gamma in kernel function (default 1/num_features) 75 -r coef0 : set coef0 in kernel function (default 0) 76 -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1) 77 -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5) 78 -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1) 79 -m cachesize : set cache memory size in MB (default 100) 80 -e epsilon : set tolerance of termination criterion (default 0.001) 81 -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1) 82 -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0) 83 -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1) 84 -v n: n-fold cross validation mode 85 -q : quiet mode (no outputs) 86 """ 87 prob, param = None, None 88 if isinstance(arg1, (list, tuple)) or (scipy and isinstance(arg1, scipy.ndarray)): 89 assert isinstance(arg2, (list, tuple)) or (scipy and isinstance(arg2, (scipy.ndarray, sparse.spmatrix))) 90 y, x, options = arg1, arg2, arg3 91 param = svm_parameter(options) 92 prob = svm_problem(y, x, isKernel=(param.kernel_type == PRECOMPUTED)) 93 elif isinstance(arg1, svm_problem): 94 prob = arg1 95 if isinstance(arg2, svm_parameter): 96 param = arg2 97 else: 98 param = svm_parameter(arg2) 99 if prob == None or param == None: 100 raise TypeError("Wrong types for the arguments") 101 102 if param.kernel_type == PRECOMPUTED: 103 for i in range(prob.l): 104 xi = prob.x[i] 105 idx, val = xi[0].index, xi[0].value 106 if idx != 0: 107 raise ValueError('Wrong input format: first column must be 0:sample_serial_number') 108 if val <= 0 or val > prob.n: 109 raise ValueError('Wrong input format: sample_serial_number out of range') 110 111 if param.gamma == 0 and prob.n > 0: 112 param.gamma = 1.0 / prob.n 113 libsvm.svm_set_print_string_function(param.print_func) 114 err_msg = libsvm.svm_check_parameter(prob, param) 115 if err_msg: 116 raise ValueError('Error: %s' % err_msg) 117 118 if param.cross_validation: 119 l, nr_fold = prob.l, param.nr_fold 120 target = (c_double * l)() 121 libsvm.svm_cross_validation(prob, param, nr_fold, target) 122 ACC, MSE, SCC = evaluations(prob.y[:l], target[:l]) 123 if param.svm_type in [EPSILON_SVR, NU_SVR]: 124 print("Cross Validation Mean squared error = %g" % MSE) 125 print("Cross Validation Squared correlation coefficient = %g" % SCC) 126 return MSE 127 else: 128 print("Cross Validation Accuracy = %g%%" % ACC) 129 return ACC 130 else: 131 m = libsvm.svm_train(prob, param) 132 m = toPyModel(m) 133 134 # If prob is destroyed, data including SVs pointed by m can remain. 135 m.x_space = prob.x_space 136 return m 137 138def svm_predict(y, x, m, options=""): 139 """ 140 svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals) 141 142 y: a list/tuple/ndarray of l true labels (type must be int/double). 143 It is used for calculating the accuracy. Use [] if true labels are 144 unavailable. 145 146 x: 1. a list/tuple of l training instances. Feature vector of 147 each training instance is a list/tuple or dictionary. 148 149 2. an l * n numpy ndarray or scipy spmatrix (n: number of features). 150 151 Predict data (y, x) with the SVM model m. 152 options: 153 -b probability_estimates: whether to predict probability estimates, 154 0 or 1 (default 0); for one-class SVM only 0 is supported. 155 -q : quiet mode (no outputs). 156 157 The return tuple contains 158 p_labels: a list of predicted labels 159 p_acc: a tuple including accuracy (for classification), mean-squared 160 error, and squared correlation coefficient (for regression). 161 p_vals: a list of decision values or probability estimates (if '-b 1' 162 is specified). If k is the number of classes, for decision values, 163 each element includes results of predicting k(k-1)/2 binary-class 164 SVMs. For probabilities, each element contains k values indicating 165 the probability that the testing instance is in each class. 166 Note that the order of classes here is the same as 'model.label' 167 field in the model structure. 168 """ 169 170 def info(s): 171 print(s) 172 173 if scipy and isinstance(x, scipy.ndarray): 174 x = scipy.ascontiguousarray(x) # enforce row-major 175 elif sparse and isinstance(x, sparse.spmatrix): 176 x = x.tocsr() 177 elif not isinstance(x, (list, tuple)): 178 raise TypeError("type of x: {0} is not supported!".format(type(x))) 179 180 if (not isinstance(y, (list, tuple))) and (not (scipy and isinstance(y, scipy.ndarray))): 181 raise TypeError("type of y: {0} is not supported!".format(type(y))) 182 183 predict_probability = 0 184 argv = options.split() 185 i = 0 186 while i < len(argv): 187 if argv[i] == '-b': 188 i += 1 189 predict_probability = int(argv[i]) 190 elif argv[i] == '-q': 191 info = print_null 192 else: 193 raise ValueError("Wrong options") 194 i+=1 195 196 svm_type = m.get_svm_type() 197 is_prob_model = m.is_probability_model() 198 nr_class = m.get_nr_class() 199 pred_labels = [] 200 pred_values = [] 201 202 if scipy and isinstance(x, sparse.spmatrix): 203 nr_instance = x.shape[0] 204 else: 205 nr_instance = len(x) 206 207 if predict_probability: 208 if not is_prob_model: 209 raise ValueError("Model does not support probabiliy estimates") 210 211 if svm_type in [NU_SVR, EPSILON_SVR]: 212 info("Prob. model for test data: target value = predicted value + z,\n" 213 "z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability()); 214 nr_class = 0 215 216 prob_estimates = (c_double * nr_class)() 217 for i in range(nr_instance): 218 if scipy and isinstance(x, sparse.spmatrix): 219 indslice = slice(x.indptr[i], x.indptr[i+1]) 220 xi, idx = gen_svm_nodearray((x.indices[indslice], x.data[indslice]), isKernel=(m.param.kernel_type == PRECOMPUTED)) 221 else: 222 xi, idx = gen_svm_nodearray(x[i], isKernel=(m.param.kernel_type == PRECOMPUTED)) 223 label = libsvm.svm_predict_probability(m, xi, prob_estimates) 224 values = prob_estimates[:nr_class] 225 pred_labels += [label] 226 pred_values += [values] 227 else: 228 if is_prob_model: 229 info("Model supports probability estimates, but disabled in predicton.") 230 if svm_type in (ONE_CLASS, EPSILON_SVR, NU_SVC): 231 nr_classifier = 1 232 else: 233 nr_classifier = nr_class*(nr_class-1)//2 234 dec_values = (c_double * nr_classifier)() 235 for i in range(nr_instance): 236 if scipy and isinstance(x, sparse.spmatrix): 237 indslice = slice(x.indptr[i], x.indptr[i+1]) 238 xi, idx = gen_svm_nodearray((x.indices[indslice], x.data[indslice]), isKernel=(m.param.kernel_type == PRECOMPUTED)) 239 else: 240 xi, idx = gen_svm_nodearray(x[i], isKernel=(m.param.kernel_type == PRECOMPUTED)) 241 label = libsvm.svm_predict_values(m, xi, dec_values) 242 if(nr_class == 1): 243 values = [1] 244 else: 245 values = dec_values[:nr_classifier] 246 pred_labels += [label] 247 pred_values += [values] 248 249 if len(y) == 0: 250 y = [0] * nr_instance 251 ACC, MSE, SCC = evaluations(y, pred_labels) 252 253 if svm_type in [EPSILON_SVR, NU_SVR]: 254 info("Mean squared error = %g (regression)" % MSE) 255 info("Squared correlation coefficient = %g (regression)" % SCC) 256 else: 257 info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(round(nr_instance*ACC/100)), nr_instance)) 258 259 return pred_labels, (ACC, MSE, SCC), pred_values 260