1#!/usr/bin/env python
2
3from ctypes import *
4from ctypes.util import find_library
5from os import path
6from glob import glob
7import sys
8
9try:
10    import scipy
11    from scipy import sparse
12except:
13    scipy = None
14    sparse = None
15
16if sys.version_info[0] < 3:
17    range = xrange
18    from itertools import izip as zip
19
20__all__ = ['liblinear', 'feature_node', 'gen_feature_nodearray', 'problem',
21           'parameter', 'model', 'toPyModel', 'L2R_LR', 'L2R_L2LOSS_SVC_DUAL',
22           'L2R_L2LOSS_SVC', 'L2R_L1LOSS_SVC_DUAL', 'MCSVM_CS',
23           'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL', 'L2R_L2LOSS_SVR',
24           'L2R_L2LOSS_SVR_DUAL', 'L2R_L1LOSS_SVR_DUAL', 'ONECLASS_SVM',
25           'print_null']
26
27try:
28    dirname = path.dirname(path.abspath(__file__))
29    dynamic_lib_name = 'clib.cp*'
30    path_to_so = glob(path.join(dirname, dynamic_lib_name))[0]
31    liblinear = CDLL(path_to_so)
32except:
33    try :
34        if sys.platform == 'win32':
35            liblinear = CDLL(path.join(dirname, r'..\..\windows\liblinear.dll'))
36        else:
37            liblinear = CDLL(path.join(dirname, '../../liblinear.so.4'))
38    except:
39    # For unix the prefix 'lib' is not considered.
40        if find_library('linear'):
41            liblinear = CDLL(find_library('linear'))
42        elif find_library('liblinear'):
43            liblinear = CDLL(find_library('liblinear'))
44        else:
45            raise Exception('LIBLINEAR library not found.')
46
47L2R_LR = 0
48L2R_L2LOSS_SVC_DUAL = 1
49L2R_L2LOSS_SVC = 2
50L2R_L1LOSS_SVC_DUAL = 3
51MCSVM_CS = 4
52L1R_L2LOSS_SVC = 5
53L1R_LR = 6
54L2R_LR_DUAL = 7
55L2R_L2LOSS_SVR = 11
56L2R_L2LOSS_SVR_DUAL = 12
57L2R_L1LOSS_SVR_DUAL = 13
58ONECLASS_SVM = 21
59
60PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
61def print_null(s):
62    return
63
64def genFields(names, types):
65    return list(zip(names, types))
66
67def fillprototype(f, restype, argtypes):
68    f.restype = restype
69    f.argtypes = argtypes
70
71class feature_node(Structure):
72    _names = ["index", "value"]
73    _types = [c_int, c_double]
74    _fields_ = genFields(_names, _types)
75
76    def __str__(self):
77        return '%d:%g' % (self.index, self.value)
78
79def gen_feature_nodearray(xi, feature_max=None):
80    if feature_max:
81        assert(isinstance(feature_max, int))
82
83    xi_shift = 0 # ensure correct indices of xi
84    if scipy and isinstance(xi, tuple) and len(xi) == 2\
85            and isinstance(xi[0], scipy.ndarray) and isinstance(xi[1], scipy.ndarray): # for a sparse vector
86        index_range = xi[0] + 1 # index starts from 1
87        if feature_max:
88            index_range = index_range[scipy.where(index_range <= feature_max)]
89    elif scipy and isinstance(xi, scipy.ndarray):
90        xi_shift = 1
91        index_range = xi.nonzero()[0] + 1 # index starts from 1
92        if feature_max:
93            index_range = index_range[scipy.where(index_range <= feature_max)]
94    elif isinstance(xi, (dict, list, tuple)):
95        if isinstance(xi, dict):
96            index_range = xi.keys()
97        elif isinstance(xi, (list, tuple)):
98            xi_shift = 1
99            index_range = range(1, len(xi) + 1)
100        index_range = filter(lambda j: xi[j-xi_shift] != 0, index_range)
101
102        if feature_max:
103            index_range = filter(lambda j: j <= feature_max, index_range)
104        index_range = sorted(index_range)
105    else:
106        raise TypeError('xi should be a dictionary, list, tuple, 1-d numpy array, or tuple of (index, data)')
107
108    ret = (feature_node*(len(index_range)+2))()
109    ret[-1].index = -1 # for bias term
110    ret[-2].index = -1
111
112    if scipy and isinstance(xi, tuple) and len(xi) == 2\
113            and isinstance(xi[0], scipy.ndarray) and isinstance(xi[1], scipy.ndarray): # for a sparse vector
114        for idx, j in enumerate(index_range):
115            ret[idx].index = j
116            ret[idx].value = (xi[1])[idx]
117    else:
118        for idx, j in enumerate(index_range):
119            ret[idx].index = j
120            ret[idx].value = xi[j - xi_shift]
121
122    max_idx = 0
123    if len(index_range) > 0:
124        max_idx = index_range[-1]
125    return ret, max_idx
126
127try:
128    from numba import jit
129    jit_enabled = True
130except:
131    jit = lambda x: x
132    jit_enabled = False
133
134@jit
135def csr_to_problem_jit(l, x_val, x_ind, x_rowptr, prob_val, prob_ind, prob_rowptr):
136    for i in range(l):
137        b1,e1 = x_rowptr[i], x_rowptr[i+1]
138        b2,e2 = prob_rowptr[i], prob_rowptr[i+1]-2
139        for j in range(b1,e1):
140            prob_ind[j-b1+b2] = x_ind[j]+1
141            prob_val[j-b1+b2] = x_val[j]
142def csr_to_problem_nojit(l, x_val, x_ind, x_rowptr, prob_val, prob_ind, prob_rowptr):
143    for i in range(l):
144        x_slice = slice(x_rowptr[i], x_rowptr[i+1])
145        prob_slice = slice(prob_rowptr[i], prob_rowptr[i+1]-2)
146        prob_ind[prob_slice] = x_ind[x_slice]+1
147        prob_val[prob_slice] = x_val[x_slice]
148
149def csr_to_problem(x, prob):
150    # Extra space for termination node and (possibly) bias term
151    x_space = prob.x_space = scipy.empty((x.nnz+x.shape[0]*2), dtype=feature_node)
152    prob.rowptr = x.indptr.copy()
153    prob.rowptr[1:] += 2*scipy.arange(1,x.shape[0]+1)
154    prob_ind = x_space["index"]
155    prob_val = x_space["value"]
156    prob_ind[:] = -1
157    if jit_enabled:
158        csr_to_problem_jit(x.shape[0], x.data, x.indices, x.indptr, prob_val, prob_ind, prob.rowptr)
159    else:
160        csr_to_problem_nojit(x.shape[0], x.data, x.indices, x.indptr, prob_val, prob_ind, prob.rowptr)
161
162class problem(Structure):
163    _names = ["l", "n", "y", "x", "bias"]
164    _types = [c_int, c_int, POINTER(c_double), POINTER(POINTER(feature_node)), c_double]
165    _fields_ = genFields(_names, _types)
166
167    def __init__(self, y, x, bias = -1):
168        if (not isinstance(y, (list, tuple))) and (not (scipy and isinstance(y, scipy.ndarray))):
169            raise TypeError("type of y: {0} is not supported!".format(type(y)))
170
171        if isinstance(x, (list, tuple)):
172            if len(y) != len(x):
173                raise ValueError("len(y) != len(x)")
174        elif scipy != None and isinstance(x, (scipy.ndarray, sparse.spmatrix)):
175            if len(y) != x.shape[0]:
176                raise ValueError("len(y) != len(x)")
177            if isinstance(x, scipy.ndarray):
178                x = scipy.ascontiguousarray(x) # enforce row-major
179            if isinstance(x, sparse.spmatrix):
180                x = x.tocsr()
181                pass
182        else:
183            raise TypeError("type of x: {0} is not supported!".format(type(x)))
184        self.l = l = len(y)
185        self.bias = -1
186
187        max_idx = 0
188        x_space = self.x_space = []
189        if scipy != None and isinstance(x, sparse.csr_matrix):
190            csr_to_problem(x, self)
191            max_idx = x.shape[1]
192        else:
193            for i, xi in enumerate(x):
194                tmp_xi, tmp_idx = gen_feature_nodearray(xi)
195                x_space += [tmp_xi]
196                max_idx = max(max_idx, tmp_idx)
197        self.n = max_idx
198
199        self.y = (c_double * l)()
200        if scipy != None and isinstance(y, scipy.ndarray):
201            scipy.ctypeslib.as_array(self.y, (self.l,))[:] = y
202        else:
203            for i, yi in enumerate(y): self.y[i] = yi
204
205        self.x = (POINTER(feature_node) * l)()
206        if scipy != None and isinstance(x, sparse.csr_matrix):
207            base = addressof(self.x_space.ctypes.data_as(POINTER(feature_node))[0])
208            x_ptr = cast(self.x, POINTER(c_uint64))
209            x_ptr = scipy.ctypeslib.as_array(x_ptr,(self.l,))
210            x_ptr[:] = self.rowptr[:-1]*sizeof(feature_node)+base
211        else:
212            for i, xi in enumerate(self.x_space): self.x[i] = xi
213
214        self.set_bias(bias)
215
216    def set_bias(self, bias):
217        if self.bias == bias:
218            return
219        if bias >= 0 and self.bias < 0:
220            self.n += 1
221            node = feature_node(self.n, bias)
222        if bias < 0 and self.bias >= 0:
223            self.n -= 1
224            node = feature_node(-1, bias)
225
226        if isinstance(self.x_space, list):
227            for xi in self.x_space:
228                xi[-2] = node
229        else:
230            self.x_space["index"][self.rowptr[1:]-2] = node.index
231            self.x_space["value"][self.rowptr[1:]-2] = node.value
232
233        self.bias = bias
234
235
236class parameter(Structure):
237    _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p", "nu", "init_sol", "regularize_bias"]
238    _types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double, c_double, POINTER(c_double), c_int]
239    _fields_ = genFields(_names, _types)
240
241    def __init__(self, options = None):
242        if options == None:
243            options = ''
244        self.parse_options(options)
245
246    def __str__(self):
247        s = ''
248        attrs = parameter._names + list(self.__dict__.keys())
249        values = map(lambda attr: getattr(self, attr), attrs)
250        for attr, val in zip(attrs, values):
251            s += (' %s: %s\n' % (attr, val))
252        s = s.strip()
253
254        return s
255
256    def set_to_default_values(self):
257        self.solver_type = L2R_L2LOSS_SVC_DUAL
258        self.eps = float('inf')
259        self.C = 1
260        self.p = 0.1
261        self.nu = 0.5
262        self.nr_weight = 0
263        self.weight_label = None
264        self.weight = None
265        self.init_sol = None
266        self.bias = -1
267        self.regularize_bias = 1
268        self.flag_cross_validation = False
269        self.flag_C_specified = False
270        self.flag_p_specified = False
271        self.flag_solver_specified = False
272        self.flag_find_parameters = False
273        self.nr_fold = 0
274        self.print_func = cast(None, PRINT_STRING_FUN)
275
276    def parse_options(self, options):
277        if isinstance(options, list):
278            argv = options
279        elif isinstance(options, str):
280            argv = options.split()
281        else:
282            raise TypeError("arg 1 should be a list or a str.")
283        self.set_to_default_values()
284        self.print_func = cast(None, PRINT_STRING_FUN)
285        weight_label = []
286        weight = []
287
288        i = 0
289        while i < len(argv) :
290            if argv[i] == "-s":
291                i = i + 1
292                self.solver_type = int(argv[i])
293                self.flag_solver_specified = True
294            elif argv[i] == "-c":
295                i = i + 1
296                self.C = float(argv[i])
297                self.flag_C_specified = True
298            elif argv[i] == "-p":
299                i = i + 1
300                self.p = float(argv[i])
301                self.flag_p_specified = True
302            elif argv[i] == "-n":
303                i = i + 1
304                self.nu = float(argv[i])
305            elif argv[i] == "-e":
306                i = i + 1
307                self.eps = float(argv[i])
308            elif argv[i] == "-B":
309                i = i + 1
310                self.bias = float(argv[i])
311            elif argv[i] == "-v":
312                i = i + 1
313                self.flag_cross_validation = 1
314                self.nr_fold = int(argv[i])
315                if self.nr_fold < 2 :
316                    raise ValueError("n-fold cross validation: n must >= 2")
317            elif argv[i].startswith("-w"):
318                i = i + 1
319                self.nr_weight += 1
320                weight_label += [int(argv[i-1][2:])]
321                weight += [float(argv[i])]
322            elif argv[i] == "-q":
323                self.print_func = PRINT_STRING_FUN(print_null)
324            elif argv[i] == "-C":
325                self.flag_find_parameters = True
326            elif argv[i] == "-R":
327                self.regularize_bias = 0
328            else:
329                raise ValueError("Wrong options")
330            i += 1
331
332        liblinear.set_print_string_function(self.print_func)
333        self.weight_label = (c_int*self.nr_weight)()
334        self.weight = (c_double*self.nr_weight)()
335        for i in range(self.nr_weight):
336            self.weight[i] = weight[i]
337            self.weight_label[i] = weight_label[i]
338
339        # default solver for parameter selection is L2R_L2LOSS_SVC
340        if self.flag_find_parameters:
341            if not self.flag_cross_validation:
342                self.nr_fold = 5
343            if not self.flag_solver_specified:
344                self.solver_type = L2R_L2LOSS_SVC
345                self.flag_solver_specified = True
346            elif self.solver_type not in [L2R_LR, L2R_L2LOSS_SVC, L2R_L2LOSS_SVR]:
347                raise ValueError("Warm-start parameter search only available for -s 0, -s 2 and -s 11")
348
349        if self.eps == float('inf'):
350            if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
351                self.eps = 0.01
352            elif self.solver_type in [L2R_L2LOSS_SVR]:
353                self.eps = 0.0001
354            elif self.solver_type in [L2R_L2LOSS_SVC_DUAL, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L2R_LR_DUAL]:
355                self.eps = 0.1
356            elif self.solver_type in [L1R_L2LOSS_SVC, L1R_LR]:
357                self.eps = 0.01
358            elif self.solver_type in [L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
359                self.eps = 0.1
360            elif self.solver_type in [ONECLASS_SVM]:
361                self.eps = 0.01
362
363class model(Structure):
364    _names = ["param", "nr_class", "nr_feature", "w", "label", "bias", "rho"]
365    _types = [parameter, c_int, c_int, POINTER(c_double), POINTER(c_int), c_double, c_double]
366    _fields_ = genFields(_names, _types)
367
368    def __init__(self):
369        self.__createfrom__ = 'python'
370
371    def __del__(self):
372        # free memory created by C to avoid memory leak
373        if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
374            liblinear.free_and_destroy_model(pointer(self))
375
376    def get_nr_feature(self):
377        return liblinear.get_nr_feature(self)
378
379    def get_nr_class(self):
380        return liblinear.get_nr_class(self)
381
382    def get_labels(self):
383        nr_class = self.get_nr_class()
384        labels = (c_int * nr_class)()
385        liblinear.get_labels(self, labels)
386        return labels[:nr_class]
387
388    def get_decfun_coef(self, feat_idx, label_idx=0):
389        return liblinear.get_decfun_coef(self, feat_idx, label_idx)
390
391    def get_decfun_bias(self, label_idx=0):
392        return liblinear.get_decfun_bias(self, label_idx)
393
394    def get_decfun_rho(self):
395        return liblinear.get_decfun_rho(self)
396
397    def get_decfun(self, label_idx=0):
398        w = [liblinear.get_decfun_coef(self, feat_idx, label_idx) for feat_idx in range(1, self.nr_feature+1)]
399        if self.is_oneclass_model():
400            rho = self.get_decfun_rho()
401            return (w, -rho)
402        else:
403            b = liblinear.get_decfun_bias(self, label_idx)
404            return (w, b)
405
406    def is_probability_model(self):
407        return (liblinear.check_probability_model(self) == 1)
408
409    def is_regression_model(self):
410        return (liblinear.check_regression_model(self) == 1)
411
412    def is_oneclass_model(self):
413        return (liblinear.check_oneclass_model(self) == 1)
414
415def toPyModel(model_ptr):
416    """
417    toPyModel(model_ptr) -> model
418
419    Convert a ctypes POINTER(model) to a Python model
420    """
421    if bool(model_ptr) == False:
422        raise ValueError("Null pointer")
423    m = model_ptr.contents
424    m.__createfrom__ = 'C'
425    return m
426
427fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
428fillprototype(liblinear.find_parameters, None, [POINTER(problem), POINTER(parameter), c_int, c_double, c_double, POINTER(c_double), POINTER(c_double), POINTER(c_double)])
429fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
430
431fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
432fillprototype(liblinear.predict, c_double, [POINTER(model), POINTER(feature_node)])
433fillprototype(liblinear.predict_probability, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
434
435fillprototype(liblinear.save_model, c_int, [c_char_p, POINTER(model)])
436fillprototype(liblinear.load_model, POINTER(model), [c_char_p])
437
438fillprototype(liblinear.get_nr_feature, c_int, [POINTER(model)])
439fillprototype(liblinear.get_nr_class, c_int, [POINTER(model)])
440fillprototype(liblinear.get_labels, None, [POINTER(model), POINTER(c_int)])
441fillprototype(liblinear.get_decfun_coef, c_double, [POINTER(model), c_int, c_int])
442fillprototype(liblinear.get_decfun_bias, c_double, [POINTER(model), c_int])
443fillprototype(liblinear.get_decfun_rho, c_double, [POINTER(model)])
444
445fillprototype(liblinear.free_model_content, None, [POINTER(model)])
446fillprototype(liblinear.free_and_destroy_model, None, [POINTER(POINTER(model))])
447fillprototype(liblinear.destroy_param, None, [POINTER(parameter)])
448fillprototype(liblinear.check_parameter, c_char_p, [POINTER(problem), POINTER(parameter)])
449fillprototype(liblinear.check_probability_model, c_int, [POINTER(model)])
450fillprototype(liblinear.check_regression_model, c_int, [POINTER(model)])
451fillprototype(liblinear.check_oneclass_model, c_int, [POINTER(model)])
452fillprototype(liblinear.set_print_string_function, None, [CFUNCTYPE(None, c_char_p)])
453