1import numpy as np
2from scipy.optimize import fmin_l_bfgs_b
3
4from Orange.classification import Model
5
6
7class GradientDescent:
8    """
9    Logistic regression algorithm with custom cost and gradient function,
10    which allow to perform algorithm step by step
11
12    Parameters
13    ----------
14    alpha : float
15        Learning rate
16    theta : array_like(float, ndim=1)
17        Logistic function parameters
18    data : Orange.data.Table
19        Data
20    """
21
22    x = None
23    y = None
24    theta = None
25    alpha = None
26    domain = None
27    step_no = 0
28    stochastic_i = 0
29    stochastic_step_size = 30  # number of steps in one step
30    regularization_rate = 0.001
31    # very small regularization rate to avoid big parameters
32
33    def __init__(self, alpha=0.1, theta=None, data=None, stochastic=False,
34                 step_size=30, intercept=False):
35        self.history = []
36        self.intercept=intercept
37        self.set_alpha(alpha)
38        self.set_data(data)
39        self.set_theta(theta)
40        self.stochastic = stochastic
41        self.stochastic_step_size = step_size
42
43    def set_data(self, data):
44        """
45        Function set the data
46        """
47        if data is not None:
48            x = data.X
49            self.x = np.c_[np.ones(len(x)), x] if self.intercept else x
50            self.y = data.Y
51            self.domain = data.domain
52        else:
53            self.x = None
54            self.y = None
55            self.domain = None
56
57    def set_theta(self, theta):
58        """
59        Function sets theta. Can be called from constructor or outside.
60        """
61        if isinstance(theta, (np.ndarray, np.generic)):
62            self.theta = theta
63        elif isinstance(theta, list):
64            self.theta = np.array(theta)
65        else:
66            self.theta = None
67        self.history = self.set_list(
68            self.history, 0, (np.copy(self.theta), 0, None))
69        self.step_no = 0
70
71    def set_alpha(self, alpha):
72        """
73        Function sets alpha and can be called from constructor or from outside.
74        """
75        self.alpha = alpha
76
77    @property
78    def model(self):
79        """
80        Function returns model based on current parameters.
81        """
82        raise NotImplementedError("Subclasses should implement this!")
83
84    @property
85    def converged(self):
86        """
87        Function returns True if gradient descent already converged.
88        """
89        if self.step_no == 0:
90            return False
91        return (np.sum(np.abs(self.theta - self.history[self.step_no - 1][0])) /
92                len(self.theta) < (1e-2 if not self.stochastic else 1e-5))
93
94    def step(self):
95        """
96        Function performs one step of the gradient descent
97        """
98        self.step_no += 1
99
100        seed = None  # seed that will be stored to revert the shuffle
101        # if we came around all data set index to zero and permute data
102        if self.stochastic_i + self.stochastic_step_size >= len(self.x):
103            self.stochastic_i = 0
104
105            # shuffle data
106            seed = np.random.randint(100)  # random seed
107            np.random.seed(seed)  # set seed of permutation used to shuffle
108            indices = np.random.permutation(len(self.x))
109            self.x = self.x[indices]  # permutation
110            self.y = self.y[indices]
111
112        # calculates gradient and modify theta
113        grad = self.dj(self.theta, self.stochastic)
114        if self.stochastic:
115            self.theta -= np.sum(np.float64(self.alpha) * grad, axis=0)
116        else:
117            self.theta -= self.alpha * grad
118
119        # increase index used by stochastic gradient descent
120        self.stochastic_i += self.stochastic_step_size
121
122        # save history for step back
123        self.history = self.set_list(
124            self.history, self.step_no,
125            (np.copy(self.theta), self.stochastic_i, seed))
126
127    def step_back(self):
128        if self.step_no > 0:
129            self.step_no -= 1
130
131            # modify theta
132            self.theta = np.copy(self.history[self.step_no][0])
133
134            # modify index for stochastic gradient descent
135            self.stochastic_i = self.history[self.step_no][1]
136
137            # if necessary restore data shuffle
138            seed = self.history[self.step_no + 1][2]
139            if seed is not None:  # it means data had been permuted on this pos
140                np.random.seed(seed)  # use same seed to revert
141                indices = np.random.permutation(len(self.x))
142                indices_reverse = np.argsort(indices)
143                # indices of sorted indices gives us reversing shuffle list
144                self.x = self.x[indices_reverse]
145                self.y = self.y[indices_reverse]
146
147    def j(self, theta):
148        """
149        Cost function for logistic regression
150        """
151        raise NotImplementedError("Subclasses should implement this!")
152
153    def dj(self, theta, stochastic=False):
154        """
155        Gradient of the cost function for logistic regression
156        """
157        raise NotImplementedError("Subclasses should implement this!")
158
159    def optimized(self):
160        """
161        Function performs whole model training. Not step by step.
162        """
163
164        res = fmin_l_bfgs_b(self.j,
165                            np.zeros(self.x.shape[1]),
166                            self.dj)
167        return res[0]
168
169    @staticmethod
170    def set_list(l, i, v):
171        """
172        Function sets i-th value in list to v. If i does not exist in l
173        it is initialized else value is modified
174        """
175        try:
176            l[i] = v
177        except IndexError:
178            for _ in range(i-len(l)):
179                l.append(None)
180            l.append(v)
181        return l
182