1#===============================================================================
2# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
3# Copyright (c) 2014, James Hensman, Max Zwiessele
4# Copyright (c) 2015, Max Zwiessele
5#
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are met:
10#
11# * Redistributions of source code must retain the above copyright notice, this
12#   list of conditions and the following disclaimer.
13#
14# * Redistributions in binary form must reproduce the above copyright notice,
15#   this list of conditions and the following disclaimer in the documentation
16#   and/or other materials provided with the distribution.
17#
18# * Neither the name of paramax nor the names of its
19#   contributors may be used to endorse or promote products derived from
20#   this software without specific prior written permission.
21#
22# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
26# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32#===============================================================================
33
34
35import numpy as np
36from numpy.linalg.linalg import LinAlgError
37
38from . import optimization
39from .parameterized import Parameterized
40from .optimization.verbose_optimization import VerboseOptimization
41import multiprocessing as mp
42#from functools import reduce
43
44def opt_wrapper(args):
45    m = args[0]
46    kwargs = args[1]
47    return m.optimize(**kwargs)
48
49
50class Model(Parameterized):
51    _fail_count = 0  # Count of failed optimization steps (see objective)
52    _allowed_failures = 10  # number of allowed failures
53
54    def __init__(self, name):
55        super(Model, self).__init__(name)  # Parameterized.__init__(self)
56        self.optimization_runs = []
57        self.sampling_runs = []
58        self.preferred_optimizer = 'lbfgsb'
59        #from paramz import Tie
60        #self.tie = Tie()
61        #self.link_parameter(self.tie, -1)
62        self.obj_grads = None
63        #self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
64
65    def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
66        """
67        Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
68
69        kwargs are passed to the optimizer. They can be:
70
71        :param max_iters: maximum number of function evaluations
72        :type max_iters: int
73        :messages: True: Display messages during optimisation, "ipython_notebook":
74        :type messages: bool"string
75        :param optimizer: which optimizer to use (defaults to self.preferred optimizer)
76        :type optimizer: string
77
78        Valid optimizers are:
79          - 'scg': scaled conjugate gradient method, recommended for stability.
80                   See also GPy.inference.optimization.scg
81          - 'fmin_tnc': truncated Newton method (see scipy.optimize.fmin_tnc)
82          - 'simplex': the Nelder-Mead simplex method (see scipy.optimize.fmin),
83          - 'lbfgsb': the l-bfgs-b method (see scipy.optimize.fmin_l_bfgs_b),
84          - 'lbfgs': the bfgs method (see scipy.optimize.fmin_bfgs),
85          - 'sgd': stochastic gradient decsent (see scipy.optimize.sgd). For experts only!
86
87
88        """
89        if self.is_fixed or self.size == 0:
90            print('nothing to optimize')
91            return
92
93        if not self.update_model():
94            print("updates were off, setting updates on again")
95            self.update_model(True)
96
97        if start is None:
98            start = self.optimizer_array
99
100        if optimizer is None:
101            optimizer = self.preferred_optimizer
102
103        if isinstance(optimizer, optimization.Optimizer):
104            opt = optimizer
105            opt.model = self
106        else:
107            optimizer = optimization.get_optimizer(optimizer)
108            opt = optimizer(max_iters=max_iters, **kwargs)
109
110        with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook, clear_after_finish=clear_after_finish) as vo:
111            opt.run(start, f_fp=self._objective_grads, f=self._objective, fp=self._grads)
112
113        self.optimizer_array = opt.x_opt
114
115        self.optimization_runs.append(opt)
116        return opt
117
118    def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
119        """
120        Perform random restarts of the model, and set the model to the best
121        seen solution.
122
123        If the robust flag is set, exceptions raised during optimizations will
124        be handled silently.  If _all_ runs fail, the model is reset to the
125        existing parameter values.
126
127        \*\*kwargs are passed to the optimizer.
128
129        :param num_restarts: number of restarts to use (default 10)
130        :type num_restarts: int
131        :param robust: whether to handle exceptions silently or not (default False)
132        :type robust: bool
133        :param parallel: whether to run each restart as a separate process. It relies on the multiprocessing module.
134        :type parallel: bool
135        :param num_processes: number of workers in the multiprocessing pool
136        :type numprocesses: int
137        :param max_f_eval: maximum number of function evaluations
138        :type max_f_eval: int
139        :param max_iters: maximum number of iterations
140        :type max_iters: int
141        :param messages: whether to display during optimisation
142        :type messages: bool
143
144        .. note::
145
146            If num_processes is None, the number of workes in the
147            multiprocessing pool is automatically set to the number of processors
148            on the current machine.
149
150        """
151        initial_length = len(self.optimization_runs)
152        initial_parameters = self.optimizer_array.copy()
153
154        if parallel: #pragma: no cover
155            try:
156                pool = mp.Pool(processes=num_processes)
157                obs = [self.copy() for i in range(num_restarts)]
158                [obs[i].randomize() for i in range(num_restarts-1)]
159                jobs = pool.map(opt_wrapper, [(o,kwargs) for o in obs])
160                pool.close()
161                pool.join()
162            except KeyboardInterrupt:
163                print("Ctrl+c received, terminating and joining pool.")
164                pool.terminate()
165                pool.join()
166
167        for i in range(num_restarts):
168            try:
169                if not parallel:
170                    if i > 0:
171                        self.randomize()
172                    self.optimize(**kwargs)
173                else:#pragma: no cover
174                    self.optimization_runs.append(jobs[i])
175
176                if verbose:
177                    print(("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt)))
178            except Exception as e:
179                if robust:
180                    print(("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts)))
181                else:
182                    raise e
183
184        if len(self.optimization_runs) > initial_length:
185            # This works, since failed jobs don't get added to the optimization_runs.
186            i = np.argmin([o.f_opt for o in self.optimization_runs[initial_length:]])
187            self.optimizer_array = self.optimization_runs[initial_length + i].x_opt
188        else:
189            self.optimizer_array = initial_parameters
190        return self.optimization_runs
191
192    def objective_function(self):
193        """
194        The objective function for the given algorithm.
195
196        This function is the true objective, which wants to be minimized.
197        Note that all parameters are already set and in place, so you just need
198        to return the objective function here.
199
200        For probabilistic models this is the negative log_likelihood
201        (including the MAP prior), so we return it here. If your model is not
202        probabilistic, just return your objective to minimize here!
203        """
204        raise NotImplementedError("Implement the result of the objective function here")
205
206    def objective_function_gradients(self):
207        """
208        The gradients for the objective function for the given algorithm.
209        The gradients are w.r.t. the *negative* objective function, as
210        this framework works with *negative* log-likelihoods as a default.
211
212        You can find the gradient for the parameters in self.gradient at all times.
213        This is the place, where gradients get stored for parameters.
214
215        This function is the true objective, which wants to be minimized.
216        Note that all parameters are already set and in place, so you just need
217        to return the gradient here.
218
219        For probabilistic models this is the gradient of the negative log_likelihood
220        (including the MAP prior), so we return it here. If your model is not
221        probabilistic, just return your *negative* gradient here!
222        """
223        return self.gradient
224
225    def _grads(self, x):
226        """
227        Gets the gradients from the likelihood and the priors.
228
229        Failures are handled robustly. The algorithm will try several times to
230        return the gradients, and will raise the original exception if
231        the objective cannot be computed.
232
233        :param x: the parameters of the model.
234        :type x: np.array
235        """
236        try:
237            # self._set_params_transformed(x)
238            self.optimizer_array = x
239            self.obj_grads = self._transform_gradients(self.objective_function_gradients())
240            self._fail_count = 0
241        except (LinAlgError, ZeroDivisionError, ValueError): #pragma: no cover
242            if self._fail_count >= self._allowed_failures:
243                raise
244            self._fail_count += 1
245            self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
246        return self.obj_grads
247
248    def _objective(self, x):
249        """
250        The objective function passed to the optimizer. It combines
251        the likelihood and the priors.
252
253        Failures are handled robustly. The algorithm will try several times to
254        return the objective, and will raise the original exception if
255        the objective cannot be computed.
256
257        :param x: the parameters of the model.
258        :parameter type: np.array
259        """
260        try:
261            self.optimizer_array = x
262            obj = self.objective_function()
263            self._fail_count = 0
264        except (LinAlgError, ZeroDivisionError, ValueError):#pragma: no cover
265            if self._fail_count >= self._allowed_failures:
266                raise
267            self._fail_count += 1
268            return np.inf
269        return obj
270
271    def _objective_grads(self, x):
272        try:
273            self.optimizer_array = x
274            obj_f, self.obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
275            self._fail_count = 0
276        except (LinAlgError, ZeroDivisionError, ValueError):#pragma: no cover
277            if self._fail_count >= self._allowed_failures:
278                raise
279            self._fail_count += 1
280            obj_f = np.inf
281            self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10)
282        return obj_f, self.obj_grads
283
284    def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, df_tolerance=1e-12):
285        """
286        Check the gradient of the ,odel by comparing to a numerical
287        estimate.  If the verbose flag is passed, individual
288        components are tested (and printed)
289
290        :param verbose: If True, print a "full" checking of each parameter
291        :type verbose: bool
292        :param step: The size of the step around which to linearise the objective
293        :type step: float (default 1e-6)
294        :param tolerance: the tolerance allowed (see note)
295        :type tolerance: float (default 1e-3)
296
297        Note:-
298           The gradient is considered correct if the ratio of the analytical
299           and numerical gradients is within <tolerance> of unity.
300
301           The *dF_ratio* indicates the limit of numerical accuracy of numerical gradients.
302           If it is too small, e.g., smaller than 1e-12, the numerical gradients are usually
303           not accurate enough for the tests (shown with blue).
304        """
305        if not self._model_initialized_:
306            import warnings
307            warnings.warn("This model has not been initialized, try model.inititialize_model()", RuntimeWarning)
308            return False
309
310        x = self.optimizer_array.copy()
311
312        if not verbose:
313            # make sure only to test the selected parameters
314            if target_param is None:
315                transformed_index = np.arange(len(x))
316            else:
317                transformed_index = self._raveled_index_for_transformed(target_param)
318
319                if transformed_index.size == 0:
320                    print("No free parameters to check")
321                    return True
322
323            # just check the global ratio
324            dx = np.zeros(x.shape)
325            dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.)
326
327            # evaulate around the point x
328            f1 = self._objective(x + dx)
329            f2 = self._objective(x - dx)
330            gradient = self._grads(x)
331
332            dx = dx[transformed_index]
333            gradient = gradient[transformed_index]
334
335            denominator = (2 * np.dot(dx, gradient))
336            global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator)
337            global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
338            if global_ratio is np.nan: # pragma: no cover
339                global_ratio = 0
340            return np.abs(1. - global_ratio) < tolerance or global_diff
341        else:
342            # check the gradient of each parameter individually, and do some pretty printing
343            try:
344                names = self.parameter_names_flat()
345            except NotImplementedError:
346                names = ['Variable %i' % i for i in range(len(x))]
347            # Prepare for pretty-printing
348            header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical', 'dF_ratio']
349            max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
350            float_len = 10
351            cols = [max_names]
352            cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
353            cols = np.array(cols) + 5
354            header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
355            header_string = list(map(lambda x: '|'.join(x), [header_string]))
356            separator = '-' * len(header_string[0])
357            print('\n'.join([header_string[0], separator]))
358
359            if target_param is None:
360                target_param = self
361            transformed_index = self._raveled_index_for_transformed(target_param)
362
363            if transformed_index.size == 0:
364                print("No free parameters to check")
365                return True
366
367            gradient = self._grads(x).copy()
368            np.where(gradient == 0, 1e-312, gradient)
369            ret = True
370            for xind in zip(transformed_index):
371                xx = x.copy()
372                xx[xind] += step
373                f1 = float(self._objective(xx))
374                xx[xind] -= 2.*step
375                f2 = float(self._objective(xx))
376                #Avoid divide by zero, if any of the values are above 1e-15, otherwise both values are essentiall
377                #the same
378                if f1 > 1e-15 or f1 < -1e-15 or f2 > 1e-15 or f2 < -1e-15:
379                    df_ratio = np.abs((f1 - f2) / min(f1, f2))
380                else: # pragma: no cover
381                    df_ratio = 1.0
382                df_unstable = df_ratio < df_tolerance
383                numerical_gradient = (f1 - f2) / (2. * step)
384                if np.all(gradient[xind] == 0): # pragma: no cover
385                    ratio = (f1 - f2) == gradient[xind]
386                else:
387                    ratio = (f1 - f2) / (2. * step * gradient[xind])
388                difference = np.abs(numerical_gradient - gradient[xind])
389
390                if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
391                    formatted_name = "\033[92m {0} \033[0m".format(names[xind])
392                    ret &= True
393                else:  # pragma: no cover
394                    formatted_name = "\033[91m {0} \033[0m".format(names[xind])
395                    ret &= False
396                if df_unstable:  # pragma: no cover
397                    formatted_name = "\033[94m {0} \033[0m".format(names[xind])
398
399                r = '%.6f' % float(ratio)
400                d = '%.6f' % float(difference)
401                g = '%.6f' % gradient[xind]
402                ng = '%.6f' % float(numerical_gradient)
403                df = '%1.e' % float(df_ratio)
404                grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}|{5:^{c5}}".format(formatted_name, r, d, g, ng, df, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4], c5=cols[5])
405                print(grad_string)
406
407            self.optimizer_array = x
408            return ret
409
410    def _repr_html_(self):
411        """Representation of the model in html for notebook display."""
412        model_details = [['<b>Model</b>', self.name + '<br>'],
413                         ['<b>Objective</b>', '{}<br>'.format(float(self.objective_function()))],
414                         ["<b>Number of Parameters</b>", '{}<br>'.format(self.size)],
415                         ["<b>Number of Optimization Parameters</b>", '{}<br>'.format(self._size_transformed())],
416                         ["<b>Updates</b>", '{}<br>'.format(self._update_on)],
417                         ]
418        from operator import itemgetter
419        to_print = ["""<style type="text/css">
420.pd{
421    font-family: "Courier New", Courier, monospace !important;
422    width: 100%;
423    padding: 3px;
424}
425</style>\n"""] + ["<p class=pd>"] + ["{}: {}".format(name, detail) for name, detail in model_details] + ["</p>"]
426        to_print.append(super(Model, self)._repr_html_())
427        return "\n".join(to_print)
428
429    def __str__(self, VT100=True):
430        model_details = [['Name', self.name],
431                         ['Objective', '{}'.format(float(self.objective_function()))],
432                         ["Number of Parameters", '{}'.format(self.size)],
433                         ["Number of Optimization Parameters", '{}'.format(self._size_transformed())],
434                         ["Updates", '{}'.format(self._update_on)],
435                         ]
436        max_len = max(map(len, model_details))
437        to_print = [""] + ["{0:{l}} : {1}".format(name, detail, l=max_len) for name, detail in model_details] + ["Parameters:"]
438        to_print.append(super(Model, self).__str__(VT100=VT100))
439        return "\n".join(to_print)
440
441