1#=============================================================================== 2# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt). 3# Copyright (c) 2014, James Hensman, Max Zwiessele 4# Copyright (c) 2015, Max Zwiessele 5# 6# All rights reserved. 7# 8# Redistribution and use in source and binary forms, with or without 9# modification, are permitted provided that the following conditions are met: 10# 11# * Redistributions of source code must retain the above copyright notice, this 12# list of conditions and the following disclaimer. 13# 14# * Redistributions in binary form must reproduce the above copyright notice, 15# this list of conditions and the following disclaimer in the documentation 16# and/or other materials provided with the distribution. 17# 18# * Neither the name of paramax nor the names of its 19# contributors may be used to endorse or promote products derived from 20# this software without specific prior written permission. 21# 22# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 23# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 26# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32#=============================================================================== 33 34 35import numpy as np 36from numpy.linalg.linalg import LinAlgError 37 38from . import optimization 39from .parameterized import Parameterized 40from .optimization.verbose_optimization import VerboseOptimization 41import multiprocessing as mp 42#from functools import reduce 43 44def opt_wrapper(args): 45 m = args[0] 46 kwargs = args[1] 47 return m.optimize(**kwargs) 48 49 50class Model(Parameterized): 51 _fail_count = 0 # Count of failed optimization steps (see objective) 52 _allowed_failures = 10 # number of allowed failures 53 54 def __init__(self, name): 55 super(Model, self).__init__(name) # Parameterized.__init__(self) 56 self.optimization_runs = [] 57 self.sampling_runs = [] 58 self.preferred_optimizer = 'lbfgsb' 59 #from paramz import Tie 60 #self.tie = Tie() 61 #self.link_parameter(self.tie, -1) 62 self.obj_grads = None 63 #self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500) 64 65 def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs): 66 """ 67 Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. 68 69 kwargs are passed to the optimizer. They can be: 70 71 :param max_iters: maximum number of function evaluations 72 :type max_iters: int 73 :messages: True: Display messages during optimisation, "ipython_notebook": 74 :type messages: bool"string 75 :param optimizer: which optimizer to use (defaults to self.preferred optimizer) 76 :type optimizer: string 77 78 Valid optimizers are: 79 - 'scg': scaled conjugate gradient method, recommended for stability. 80 See also GPy.inference.optimization.scg 81 - 'fmin_tnc': truncated Newton method (see scipy.optimize.fmin_tnc) 82 - 'simplex': the Nelder-Mead simplex method (see scipy.optimize.fmin), 83 - 'lbfgsb': the l-bfgs-b method (see scipy.optimize.fmin_l_bfgs_b), 84 - 'lbfgs': the bfgs method (see scipy.optimize.fmin_bfgs), 85 - 'sgd': stochastic gradient decsent (see scipy.optimize.sgd). For experts only! 86 87 88 """ 89 if self.is_fixed or self.size == 0: 90 print('nothing to optimize') 91 return 92 93 if not self.update_model(): 94 print("updates were off, setting updates on again") 95 self.update_model(True) 96 97 if start is None: 98 start = self.optimizer_array 99 100 if optimizer is None: 101 optimizer = self.preferred_optimizer 102 103 if isinstance(optimizer, optimization.Optimizer): 104 opt = optimizer 105 opt.model = self 106 else: 107 optimizer = optimization.get_optimizer(optimizer) 108 opt = optimizer(max_iters=max_iters, **kwargs) 109 110 with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook, clear_after_finish=clear_after_finish) as vo: 111 opt.run(start, f_fp=self._objective_grads, f=self._objective, fp=self._grads) 112 113 self.optimizer_array = opt.x_opt 114 115 self.optimization_runs.append(opt) 116 return opt 117 118 def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): 119 """ 120 Perform random restarts of the model, and set the model to the best 121 seen solution. 122 123 If the robust flag is set, exceptions raised during optimizations will 124 be handled silently. If _all_ runs fail, the model is reset to the 125 existing parameter values. 126 127 \*\*kwargs are passed to the optimizer. 128 129 :param num_restarts: number of restarts to use (default 10) 130 :type num_restarts: int 131 :param robust: whether to handle exceptions silently or not (default False) 132 :type robust: bool 133 :param parallel: whether to run each restart as a separate process. It relies on the multiprocessing module. 134 :type parallel: bool 135 :param num_processes: number of workers in the multiprocessing pool 136 :type numprocesses: int 137 :param max_f_eval: maximum number of function evaluations 138 :type max_f_eval: int 139 :param max_iters: maximum number of iterations 140 :type max_iters: int 141 :param messages: whether to display during optimisation 142 :type messages: bool 143 144 .. note:: 145 146 If num_processes is None, the number of workes in the 147 multiprocessing pool is automatically set to the number of processors 148 on the current machine. 149 150 """ 151 initial_length = len(self.optimization_runs) 152 initial_parameters = self.optimizer_array.copy() 153 154 if parallel: #pragma: no cover 155 try: 156 pool = mp.Pool(processes=num_processes) 157 obs = [self.copy() for i in range(num_restarts)] 158 [obs[i].randomize() for i in range(num_restarts-1)] 159 jobs = pool.map(opt_wrapper, [(o,kwargs) for o in obs]) 160 pool.close() 161 pool.join() 162 except KeyboardInterrupt: 163 print("Ctrl+c received, terminating and joining pool.") 164 pool.terminate() 165 pool.join() 166 167 for i in range(num_restarts): 168 try: 169 if not parallel: 170 if i > 0: 171 self.randomize() 172 self.optimize(**kwargs) 173 else:#pragma: no cover 174 self.optimization_runs.append(jobs[i]) 175 176 if verbose: 177 print(("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt))) 178 except Exception as e: 179 if robust: 180 print(("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts))) 181 else: 182 raise e 183 184 if len(self.optimization_runs) > initial_length: 185 # This works, since failed jobs don't get added to the optimization_runs. 186 i = np.argmin([o.f_opt for o in self.optimization_runs[initial_length:]]) 187 self.optimizer_array = self.optimization_runs[initial_length + i].x_opt 188 else: 189 self.optimizer_array = initial_parameters 190 return self.optimization_runs 191 192 def objective_function(self): 193 """ 194 The objective function for the given algorithm. 195 196 This function is the true objective, which wants to be minimized. 197 Note that all parameters are already set and in place, so you just need 198 to return the objective function here. 199 200 For probabilistic models this is the negative log_likelihood 201 (including the MAP prior), so we return it here. If your model is not 202 probabilistic, just return your objective to minimize here! 203 """ 204 raise NotImplementedError("Implement the result of the objective function here") 205 206 def objective_function_gradients(self): 207 """ 208 The gradients for the objective function for the given algorithm. 209 The gradients are w.r.t. the *negative* objective function, as 210 this framework works with *negative* log-likelihoods as a default. 211 212 You can find the gradient for the parameters in self.gradient at all times. 213 This is the place, where gradients get stored for parameters. 214 215 This function is the true objective, which wants to be minimized. 216 Note that all parameters are already set and in place, so you just need 217 to return the gradient here. 218 219 For probabilistic models this is the gradient of the negative log_likelihood 220 (including the MAP prior), so we return it here. If your model is not 221 probabilistic, just return your *negative* gradient here! 222 """ 223 return self.gradient 224 225 def _grads(self, x): 226 """ 227 Gets the gradients from the likelihood and the priors. 228 229 Failures are handled robustly. The algorithm will try several times to 230 return the gradients, and will raise the original exception if 231 the objective cannot be computed. 232 233 :param x: the parameters of the model. 234 :type x: np.array 235 """ 236 try: 237 # self._set_params_transformed(x) 238 self.optimizer_array = x 239 self.obj_grads = self._transform_gradients(self.objective_function_gradients()) 240 self._fail_count = 0 241 except (LinAlgError, ZeroDivisionError, ValueError): #pragma: no cover 242 if self._fail_count >= self._allowed_failures: 243 raise 244 self._fail_count += 1 245 self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) 246 return self.obj_grads 247 248 def _objective(self, x): 249 """ 250 The objective function passed to the optimizer. It combines 251 the likelihood and the priors. 252 253 Failures are handled robustly. The algorithm will try several times to 254 return the objective, and will raise the original exception if 255 the objective cannot be computed. 256 257 :param x: the parameters of the model. 258 :parameter type: np.array 259 """ 260 try: 261 self.optimizer_array = x 262 obj = self.objective_function() 263 self._fail_count = 0 264 except (LinAlgError, ZeroDivisionError, ValueError):#pragma: no cover 265 if self._fail_count >= self._allowed_failures: 266 raise 267 self._fail_count += 1 268 return np.inf 269 return obj 270 271 def _objective_grads(self, x): 272 try: 273 self.optimizer_array = x 274 obj_f, self.obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients()) 275 self._fail_count = 0 276 except (LinAlgError, ZeroDivisionError, ValueError):#pragma: no cover 277 if self._fail_count >= self._allowed_failures: 278 raise 279 self._fail_count += 1 280 obj_f = np.inf 281 self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10) 282 return obj_f, self.obj_grads 283 284 def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, df_tolerance=1e-12): 285 """ 286 Check the gradient of the ,odel by comparing to a numerical 287 estimate. If the verbose flag is passed, individual 288 components are tested (and printed) 289 290 :param verbose: If True, print a "full" checking of each parameter 291 :type verbose: bool 292 :param step: The size of the step around which to linearise the objective 293 :type step: float (default 1e-6) 294 :param tolerance: the tolerance allowed (see note) 295 :type tolerance: float (default 1e-3) 296 297 Note:- 298 The gradient is considered correct if the ratio of the analytical 299 and numerical gradients is within <tolerance> of unity. 300 301 The *dF_ratio* indicates the limit of numerical accuracy of numerical gradients. 302 If it is too small, e.g., smaller than 1e-12, the numerical gradients are usually 303 not accurate enough for the tests (shown with blue). 304 """ 305 if not self._model_initialized_: 306 import warnings 307 warnings.warn("This model has not been initialized, try model.inititialize_model()", RuntimeWarning) 308 return False 309 310 x = self.optimizer_array.copy() 311 312 if not verbose: 313 # make sure only to test the selected parameters 314 if target_param is None: 315 transformed_index = np.arange(len(x)) 316 else: 317 transformed_index = self._raveled_index_for_transformed(target_param) 318 319 if transformed_index.size == 0: 320 print("No free parameters to check") 321 return True 322 323 # just check the global ratio 324 dx = np.zeros(x.shape) 325 dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.) 326 327 # evaulate around the point x 328 f1 = self._objective(x + dx) 329 f2 = self._objective(x - dx) 330 gradient = self._grads(x) 331 332 dx = dx[transformed_index] 333 gradient = gradient[transformed_index] 334 335 denominator = (2 * np.dot(dx, gradient)) 336 global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator) 337 global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance) 338 if global_ratio is np.nan: # pragma: no cover 339 global_ratio = 0 340 return np.abs(1. - global_ratio) < tolerance or global_diff 341 else: 342 # check the gradient of each parameter individually, and do some pretty printing 343 try: 344 names = self.parameter_names_flat() 345 except NotImplementedError: 346 names = ['Variable %i' % i for i in range(len(x))] 347 # Prepare for pretty-printing 348 header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical', 'dF_ratio'] 349 max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])]) 350 float_len = 10 351 cols = [max_names] 352 cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))]) 353 cols = np.array(cols) + 5 354 header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))] 355 header_string = list(map(lambda x: '|'.join(x), [header_string])) 356 separator = '-' * len(header_string[0]) 357 print('\n'.join([header_string[0], separator])) 358 359 if target_param is None: 360 target_param = self 361 transformed_index = self._raveled_index_for_transformed(target_param) 362 363 if transformed_index.size == 0: 364 print("No free parameters to check") 365 return True 366 367 gradient = self._grads(x).copy() 368 np.where(gradient == 0, 1e-312, gradient) 369 ret = True 370 for xind in zip(transformed_index): 371 xx = x.copy() 372 xx[xind] += step 373 f1 = float(self._objective(xx)) 374 xx[xind] -= 2.*step 375 f2 = float(self._objective(xx)) 376 #Avoid divide by zero, if any of the values are above 1e-15, otherwise both values are essentiall 377 #the same 378 if f1 > 1e-15 or f1 < -1e-15 or f2 > 1e-15 or f2 < -1e-15: 379 df_ratio = np.abs((f1 - f2) / min(f1, f2)) 380 else: # pragma: no cover 381 df_ratio = 1.0 382 df_unstable = df_ratio < df_tolerance 383 numerical_gradient = (f1 - f2) / (2. * step) 384 if np.all(gradient[xind] == 0): # pragma: no cover 385 ratio = (f1 - f2) == gradient[xind] 386 else: 387 ratio = (f1 - f2) / (2. * step * gradient[xind]) 388 difference = np.abs(numerical_gradient - gradient[xind]) 389 390 if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: 391 formatted_name = "\033[92m {0} \033[0m".format(names[xind]) 392 ret &= True 393 else: # pragma: no cover 394 formatted_name = "\033[91m {0} \033[0m".format(names[xind]) 395 ret &= False 396 if df_unstable: # pragma: no cover 397 formatted_name = "\033[94m {0} \033[0m".format(names[xind]) 398 399 r = '%.6f' % float(ratio) 400 d = '%.6f' % float(difference) 401 g = '%.6f' % gradient[xind] 402 ng = '%.6f' % float(numerical_gradient) 403 df = '%1.e' % float(df_ratio) 404 grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}|{5:^{c5}}".format(formatted_name, r, d, g, ng, df, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4], c5=cols[5]) 405 print(grad_string) 406 407 self.optimizer_array = x 408 return ret 409 410 def _repr_html_(self): 411 """Representation of the model in html for notebook display.""" 412 model_details = [['<b>Model</b>', self.name + '<br>'], 413 ['<b>Objective</b>', '{}<br>'.format(float(self.objective_function()))], 414 ["<b>Number of Parameters</b>", '{}<br>'.format(self.size)], 415 ["<b>Number of Optimization Parameters</b>", '{}<br>'.format(self._size_transformed())], 416 ["<b>Updates</b>", '{}<br>'.format(self._update_on)], 417 ] 418 from operator import itemgetter 419 to_print = ["""<style type="text/css"> 420.pd{ 421 font-family: "Courier New", Courier, monospace !important; 422 width: 100%; 423 padding: 3px; 424} 425</style>\n"""] + ["<p class=pd>"] + ["{}: {}".format(name, detail) for name, detail in model_details] + ["</p>"] 426 to_print.append(super(Model, self)._repr_html_()) 427 return "\n".join(to_print) 428 429 def __str__(self, VT100=True): 430 model_details = [['Name', self.name], 431 ['Objective', '{}'.format(float(self.objective_function()))], 432 ["Number of Parameters", '{}'.format(self.size)], 433 ["Number of Optimization Parameters", '{}'.format(self._size_transformed())], 434 ["Updates", '{}'.format(self._update_on)], 435 ] 436 max_len = max(map(len, model_details)) 437 to_print = [""] + ["{0:{l}} : {1}".format(name, detail, l=max_len) for name, detail in model_details] + ["Parameters:"] 438 to_print.append(super(Model, self).__str__(VT100=VT100)) 439 return "\n".join(to_print) 440 441