1 //
2 // Created by friedrich on 16.08.16.
3 //
4 
5 #include <SubsetOfRegressors.hpp>
6 #include <assert.h>
7 #include <random>
8 #include <algorithm>
9 #include <iostream>
10 #include <cmath>
11 
12 //--------------------------------------------------------------------------------
SubsetOfRegressors(int n,double & delta_input)13 SubsetOfRegressors::SubsetOfRegressors(int n, double &delta_input) :
14 		FullyIndependentTrainingConditional(n, delta_input) {
15 }
16 //--------------------------------------------------------------------------------
SubsetOfRegressors(int n,double & delta_input,std::vector<double> gp_parameters_input)17 SubsetOfRegressors::SubsetOfRegressors(int n, double &delta_input, std::vector<double> gp_parameters_input) :
18 		FullyIndependentTrainingConditional(n, delta_input, gp_parameters_input) {
19 }
20 //--------------------------------------------------------------------------------
build(std::vector<std::vector<double>> const & nodes,std::vector<double> const & values,std::vector<double> const & noise)21 void SubsetOfRegressors::build(std::vector<std::vector<double> > const &nodes,
22 		std::vector<double> const &values, std::vector<double> const &noise) {
23 
24 	int nb_u_nodes = u.rows();
25 
26 	if (nb_u_nodes > 0) {
27 		std::cout << "SoR build with [" << nodes.size() << "," << nb_u_nodes
28 			<< "]" << std::endl;
29 		std::cout << "With Parameters: " << std::endl;
30 	    for ( int i = 0; i < dim+1; ++i )
31 	      std::cout << "gp_param = " << gp_parameters[i] << std::endl;
32 	    std::cout << std::endl;
33 		//nb_u_nodes++;
34 
35 		nb_gp_nodes = nodes.size();
36 		gp_nodes.clear();
37 		gp_noise.clear();
38 		gp_nodes_eigen.resize(nb_gp_nodes, dim);
39 		gp_noise_eigen.resize(nb_gp_nodes);
40 		for (int i = 0; i < nb_gp_nodes; ++i) {
41 			gp_nodes.push_back(nodes.at(i));
42 			gp_noise.push_back(noise.at(i));
43 			for(int j = 0; j < dim; ++j){
44 				gp_nodes_eigen(i,j) = nodes[i][j];
45 			}
46 			gp_noise_eigen(i) = noise[i];
47 		}
48 		/*
49 		if (resample_u){
50 			this->sample_u(nb_u_nodes);
51 			if (do_hp_estimation)
52 				this->estimate_hyper_parameters(nodes, values, noise);
53   			resample_u = false;
54 		}
55 		*/
56 
57 		//Set up matrix K_u_f and K_f_u
58 		compute_Kuf_and_Kuu();
59 		MatrixXd K_f_u = K_u_f.transpose();
60 
61 		VectorXd diag_Q_f_f;
62 		compute_Qff(K_f_u, diag_Q_f_f);
63 
64 		VectorXd diag_K_f_f;
65 		compute_Kff(diag_K_f_f);
66 
67 		VectorXd diff_Kff_Qff(nb_gp_nodes);
68 		diff_Kff_Qff.setZero();
69 
70 		compute_Lambda(diff_Kff_Qff, noise);
71 
72 		MatrixXd Lambda_K_f_u;
73 		compute_Lambda_times_Kfu(K_f_u, Lambda_K_f_u);
74 
75 		MatrixXd K_u_f_Lambda_f_u;
76 		compute_KufLambdaKfu(Lambda_K_f_u, K_u_f_Lambda_f_u);
77 
78 		L_eigen.compute(K_u_u + K_u_f_Lambda_f_u);
79 
80 		scaled_function_values.clear();
81 		scaled_function_values.resize(nb_gp_nodes);
82 		for (int i = 0; i < nb_gp_nodes; i++) {
83 			scaled_function_values.at(i) = values.at(i);
84 		}
85 
86 		VectorXd LambdaInv_f;
87 		compute_LambdaInvF(LambdaInv_f);
88 
89 		VectorXd alpha_eigen_rhs = K_u_f * LambdaInv_f;
90 
91 		//Solve Sigma_not_inv^(-1)*alpha
92 		alpha_eigen = L_eigen.solve(alpha_eigen_rhs);
93 
94 	} else {
95 		GaussianProcess::build(nodes, values, noise);
96 	}
97 	return;
98 }
99 //--------------------------------------------------------------------------------
evaluate(std::vector<double> const & x,double & mean,double & variance)100 void SubsetOfRegressors::evaluate(std::vector<double> const &x, double &mean,
101 		double &variance) {
102 	int nb_u_nodes = u.rows();
103 	if (nb_u_nodes > 0) {
104 		int nb_u_nodes = u.rows();
105 		VectorXd x_eigen;
106 		x_eigen.resize(x.size());
107 		for(int i = 0; i < x.size(); ++i){
108 			x_eigen(i) = x[i];
109 		}
110 		K0_eigen.resize(nb_u_nodes);
111 		for (int i = 0; i < nb_u_nodes; i++) {
112 			K0_eigen(i) = evaluate_kernel(x_eigen, u.row(i));
113 		}
114 
115 //		std::cout << "K0" << std::endl;
116 //		VectorOperations::print_vector(K0);
117 //		std::cout << "alpha=(K_u_u + K_u_f*sigmaI*K_f_u)*K_u_f*noise*y:" << std::endl;
118 //		VectorOperations::print_vector(alpha);
119 		mean = K0_eigen.dot(alpha_eigen);
120 //		std::cout << "mean:" << mean << std::endl;
121 
122 		variance = K0_eigen.dot(L_eigen.solve(K0_eigen));
123 		/*
124 		 std::cout << "Variance: " << variance << std::endl;
125 		 std::cout << "######################################" << std::endl;
126 		 evaluate_counter++;
127 		 assert(evaluate_counter<20*3);
128 		 */
129 	} else {
130 		GaussianProcess::evaluate(x, mean, variance);
131 	}
132 	return;
133 
134 }
135 
run_optimizer(std::vector<double> const & values)136 void SubsetOfRegressors::run_optimizer(std::vector<double> const &values){
137 	double optval;
138 
139   int exitflag;
140 
141   nlopt::opt* local_opt;
142   nlopt::opt* global_opt;
143   int dimp1 = gp_parameters_hp.size();
144   set_optimizer(values, local_opt, global_opt);
145   std::vector<double> tol(dimp1);
146   for(int i = 0; i < dimp1; ++i){
147   	tol[i] = 0.0;
148   }
149   if (optimize_global){
150   	  print = 0;
151  	  std::cout << "Global optimization" << std::endl;
152 	  exitflag=-20;
153 	  global_opt->add_inequality_mconstraint(trust_region_constraint, gp_pointer, tol);
154   	  global_opt->set_min_objective( parameter_estimation_objective, gp_pointer);
155 	  exitflag = global_opt->optimize(gp_parameters_hp, optval);
156 
157 	  std::cout << "exitflag = "<< exitflag<<std::endl;
158   	  std::cout << "Function calls: " << print << std::endl;
159 	  std::cout << "OPTVAL .... " << optval << std::endl;
160 	  //for ( int i = 0; i < 1+dim; ++i )
161 	  //  std::cout << "gp_param = " << gp_parameters_hp[i] << std::endl;
162 	  //std::cout << std::endl;
163   }
164   if (optimize_local){
165   	  print = 0;
166   	  std::cout << "Local optimization" << std::endl;
167 	  exitflag=-20;
168 	  //try {
169 	  local_opt->add_inequality_mconstraint(trust_region_constraint, gp_pointer, tol);
170 	  local_opt->set_min_objective( parameter_estimation_objective_w_gradients, gp_pointer);
171 	  exitflag = local_opt->optimize(gp_parameters_hp, optval);
172 
173 	  std::cout << "exitflag = "<< exitflag<<std::endl;
174   	  std::cout << "Function calls: " << print << std::endl;
175 	  std::cout << "OPTVAL .... " << optval << std::endl;
176 	  //for ( int i = 0; i < 1+dim; ++i )
177 	    //std::cout << "gp_param = " << gp_parameters_hp[i] << std::endl;
178 	  //std::cout << std::endl;
179   }
180 
181   delete local_opt;
182   delete global_opt;
183 
184   return;
185 }
186 
estimate_hyper_parameters_all(std::vector<std::vector<double>> const & nodes,std::vector<double> const & values,std::vector<double> const & noise)187 void SubsetOfRegressors::estimate_hyper_parameters_all ( std::vector< std::vector<double> > const &nodes,
188                                                   std::vector<double> const &values,
189                                                   std::vector<double> const &noise )
190 {
191   //std::cout << "in sor1" << std::endl;
192   if(u.rows() > 0){
193 	  copy_data_to_members(nodes, values, noise);
194 
195 	  gp_pointer = this;
196 
197 	  sample_u(u.rows());
198 
199 	  set_hyperparameters();
200 
201 	  run_optimizer(values);
202 
203 	  copy_hyperparameters();
204 
205 	  //this->build(nodes, values, noise);
206 	  for ( int i = 0; i < 1+dim; ++i )
207 	    std::cout << "gp_param = " << gp_parameters[i] << std::endl;
208   }else{
209   	GaussianProcess::estimate_hyper_parameters(nodes, values, noise);
210   }
211 
212   return;
213 }
214 
estimate_hyper_parameters_induced_only(std::vector<std::vector<double>> const & nodes,std::vector<double> const & values,std::vector<double> const & noise)215 void SubsetOfRegressors::estimate_hyper_parameters_induced_only ( std::vector< std::vector<double> > const &nodes,
216                                                   std::vector<double> const &values,
217                                                   std::vector<double> const &noise )
218 {
219   //std::cout << "in sor1" << "u size:" << u.rows() << std::endl;
220   if(u.rows() > 0){
221 	  copy_data_to_members(nodes, values, noise);
222 
223 	  gp_pointer = this;
224 
225 	  sample_u(u.rows());
226 
227 	  set_hyperparameters_induced_only();
228 
229 	  run_optimizer(values);
230 
231 	  copy_hyperparameters();
232 
233 	  //this->build(nodes, values, noise);
234 	  for ( int i = 0; i < 1+dim; ++i )
235 	    std::cout << "gp_param = " << gp_parameters[i] << std::endl;
236   }else{
237   	GaussianProcess::estimate_hyper_parameters(nodes, values, noise);
238   }
239 
240   return;
241 }
242 
estimate_hyper_parameters_ls_only(std::vector<std::vector<double>> const & nodes,std::vector<double> const & values,std::vector<double> const & noise)243 void SubsetOfRegressors::estimate_hyper_parameters_ls_only ( std::vector< std::vector<double> > const &nodes,
244                                                   std::vector<double> const &values,
245                                                   std::vector<double> const &noise )
246 {
247   if(u.rows() > 0){
248 	  copy_data_to_members(nodes, values, noise);
249 
250 	  gp_pointer = this;
251 
252 	  sample_u(u.rows());
253 
254 	  set_hyperparameters_ls_only();
255 
256 	  run_optimizer(values);
257 
258 	  copy_hyperparameters();
259 
260 	  //this->build(nodes, values, noise);
261 	  for ( int i = 0; i < 1+dim; ++i )
262 	    std::cout << "gp_param = " << gp_parameters[i] << std::endl;
263   }else{
264   	GaussianProcess::estimate_hyper_parameters(nodes, values, noise);
265   }
266 
267   return;
268 }
269 
270 //--------------------------------------------------------------------------------
parameter_estimation_objective(std::vector<double> const & x,std::vector<double> & grad,void * data)271 double SubsetOfRegressors::parameter_estimation_objective(std::vector<double> const &x,
272                                                        std::vector<double> &grad,
273                                                        void *data)
274 {
275 
276   SubsetOfRegressors *d = reinterpret_cast<SubsetOfRegressors*>(data);
277   int offset;
278   std::vector<double> local_params(d->dim+1);
279   if(x.size()==1+d->dim){
280   	  offset = 0;
281   	  for(int i = 0; i < local_params.size(); ++i){
282 	  		local_params[i] = x[i];
283   	  }
284   }else{
285 	  if(x.size() > d->u.rows()*d->dim){
286 	  	offset = 1+d->dim;
287 	  	for(int i = 0; i < local_params.size(); ++i){
288 	  		local_params[i] = x[i];
289 	  	}
290 	  }else{
291 	  	offset = 0;
292 	  	for(int i = 0; i < local_params.size(); ++i){
293 	  		local_params[i] = d->gp_parameters[i];
294 	  	}
295 	  }
296 	  int u_counter;
297 	  for (int i = 0; i < d->dim; ++i) {
298 	  	  u_counter = 0;
299 	  	  for(int j = offset + i*d->u.rows(); j < offset + (i+1)*d->u.rows(); ++j){
300 	            d->u(u_counter,i) = x[j];
301 	            u_counter++;
302 	  	  }
303 	  }
304   }
305 
306   //Compute Kuf, Kuu
307   //Set up matrix K_u_f and K_f_u
308 	int nb_gp_nodes = d->gp_nodes.size();
309 	int nb_u_nodes = d->u.rows();
310 	d->K_u_f.resize(nb_u_nodes, nb_gp_nodes);
311 	for (int i = 0; i < nb_u_nodes; ++i) {
312 		for (int j = 0; j < nb_gp_nodes; ++j) {
313 			d->K_u_f(i,j) = d->evaluate_kernel(d->u.row(i), d->gp_nodes_eigen.row(j), local_params);
314 		}
315 	}
316 	MatrixXd K_f_u = d->K_u_f.transpose();
317 	//std::cout << "Kuf\n" << d->K_u_f << std::endl;
318 
319 	//Set up matrix K_u_u
320 	d->K_u_u.resize(nb_u_nodes, nb_u_nodes);
321 	for (int i = 0; i < nb_u_nodes; ++i) {
322 		for (int j = 0; j < nb_u_nodes; ++j) {
323 			d->K_u_u(i,j) = d->evaluate_kernel(d->u.row(i), d->u.row(j), local_params);
324 			if(i==j)
325 				d->K_u_u(i,j) += d->Kuu_opt_nugget;
326 		}
327 	}
328 	//std::cout << "Kuu\n" << d->K_u_u << std::endl;
329 	d->LLTofK_u_u.compute(d->K_u_u);
330 	MatrixXd K_u_u_u_f = d->LLTofK_u_u.solve(d->K_u_f);
331 
332 	VectorXd diag_Q_f_f;
333 	diag_Q_f_f.resize(nb_gp_nodes);
334 
335 	for (int i = 0; i < nb_gp_nodes; ++i) {
336 		diag_Q_f_f(i) = 0.0;
337 		for (int j = 0; j < nb_u_nodes; ++j) {
338 			diag_Q_f_f(i) += (K_f_u(i,j) * K_u_u_u_f(j,i));
339 		}
340 	}
341 	VectorXd diag_K_f_f;
342 	diag_K_f_f.resize(nb_gp_nodes);
343 	for (int i = 0; i < nb_gp_nodes; ++i) {
344 		diag_K_f_f(i) = d->evaluate_kernel(d->gp_nodes_eigen.row(i),
345 									 d->gp_nodes_eigen.row(i), local_params);
346 	}
347 	/*std::cout << "diag_K_f_f\n" << diag_K_f_f << std::endl;
348 	std::cout << "diag_Q_f_f\n" << diag_Q_f_f << std::endl;*/
349 
350 	d->Lambda.resize(nb_gp_nodes);
351 	//std::cout << "noise\n" << std::endl;
352 	for (int i = 0; i < nb_gp_nodes; ++i) {
353 		d->Lambda(i) = (pow( d->gp_noise.at(i) / 2e0 + d->noise_regularization, 2e0 ));
354 	}
355 	//std::cout << std::endl;
356 
357 	MatrixXd Lambda_K_f_u;
358 	Lambda_K_f_u.resize(nb_gp_nodes, nb_u_nodes);
359 	for(int i = 0; i < nb_gp_nodes; i++){
360 		for(int j = 0; j < nb_u_nodes; j++){
361 			Lambda_K_f_u(i,j) = ((1.0/(d->Lambda(i) + d->Lambda_opt_nugget)) * K_f_u(i,j));
362 		}
363 	}
364 	MatrixXd K_u_f_Lambda_f_u;
365 	K_u_f_Lambda_f_u.resize(nb_u_nodes, nb_u_nodes);
366 	K_u_f_Lambda_f_u = d->K_u_f*Lambda_K_f_u;
367 
368 	MatrixXd Sigma = d->K_u_u + K_u_f_Lambda_f_u;
369 	for (int i = 0; i < nb_u_nodes; ++i) {
370 		//Sigma(i,i) += nugget;
371 	}
372 	d->L_eigen.compute(Sigma);
373 	double L12 = 0.0;//-log(d->K_u_u.determinant());
374 	for (int i = 0; i < d->u.rows(); ++i){
375 		L12 += log(d->LLTofK_u_u.vectorD()(i));
376     	//std::cout << L1 << "L11-" << i << std::endl;
377 	}
378 
379 	double det_Leigen = 0.0;
380 	for (int i = 0; i < d->u.rows(); ++i){
381 		det_Leigen += log(d->L_eigen.vectorD()(i));
382     	//std::cout << d->L_eigen.matrixL()(i,i) << " " << log(d->L_eigen.matrixL()(i,i)) << "det_Leigen-" << i << std::endl;
383 	}
384 
385 	double L11 = 0.0;
386     //std::cout << L1 << "L12 " << std::endl;
387 	for (int i = 0; i < d->nb_gp_nodes; ++i){
388 		L11 += log(d->Lambda(i));
389 		//std::cout << "Lambda: " << i << " " << d->Lambda(i) <<" "<<log(d->Lambda(i)) << std::endl;
390 	}
391     double L1 = 0.0;
392 	//L1 = 0.5*L11 + (2*0.5)*det_Leigen + 0.5*det_LeigenD - (2*0.5)*L12 - 0.5*L12D;
393 	L1 = 0.5*L11 + 0.5*det_Leigen - 0.5*L12;
394 
395 	MatrixXd Q_f_f = K_f_u*K_u_u_u_f;
396 	for(int i = 0; i < nb_gp_nodes; i++){
397 		Q_f_f(i,i) += d->Lambda(i) + d->Qff_opt_nugget;
398 	}
399 	LLT<MatrixXd> LLTofQ_f_f(Q_f_f);
400 	double L2 = 0.5*d->scaled_function_values_eigen.dot(LLTofQ_f_f.solve(d->scaled_function_values_eigen));
401 
402   	//std::cout << d->scaled_function_values_eigen << std::endl;
403   double result = L1 + L2;
404 
405   if (std::isinf(result) || std::isnan(result)){
406   	std::cout << "Result is inf or nan" << std::endl;
407   	std::cout << "L11 " << L11 << " " << 'x' << std::endl;
408     std::cout << "L12 " << L12 << std::endl; //<< " " << log(d->K_u_u.determinant()) << std::endl;
409     std::cout << "L13 " << det_Leigen << std::endl;// << " " << log(Sigma.determinant()) << std::endl;
410     std::cout << L1 << ' ' << L2 << std::endl;
411   	result = std::numeric_limits<double>::infinity();
412   	if(d->Kuu_opt_nugget < d->nugget_max){
413   		d->Kuu_opt_nugget *= 10;
414   		d->Lambda_opt_nugget *= 10;
415   		d->Qff_opt_nugget *= 10;
416   	}
417   }else{
418   	if(d->Kuu_opt_nugget > d->nugget_min){
419   		d->Kuu_opt_nugget *= 0.1;
420   		d->Lambda_opt_nugget *= 0.1;
421   		d->Qff_opt_nugget *= 0.1;
422   	}
423   }
424   if ((d->print%1000)==0){
425 	  /*for ( int i = 0; i < d->dim + 1; ++i )
426 	    std::cout << "gp_param = " << x[i] << std::endl;
427 	  for(int j = offset; j < offset + d->u.rows(); ++j)
428 			std::cout << "gp_param = " << x[j] <<","<<x[j+ d->u.rows()]<< std::endl;*/
429 
430   	//std::cout << d->print <<" Objective: " << L1 << " " << L2 << " "<< result<< std::endl;
431   }
432 
433   d->print++;
434 
435   return result;
436 
437 }
438 
parameter_estimation_objective_w_gradients(std::vector<double> const & x,std::vector<double> & grad,void * data)439 double SubsetOfRegressors::parameter_estimation_objective_w_gradients(std::vector<double> const &x,
440                                                        std::vector<double> &grad,
441                                                        void *data)
442 {
443 
444   //std::cout << "in sor2" << std::endl;
445 
446   SubsetOfRegressors *d = reinterpret_cast<SubsetOfRegressors*>(data);
447   int offset;
448   std::vector<double> local_params(d->dim+1);
449   if(x.size()==1+d->dim){
450   	  offset = 0;
451   	  for(int i = 0; i < local_params.size(); ++i){
452 	  		local_params[i] = x[i];
453   	  }
454   }else{
455 	  if(x.size() > d->u.rows()*d->dim){
456 	  	offset = 1+d->dim;
457 	  	for(int i = 0; i < local_params.size(); ++i){
458 	  		local_params[i] = x[i];
459 	  	}
460 	  }else{
461 	  	offset = 0;
462 	  	for(int i = 0; i < local_params.size(); ++i){
463 	  		local_params[i] = d->gp_parameters[i];
464 	  	}
465 	  }
466 	  int u_counter;
467 	  for (int i = 0; i < d->dim; ++i) {
468 	  	  u_counter = 0;
469 	  	  for(int j = offset + i*d->u.rows(); j < offset + (i+1)*d->u.rows(); ++j){
470 	            d->u(u_counter,i) = x[j];
471 	            u_counter++;
472 	  	  }
473 	  }
474   }
475 
476   //Compute Kuf, Kuu
477   //Set up matrix K_u_f and K_f_u
478 	int nb_gp_nodes = d->gp_nodes.size();
479 	int nb_u_nodes = d->u.rows();
480 	d->K_u_f.resize(nb_u_nodes, nb_gp_nodes);
481 	for (int i = 0; i < nb_u_nodes; ++i) {
482 		for (int j = 0; j < nb_gp_nodes; ++j) {
483 			d->K_u_f(i,j) = d->evaluate_kernel(d->u.row(i), d->gp_nodes_eigen.row(j), local_params);
484 		}
485 	}
486 	MatrixXd K_f_u = d->K_u_f.transpose();
487 	//std::cout << "Kuf\n" << d->K_u_f << std::endl;
488 
489 	//Set up matrix K_u_u
490 	d->K_u_u.resize(nb_u_nodes, nb_u_nodes);
491 	for (int i = 0; i < nb_u_nodes; ++i) {
492 		for (int j = 0; j < nb_u_nodes; ++j) {
493 			d->K_u_u(i,j) = d->evaluate_kernel(d->u.row(i), d->u.row(j), local_params);
494 			if(i==j)
495 				d->K_u_u(i,j) += d->Kuu_opt_nugget;
496 		}
497 	}
498 	//std::cout << "Kuu\n" << d->K_u_u << std::endl;
499 	d->LLTofK_u_u.compute(d->K_u_u);
500 	MatrixXd K_u_u_u_f = d->LLTofK_u_u.solve(d->K_u_f);
501 
502 	VectorXd diag_Q_f_f;
503 	diag_Q_f_f.resize(nb_gp_nodes);
504 
505 	for (int i = 0; i < nb_gp_nodes; ++i) {
506 		diag_Q_f_f(i) = 0.0;
507 		for (int j = 0; j < nb_u_nodes; ++j) {
508 			diag_Q_f_f(i) += (K_f_u(i,j) * K_u_u_u_f(j,i));
509 		}
510 	}
511 	VectorXd diag_K_f_f;
512 	diag_K_f_f.resize(nb_gp_nodes);
513 	for (int i = 0; i < nb_gp_nodes; ++i) {
514 		diag_K_f_f(i) = d->evaluate_kernel(d->gp_nodes_eigen.row(i),
515 									 d->gp_nodes_eigen.row(i), local_params);
516 	}
517 	/*std::cout << "diag_K_f_f\n" << diag_K_f_f << std::endl;
518 	std::cout << "diag_Q_f_f\n" << diag_Q_f_f << std::endl;*/
519 
520 	d->Lambda.resize(nb_gp_nodes);
521 	//std::cout << "noise\n" << std::endl;
522 	for (int i = 0; i < nb_gp_nodes; ++i) {
523 		d->Lambda(i) = (pow( d->gp_noise.at(i) / 2e0 + d->noise_regularization, 2e0 ));
524 	}
525 	//std::cout << std::endl;
526 
527 	MatrixXd Lambda_K_f_u;
528 	Lambda_K_f_u.resize(nb_gp_nodes, nb_u_nodes);
529 	for(int i = 0; i < nb_gp_nodes; i++){
530 		for(int j = 0; j < nb_u_nodes; j++){
531 			Lambda_K_f_u(i,j) = ((1.0/(d->Lambda(i) + d->Lambda_opt_nugget)) * K_f_u(i,j));
532 		}
533 	}
534 	MatrixXd K_u_f_Lambda_f_u;
535 	K_u_f_Lambda_f_u.resize(nb_u_nodes, nb_u_nodes);
536 	K_u_f_Lambda_f_u = d->K_u_f*Lambda_K_f_u;
537 
538 	MatrixXd Sigma = d->K_u_u + K_u_f_Lambda_f_u;
539 	for (int i = 0; i < nb_u_nodes; ++i) {
540 		//Sigma(i,i) += nugget;
541 	}
542 	d->L_eigen.compute(Sigma);
543 	double L12 = 0.0;//-log(d->K_u_u.determinant());
544 	for (int i = 0; i < d->u.rows(); ++i){
545 		L12 += log(d->LLTofK_u_u.vectorD()(i));
546     	//std::cout << L1 << "L11-" << i << std::endl;
547 	}
548 
549 	double det_Leigen = 0.0;
550 	for (int i = 0; i < d->u.rows(); ++i){
551 		det_Leigen += log(d->L_eigen.vectorD()(i));
552     	//std::cout << d->L_eigen.matrixL()(i,i) << " " << log(d->L_eigen.matrixL()(i,i)) << "det_Leigen-" << i << std::endl;
553 	}
554 
555 	double L11 = 0.0;
556     //std::cout << L1 << "L12 " << std::endl;
557 	for (int i = 0; i < d->nb_gp_nodes; ++i){
558 		L11 += log(d->Lambda(i));
559 		//std::cout << "Lambda: " << i << " " << d->Lambda(i) <<" "<<log(d->Lambda(i)) << std::endl;
560 	}
561     double L1 = 0.0;
562 	//L1 = 0.5*L11 + (2*0.5)*det_Leigen + 0.5*det_LeigenD - (2*0.5)*L12 - 0.5*L12D;
563 	L1 = 0.5*L11 + 0.5*det_Leigen - 0.5*L12;
564 
565 	MatrixXd Q_f_f = K_f_u*K_u_u_u_f;
566 	for(int i = 0; i < nb_gp_nodes; i++){
567 		Q_f_f(i,i) += d->Lambda(i) + d->Qff_opt_nugget;
568 	}
569 	LLT<MatrixXd> LLTofQ_f_f(Q_f_f);
570 	double L2 = 0.5*d->scaled_function_values_eigen.dot(LLTofQ_f_f.solve(d->scaled_function_values_eigen));
571 
572   	//std::cout << d->scaled_function_values_eigen << std::endl;
573   double result = L1 + L2;
574 
575   if (std::isinf(result) || std::isnan(result)){
576   	std::cout << "Result is inf or nan" << std::endl;
577   	std::cout << "L11 " << L11 << " " << 'x' << std::endl;
578     std::cout << "L12 " << L12 << std::endl; //<< " " << log(d->K_u_u.determinant()) << std::endl;
579     std::cout << "L13 " << det_Leigen << std::endl;// << " " << log(Sigma.determinant()) << std::endl;
580     std::cout << L1 << ' ' << L2 << std::endl;
581   	result = std::numeric_limits<double>::infinity();
582   	if(d->Kuu_opt_nugget < d->nugget_max){
583   		d->Kuu_opt_nugget *= 10;
584   		d->Lambda_opt_nugget *= 10;
585   		d->Qff_opt_nugget *= 10;
586   	}
587   }else{
588   	if(d->Kuu_opt_nugget > d->nugget_min){
589   		d->Kuu_opt_nugget *= 0.1;
590   		d->Lambda_opt_nugget *= 0.1;
591   		d->Qff_opt_nugget *= 0.1;
592   	}
593   }
594   if ((d->print%1000)==0){
595 	  /*for ( int i = 0; i < d->dim + 1; ++i )
596 	    std::cout << "gp_param = " << x[i] << std::endl;
597 	  for(int j = offset; j < offset + d->u.rows(); ++j)
598 			std::cout << "gp_param = " << x[j] <<","<<x[j+ d->u.rows()]<< std::endl;*/
599 
600   	//std::cout << d->print <<" Objective: " << L1 << " " << L2 << " "<< result<< std::endl;
601   }
602 
603 
604 	//Gradient computation:
605 	int dim_grad = x.size();
606 	grad.resize(dim_grad);
607 
608 	for(int i = 0; i < grad.size(); i++){
609 
610 		MatrixXd Kffdot;
611 		MatrixXd Kufdot;
612 		MatrixXd Kfudot;
613 		MatrixXd Kuudot;
614 		VectorXd LambdaDot;
615 		if(dim_grad == d->dim + 1){
616 			if(i == 0){//grad sigma_f
617 				d->derivate_K_u_u_wrt_sigmaf(local_params, Kuudot);
618 				d->derivate_K_f_f_wrt_sigmaf(local_params, Kffdot);
619 				d->derivate_K_u_f_wrt_sigmaf(local_params, Kufdot);
620 				Kfudot = Kufdot.transpose();
621 			}else{//grad length parameter
622 				d->derivate_K_u_u_wrt_l(local_params, i-1, Kuudot);
623 				d->derivate_K_f_f_wrt_l(local_params, i-1, Kffdot);
624 				d->derivate_K_u_f_wrt_l(local_params, i-1, Kufdot);
625 				Kfudot = Kufdot.transpose();
626 			}
627 		}else{
628 			if( dim_grad > d->u.rows()*d->dim){//if we optimize also for sigma_f and lengthscales
629 				if(i == 0){//grad sigma_f
630 					d->derivate_K_u_u_wrt_sigmaf(local_params, Kuudot);
631 					d->derivate_K_f_f_wrt_sigmaf(local_params, Kffdot);
632 					d->derivate_K_u_f_wrt_sigmaf(local_params, Kufdot);
633 					Kfudot = Kufdot.transpose();
634 				}else if(i > 0 && i < 1 + d->dim){//grad length parameter
635 					d->derivate_K_u_u_wrt_l(local_params, i-1, Kuudot);
636 					d->derivate_K_f_f_wrt_l(local_params, i-1, Kffdot);
637 					d->derivate_K_u_f_wrt_l(local_params, i-1, Kufdot);
638 					Kfudot = Kufdot.transpose();
639 				}else{//grad u
640 					int uidx = (i-offset)%d->u.rows();
641 					int udim = (int) ((i-offset)/d->u.rows());
642 					d->derivate_K_u_u_wrt_uik(local_params, uidx, udim, Kuudot);
643 					d->derivate_K_u_f_wrt_uik(local_params, uidx, udim, Kufdot);
644 					Kffdot.resize(nb_gp_nodes, nb_gp_nodes);
645 					Kffdot.setZero();
646 					Kfudot = Kufdot.transpose();
647 				}
648 		    }else{
649 		    	int uidx = (i-offset)%d->u.rows();
650 				int udim = (int) ((i-offset)/d->u.rows());
651 				d->derivate_K_u_u_wrt_uik(local_params, uidx, udim, Kuudot);
652 				d->derivate_K_u_f_wrt_uik(local_params, uidx, udim, Kufdot);
653 				Kffdot.resize(nb_gp_nodes, nb_gp_nodes);
654 				Kffdot.setZero();
655 				Kfudot = Kufdot.transpose();
656 		    }
657 	    }
658 		VectorXd LambdaInv(nb_gp_nodes);
659 		for(int j = 0; j < nb_gp_nodes; ++j){
660 			LambdaInv(j) = 1.0/d->Lambda(j);
661 		}
662 
663 		/*
664 		IOFormat HeavyFmt(FullPrecision, 0, ", ", ";\n", "", "", "[", "]");
665 		std::cout << "Kuudot\n" << Kuudot.format(HeavyFmt) << std::endl;
666 		std::cout << "Kffdot\n" << Kffdot.format(HeavyFmt) << std::endl;
667 		std::cout << "Kufdot\n" << Kufdot.format(HeavyFmt) << std::endl;
668 		std::cout << "Kuu\n" << d->K_u_u.format(HeavyFmt) << std::endl;
669 		std::cout << "Kuf\n" << d->K_u_f.format(HeavyFmt) << std::endl;
670 		std::cout << "Qff\n" << Q_f_f.format(HeavyFmt) << std::endl;
671 		std::cout << "noise:\n" << std::endl;
672 		for (int i = 0; i < nb_gp_nodes; ++i) {
673 			std::cout << (pow( d->gp_noise.at(i) / 2e0 + d->noise_regularization, 2e0 )) << std::endl;
674 		}
675 		std::cout << std::endl;
676 		std::cout << "y:\n" << d->scaled_function_values_eigen << std::endl;
677 		*/
678 
679 		//L1-term: dL1/dtheta= tr(Kuu^(-1) * Kuudot)
680 		double dL1 = (d->LLTofK_u_u.solve(Kuudot)).trace();
681 
682 		//L2-term: d(log(det(Kuu+Kuf*(noise^2)^(-1)*Kfu)))/dtheta
683 		MatrixXd LambdaInvDiag = LambdaInv.asDiagonal();
684 		MatrixXd dSigma = Kuudot
685 							+ 2*Kufdot*LambdaInvDiag*K_f_u;
686 							//+ d->K_u_f*LambdaInvDiag*Kfudot;
687 		//std::cout << "LambdaInv:\n" << LambdaInv.format(HeavyFmt) << std::endl;
688 		//std::cout << "LambdaInvDiag:\n" << LambdaInvDiag.format(HeavyFmt) << std::endl;
689 		//std::cout << "dSigma:\n" << dSigma.format(HeavyFmt) << std::endl;
690 		double dL2 = (d->L_eigen.solve(dSigma)).trace();
691 
692 		//L3-term: d(yT*Q*y)/dtheta
693 		MatrixXd KfudotKuinvKuf = Kfudot * d->LLTofK_u_u.solve(d->K_u_f);
694 		MatrixXd KfuKuinvKufdot = K_f_u * d->LLTofK_u_u.solve(Kufdot);
695 		MatrixXd KfuKuinfKudotKuinvKuf = K_f_u*(d->LLTofK_u_u.solve(Kuudot*(d->LLTofK_u_u.solve(d->K_u_f))));
696 		MatrixXd dQ = KfudotKuinvKuf - KfuKuinfKudotKuinvKuf + KfuKuinvKufdot;
697 		//std::cout << "dQ:\n" << dQ.format(HeavyFmt) << std::endl;
698 		double dL3 = (-1)*d->scaled_function_values_eigen.dot(
699 						LLTofQ_f_f.solve(dQ*LLTofQ_f_f.solve(
700 							d->scaled_function_values_eigen)));
701 
702 		grad[i] = 0.5*(-dL1 + dL2 + dL3);
703 
704 		if ((d->print%1)==0){
705 	 		//std::cout << "Grad: " << i << "|" << 0.5*dL1 << " + "<< 0.5*dL2 << " + "<< 0.5*dL3 << " = " << grad[i] << std::endl;
706 		}
707 		//exit(-1);
708 
709 	}
710 
711   d->print++;
712 
713   return result;
714 
715 }
716 
trust_region_constraint(unsigned int m,double * c,unsigned int n,const double * x,double * grad,void * data)717 void SubsetOfRegressors::trust_region_constraint(unsigned int m, double* c, unsigned int n, const double* x, double* grad,
718                                                      			void *data){
719 	SubsetOfRegressors *d = reinterpret_cast<SubsetOfRegressors*>(data);
720 
721 	int offset;
722 	if(m == d->dim + 1){
723 		for (int i = 0; i < d->dim+1; ++i) {
724 			c[i] = -1;
725 		}
726 	}else{
727 		if(m > d->u.rows()*d->dim){
728 		    offset = 1+d->dim;
729 		}else{
730 			offset = 0;
731 		}
732 		int u_counter;
733 
734 		for (int i = 0; i < offset; ++i) {
735 			c[i] = -1;
736 		}
737 		MatrixXd u_intern(d->u.rows(), d->u.cols());
738 		for (int i = 0; i < d->dim; ++i) {
739 		  u_counter = 0;
740 		  for(int j = offset + i*d->u.rows(); j < offset + (i+1)*d->u.rows(); ++j){
741 	        u_intern(u_counter, i) = x[j];
742 	        u_counter++;
743 		  }
744 		}
745 		VectorXd c_intern(u_intern.rows());
746 		VectorXd dist(d->dim);
747 		for (int i = 0; i < u_intern.rows(); ++i) {
748 			for (int j = 0; j < d->dim; ++j) {
749 				dist(j) = (u_intern(i, j)-d->constraint_ball_center(j));
750 			}
751 	    	c_intern(i) = sqrt( dist.dot(dist) ) - d->constraint_ball_radius;
752 		}
753 		for (int i = 0; i < d->dim; ++i) {
754 		    u_counter = 0;
755 		  	for(int j = offset + i*d->u.rows(); j < offset + (i+1)*d->u.rows(); ++j){
756 		    c[j] = c_intern(u_counter);
757 	        u_counter++;
758 		  	}
759 		}
760 	}
761   	return;
762 }