1 //
2 // Created by friedrich on 16.08.16.
3 //
4
5 #include <SubsetOfRegressors.hpp>
6 #include <assert.h>
7 #include <random>
8 #include <algorithm>
9 #include <iostream>
10 #include <cmath>
11
12 //--------------------------------------------------------------------------------
SubsetOfRegressors(int n,double & delta_input)13 SubsetOfRegressors::SubsetOfRegressors(int n, double &delta_input) :
14 FullyIndependentTrainingConditional(n, delta_input) {
15 }
16 //--------------------------------------------------------------------------------
SubsetOfRegressors(int n,double & delta_input,std::vector<double> gp_parameters_input)17 SubsetOfRegressors::SubsetOfRegressors(int n, double &delta_input, std::vector<double> gp_parameters_input) :
18 FullyIndependentTrainingConditional(n, delta_input, gp_parameters_input) {
19 }
20 //--------------------------------------------------------------------------------
build(std::vector<std::vector<double>> const & nodes,std::vector<double> const & values,std::vector<double> const & noise)21 void SubsetOfRegressors::build(std::vector<std::vector<double> > const &nodes,
22 std::vector<double> const &values, std::vector<double> const &noise) {
23
24 int nb_u_nodes = u.rows();
25
26 if (nb_u_nodes > 0) {
27 std::cout << "SoR build with [" << nodes.size() << "," << nb_u_nodes
28 << "]" << std::endl;
29 std::cout << "With Parameters: " << std::endl;
30 for ( int i = 0; i < dim+1; ++i )
31 std::cout << "gp_param = " << gp_parameters[i] << std::endl;
32 std::cout << std::endl;
33 //nb_u_nodes++;
34
35 nb_gp_nodes = nodes.size();
36 gp_nodes.clear();
37 gp_noise.clear();
38 gp_nodes_eigen.resize(nb_gp_nodes, dim);
39 gp_noise_eigen.resize(nb_gp_nodes);
40 for (int i = 0; i < nb_gp_nodes; ++i) {
41 gp_nodes.push_back(nodes.at(i));
42 gp_noise.push_back(noise.at(i));
43 for(int j = 0; j < dim; ++j){
44 gp_nodes_eigen(i,j) = nodes[i][j];
45 }
46 gp_noise_eigen(i) = noise[i];
47 }
48 /*
49 if (resample_u){
50 this->sample_u(nb_u_nodes);
51 if (do_hp_estimation)
52 this->estimate_hyper_parameters(nodes, values, noise);
53 resample_u = false;
54 }
55 */
56
57 //Set up matrix K_u_f and K_f_u
58 compute_Kuf_and_Kuu();
59 MatrixXd K_f_u = K_u_f.transpose();
60
61 VectorXd diag_Q_f_f;
62 compute_Qff(K_f_u, diag_Q_f_f);
63
64 VectorXd diag_K_f_f;
65 compute_Kff(diag_K_f_f);
66
67 VectorXd diff_Kff_Qff(nb_gp_nodes);
68 diff_Kff_Qff.setZero();
69
70 compute_Lambda(diff_Kff_Qff, noise);
71
72 MatrixXd Lambda_K_f_u;
73 compute_Lambda_times_Kfu(K_f_u, Lambda_K_f_u);
74
75 MatrixXd K_u_f_Lambda_f_u;
76 compute_KufLambdaKfu(Lambda_K_f_u, K_u_f_Lambda_f_u);
77
78 L_eigen.compute(K_u_u + K_u_f_Lambda_f_u);
79
80 scaled_function_values.clear();
81 scaled_function_values.resize(nb_gp_nodes);
82 for (int i = 0; i < nb_gp_nodes; i++) {
83 scaled_function_values.at(i) = values.at(i);
84 }
85
86 VectorXd LambdaInv_f;
87 compute_LambdaInvF(LambdaInv_f);
88
89 VectorXd alpha_eigen_rhs = K_u_f * LambdaInv_f;
90
91 //Solve Sigma_not_inv^(-1)*alpha
92 alpha_eigen = L_eigen.solve(alpha_eigen_rhs);
93
94 } else {
95 GaussianProcess::build(nodes, values, noise);
96 }
97 return;
98 }
99 //--------------------------------------------------------------------------------
evaluate(std::vector<double> const & x,double & mean,double & variance)100 void SubsetOfRegressors::evaluate(std::vector<double> const &x, double &mean,
101 double &variance) {
102 int nb_u_nodes = u.rows();
103 if (nb_u_nodes > 0) {
104 int nb_u_nodes = u.rows();
105 VectorXd x_eigen;
106 x_eigen.resize(x.size());
107 for(int i = 0; i < x.size(); ++i){
108 x_eigen(i) = x[i];
109 }
110 K0_eigen.resize(nb_u_nodes);
111 for (int i = 0; i < nb_u_nodes; i++) {
112 K0_eigen(i) = evaluate_kernel(x_eigen, u.row(i));
113 }
114
115 // std::cout << "K0" << std::endl;
116 // VectorOperations::print_vector(K0);
117 // std::cout << "alpha=(K_u_u + K_u_f*sigmaI*K_f_u)*K_u_f*noise*y:" << std::endl;
118 // VectorOperations::print_vector(alpha);
119 mean = K0_eigen.dot(alpha_eigen);
120 // std::cout << "mean:" << mean << std::endl;
121
122 variance = K0_eigen.dot(L_eigen.solve(K0_eigen));
123 /*
124 std::cout << "Variance: " << variance << std::endl;
125 std::cout << "######################################" << std::endl;
126 evaluate_counter++;
127 assert(evaluate_counter<20*3);
128 */
129 } else {
130 GaussianProcess::evaluate(x, mean, variance);
131 }
132 return;
133
134 }
135
run_optimizer(std::vector<double> const & values)136 void SubsetOfRegressors::run_optimizer(std::vector<double> const &values){
137 double optval;
138
139 int exitflag;
140
141 nlopt::opt* local_opt;
142 nlopt::opt* global_opt;
143 int dimp1 = gp_parameters_hp.size();
144 set_optimizer(values, local_opt, global_opt);
145 std::vector<double> tol(dimp1);
146 for(int i = 0; i < dimp1; ++i){
147 tol[i] = 0.0;
148 }
149 if (optimize_global){
150 print = 0;
151 std::cout << "Global optimization" << std::endl;
152 exitflag=-20;
153 global_opt->add_inequality_mconstraint(trust_region_constraint, gp_pointer, tol);
154 global_opt->set_min_objective( parameter_estimation_objective, gp_pointer);
155 exitflag = global_opt->optimize(gp_parameters_hp, optval);
156
157 std::cout << "exitflag = "<< exitflag<<std::endl;
158 std::cout << "Function calls: " << print << std::endl;
159 std::cout << "OPTVAL .... " << optval << std::endl;
160 //for ( int i = 0; i < 1+dim; ++i )
161 // std::cout << "gp_param = " << gp_parameters_hp[i] << std::endl;
162 //std::cout << std::endl;
163 }
164 if (optimize_local){
165 print = 0;
166 std::cout << "Local optimization" << std::endl;
167 exitflag=-20;
168 //try {
169 local_opt->add_inequality_mconstraint(trust_region_constraint, gp_pointer, tol);
170 local_opt->set_min_objective( parameter_estimation_objective_w_gradients, gp_pointer);
171 exitflag = local_opt->optimize(gp_parameters_hp, optval);
172
173 std::cout << "exitflag = "<< exitflag<<std::endl;
174 std::cout << "Function calls: " << print << std::endl;
175 std::cout << "OPTVAL .... " << optval << std::endl;
176 //for ( int i = 0; i < 1+dim; ++i )
177 //std::cout << "gp_param = " << gp_parameters_hp[i] << std::endl;
178 //std::cout << std::endl;
179 }
180
181 delete local_opt;
182 delete global_opt;
183
184 return;
185 }
186
estimate_hyper_parameters_all(std::vector<std::vector<double>> const & nodes,std::vector<double> const & values,std::vector<double> const & noise)187 void SubsetOfRegressors::estimate_hyper_parameters_all ( std::vector< std::vector<double> > const &nodes,
188 std::vector<double> const &values,
189 std::vector<double> const &noise )
190 {
191 //std::cout << "in sor1" << std::endl;
192 if(u.rows() > 0){
193 copy_data_to_members(nodes, values, noise);
194
195 gp_pointer = this;
196
197 sample_u(u.rows());
198
199 set_hyperparameters();
200
201 run_optimizer(values);
202
203 copy_hyperparameters();
204
205 //this->build(nodes, values, noise);
206 for ( int i = 0; i < 1+dim; ++i )
207 std::cout << "gp_param = " << gp_parameters[i] << std::endl;
208 }else{
209 GaussianProcess::estimate_hyper_parameters(nodes, values, noise);
210 }
211
212 return;
213 }
214
estimate_hyper_parameters_induced_only(std::vector<std::vector<double>> const & nodes,std::vector<double> const & values,std::vector<double> const & noise)215 void SubsetOfRegressors::estimate_hyper_parameters_induced_only ( std::vector< std::vector<double> > const &nodes,
216 std::vector<double> const &values,
217 std::vector<double> const &noise )
218 {
219 //std::cout << "in sor1" << "u size:" << u.rows() << std::endl;
220 if(u.rows() > 0){
221 copy_data_to_members(nodes, values, noise);
222
223 gp_pointer = this;
224
225 sample_u(u.rows());
226
227 set_hyperparameters_induced_only();
228
229 run_optimizer(values);
230
231 copy_hyperparameters();
232
233 //this->build(nodes, values, noise);
234 for ( int i = 0; i < 1+dim; ++i )
235 std::cout << "gp_param = " << gp_parameters[i] << std::endl;
236 }else{
237 GaussianProcess::estimate_hyper_parameters(nodes, values, noise);
238 }
239
240 return;
241 }
242
estimate_hyper_parameters_ls_only(std::vector<std::vector<double>> const & nodes,std::vector<double> const & values,std::vector<double> const & noise)243 void SubsetOfRegressors::estimate_hyper_parameters_ls_only ( std::vector< std::vector<double> > const &nodes,
244 std::vector<double> const &values,
245 std::vector<double> const &noise )
246 {
247 if(u.rows() > 0){
248 copy_data_to_members(nodes, values, noise);
249
250 gp_pointer = this;
251
252 sample_u(u.rows());
253
254 set_hyperparameters_ls_only();
255
256 run_optimizer(values);
257
258 copy_hyperparameters();
259
260 //this->build(nodes, values, noise);
261 for ( int i = 0; i < 1+dim; ++i )
262 std::cout << "gp_param = " << gp_parameters[i] << std::endl;
263 }else{
264 GaussianProcess::estimate_hyper_parameters(nodes, values, noise);
265 }
266
267 return;
268 }
269
270 //--------------------------------------------------------------------------------
parameter_estimation_objective(std::vector<double> const & x,std::vector<double> & grad,void * data)271 double SubsetOfRegressors::parameter_estimation_objective(std::vector<double> const &x,
272 std::vector<double> &grad,
273 void *data)
274 {
275
276 SubsetOfRegressors *d = reinterpret_cast<SubsetOfRegressors*>(data);
277 int offset;
278 std::vector<double> local_params(d->dim+1);
279 if(x.size()==1+d->dim){
280 offset = 0;
281 for(int i = 0; i < local_params.size(); ++i){
282 local_params[i] = x[i];
283 }
284 }else{
285 if(x.size() > d->u.rows()*d->dim){
286 offset = 1+d->dim;
287 for(int i = 0; i < local_params.size(); ++i){
288 local_params[i] = x[i];
289 }
290 }else{
291 offset = 0;
292 for(int i = 0; i < local_params.size(); ++i){
293 local_params[i] = d->gp_parameters[i];
294 }
295 }
296 int u_counter;
297 for (int i = 0; i < d->dim; ++i) {
298 u_counter = 0;
299 for(int j = offset + i*d->u.rows(); j < offset + (i+1)*d->u.rows(); ++j){
300 d->u(u_counter,i) = x[j];
301 u_counter++;
302 }
303 }
304 }
305
306 //Compute Kuf, Kuu
307 //Set up matrix K_u_f and K_f_u
308 int nb_gp_nodes = d->gp_nodes.size();
309 int nb_u_nodes = d->u.rows();
310 d->K_u_f.resize(nb_u_nodes, nb_gp_nodes);
311 for (int i = 0; i < nb_u_nodes; ++i) {
312 for (int j = 0; j < nb_gp_nodes; ++j) {
313 d->K_u_f(i,j) = d->evaluate_kernel(d->u.row(i), d->gp_nodes_eigen.row(j), local_params);
314 }
315 }
316 MatrixXd K_f_u = d->K_u_f.transpose();
317 //std::cout << "Kuf\n" << d->K_u_f << std::endl;
318
319 //Set up matrix K_u_u
320 d->K_u_u.resize(nb_u_nodes, nb_u_nodes);
321 for (int i = 0; i < nb_u_nodes; ++i) {
322 for (int j = 0; j < nb_u_nodes; ++j) {
323 d->K_u_u(i,j) = d->evaluate_kernel(d->u.row(i), d->u.row(j), local_params);
324 if(i==j)
325 d->K_u_u(i,j) += d->Kuu_opt_nugget;
326 }
327 }
328 //std::cout << "Kuu\n" << d->K_u_u << std::endl;
329 d->LLTofK_u_u.compute(d->K_u_u);
330 MatrixXd K_u_u_u_f = d->LLTofK_u_u.solve(d->K_u_f);
331
332 VectorXd diag_Q_f_f;
333 diag_Q_f_f.resize(nb_gp_nodes);
334
335 for (int i = 0; i < nb_gp_nodes; ++i) {
336 diag_Q_f_f(i) = 0.0;
337 for (int j = 0; j < nb_u_nodes; ++j) {
338 diag_Q_f_f(i) += (K_f_u(i,j) * K_u_u_u_f(j,i));
339 }
340 }
341 VectorXd diag_K_f_f;
342 diag_K_f_f.resize(nb_gp_nodes);
343 for (int i = 0; i < nb_gp_nodes; ++i) {
344 diag_K_f_f(i) = d->evaluate_kernel(d->gp_nodes_eigen.row(i),
345 d->gp_nodes_eigen.row(i), local_params);
346 }
347 /*std::cout << "diag_K_f_f\n" << diag_K_f_f << std::endl;
348 std::cout << "diag_Q_f_f\n" << diag_Q_f_f << std::endl;*/
349
350 d->Lambda.resize(nb_gp_nodes);
351 //std::cout << "noise\n" << std::endl;
352 for (int i = 0; i < nb_gp_nodes; ++i) {
353 d->Lambda(i) = (pow( d->gp_noise.at(i) / 2e0 + d->noise_regularization, 2e0 ));
354 }
355 //std::cout << std::endl;
356
357 MatrixXd Lambda_K_f_u;
358 Lambda_K_f_u.resize(nb_gp_nodes, nb_u_nodes);
359 for(int i = 0; i < nb_gp_nodes; i++){
360 for(int j = 0; j < nb_u_nodes; j++){
361 Lambda_K_f_u(i,j) = ((1.0/(d->Lambda(i) + d->Lambda_opt_nugget)) * K_f_u(i,j));
362 }
363 }
364 MatrixXd K_u_f_Lambda_f_u;
365 K_u_f_Lambda_f_u.resize(nb_u_nodes, nb_u_nodes);
366 K_u_f_Lambda_f_u = d->K_u_f*Lambda_K_f_u;
367
368 MatrixXd Sigma = d->K_u_u + K_u_f_Lambda_f_u;
369 for (int i = 0; i < nb_u_nodes; ++i) {
370 //Sigma(i,i) += nugget;
371 }
372 d->L_eigen.compute(Sigma);
373 double L12 = 0.0;//-log(d->K_u_u.determinant());
374 for (int i = 0; i < d->u.rows(); ++i){
375 L12 += log(d->LLTofK_u_u.vectorD()(i));
376 //std::cout << L1 << "L11-" << i << std::endl;
377 }
378
379 double det_Leigen = 0.0;
380 for (int i = 0; i < d->u.rows(); ++i){
381 det_Leigen += log(d->L_eigen.vectorD()(i));
382 //std::cout << d->L_eigen.matrixL()(i,i) << " " << log(d->L_eigen.matrixL()(i,i)) << "det_Leigen-" << i << std::endl;
383 }
384
385 double L11 = 0.0;
386 //std::cout << L1 << "L12 " << std::endl;
387 for (int i = 0; i < d->nb_gp_nodes; ++i){
388 L11 += log(d->Lambda(i));
389 //std::cout << "Lambda: " << i << " " << d->Lambda(i) <<" "<<log(d->Lambda(i)) << std::endl;
390 }
391 double L1 = 0.0;
392 //L1 = 0.5*L11 + (2*0.5)*det_Leigen + 0.5*det_LeigenD - (2*0.5)*L12 - 0.5*L12D;
393 L1 = 0.5*L11 + 0.5*det_Leigen - 0.5*L12;
394
395 MatrixXd Q_f_f = K_f_u*K_u_u_u_f;
396 for(int i = 0; i < nb_gp_nodes; i++){
397 Q_f_f(i,i) += d->Lambda(i) + d->Qff_opt_nugget;
398 }
399 LLT<MatrixXd> LLTofQ_f_f(Q_f_f);
400 double L2 = 0.5*d->scaled_function_values_eigen.dot(LLTofQ_f_f.solve(d->scaled_function_values_eigen));
401
402 //std::cout << d->scaled_function_values_eigen << std::endl;
403 double result = L1 + L2;
404
405 if (std::isinf(result) || std::isnan(result)){
406 std::cout << "Result is inf or nan" << std::endl;
407 std::cout << "L11 " << L11 << " " << 'x' << std::endl;
408 std::cout << "L12 " << L12 << std::endl; //<< " " << log(d->K_u_u.determinant()) << std::endl;
409 std::cout << "L13 " << det_Leigen << std::endl;// << " " << log(Sigma.determinant()) << std::endl;
410 std::cout << L1 << ' ' << L2 << std::endl;
411 result = std::numeric_limits<double>::infinity();
412 if(d->Kuu_opt_nugget < d->nugget_max){
413 d->Kuu_opt_nugget *= 10;
414 d->Lambda_opt_nugget *= 10;
415 d->Qff_opt_nugget *= 10;
416 }
417 }else{
418 if(d->Kuu_opt_nugget > d->nugget_min){
419 d->Kuu_opt_nugget *= 0.1;
420 d->Lambda_opt_nugget *= 0.1;
421 d->Qff_opt_nugget *= 0.1;
422 }
423 }
424 if ((d->print%1000)==0){
425 /*for ( int i = 0; i < d->dim + 1; ++i )
426 std::cout << "gp_param = " << x[i] << std::endl;
427 for(int j = offset; j < offset + d->u.rows(); ++j)
428 std::cout << "gp_param = " << x[j] <<","<<x[j+ d->u.rows()]<< std::endl;*/
429
430 //std::cout << d->print <<" Objective: " << L1 << " " << L2 << " "<< result<< std::endl;
431 }
432
433 d->print++;
434
435 return result;
436
437 }
438
parameter_estimation_objective_w_gradients(std::vector<double> const & x,std::vector<double> & grad,void * data)439 double SubsetOfRegressors::parameter_estimation_objective_w_gradients(std::vector<double> const &x,
440 std::vector<double> &grad,
441 void *data)
442 {
443
444 //std::cout << "in sor2" << std::endl;
445
446 SubsetOfRegressors *d = reinterpret_cast<SubsetOfRegressors*>(data);
447 int offset;
448 std::vector<double> local_params(d->dim+1);
449 if(x.size()==1+d->dim){
450 offset = 0;
451 for(int i = 0; i < local_params.size(); ++i){
452 local_params[i] = x[i];
453 }
454 }else{
455 if(x.size() > d->u.rows()*d->dim){
456 offset = 1+d->dim;
457 for(int i = 0; i < local_params.size(); ++i){
458 local_params[i] = x[i];
459 }
460 }else{
461 offset = 0;
462 for(int i = 0; i < local_params.size(); ++i){
463 local_params[i] = d->gp_parameters[i];
464 }
465 }
466 int u_counter;
467 for (int i = 0; i < d->dim; ++i) {
468 u_counter = 0;
469 for(int j = offset + i*d->u.rows(); j < offset + (i+1)*d->u.rows(); ++j){
470 d->u(u_counter,i) = x[j];
471 u_counter++;
472 }
473 }
474 }
475
476 //Compute Kuf, Kuu
477 //Set up matrix K_u_f and K_f_u
478 int nb_gp_nodes = d->gp_nodes.size();
479 int nb_u_nodes = d->u.rows();
480 d->K_u_f.resize(nb_u_nodes, nb_gp_nodes);
481 for (int i = 0; i < nb_u_nodes; ++i) {
482 for (int j = 0; j < nb_gp_nodes; ++j) {
483 d->K_u_f(i,j) = d->evaluate_kernel(d->u.row(i), d->gp_nodes_eigen.row(j), local_params);
484 }
485 }
486 MatrixXd K_f_u = d->K_u_f.transpose();
487 //std::cout << "Kuf\n" << d->K_u_f << std::endl;
488
489 //Set up matrix K_u_u
490 d->K_u_u.resize(nb_u_nodes, nb_u_nodes);
491 for (int i = 0; i < nb_u_nodes; ++i) {
492 for (int j = 0; j < nb_u_nodes; ++j) {
493 d->K_u_u(i,j) = d->evaluate_kernel(d->u.row(i), d->u.row(j), local_params);
494 if(i==j)
495 d->K_u_u(i,j) += d->Kuu_opt_nugget;
496 }
497 }
498 //std::cout << "Kuu\n" << d->K_u_u << std::endl;
499 d->LLTofK_u_u.compute(d->K_u_u);
500 MatrixXd K_u_u_u_f = d->LLTofK_u_u.solve(d->K_u_f);
501
502 VectorXd diag_Q_f_f;
503 diag_Q_f_f.resize(nb_gp_nodes);
504
505 for (int i = 0; i < nb_gp_nodes; ++i) {
506 diag_Q_f_f(i) = 0.0;
507 for (int j = 0; j < nb_u_nodes; ++j) {
508 diag_Q_f_f(i) += (K_f_u(i,j) * K_u_u_u_f(j,i));
509 }
510 }
511 VectorXd diag_K_f_f;
512 diag_K_f_f.resize(nb_gp_nodes);
513 for (int i = 0; i < nb_gp_nodes; ++i) {
514 diag_K_f_f(i) = d->evaluate_kernel(d->gp_nodes_eigen.row(i),
515 d->gp_nodes_eigen.row(i), local_params);
516 }
517 /*std::cout << "diag_K_f_f\n" << diag_K_f_f << std::endl;
518 std::cout << "diag_Q_f_f\n" << diag_Q_f_f << std::endl;*/
519
520 d->Lambda.resize(nb_gp_nodes);
521 //std::cout << "noise\n" << std::endl;
522 for (int i = 0; i < nb_gp_nodes; ++i) {
523 d->Lambda(i) = (pow( d->gp_noise.at(i) / 2e0 + d->noise_regularization, 2e0 ));
524 }
525 //std::cout << std::endl;
526
527 MatrixXd Lambda_K_f_u;
528 Lambda_K_f_u.resize(nb_gp_nodes, nb_u_nodes);
529 for(int i = 0; i < nb_gp_nodes; i++){
530 for(int j = 0; j < nb_u_nodes; j++){
531 Lambda_K_f_u(i,j) = ((1.0/(d->Lambda(i) + d->Lambda_opt_nugget)) * K_f_u(i,j));
532 }
533 }
534 MatrixXd K_u_f_Lambda_f_u;
535 K_u_f_Lambda_f_u.resize(nb_u_nodes, nb_u_nodes);
536 K_u_f_Lambda_f_u = d->K_u_f*Lambda_K_f_u;
537
538 MatrixXd Sigma = d->K_u_u + K_u_f_Lambda_f_u;
539 for (int i = 0; i < nb_u_nodes; ++i) {
540 //Sigma(i,i) += nugget;
541 }
542 d->L_eigen.compute(Sigma);
543 double L12 = 0.0;//-log(d->K_u_u.determinant());
544 for (int i = 0; i < d->u.rows(); ++i){
545 L12 += log(d->LLTofK_u_u.vectorD()(i));
546 //std::cout << L1 << "L11-" << i << std::endl;
547 }
548
549 double det_Leigen = 0.0;
550 for (int i = 0; i < d->u.rows(); ++i){
551 det_Leigen += log(d->L_eigen.vectorD()(i));
552 //std::cout << d->L_eigen.matrixL()(i,i) << " " << log(d->L_eigen.matrixL()(i,i)) << "det_Leigen-" << i << std::endl;
553 }
554
555 double L11 = 0.0;
556 //std::cout << L1 << "L12 " << std::endl;
557 for (int i = 0; i < d->nb_gp_nodes; ++i){
558 L11 += log(d->Lambda(i));
559 //std::cout << "Lambda: " << i << " " << d->Lambda(i) <<" "<<log(d->Lambda(i)) << std::endl;
560 }
561 double L1 = 0.0;
562 //L1 = 0.5*L11 + (2*0.5)*det_Leigen + 0.5*det_LeigenD - (2*0.5)*L12 - 0.5*L12D;
563 L1 = 0.5*L11 + 0.5*det_Leigen - 0.5*L12;
564
565 MatrixXd Q_f_f = K_f_u*K_u_u_u_f;
566 for(int i = 0; i < nb_gp_nodes; i++){
567 Q_f_f(i,i) += d->Lambda(i) + d->Qff_opt_nugget;
568 }
569 LLT<MatrixXd> LLTofQ_f_f(Q_f_f);
570 double L2 = 0.5*d->scaled_function_values_eigen.dot(LLTofQ_f_f.solve(d->scaled_function_values_eigen));
571
572 //std::cout << d->scaled_function_values_eigen << std::endl;
573 double result = L1 + L2;
574
575 if (std::isinf(result) || std::isnan(result)){
576 std::cout << "Result is inf or nan" << std::endl;
577 std::cout << "L11 " << L11 << " " << 'x' << std::endl;
578 std::cout << "L12 " << L12 << std::endl; //<< " " << log(d->K_u_u.determinant()) << std::endl;
579 std::cout << "L13 " << det_Leigen << std::endl;// << " " << log(Sigma.determinant()) << std::endl;
580 std::cout << L1 << ' ' << L2 << std::endl;
581 result = std::numeric_limits<double>::infinity();
582 if(d->Kuu_opt_nugget < d->nugget_max){
583 d->Kuu_opt_nugget *= 10;
584 d->Lambda_opt_nugget *= 10;
585 d->Qff_opt_nugget *= 10;
586 }
587 }else{
588 if(d->Kuu_opt_nugget > d->nugget_min){
589 d->Kuu_opt_nugget *= 0.1;
590 d->Lambda_opt_nugget *= 0.1;
591 d->Qff_opt_nugget *= 0.1;
592 }
593 }
594 if ((d->print%1000)==0){
595 /*for ( int i = 0; i < d->dim + 1; ++i )
596 std::cout << "gp_param = " << x[i] << std::endl;
597 for(int j = offset; j < offset + d->u.rows(); ++j)
598 std::cout << "gp_param = " << x[j] <<","<<x[j+ d->u.rows()]<< std::endl;*/
599
600 //std::cout << d->print <<" Objective: " << L1 << " " << L2 << " "<< result<< std::endl;
601 }
602
603
604 //Gradient computation:
605 int dim_grad = x.size();
606 grad.resize(dim_grad);
607
608 for(int i = 0; i < grad.size(); i++){
609
610 MatrixXd Kffdot;
611 MatrixXd Kufdot;
612 MatrixXd Kfudot;
613 MatrixXd Kuudot;
614 VectorXd LambdaDot;
615 if(dim_grad == d->dim + 1){
616 if(i == 0){//grad sigma_f
617 d->derivate_K_u_u_wrt_sigmaf(local_params, Kuudot);
618 d->derivate_K_f_f_wrt_sigmaf(local_params, Kffdot);
619 d->derivate_K_u_f_wrt_sigmaf(local_params, Kufdot);
620 Kfudot = Kufdot.transpose();
621 }else{//grad length parameter
622 d->derivate_K_u_u_wrt_l(local_params, i-1, Kuudot);
623 d->derivate_K_f_f_wrt_l(local_params, i-1, Kffdot);
624 d->derivate_K_u_f_wrt_l(local_params, i-1, Kufdot);
625 Kfudot = Kufdot.transpose();
626 }
627 }else{
628 if( dim_grad > d->u.rows()*d->dim){//if we optimize also for sigma_f and lengthscales
629 if(i == 0){//grad sigma_f
630 d->derivate_K_u_u_wrt_sigmaf(local_params, Kuudot);
631 d->derivate_K_f_f_wrt_sigmaf(local_params, Kffdot);
632 d->derivate_K_u_f_wrt_sigmaf(local_params, Kufdot);
633 Kfudot = Kufdot.transpose();
634 }else if(i > 0 && i < 1 + d->dim){//grad length parameter
635 d->derivate_K_u_u_wrt_l(local_params, i-1, Kuudot);
636 d->derivate_K_f_f_wrt_l(local_params, i-1, Kffdot);
637 d->derivate_K_u_f_wrt_l(local_params, i-1, Kufdot);
638 Kfudot = Kufdot.transpose();
639 }else{//grad u
640 int uidx = (i-offset)%d->u.rows();
641 int udim = (int) ((i-offset)/d->u.rows());
642 d->derivate_K_u_u_wrt_uik(local_params, uidx, udim, Kuudot);
643 d->derivate_K_u_f_wrt_uik(local_params, uidx, udim, Kufdot);
644 Kffdot.resize(nb_gp_nodes, nb_gp_nodes);
645 Kffdot.setZero();
646 Kfudot = Kufdot.transpose();
647 }
648 }else{
649 int uidx = (i-offset)%d->u.rows();
650 int udim = (int) ((i-offset)/d->u.rows());
651 d->derivate_K_u_u_wrt_uik(local_params, uidx, udim, Kuudot);
652 d->derivate_K_u_f_wrt_uik(local_params, uidx, udim, Kufdot);
653 Kffdot.resize(nb_gp_nodes, nb_gp_nodes);
654 Kffdot.setZero();
655 Kfudot = Kufdot.transpose();
656 }
657 }
658 VectorXd LambdaInv(nb_gp_nodes);
659 for(int j = 0; j < nb_gp_nodes; ++j){
660 LambdaInv(j) = 1.0/d->Lambda(j);
661 }
662
663 /*
664 IOFormat HeavyFmt(FullPrecision, 0, ", ", ";\n", "", "", "[", "]");
665 std::cout << "Kuudot\n" << Kuudot.format(HeavyFmt) << std::endl;
666 std::cout << "Kffdot\n" << Kffdot.format(HeavyFmt) << std::endl;
667 std::cout << "Kufdot\n" << Kufdot.format(HeavyFmt) << std::endl;
668 std::cout << "Kuu\n" << d->K_u_u.format(HeavyFmt) << std::endl;
669 std::cout << "Kuf\n" << d->K_u_f.format(HeavyFmt) << std::endl;
670 std::cout << "Qff\n" << Q_f_f.format(HeavyFmt) << std::endl;
671 std::cout << "noise:\n" << std::endl;
672 for (int i = 0; i < nb_gp_nodes; ++i) {
673 std::cout << (pow( d->gp_noise.at(i) / 2e0 + d->noise_regularization, 2e0 )) << std::endl;
674 }
675 std::cout << std::endl;
676 std::cout << "y:\n" << d->scaled_function_values_eigen << std::endl;
677 */
678
679 //L1-term: dL1/dtheta= tr(Kuu^(-1) * Kuudot)
680 double dL1 = (d->LLTofK_u_u.solve(Kuudot)).trace();
681
682 //L2-term: d(log(det(Kuu+Kuf*(noise^2)^(-1)*Kfu)))/dtheta
683 MatrixXd LambdaInvDiag = LambdaInv.asDiagonal();
684 MatrixXd dSigma = Kuudot
685 + 2*Kufdot*LambdaInvDiag*K_f_u;
686 //+ d->K_u_f*LambdaInvDiag*Kfudot;
687 //std::cout << "LambdaInv:\n" << LambdaInv.format(HeavyFmt) << std::endl;
688 //std::cout << "LambdaInvDiag:\n" << LambdaInvDiag.format(HeavyFmt) << std::endl;
689 //std::cout << "dSigma:\n" << dSigma.format(HeavyFmt) << std::endl;
690 double dL2 = (d->L_eigen.solve(dSigma)).trace();
691
692 //L3-term: d(yT*Q*y)/dtheta
693 MatrixXd KfudotKuinvKuf = Kfudot * d->LLTofK_u_u.solve(d->K_u_f);
694 MatrixXd KfuKuinvKufdot = K_f_u * d->LLTofK_u_u.solve(Kufdot);
695 MatrixXd KfuKuinfKudotKuinvKuf = K_f_u*(d->LLTofK_u_u.solve(Kuudot*(d->LLTofK_u_u.solve(d->K_u_f))));
696 MatrixXd dQ = KfudotKuinvKuf - KfuKuinfKudotKuinvKuf + KfuKuinvKufdot;
697 //std::cout << "dQ:\n" << dQ.format(HeavyFmt) << std::endl;
698 double dL3 = (-1)*d->scaled_function_values_eigen.dot(
699 LLTofQ_f_f.solve(dQ*LLTofQ_f_f.solve(
700 d->scaled_function_values_eigen)));
701
702 grad[i] = 0.5*(-dL1 + dL2 + dL3);
703
704 if ((d->print%1)==0){
705 //std::cout << "Grad: " << i << "|" << 0.5*dL1 << " + "<< 0.5*dL2 << " + "<< 0.5*dL3 << " = " << grad[i] << std::endl;
706 }
707 //exit(-1);
708
709 }
710
711 d->print++;
712
713 return result;
714
715 }
716
trust_region_constraint(unsigned int m,double * c,unsigned int n,const double * x,double * grad,void * data)717 void SubsetOfRegressors::trust_region_constraint(unsigned int m, double* c, unsigned int n, const double* x, double* grad,
718 void *data){
719 SubsetOfRegressors *d = reinterpret_cast<SubsetOfRegressors*>(data);
720
721 int offset;
722 if(m == d->dim + 1){
723 for (int i = 0; i < d->dim+1; ++i) {
724 c[i] = -1;
725 }
726 }else{
727 if(m > d->u.rows()*d->dim){
728 offset = 1+d->dim;
729 }else{
730 offset = 0;
731 }
732 int u_counter;
733
734 for (int i = 0; i < offset; ++i) {
735 c[i] = -1;
736 }
737 MatrixXd u_intern(d->u.rows(), d->u.cols());
738 for (int i = 0; i < d->dim; ++i) {
739 u_counter = 0;
740 for(int j = offset + i*d->u.rows(); j < offset + (i+1)*d->u.rows(); ++j){
741 u_intern(u_counter, i) = x[j];
742 u_counter++;
743 }
744 }
745 VectorXd c_intern(u_intern.rows());
746 VectorXd dist(d->dim);
747 for (int i = 0; i < u_intern.rows(); ++i) {
748 for (int j = 0; j < d->dim; ++j) {
749 dist(j) = (u_intern(i, j)-d->constraint_ball_center(j));
750 }
751 c_intern(i) = sqrt( dist.dot(dist) ) - d->constraint_ball_radius;
752 }
753 for (int i = 0; i < d->dim; ++i) {
754 u_counter = 0;
755 for(int j = offset + i*d->u.rows(); j < offset + (i+1)*d->u.rows(); ++j){
756 c[j] = c_intern(u_counter);
757 u_counter++;
758 }
759 }
760 }
761 return;
762 }