1 /*  _______________________________________________________________________
2 
3     DAKOTA: Design Analysis Kit for Optimization and Terascale Applications
4     Copyright 2014-2020 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
5     This software is distributed under the GNU Lesser General Public License.
6     For more information, see the README file in the top Dakota directory.
7     _______________________________________________________________________ */
8 
9 //- Class:	 NonDQUESOBayesCalibration
10 //- Description: Derived class for Bayesian inference using QUESO
11 //- Owner:       Laura Swiler, Brian Adams
12 //- Checked by:
13 //- Version:
14 
15 // place Dakota headers first to minimize influence of QUESO defines
16 #include "NonDQUESOBayesCalibration.hpp"
17 #include "NonDExpansion.hpp"
18 #include "ProblemDescDB.hpp"
19 #include "ParallelLibrary.hpp"
20 #include "DakotaModel.hpp"
21 #include "PRPMultiIndex.hpp"
22 // Dakota/QUESO interfaces
23 #include "QUESOImpl.hpp"
24 // finally list additional QUESO headers
25 #include "queso/StatisticalInverseProblem.h"
26 #include "queso/EnvironmentOptions.h"
27 #include "queso/GenericScalarFunction.h"
28 
29 static const char rcsId[]="@(#) $Id$";
30 
31 namespace Dakota {
32 
33 extern PRPCache data_pairs; // global container
34 
35 /// Static registration of RW TK with the QUESO TK factory
36 TKFactoryDIPC tk_factory_dipc("dakota_dipc_tk");
37 /// Static registration of Logit RW TK with the QUESO TK factory
38 TKFactoryDIPCLogit tk_factory_dipclogit("dakota_dipc_logit_tk");
39 
40 // initialization of statics
41 NonDQUESOBayesCalibration* NonDQUESOBayesCalibration::nonDQUESOInstance(NULL);
42 
43 
44 /** This constructor is called for a standard letter-envelope iterator
45     instantiation.  In this case, set_db_list_nodes has been called and
46     probDescDB can be queried for settings from the method specification. */
47 NonDQUESOBayesCalibration::
NonDQUESOBayesCalibration(ProblemDescDB & problem_db,Model & model)48 NonDQUESOBayesCalibration(ProblemDescDB& problem_db, Model& model):
49   NonDBayesCalibration(problem_db, model),
50   mcmcType(probDescDB.get_string("method.nond.mcmc_type")),
51   propCovUpdatePeriod(probDescDB.get_int("method.nond.prop_cov_update_period")),
52   batchSize(1), precondRequestValue(0),
53   logitTransform(probDescDB.get_bool("method.nond.logit_transform")),
54   priorPropCovMult(probDescDB.get_real("method.prior_prop_cov_mult")),
55   advancedOptionsFile(probDescDB.get_string("method.advanced_options_file"))
56 {
57   bool found_error = false;
58 
59   // Assign default proposalCovarType (Only QUESO supports proposal
60   // covariance updates and posterior adaptive surrogate updates for
61   // now, hence this override is in this class)
62 
63   // BMA TODO: Consider unconditional default for simplicity
64   if (proposalCovarType.empty()) {
65     if (emulatorType) proposalCovarType = "derivatives"; // misfit Hessian
66     else              proposalCovarType = "prior";       // prior covariance
67   }
68 
69   if (priorPropCovMult < std::numeric_limits<double>::min() ||
70       priorPropCovMult >= std::numeric_limits<double>::infinity()) {
71     Cerr << "\nError: QUESO proposal covariance multiplier  = "
72 	 << priorPropCovMult << " not in [DBL_MIN, Inf).\n";
73     found_error = true;
74   }
75 
76   if (propCovUpdatePeriod < std::numeric_limits<int>::max() &&
77       propCovUpdatePeriod >= chainSamples) {
78     Cout << "\nWarning: QUESO proposal covariance update_period >= chain_samples;"
79 	 << "\n         no updates will occur." << std::endl;
80   }
81 
82   // assign default maxIterations (DataMethod default is -1)
83   if (adaptPosteriorRefine) {
84     // BMA --> MSE: Why 5? Fix magic constant
85     batchSize = 5;
86     if (maxIterations < 0)
87       maxIterations = 25;
88   }
89 
90   if (!advancedOptionsFile.empty()) {
91     if (boost::filesystem::exists(advancedOptionsFile)) {
92       if (outputLevel >= NORMAL_OUTPUT)
93 	Cout << "Any QUESO options in file '" << advancedOptionsFile
94 	     << "' will override Dakota options." << std::endl;
95     } else {
96       Cerr << "\nError: QUESO options_file '" << advancedOptionsFile
97 	   << "' specified, but file not found.\n";
98       found_error = true;
99     }
100   }
101 
102   // BMA TODO: Want to support these options independently
103   if (obsErrorMultiplierMode > 0 && !calibrationData) {
104     Cerr << "\nError: you are attempting to calibrate the measurement error "
105          << "but have not provided experimental data information." << std::endl;
106     found_error = true;
107   }
108 
109   if (found_error)
110     abort_handler(METHOD_ERROR);
111 
112   init_queso_environment();
113 }
114 
115 
~NonDQUESOBayesCalibration()116 NonDQUESOBayesCalibration::~NonDQUESOBayesCalibration()
117 { }
118 
119 
120 /** Perform the uncertainty quantification */
calibrate()121 void NonDQUESOBayesCalibration::calibrate()
122 {
123   // instantiate QUESO objects and execute
124   nonDQUESOInstance = this;
125   tk_factory_dipc.set_callback(nonDQUESOInstance);
126   tk_factory_dipclogit.set_callback(nonDQUESOInstance);
127 
128   // initialize the ASV request value for preconditioning and
129   // construct a Response for use in residual computation
130   if (proposalCovarType == "derivatives")
131     init_precond_request_value();
132 
133   // BMA TODO: make sure Recast is setup properly to have the right request val
134   // likelihood needs fn_vals; preconditioning may need derivs
135   //  short request_value_needed = 1 | precondRequestValue;
136   //  init_residual_response(request_value_needed);
137 
138   // pull vars data prior to emulator build
139   init_parameter_domain();
140 
141   // build the emulator and initialize transformations, as needed
142   initialize_model();
143 
144   // may pull Hessian data from emulator
145   init_proposal_covariance();
146 
147   // init likelihoodFunctionObj, prior/posterior random vectors, inverse problem
148   init_queso_solver();
149 
150   // generate the sample chain that defines the joint posterior distribution
151   if (adaptPosteriorRefine) {
152     if (!emulatorType) { // current spec prevents this
153       Cerr << "Error: adaptive posterior refinement requires emulator model."
154 	   << std::endl;
155       abort_handler(METHOD_ERROR);
156     }
157     compactMode = true; // update_model() uses all{Samples,Responses}
158     Real adapt_metric = DBL_MAX; unsigned short int num_mcmc = 0;
159     while (adapt_metric > convergenceTol && num_mcmc <= maxIterations) {
160 
161       // TO DO: treat this like cross-validation as there is likely a sweet
162       // spot prior to degradation of conditioning (too much refinement data)
163 
164       // place update block here so that chain is always run for initial or
165       // updated emulator; placing block at loop end could result in emulator
166       // convergence w/o final chain.
167       if (num_mcmc) {
168 	// update the emulator surrogate data with new truth evals and
169 	// reconstruct surrogate (e.g., via PCE sparse recovery)
170 	update_model();
171 	// assess posterior convergence via convergence of the emulator coeffs
172 	adapt_metric = assess_emulator_convergence();
173       }
174 
175       map_pre_solve();
176       run_chain();
177       ++num_mcmc;
178 
179       // assess convergence of the posterior via sample-based K-L divergence:
180       //adapt_metric = assess_posterior_convergence();
181     }
182   }
183   else {
184     map_pre_solve();
185     run_chain();
186   }
187 
188   // Generate useful stats from the posterior samples
189   compute_statistics();
190 }
191 
192 
map_pre_solve()193 void NonDQUESOBayesCalibration::map_pre_solve()
194 {
195   // Management of pre_solve spec options occurs in NonDBayesCalibration ctor,
196   // manifesting here as a valid mapOptimizer instance.
197   if (mapOptimizer.is_null()) return;
198 
199   // Pre-solve for MAP point using optimization prior to MCMC.
200 
201   Cout << "\nInitiating pre-solve for maximum a posteriori probability (MAP)."
202        << std::endl;
203   // set initial point pulled from mcmcModel at construct time or
204   // warm start from previous map soln computed from previous emulator
205   negLogPostModel.current_variables().continuous_variables(mapSoln);
206 
207   // Perform optimization
208   mapOptimizer.run();
209   //negLogPostModel.print_evaluation_summary(Cout);
210   //mapOptimizer.print_results(Cout); // needs xform if standardizedSpace
211   Cout << "Maximum a posteriori probability (MAP) point from pre-solve"
212        << "\n(will be used as initial point for MCMC chain):\n";
213   const RealVector& map_c_vars
214     = mapOptimizer.variables_results().continuous_variables();
215   print_variables(Cout, map_c_vars);
216   Cout << std::endl;
217 
218   // propagate MAP to paramInitials for starting point of MCMC chain.
219   // Note: init_parameter_domain() happens once per calibrate(),
220   // upstream of map_pre_solve()
221   copy_gsl(map_c_vars, *paramInitials);
222   // if multiple pre-solves, propagate MAP as initial guess for next pre-solve
223   if (adaptPosteriorRefine || adaptExpDesign)
224     copy_data(map_c_vars, mapSoln); // deep copy of view
225 }
226 
227 
run_chain()228 void NonDQUESOBayesCalibration::run_chain()
229 {
230   // define initial proposal covariance from misfit Hessian
231   if (proposalCovarType == "derivatives")
232     precondition_proposal(0);
233 
234   if (outputLevel >= NORMAL_OUTPUT) {
235     Cout << "QUESO: Running chain with " << chainSamples << " samples."
236 	 << std::endl;
237     if (propCovUpdatePeriod < std::numeric_limits<int>::max())
238       Cout << "QUESO: Updating proposal covariance every "
239 	   << propCovUpdatePeriod << " samples." << std::endl;
240   }
241   run_queso_solver(); // solve inverse problem with MCMC
242 
243   // always update bestSamples with highest posterior probability points
244   log_best();
245   if (adaptPosteriorRefine) {
246     // populate allSamples for surrogate updating
247     if (emulatorType == PCE_EMULATOR)
248       filter_chain_by_conditioning();
249     else
250       best_to_all();
251   }
252   // populate acceptanceChain, acceptedFnVals
253   cache_chain();
254 }
255 
256 
init_queso_environment()257 void NonDQUESOBayesCalibration::init_queso_environment()
258 {
259   // NOTE:  for now we are assuming that DAKOTA will be run with
260   // mpiexec to call MPI_Init.  Eventually we need to generalize this
261   // and send QUESO the proper MPI subenvironments.
262 
263   // NOTE: To make this function re-entrant, have to free quesoEnv
264   // first, then reset envOptionsValues, since destructor of quesoEnv
265   // needs envOptionsValues:
266   quesoEnv.reset();
267 
268   // Construct with default options
269   // TODO: see if this can be a local, or if the env retains a pointer
270   envOptionsValues = std::make_shared<QUESO::EnvOptionsValues>();
271 
272   // C++ API options may override defaults
273   envOptionsValues->m_subDisplayFileName = "QuesoDiagnostics/display";
274   envOptionsValues->m_subDisplayAllowedSet.insert(0);
275   envOptionsValues->m_subDisplayAllowedSet.insert(1);
276   envOptionsValues->m_displayVerbosity = 2;
277   // From GPMSA: envOptionsValues->m_displayVerbosity     = 3;
278   envOptionsValues->m_seed = randomSeed;
279   // From GPMSA: envOptionsValues->m_identifyingString="dakota_foo.in"
280 
281   // File-based power user parameters have the final say
282   //
283   // Unfortunately, there's a chicken-and-egg problem where parsing
284   // EnvOptionsValues requires an Environment, so we defer this parse
285   // to the ctor...
286   //
287   // if (!advancedOptionsFile.empty())
288   //   envOptionsValues->parse(*quesoEnv, "");
289   const char* aof_cstr =
290     advancedOptionsFile.empty() ? NULL : advancedOptionsFile.c_str();
291 
292   // BMA TODO: Remove ML-specific code now that we have advanced options parsing
293 #ifdef DAKOTA_HAVE_MPI
294   // this prototype and MPI_COMM_SELF only available if Dakota/QUESO have MPI
295   if (parallelLib.mpirun_flag()) {
296     if (mcmcType == "multilevel")
297       quesoEnv = std::make_shared<QUESO::FullEnvironment>
298 	(MPI_COMM_SELF, "ml.inp", "", nullptr);
299     else // dram, dr, am, or mh
300       quesoEnv = std::make_shared<QUESO::FullEnvironment>
301 	(MPI_COMM_SELF, aof_cstr, "", envOptionsValues.get());
302   }
303   else {
304     if (mcmcType == "multilevel")
305       quesoEnv = std::make_shared<QUESO::FullEnvironment>("ml.inp", "", nullptr);
306     else // dram, dr, am, or mh
307       quesoEnv = std::make_shared<QUESO::FullEnvironment>
308 	(aof_cstr, "", envOptionsValues.get());
309   }
310 #else
311   if (mcmcType == "multilevel")
312     quesoEnv = std::make_shared<QUESO::FullEnvironment>("ml.inp", "", nullptr);
313   else // dram, dr, am, or mh
314     quesoEnv = std::make_shared<QUESO::FullEnvironment>
315       (aof_cstr, "", envOptionsValues.get());
316 #endif
317 }
318 
319 
init_precond_request_value()320 void NonDQUESOBayesCalibration::init_precond_request_value()
321 {
322   // Gauss-Newton approximate Hessian requires gradients of residuals (rv=2)
323   // full Hessian requires values/gradients/Hessians of residuals (rv=7)
324   precondRequestValue = 0;
325   switch (emulatorType) {
326   case PCE_EMULATOR: case ML_PCE_EMULATOR: case MF_PCE_EMULATOR:
327   case KRIGING_EMULATOR:
328     precondRequestValue = 7; break; // emulator Hessian support
329   case SC_EMULATOR: case MF_SC_EMULATOR: case GP_EMULATOR:
330     precondRequestValue = 2; break; // emulator Hessians not supported yet
331   case  NO_EMULATOR:
332     // Note: mixed gradients/Hessians are still globally "on", regardless of
333     // mixed computation approach
334     if (mcmcModel.gradient_type() != "none")
335       precondRequestValue |= 2; // gradients
336     if (mcmcModel.hessian_type()  != "none")
337       precondRequestValue |= 5; // values & Hessians
338     break;
339   }
340 }
341 
342 
init_queso_solver()343 void NonDQUESOBayesCalibration::init_queso_solver()
344 {
345   // Instantiate the likelihood function object that computes [ln(function)]
346   likelihoodFunctionObj = std::make_shared
347     <QUESO::GenericScalarFunction<QUESO::GslVector,QUESO::GslMatrix>>
348     ("like_", *paramDomain, &dakotaLogLikelihood, (void *)nullptr, true);
349 
350   // Instantiate the inverse problem
351 
352   postRv =
353     std::make_shared<QUESO::GenericVectorRV<QUESO::GslVector,QUESO::GslMatrix>>
354     ("post_", *paramSpace);
355   // Q: are Prior/posterior RVs copied by StatInvProblem, such that these
356   //    instances can be local and allowed to go out of scope?
357 
358   // define calIpOptionsValues
359   set_ip_options();
360   // set options specific to MH algorithm
361   set_mh_options();
362   // Inverse problem: instantiate it (posterior rv is instantiated internally)
363   inverseProb = std::make_shared
364     <QUESO::StatisticalInverseProblem<QUESO::GslVector,QUESO::GslMatrix>>
365     ("", calIpOptionsValues.get(), *priorRv, *likelihoodFunctionObj, *postRv);
366 }
367 
368 
precondition_proposal(unsigned int chain_index)369 void NonDQUESOBayesCalibration::precondition_proposal(unsigned int chain_index)
370 {
371   if (!precondRequestValue) {
372     Cerr << "Error: response derivative specification required for proposal "
373          << "preconditioning." << std::endl;
374     abort_handler(METHOD_ERROR);
375   }
376 
377   // extract GSL sample vector from QUESO vector sequence:
378   QUESO::GslVector curr_params(paramSpace->zeroVector());
379 
380   if (chain_index == 0)
381     curr_params = *paramInitials;
382   else {
383     const QUESO::BaseVectorSequence<QUESO::GslVector,QUESO::GslMatrix>&
384       mcmc_chain = inverseProb->chain();
385 
386     if (chain_index >= mcmc_chain.subSequenceSize()) {
387       Cerr << "\nError: QUESO precondition_proposal index out of bounds\n";
388       abort_handler(METHOD_ERROR);
389     }
390 
391     mcmc_chain.getPositionValues(chain_index, curr_params);
392     if (outputLevel > NORMAL_OUTPUT)
393       Cout << "New center:\n" << curr_params << "Log likelihood = "
394 	   << inverseProb->logLikelihoodValues()[chain_index] << std::endl;
395   }
396 
397   // update mcmcModel's continuous variables from curr_params (current MCMC
398   // chain position)
399   copy_gsl_partial
400     (curr_params, 0,
401      residualModel.current_variables().continuous_variables_view());
402   // update request vector values
403   const Response& residual_resp = residualModel.current_response();
404   ActiveSet set = residual_resp.active_set(); // copy
405   set.request_values(precondRequestValue);
406   // compute response (emulator case echoed to Cout if outputLevel > NORMAL)
407   residualModel.evaluate(set);
408 
409   // compute Hessian of log-likelihood misfit r^T Gamma^{-1} r
410   RealSymMatrix log_hess;//(numContinuousVars); // init to 0
411   RealSymMatrix prop_covar;
412 
413   expData.build_hessian_of_sum_square_residuals
414     (residualModel.current_response(), log_hess);
415   //bool fallback =
416     get_positive_definite_covariance_from_hessian(log_hess, prop_covar);
417 
418   /*
419   if ( fallback && (precondRequestValue & 4) ) { // fall back if indefinite
420     if (outputLevel >= NORMAL_OUTPUT)
421       Cout << "Falling back from full misfit Hessian to Gauss-Newton misfit "
422 	   << "Hessian.\n";
423     // BMA @JDJ, @MSE: I think this asv needs to be length of the
424     // total number of residuals (residualResponse.num_functions or
425     // expData.num_total_exppoints())
426     ShortArray asrv_override(numFunctions, 2); // override asrv in response
427     expData.build_hessian_of_sum_square_residuals(residualResponse,
428 						  asrv_override, log_hess);
429     get_positive_definite_covariance_from_hessian(log_hess, prop_covar);
430   }
431   */
432 
433   // pack GSL proposalCovMatrix
434   int i, j, nv = log_hess.numRows();
435   if (!proposalCovMatrix) {
436     proposalCovMatrix = std::make_shared<QUESO::GslMatrix>
437       (paramSpace->zeroVector());
438     if (paramSpace->dimGlobal() != nv ||
439 	paramSpace->dimGlobal() != nv+numFunctions) {
440       Cerr << "Error: Queso vector space is not consistent with proposal "
441 	   << "covariance dimension." << std::endl;
442       abort_handler(METHOD_ERROR);
443     }
444   }
445   for (i=0; i<nv; ++i )
446     for (j=0; j<nv; ++j )
447       (*proposalCovMatrix)(i,j) = prop_covar(i,j);
448 }
449 
450 
run_queso_solver()451 void NonDQUESOBayesCalibration::run_queso_solver()
452 {
453   if (outputLevel >= DEBUG_OUTPUT) {
454     //  Cout << "QUESO final ENV options:\n" << *envOptionsValues << std::endl;
455     Cout << "QUESO final SIP options:\n" << *calIpOptionsValues << std::endl;
456     Cout << "QUESO final MH options:\n" << *calIpMhOptionsValues << std::endl;
457   }
458 
459   Cout << "Running Bayesian Calibration with QUESO " << mcmcType << " using "
460        << calIpMhOptionsValues->m_rawChainSize << " MCMC samples." << std::endl;
461   if (outputLevel > NORMAL_OUTPUT && numHyperparams > 0)
462     Cout << "\n  Calibrating " << numHyperparams << " error hyperparameters."
463 	 << std::endl;
464 
465   ////////////////////////////////////////////////////////
466   // Step 5 of 5: Solve the inverse problem
467   ////////////////////////////////////////////////////////
468   if (mcmcType == "multilevel")
469     inverseProb->solveWithBayesMLSampling();
470   else
471     inverseProb->solveWithBayesMetropolisHastings(calIpMhOptionsValues.get(),
472 						  *paramInitials,
473 						  proposalCovMatrix.get());
474 
475   Cout << "QUESO MCMC chain completed.  MCMC details are concatenated within "
476        << "the QuesoDiagnostics directory:\n"
477        << "  display_sub0.txt contains MCMC diagnostics.\n";
478   if (standardizedSpace)
479     Cout << "  Caution: Matlab files contain the chain values in "
480 	 << "standardized probability space.\n";
481   else
482     Cout << "  Matlab files contain the chain values.\n";
483   //   << "The files to load in Matlab are\nfile_cal_ip_raw.m (the actual "
484   //   << "chain) or file_cal_ip_filt.m (the filtered chain,\nwhich contains "
485   //   << "every 20th step in the chain.\n"
486   //   << "NOTE:  the chain values in these Matlab files are currently "
487   //   << "in scaled space. \n  You will have to transform them back to "
488   //   << "original space by:\n"
489   //   << "lower_bounds + chain_values * (upper_bounds - lower_bounds)\n"
490   //   << "The rejection rate is in the tgaCalOutput file.\n"
491   //   << "We hope to improve the postprocessing of the chains by the "
492   //   << "next Dakota release.\n";
493 }
494 
495 
496 /** Populate all of acceptanceChain(num_params, chainSamples)
497     acceptedFnVals(numFunctions, chainSamples) */
cache_chain()498 void NonDQUESOBayesCalibration::cache_chain()
499 {
500   acceptanceChain.shapeUninitialized(numContinuousVars + numHyperparams,
501 				     chainSamples);
502   acceptedFnVals.shapeUninitialized(numFunctions, chainSamples);
503 
504   // temporaries for evals/lookups
505   // the MCMC model omits the hyper params and residual transformations...
506   Variables lookup_vars = mcmcModel.current_variables().copy();
507   String   interface_id = mcmcModel.interface_id();
508   Response  lookup_resp = mcmcModel.current_response().copy();
509   ActiveSet   lookup_as = lookup_resp.active_set();
510   lookup_as.request_values(1);
511   lookup_resp.active_set(lookup_as);
512   ParamResponsePair lookup_pr(lookup_vars, interface_id, lookup_resp);
513 
514   const QUESO::BaseVectorSequence<QUESO::GslVector,QUESO::GslMatrix>&
515     mcmc_chain = inverseProb->chain();
516   unsigned int num_mcmc = mcmc_chain.subSequenceSize();
517   if (num_mcmc != chainSamples) {
518     Cerr << "\nError: QUESO cache_chain(): chain length is " << num_mcmc
519 	 << "; expected " << chainSamples << '\n';
520     abort_handler(METHOD_ERROR);
521   }
522 
523   // The posterior may include GPMSA hyper-parameters, so use the postRv space
524   //  QUESO::GslVector qv(paramSpace->zeroVector());
525   QUESO::GslVector qv(postRv->imageSet().vectorSpace().zeroVector());
526 
527   unsigned int lookup_failures = 0;
528   unsigned int num_params = numContinuousVars + numHyperparams;
529   for (int i=0; i<num_mcmc; ++i) {
530 
531     // translate the QUESO vector into x- or u-space lookup vars and
532     // x-space acceptanceChain
533     mcmc_chain.getPositionValues(i, qv); // extract GSLVector from sequence
534     if (standardizedSpace) {
535       // u_rv and x_rv omit any hyper-parameters
536       RealVector u_rv(numContinuousVars, false);
537       copy_gsl_partial(qv, 0, u_rv);
538       Real* acc_chain_i = acceptanceChain[i];
539       RealVector x_rv(Teuchos::View, acc_chain_i, numContinuousVars);
540       mcmcModel.probability_transformation().trans_U_to_X(u_rv, x_rv);
541       for (int j=numContinuousVars; j<num_params; ++j)
542 	acc_chain_i[j] = qv[j]; // trailing hyperparams are not transformed
543 
544       // surrogate needs u-space variables for eval
545       if (mcmcModel.model_type() == "surrogate")
546 	lookup_vars.continuous_variables(u_rv);
547       else
548 	lookup_vars.continuous_variables(x_rv);
549     }
550     else {
551       // A view that includes calibration params and Dakota-managed
552       // hyper-parameters, to facilitate copying from the longer qv
553       // into acceptanceChain:
554       RealVector theta_hp(Teuchos::View, acceptanceChain[i],
555 			  numContinuousVars + numHyperparams);
556       copy_gsl_partial(qv, 0, theta_hp);
557       // lookup vars only need the calibration parameters
558       RealVector x_rv(Teuchos::View, acceptanceChain[i],
559 		      numContinuousVars);
560       lookup_vars.continuous_variables(x_rv);
561     }
562 
563     // now retreive function values
564 
565     // NOTE: The MCMC may be PCE/SC model, DataFitSurrModel, or raw
566     // model, but it's type may be a ProbabilityTransform wrapper if
567     // standardizedSpace is active.  mcmcModelHasSurrogate controls
568     // model re-evals.  This is not sufficiently general, e.g., if the
569     // mcmcModel is a HierarchSurrModel, could perform costly re-eval.
570 
571     // TODO: Consider doing lookup first, then surrogate re-eval, or
572     // querying a more complete eval database when available...
573 
574     if (mcmcModelHasSurrogate) {
575       mcmcModel.active_variables(lookup_vars);
576       mcmcModel.evaluate(lookup_resp.active_set());
577       const RealVector& fn_vals = mcmcModel.current_response().function_values();
578       Teuchos::setCol(fn_vals, i, acceptedFnVals);
579     }
580     else {
581       lookup_pr.variables(lookup_vars);
582       PRPCacheHIter cache_it = lookup_by_val(data_pairs, lookup_pr);
583       if (cache_it == data_pairs.get<hashed>().end()) {
584 	++lookup_failures;
585 	// Set NaN in the chain points to avoid misleading the user
586 	RealVector nan_fn_vals(mcmcModel.current_response().function_values().length());
587 	nan_fn_vals = std::numeric_limits<double>::quiet_NaN();
588 	Teuchos::setCol(nan_fn_vals, i, acceptedFnVals);
589       }
590       else {
591 	const RealVector& fn_vals = cache_it->response().function_values();
592 	Teuchos::setCol(fn_vals, i, acceptedFnVals);
593       }
594     }
595 
596   }
597   if (lookup_failures > 0 && outputLevel > SILENT_OUTPUT)
598     Cout << "Warning: could not retrieve function values for "
599 	 << lookup_failures << " MCMC chain points." << std::endl;
600 }
601 
602 
603 
log_best()604 void NonDQUESOBayesCalibration::log_best()
605 {
606   bestSamples.clear();
607   // BMA TODO: Need to transform chain back to unscaled space for reporting
608   // to user; possibly also in auxilliary data files for user consumption.
609 
610   // to get the full acceptance chain, m_filteredChainGenerate is set to
611   // false in set_invpb_mh_options()
612 
613   // Note: the QUESO VectorSequence class has a number of helpful filtering
614   // and statistics functions.
615 
616   const QUESO::BaseVectorSequence<QUESO::GslVector,QUESO::GslMatrix>& mcmc_chain
617     = inverseProb->chain();
618   const QUESO::ScalarSequence<double>& loglike_vals
619     = inverseProb->logLikelihoodValues();
620   unsigned int num_mcmc = mcmc_chain.subSequenceSize();
621   if (num_mcmc != loglike_vals.subSequenceSize()) {
622     Cerr << "Error (NonDQUESO): final mcmc chain has length " << num_mcmc
623 	 << "\n                 but likelihood set has length"
624 	 << loglike_vals.subSequenceSize() << std::endl;
625     abort_handler(METHOD_ERROR);
626   }
627 
628   // TO DO: want to keep different samples with same likelihood, but not
629   // replicate samples with same likelihood (from rejection); for now, use
630   // a std::map since the latter is unlikely.
631   QUESO::GslVector mcmc_sample(paramSpace->zeroVector());
632   RealVector mcmc_rv;
633   for (size_t chain_pos = 0; chain_pos < num_mcmc; ++chain_pos) {
634     // extract GSL sample vector from QUESO vector sequence:
635     mcmc_chain.getPositionValues(chain_pos, mcmc_sample);
636     // evaluate log of posterior from log likelihood and log prior:
637     Real log_prior     = log_prior_density(mcmc_sample),
638          log_posterior = loglike_vals[chain_pos] + log_prior;
639     if (outputLevel > NORMAL_OUTPUT)
640       Cout << "MCMC sample: " << mcmc_sample << " log prior = " << log_prior
641 	   << " log posterior = " << log_posterior << std::endl;
642     // sort ascending by log posterior (highest prob are last) and retain
643     // batch_size samples
644     copy_gsl(mcmc_sample, mcmc_rv);
645     bestSamples.insert(std::make_pair(log_posterior, mcmc_rv));
646     if (bestSamples.size() > batchSize)
647       bestSamples.erase(bestSamples.begin()); // pop front (lowest prob)
648   }
649   if (outputLevel > NORMAL_OUTPUT)
650     Cout << "bestSamples map:\n" << bestSamples << std::endl;
651 }
652 
653 
filter_chain_by_conditioning()654 void NonDQUESOBayesCalibration::filter_chain_by_conditioning()
655 {
656   const QUESO::BaseVectorSequence<QUESO::GslVector,QUESO::GslMatrix>&
657     mcmc_chain = inverseProb->chain();
658   unsigned int num_mcmc = mcmc_chain.subSequenceSize();
659 
660 
661   // filter chain -or- extract full chain and sort on likelihood values
662   if (outputLevel >= NORMAL_OUTPUT)
663     Cout << "Extracting unique samples from MCMC chain containing "
664 	 << num_mcmc << " samples.\n";
665 
666   // BMA TODO: determine if the conditioning alg can handle duplicates
667   // Note this isn't an array of unique samples, rather has no consecutive repeats
668   // May not be needed any longer...
669   RealVectorArray unique_samples;
670 
671   QUESO::GslVector q_sample(paramSpace->zeroVector()),
672               prev_q_sample(paramSpace->zeroVector());
673 
674   RealVector empty_rv;
675   mcmc_chain.getPositionValues(0, prev_q_sample);// extract vector from sequence
676   unique_samples.push_back(empty_rv);             // copy empty vector
677   copy_gsl(prev_q_sample, unique_samples.back()); // update in place
678 
679   // else first sample is same as last sample from previous chain
680   //  for (size_t s=1; s<num_mcmc; ++s) {
681   for (size_t s=1; s<num_mcmc; ++s) {
682     mcmc_chain.getPositionValues(s, q_sample);  // extract vector from sequence
683     if (!equal_gsl(q_sample, prev_q_sample)) {
684       unique_samples.push_back(empty_rv);        // copy empty vector
685       copy_gsl(q_sample, unique_samples.back()); // update in place
686       prev_q_sample = q_sample;
687     }
688   }
689 
690   if (outputLevel >= NORMAL_OUTPUT)
691     Cout << "Filtering chain by matrix conditioning: extracting best "
692 	 << batchSize << " from aggregate MCMC chain containing "
693 	 << unique_samples.size() << " samples.\n";
694   std::shared_ptr<NonDExpansion> nond_exp =
695     std::static_pointer_cast<NonDExpansion>(stochExpIterator.iterator_rep());
696   nond_exp->select_refinement_points(unique_samples, batchSize, allSamples);
697 }
698 
699 
best_to_all()700 void NonDQUESOBayesCalibration::best_to_all()
701 {
702   if (outputLevel >= NORMAL_OUTPUT) Cout << "Chain filtering results:\n";
703 
704   int num_best = bestSamples.size();
705   if (allSamples.numCols() != num_best)
706     allSamples.shapeUninitialized(numContinuousVars, num_best);
707 
708   std::/*multi*/map<Real, RealVector>::const_iterator
709     bs_it = bestSamples.begin(), bs_end = bestSamples.end();
710   for (int i=0; bs_it != bs_end; ++bs_it, ++i) {
711     Teuchos::setCol(bs_it->second, i, allSamples);
712     if (outputLevel >= NORMAL_OUTPUT) {
713       Cout << "Best point " << i+1 << ": Log posterior = " << bs_it->first
714 	   << " Sample:";
715       // BMA TODO: vector writer?
716       //      Cout << bs_it->second;
717       write_col_vector_trans(Cout, (int)i, allSamples, false, false, true);
718     }
719   }
720 }
721 
722 
update_model()723 void NonDQUESOBayesCalibration::update_model()
724 {
725   if (!emulatorType) {
726     Cerr << "Error: NonDQUESOBayesCalibration::update_model() requires an "
727 	 << "emulator model." << std::endl;
728     abort_handler(METHOD_ERROR);
729   }
730 
731   // perform truth evals (in parallel) for selected points
732   if (outputLevel >= NORMAL_OUTPUT)
733     Cout << "Updating emulator: evaluating " << allSamples.numCols()
734 	 << " best points." << std::endl;
735   // bypass surrogate but preserve transformations to standardized space
736   short orig_resp_mode = mcmcModel.surrogate_response_mode(); // store mode
737   mcmcModel.surrogate_response_mode(BYPASS_SURROGATE); // actual model evals
738   switch (emulatorType) {
739   case PCE_EMULATOR: case SC_EMULATOR:
740   case ML_PCE_EMULATOR: case MF_PCE_EMULATOR: case MF_SC_EMULATOR:
741     nondInstance = (NonD*)stochExpIterator.iterator_rep().get();
742     evaluate_parameter_sets(mcmcModel, true, false); // log allResp, no best
743     nondInstance = this; // restore
744     break;
745   case GP_EMULATOR: case KRIGING_EMULATOR:
746     if (standardizedSpace)
747       nondInstance = (NonD*)mcmcModel.subordinate_iterator().iterator_rep().get();
748     evaluate_parameter_sets(mcmcModel, true, false); // log allResp, no best
749     if (standardizedSpace)
750       nondInstance = this; // restore
751     break;
752   }
753   mcmcModel.surrogate_response_mode(orig_resp_mode); // restore mode
754 
755   // update mcmcModel with new data from iteratedModel
756   if (outputLevel >= NORMAL_OUTPUT)
757     Cout << "Updating emulator: appending " << allResponses.size()
758 	 << " new data sets." << std::endl;
759   switch (emulatorType) {
760   case PCE_EMULATOR: case SC_EMULATOR:
761   case ML_PCE_EMULATOR: case MF_PCE_EMULATOR: case MF_SC_EMULATOR: {
762     // Adapt the expansion in sync with the dataset using a top-down design
763     // (more explicit than embedded logic w/i mcmcModel.append_approximation).
764     std::shared_ptr<NonDExpansion> se_iterator =
765       std::static_pointer_cast<NonDExpansion>(stochExpIterator.iterator_rep());
766     se_iterator->append_expansion(allSamples, allResponses);
767     // TO DO: order increment places addtnl reqmts on emulator conv assessment
768     break;
769   }
770   case GP_EMULATOR: case KRIGING_EMULATOR:
771     mcmcModel.append_approximation(allSamples, allResponses, true); // rebuild
772     break;
773   }
774 }
775 
776 
assess_emulator_convergence()777 Real NonDQUESOBayesCalibration::assess_emulator_convergence()
778 {
779   // coeff reference point not yet available; force another iteration rather
780   // than use norm of current coeffs (stopping on small norm is not meaningful)
781   if (prevCoeffs.empty()) {
782     switch (emulatorType) {
783     case PCE_EMULATOR: case ML_PCE_EMULATOR: case MF_PCE_EMULATOR:
784       prevCoeffs = mcmcModel.approximation_coefficients(true);  break;
785     case SC_EMULATOR: case MF_SC_EMULATOR:
786       prevCoeffs = mcmcModel.approximation_coefficients(false); break;
787     case GP_EMULATOR: case KRIGING_EMULATOR:
788       Cerr << "Warning: convergence norm not yet defined for GP emulators in "
789 	   << "NonDQUESOBayesCalibration::assess_emulator_convergence()."
790 	   << std::endl;
791       break;
792     }
793     return DBL_MAX;
794   }
795 
796   Real l2_norm_delta_coeffs = 0., delta_coeff_ij;
797   switch (emulatorType) {
798   case PCE_EMULATOR: case ML_PCE_EMULATOR: case MF_PCE_EMULATOR: {
799     // normalized coeffs:
800     const RealVectorArray& coeffs = mcmcModel.approximation_coefficients(true);
801     size_t i, j, num_qoi = coeffs.size(),
802       num_curr_coeffs, num_prev_coeffs, num_coeffs;
803 
804     // This approach assumes a well-ordered progression in multiIndex, which is
805     // acceptable for regression PCE using consistently incremented (candidate)
806     // expansion definitions.  Sparsity is not a concern as returned coeffs are
807     // inflated to be dense w.r.t. SharedOrthogPolyApproxData::multiIndex.
808     // Could implement as resize (inflat smaller w/ 0's) + vector difference +
809     // Frobenious norm, but current approach should have lower overhead.
810     for (i=0; i<num_qoi; ++i) {
811       const RealVector&      coeffs_i =     coeffs[i];
812       const RealVector& prev_coeffs_i = prevCoeffs[i];
813       num_curr_coeffs = coeffs_i.length();
814       num_prev_coeffs = prev_coeffs_i.length();
815       num_coeffs = std::max(num_curr_coeffs, num_prev_coeffs);
816       for (j=0; j<num_coeffs; ++j) {
817 	delta_coeff_ij = 0.;
818 	if (j<num_curr_coeffs) delta_coeff_ij += coeffs_i[j];
819 	if (j<num_prev_coeffs) delta_coeff_ij -= prev_coeffs_i[j];
820 	l2_norm_delta_coeffs += delta_coeff_ij * delta_coeff_ij;
821       }
822     }
823 
824     prevCoeffs = coeffs;
825     break;
826   }
827   case SC_EMULATOR: case MF_SC_EMULATOR: {
828     // Interpolation could use a similar concept with the expansion coeffs,
829     // although adaptation would imply differences in the grid.
830     const RealVectorArray& coeffs = mcmcModel.approximation_coefficients(false);
831 
832     Cerr << "Warning: convergence norm not yet defined for SC emulator in "
833 	 << "NonDQUESOBayesCalibration::assess_emulator_convergence()."
834 	 << std::endl;
835     //abort_handler(METHOD_ERROR);
836     return DBL_MAX;
837     break;
838   }
839   case GP_EMULATOR: case KRIGING_EMULATOR:
840     // Consider use of correlation lengths.
841     // TO DO: define SurfpackApproximation::approximation_coefficients()...
842     Cerr << "Warning: convergence norm not yet defined for GP emulators in "
843 	 << "NonDQUESOBayesCalibration::assess_emulator_convergence()."
844 	 << std::endl;
845     //abort_handler(METHOD_ERROR);
846     return DBL_MAX;
847     break;
848   }
849 
850   if (outputLevel >= NORMAL_OUTPUT) {
851     Real norm = std::sqrt(l2_norm_delta_coeffs);
852     Cout << "Assessing emulator convergence: l2 norm = " << norm << std::endl;
853     return norm;
854   }
855   else
856     return std::sqrt(l2_norm_delta_coeffs);
857 }
858 
859 
860 /** Initialize the calibration parameter domain (paramSpace,
861     paramMins/paramMaxs, paramDomain, paramInitials, priorRV) */
init_parameter_domain()862 void NonDQUESOBayesCalibration::init_parameter_domain()
863 {
864   // If calibrating error multipliers, the parameter domain is expanded to
865   // estimate hyperparameters sigma^2 that multiply any user-provided covariance
866   paramSpace = std::make_shared
867     <QUESO::VectorSpace<QUESO::GslVector,QUESO::GslMatrix>>
868     (*quesoEnv, "param_", numContinuousVars + numHyperparams, nullptr);
869 
870   QUESO::GslVector paramMins(paramSpace->zeroVector()),
871                    paramMaxs(paramSpace->zeroVector());
872   RealRealPairArray bnds
873     = mcmcModel.multivariate_distribution().distribution_bounds();
874   // SVD index conversion is more general, but not required for current uses
875   //const SharedVariablesData& svd= mcmcModel.current_variables().shared_data();
876   for (size_t i=0; i<numContinuousVars; ++i) {
877     //const RealRealPair& bnds_i = bnds[svd.cv_index_to_active_index(i)];
878     paramMins[i] = bnds[i].first;  paramMaxs[i] = bnds[i].second;
879   }
880   for (size_t i=0; i<numHyperparams; ++i) {
881     // inverse gamma is defined on [0,inf), but we may have to divide
882     // responses by up to mult^{5/2}, so bound away from 0.
883     // real_min()^{2/5} = 8.68836e-124, but we can't anticipate the
884     // problem scale.  Arbitrarily choose 1.0e-100 for now.
885     paramMins[numContinuousVars + i] = 1.0e-100;
886     paramMaxs[numContinuousVars + i] = std::numeric_limits<Real>::infinity();
887   }
888   paramDomain = std::make_shared
889     <QUESO::BoxSubset<QUESO::GslVector,QUESO::GslMatrix>>
890      ("param_", *paramSpace, paramMins, paramMaxs);
891 
892   paramInitials = std::make_shared<QUESO::GslVector>(paramSpace->zeroVector());
893   // paramInitials started from mapSoln, which encompasses either:
894   // > most recent solution from map_pre_solve(), if used, or
895   // > initial guess (user-specified or default initial values)
896   copy_gsl(mapSoln, *paramInitials);
897 
898   if (outputLevel > NORMAL_OUTPUT)
899     Cout << "Initial Parameter values sent to QUESO (may be in scaled)\n"
900 	 << *paramInitials << "\nParameter bounds sent to QUESO (may be scaled)"
901 	 << ":\nparamMins " << paramMins << "\nparamMaxs " << paramMaxs << '\n';
902 
903   // new approach supports arbitrary priors:
904   priorRv = std::make_shared<QuesoVectorRV<QUESO::GslVector,QUESO::GslMatrix>>
905   ("prior_", *paramDomain, nonDQUESOInstance);
906 
907 }
908 
909 
init_proposal_covariance()910 void NonDQUESOBayesCalibration::init_proposal_covariance()
911 {
912   // Size our Queso covariance matrix and initialize trailing diagonal if
913   // calibrating error hyperparams
914   proposalCovMatrix =
915     std::make_shared<QUESO::GslMatrix>(paramSpace->zeroVector());
916   if (numHyperparams > 0) {
917     // all hyperparams utilize inverse gamma priors, which may not
918     // have finite variance; use std_dev = 0.05 * mode
919     Real alpha;
920     for (int i=0; i<numHyperparams; ++i) {
921       invGammaDists[i].pull_parameter(Pecos::IGA_ALPHA, alpha);
922       (*proposalCovMatrix)(numContinuousVars + i, numContinuousVars + i) =
923 	(alpha > 2.) ? invGammaDists[i].variance() :
924 	std::pow(0.05*(*paramInitials)[numContinuousVars + i], 2.);
925     }
926   }
927 
928   // initialize proposal covariance (must follow parameter domain init)
929   // This is the leading sub-matrix in the case of calibrating sigma terms
930   if (proposalCovarType == "user") // either filename OR data values defined
931     user_proposal_covariance(proposalCovarInputType, proposalCovarData,
932 			     proposalCovarFilename);
933   else if (proposalCovarType == "prior")
934     prior_proposal_covariance(); // prior selection or default for no emulator
935   else // misfit Hessian-based proposal with prior preconditioning
936     prior_cholesky_factorization();
937 }
938 
939 
940 /** Must be called after paramMins/paramMaxs set above */
prior_proposal_covariance()941 void NonDQUESOBayesCalibration::prior_proposal_covariance()
942 {
943   //int num_params = paramSpace->dimGlobal();
944   //QUESO::GslVector covDiag(paramSpace->zeroVector());
945 
946   // diagonal covariance from variance of prior marginals
947   RealVector dist_var = mcmcModel.multivariate_distribution().variances();
948   // SVD index conversion is more general, but not required for current uses
949   //const SharedVariablesData& svd= mcmcModel.current_variables().shared_data();
950   for (int i=0; i<numContinuousVars; ++i)
951     (*proposalCovMatrix)(i,i) = priorPropCovMult * dist_var[i];
952       //* dist_var[svd.cv_index_to_active_index(i)];
953   //proposalCovMatrix.reset(new QUESO::GslMatrix(covDiag));
954 
955   if (outputLevel > NORMAL_OUTPUT) {
956     //Cout << "Diagonal elements of the proposal covariance sent to QUESO";
957     //if (standardizedSpace) Cout << " (scaled space)";
958     //Cout << '\n' << covDiag << '\n';
959     Cout << "QUESO ProposalCovMatrix";
960     if (standardizedSpace) Cout << " (scaled space)";
961     Cout << '\n';
962     for (size_t i=0; i<numContinuousVars; ++i) {
963       for (size_t j=0; j<numContinuousVars; ++j)
964 	Cout <<  (*proposalCovMatrix)(i,j) << "  ";
965       Cout << '\n';
966     }
967   }
968 
969   validate_proposal();
970 }
971 
972 
973 /** This function will convert user-specified cov_type = "diagonal" |
974     "matrix" data from either cov_data or cov_filename and populate a
975     full QUESO::GslMatrix* in proposalCovMatrix with the covariance. */
976 void NonDQUESOBayesCalibration::
user_proposal_covariance(const String & input_fmt,const RealVector & cov_data,const String & cov_filename)977 user_proposal_covariance(const String& input_fmt, const RealVector& cov_data,
978 			 const String& cov_filename)
979 {
980   // TODO: transform user covariance for use in standardized probability space
981   if (standardizedSpace)
982     throw std::runtime_error("user-defined proposal covariance is invalid for use in transformed probability spaces.");
983   // Note: if instead a warning, then fallback to prior_proposal_covariance()
984 
985   bool use_file = !cov_filename.empty();
986 
987   // Sanity check
988   if( ("diagonal" != input_fmt) &&
989       ("matrix"   != input_fmt) )
990     throw std::runtime_error("User-specified covariance must have type of either \"diagonal\" of \"matrix\".  You have \""+input_fmt+"\".");
991 
992   // Sanity check
993   if( cov_data.length() && use_file )
994     throw std::runtime_error("You cannot provide both covariance values and a covariance data filename.");
995 
996   // Size our Queso covariance matrix
997   //proposalCovMatrix.reset(new QUESO::GslMatrix(paramSpace->zeroVector()));
998 
999   // Sanity check
1000   /*if( (proposalCovMatrix->numRowsLocal()  != numContinuousVars) ||
1001       (proposalCovMatrix->numRowsGlobal() != numContinuousVars) ||
1002       (proposalCovMatrix->numCols()       != numContinuousVars)   )
1003     throw std::runtime_error("Queso vector space is not consistent with parameter dimension.");
1004   */
1005   // Read in a general way and then check that the data is consistent
1006   RealVectorArray values_from_file;
1007   if( use_file )
1008   {
1009     std::ifstream s;
1010     TabularIO::open_file(s, cov_filename, "read_queso_covariance_data");
1011     bool row_major = false;
1012     read_unsized_data(s, values_from_file, row_major);
1013   }
1014 
1015   if( "diagonal" == input_fmt )
1016   {
1017     if( use_file ) {
1018       // Allow either row or column data layout
1019       bool row_data = false;
1020       // Sanity checks
1021       if( values_from_file.size() != 1 ) {
1022         if( values_from_file.size() == numContinuousVars )
1023           row_data = true;
1024         else
1025           throw std::runtime_error("\"diagonal\" Queso covariance file data should have either 1 column (or row) and "
1026               +convert_to_string(numContinuousVars)+" rows (or columns).");
1027       }
1028       if( row_data ) {
1029         for( int i=0; i<numContinuousVars; ++i )
1030           (*proposalCovMatrix)(i,i) = values_from_file[i](0);
1031       }
1032       else {
1033         if( values_from_file[0].length() != numContinuousVars )
1034           throw std::runtime_error("\"diagonal\" Queso covariance file data should have "
1035               +convert_to_string(numContinuousVars)+" rows.  Found "
1036               +convert_to_string(values_from_file[0].length())+" rows.");
1037         for( int i=0; i<numContinuousVars; ++i )
1038           (*proposalCovMatrix)(i,i) = values_from_file[0](i);
1039       }
1040     }
1041     else {
1042       // Sanity check
1043       if( numContinuousVars != cov_data.length() )
1044         throw std::runtime_error("Expected num covariance values is "+convert_to_string(numContinuousVars)
1045                                  +" but incoming vector provides "+convert_to_string(cov_data.length())+".");
1046       for( int i=0; i<numContinuousVars; ++i )
1047         (*proposalCovMatrix)(i,i) = cov_data(i);
1048     }
1049   }
1050   else // "matrix" == input_fmt
1051   {
1052     if( use_file ) {
1053       // Sanity checks
1054       if( values_from_file.size() != numContinuousVars )
1055         throw std::runtime_error("\"matrix\" Queso covariance file data should have "
1056                                  +convert_to_string(numContinuousVars)+" columns.  Found "
1057                                  +convert_to_string(values_from_file.size())+" columns.");
1058       if( values_from_file[0].length() != numContinuousVars )
1059         throw std::runtime_error("\"matrix\" Queso covariance file data should have "
1060                                  +convert_to_string(numContinuousVars)+" rows.  Found "
1061                                  +convert_to_string(values_from_file[0].length())+" rows.");
1062       for( int i=0; i<numContinuousVars; ++i )
1063         for( int j=0; j<numContinuousVars; ++j )
1064           (*proposalCovMatrix)(i,j) = values_from_file[i](j);
1065     }
1066     else {
1067       // Sanity check
1068       if( numContinuousVars*numContinuousVars != cov_data.length() )
1069         throw std::runtime_error("Expected num covariance values is "+convert_to_string(numContinuousVars*numContinuousVars)
1070             +" but incoming vector provides "+convert_to_string(cov_data.length())+".");
1071       int count = 0;
1072       for( int i=0; i<numContinuousVars; ++i )
1073         for( int j=0; j<numContinuousVars; ++j )
1074           (*proposalCovMatrix)(i,j) = cov_data[count++];
1075     }
1076   }
1077 
1078   // validate that provided data is a valid covariance matrix
1079   validate_proposal();
1080 }
1081 
1082 
validate_proposal()1083 void NonDQUESOBayesCalibration::validate_proposal()
1084 {
1085   // validate that provided data is a valid covariance matrix
1086 
1087   if (outputLevel > NORMAL_OUTPUT ) {
1088     Cout << "Proposal Covariance " << '\n';
1089     proposalCovMatrix->print(Cout);
1090     Cout << std::endl;
1091   }
1092 
1093   // test symmetry
1094   QUESO::GslMatrix test_mat = proposalCovMatrix->transpose();
1095   test_mat -= *proposalCovMatrix;
1096   if( test_mat.normMax() > 1.e-14 )
1097     throw std::runtime_error("Queso covariance matrix is not symmetric.");
1098 
1099   // test PD part of SPD
1100   test_mat = *proposalCovMatrix;
1101   int ierr = test_mat.chol();
1102   if( ierr == QUESO::UQ_MATRIX_IS_NOT_POS_DEFINITE_RC)
1103     throw std::runtime_error("Queso covariance data is not SPD.");
1104 }
1105 
1106 
1107 /// set inverse problem options common to all solvers
set_ip_options()1108 void NonDQUESOBayesCalibration::set_ip_options()
1109 {
1110   // Construct with default options
1111   calIpOptionsValues = std::make_shared<QUESO::SipOptionsValues>();
1112 
1113   // C++ API options may override defaults
1114   //definitely want to retain computeSolution
1115   calIpOptionsValues->m_computeSolution    = true;
1116   calIpOptionsValues->m_dataOutputFileName = "QuesoDiagnostics/invpb_output";
1117   calIpOptionsValues->m_dataOutputAllowedSet.insert(0);
1118   calIpOptionsValues->m_dataOutputAllowedSet.insert(1);
1119 
1120   // File-based power user parameters have the final say
1121   if (!advancedOptionsFile.empty())
1122     calIpOptionsValues->parse(*quesoEnv, "");
1123 
1124  if (outputLevel >= DEBUG_OUTPUT)
1125     Cout << "\nIP Final Options:" << *calIpOptionsValues << std::endl;
1126 }
1127 
1128 
set_mh_options()1129 void NonDQUESOBayesCalibration::set_mh_options()
1130 {
1131   // Construct with default options
1132   calIpMhOptionsValues = std::make_shared<QUESO::MhOptionsValues>();
1133 
1134   // C++ API options may override defaults
1135   calIpMhOptionsValues->m_dataOutputFileName = "QuesoDiagnostics/mh_output";
1136   calIpMhOptionsValues->m_dataOutputAllowedSet.insert(0);
1137   calIpMhOptionsValues->m_dataOutputAllowedSet.insert(1);
1138 
1139   calIpMhOptionsValues->m_rawChainDataInputFileName   = ".";
1140   calIpMhOptionsValues->m_rawChainSize = (chainSamples > 0) ? chainSamples : 1000;
1141   //calIpMhOptionsValues->m_rawChainGenerateExtra     = false;
1142   //calIpMhOptionsValues->m_rawChainDisplayPeriod     = 20000;
1143   //calIpMhOptionsValues->m_rawChainMeasureRunTimes   = true;
1144   calIpMhOptionsValues->m_rawChainDataOutputFileName  = "QuesoDiagnostics/raw_chain";
1145   calIpMhOptionsValues->m_rawChainDataOutputAllowedSet.insert(0);
1146   calIpMhOptionsValues->m_rawChainDataOutputAllowedSet.insert(1);
1147   // NO LONGER SUPPORTED.  calIpMhOptionsValues->m_rawChainComputeStats = true;
1148 
1149   //calIpMhOptionsValues->m_displayCandidates         = false;
1150   calIpMhOptionsValues->m_putOutOfBoundsInChain       = false;
1151   //calIpMhOptionsValues->m_tkUseLocalHessian         = false;
1152   //calIpMhOptionsValues->m_tkUseNewtonComponent      = true;
1153 
1154   // delayed rejection option:
1155   calIpMhOptionsValues->m_drMaxNumExtraStages
1156     = (mcmcType == "delayed_rejection" || mcmcType == "dram") ? 1 : 0;
1157   calIpMhOptionsValues->m_drScalesForExtraStages.resize(1);
1158   calIpMhOptionsValues->m_drScalesForExtraStages[0] = 5;
1159   //calIpMhOptionsValues->m_drScalesForExtraStages[1] = 10;
1160   //calIpMhOptionsValues->m_drScalesForExtraStages[2] = 20;
1161 
1162   // adaptive metropolis option:
1163   calIpMhOptionsValues->m_amInitialNonAdaptInterval
1164     = (mcmcType == "adaptive_metropolis" || mcmcType == "dram") ? 100 : 0;
1165   calIpMhOptionsValues->m_amAdaptInterval           = 100;
1166   calIpMhOptionsValues->m_amEta                     = 2.88;
1167   calIpMhOptionsValues->m_amEpsilon                 = 1.e-8;
1168 
1169   // chain management options:
1170   if (false) { // TO DO: add user spec to support chain filtering?
1171     // filtering the chain for final output affects the inverseProb->chain()
1172     // as well as the QuesoDiagnostics directory.  For now, we turn this off
1173     // to promote reporting the best MAP estimate.
1174     calIpMhOptionsValues->m_filteredChainGenerate         = true;
1175     calIpMhOptionsValues->m_filteredChainDiscardedPortion = 0.;
1176     calIpMhOptionsValues->m_filteredChainLag              = 20;
1177     calIpMhOptionsValues->
1178       m_filteredChainDataOutputFileName = "QuesoDiagnostics/filtered_chain";
1179     calIpMhOptionsValues->m_filteredChainDataOutputAllowedSet.insert(0);
1180     calIpMhOptionsValues->m_filteredChainDataOutputAllowedSet.insert(1);
1181     //calIpMhOptionsValues->m_filteredChainComputeStats   = true;
1182   }
1183   else //if (adaptPosteriorRefine || chainCycles > 1)
1184     // In this case, we process the full chain for maximizing posterior
1185     // probability or point spacing / linear system conditioning
1186     calIpMhOptionsValues->m_filteredChainGenerate         = false;
1187 
1188   // Logit transform addresses high rejection rates in corners of
1189   // bounded domains.  It is potentially redundant in some cases,
1190   // e.g., WIENER u-space type.
1191   if (logitTransform) {
1192     calIpMhOptionsValues->m_algorithm = "logit_random_walk";
1193     calIpMhOptionsValues->m_tk = "logit_random_walk";
1194     calIpMhOptionsValues->m_doLogitTransform = true;  // deprecated
1195   }
1196   else {
1197     calIpMhOptionsValues->m_algorithm = "random_walk";
1198     calIpMhOptionsValues->m_tk = "random_walk";
1199     calIpMhOptionsValues->m_doLogitTransform = false;  // deprecated
1200   }
1201 
1202   // Use custom TK for derivative-based proposal updates
1203   if (proposalCovarType == "derivatives" &&
1204       propCovUpdatePeriod < std::numeric_limits<int>::max()) {
1205     if (logitTransform)
1206       calIpMhOptionsValues->m_tk = "dakota_dipc_logit_tk";
1207     else
1208       calIpMhOptionsValues->m_tk = "dakota_dipc_tk";
1209     calIpMhOptionsValues->m_updateInterval = propCovUpdatePeriod;
1210   }
1211 
1212   // File-based power user parameters have the final say
1213   // The options are typically prefixed with ip_mh, so prepend an "ip_" prefix
1214   // from the IP options:
1215   if (!advancedOptionsFile.empty())
1216     calIpMhOptionsValues->parse(*quesoEnv, calIpOptionsValues->m_prefix);
1217 
1218   if (outputLevel >= DEBUG_OUTPUT)
1219     Cout << "\nMH Final Options:" << *calIpMhOptionsValues << std::endl;
1220  }
1221 
1222 
1223 void NonDQUESOBayesCalibration::
print_results(std::ostream & s,short results_state)1224 print_results(std::ostream& s, short results_state)
1225 {
1226   if (bestSamples.empty()) return;
1227 
1228   // ----------------------------------------
1229   // Output best sample which appoximates MAP
1230   // ----------------------------------------
1231   std::/*multi*/map<Real, RealVector>::const_iterator it = --bestSamples.end();
1232   //std::pair<Real, RealVector>& best = bestSamples.back();
1233   const RealVector& best_sample = it->second;
1234 
1235   size_t wpp7 = write_precision+7;
1236   s << "<<<<< Best parameters          =\n";
1237   print_variables(s, best_sample);
1238 
1239   // print corresponding response data; here we recover the misfit
1240   // instead of re-computing it
1241   QUESO::GslVector qv(paramSpace->zeroVector());
1242   copy_gsl(best_sample, qv);
1243   Real log_prior = log_prior_density(qv), log_post = it->first;
1244   size_t num_total_calib_terms = residualModel.num_primary_fns();
1245   Real half_nr_log2pi = num_total_calib_terms * HALF_LOG_2PI;
1246   RealVector hyper_params(numHyperparams);
1247   copy_gsl_partial(qv, numContinuousVars, hyper_params);
1248   Real half_log_det =
1249     expData.half_log_cov_determinant(hyper_params, obsErrorMultiplierMode);
1250   // misfit = -log(L) - 1/2*Nr*log(2*pi) - 1/2*log(det(Cov))
1251   Real misfit = (log_prior - log_post) - half_nr_log2pi - half_log_det;
1252 
1253   s <<   "<<<<< Best misfit              ="
1254     << "\n                     " << std::setw(wpp7) << misfit
1255     << "\n<<<<< Best log prior           ="
1256     << "\n                     " << std::setw(wpp7) << log_prior
1257     << "\n<<<<< Best log posterior       ="
1258     << "\n                     " << std::setw(wpp7) << log_post << std::endl;
1259 
1260   /*
1261   // --------------------------
1262   // Multipoint results summary
1263   // --------------------------
1264   std::map<Real, RealVector>::const_iterator it;
1265   size_t i, j, num_best = bestSamples.size(), wpp7 = write_precision+7;
1266   for (it=bestSamples.begin(), i=1; it!=bestSamples.end(); ++it, ++i) {
1267     s << "<<<<< Best parameters          ";
1268     if (num_best > 1) s << "(set " << i << ") ";
1269     s << "=\n";
1270     RealVector best_sample = it->second;
1271     for (j=0; j<numContinuousVars; ++j)
1272       s << "                     " << std::setw(wpp7) << best_sample[j] << '\n';
1273     s << "<<<<< Best log posterior       ";
1274     if (num_best > 1) s << "(set " << i << ") ";
1275     s << "=\n                     " << std::setw(wpp7) << it->first << '\n';
1276   }
1277   */
1278 
1279   // Print final stats for variables and responses
1280   NonDBayesCalibration::print_results(s, results_state);
1281 }
1282 
1283 
1284 void NonDQUESOBayesCalibration::
print_variables(std::ostream & s,const RealVector & c_vars)1285 print_variables(std::ostream& s, const RealVector& c_vars)
1286 {
1287   StringMultiArrayConstView cv_labels =
1288     iteratedModel.continuous_variable_labels();
1289   // the residualModel includes any hyper-parameters
1290   StringArray combined_labels;
1291   copy_data(residualModel.continuous_variable_labels(), combined_labels);
1292 
1293   size_t wpp7 = write_precision+7;
1294 
1295   // print MAP for continuous random variables
1296   if (standardizedSpace) {
1297     RealVector u_rv(Teuchos::View, c_vars.values(), numContinuousVars);
1298     RealVector x_rv;
1299     mcmcModel.probability_transformation().trans_U_to_X(u_rv, x_rv);
1300     write_data(Cout, x_rv, cv_labels);
1301   }
1302   else
1303     for (size_t j=0; j<numContinuousVars; ++j)
1304       s << "                     " << std::setw(wpp7) << c_vars[j]
1305 	<< ' ' << cv_labels[j] << '\n';
1306   // print MAP for hyper-parameters (e.g., observation error params)
1307   for (size_t j=0; j<numHyperparams; ++j)
1308     s << "                     " << std::setw(wpp7)
1309       << c_vars[numContinuousVars+j] << ' '
1310       << combined_labels[numContinuousVars + j] << '\n';
1311 }
1312 
1313 
dakotaLogLikelihood(const QUESO::GslVector & paramValues,const QUESO::GslVector * paramDirection,const void * functionDataPtr,QUESO::GslVector * gradVector,QUESO::GslMatrix * hessianMatrix,QUESO::GslVector * hessianEffect)1314 double NonDQUESOBayesCalibration::dakotaLogLikelihood(
1315   const QUESO::GslVector& paramValues, const QUESO::GslVector* paramDirection,
1316   const void*         functionDataPtr, QUESO::GslVector*       gradVector,
1317   QUESO::GslMatrix*     hessianMatrix, QUESO::GslVector*       hessianEffect)
1318 {
1319   // The GP/KRIGING/NO EMULATOR may use an unstandardized space (original)
1320   // and the PCE or SC cases always use standardized space.
1321   //
1322   // We had discussed having QUESO search in the original space:  this may
1323   // difficult for high dimensional spaces depending on the scaling,
1324   // because QUESO calculates the volume of the hypercube in which it is
1325   // searching and will stop if it is too small (e.g. if one input is
1326   // of small magnitude, searching in the original space will not be viable).
1327 
1328   // Set the calibration variables and hyperparams in the outer
1329   // residualModel: note that this won't update the Variables object
1330   // in any inner models.
1331   RealVector& all_params = nonDQUESOInstance->
1332     residualModel.current_variables().continuous_variables_view();
1333   nonDQUESOInstance->copy_gsl(paramValues, all_params);
1334 
1335   nonDQUESOInstance->residualModel.evaluate();
1336 
1337   const RealVector& residuals =
1338     nonDQUESOInstance->residualModel.current_response().function_values();
1339   double log_like = nonDQUESOInstance->log_likelihood(residuals, all_params);
1340 
1341   if (nonDQUESOInstance->outputLevel >= DEBUG_OUTPUT) {
1342     Cout << "Log likelihood is " << log_like << " Likelihood is "
1343          << std::exp(log_like) << '\n';
1344 
1345     std::ofstream LogLikeOutput;
1346     LogLikeOutput.open("NonDQUESOLogLike.txt", std::ios::out | std::ios::app);
1347     // Note: parameter values are in scaled space, if scaling is
1348     // active; residuals may be scaled by covariance
1349     size_t num_total_params =
1350       nonDQUESOInstance->numContinuousVars + nonDQUESOInstance->numHyperparams;
1351     for (size_t i=0; i<num_total_params; ++i)
1352       LogLikeOutput << paramValues[i] << ' ' ;
1353     for (size_t i=0; i<residuals.length(); ++i)
1354       LogLikeOutput << residuals[i] << ' ' ;
1355     LogLikeOutput << log_like << '\n';
1356     LogLikeOutput.close();
1357   }
1358 
1359   return log_like;
1360 }
1361 
1362 
1363 void NonDQUESOBayesCalibration::
copy_gsl(const QUESO::GslVector & qv,RealVector & rv)1364 copy_gsl(const QUESO::GslVector& qv, RealVector& rv)
1365 {
1366   size_t i, size_qv = qv.sizeLocal();
1367   if (size_qv != rv.length())
1368     rv.sizeUninitialized(size_qv);
1369   for (i=0; i<size_qv; ++i)
1370     rv[i] = qv[i];
1371 }
1372 
1373 
1374 void NonDQUESOBayesCalibration::
copy_gsl(const RealVector & rv,QUESO::GslVector & qv)1375 copy_gsl(const RealVector& rv, QUESO::GslVector& qv)
1376 {
1377   size_t i, size_rv = rv.length();
1378   // this resize is not general, but is adequate for current uses:
1379   if (size_rv != qv.sizeLocal())
1380     qv = paramSpace->zeroVector();//(size_rv);
1381   for (i=0; i<size_rv; ++i)
1382     qv[i] = rv[i];
1383 }
1384 
1385 
1386 void NonDQUESOBayesCalibration::
copy_gsl_partial(const QUESO::GslVector & qv,size_t start_qv,RealVector & rv)1387 copy_gsl_partial(const QUESO::GslVector& qv, size_t start_qv, RealVector& rv)
1388 {
1389   // copy part of qv into all of rv
1390   size_t ri, qi, size_rv = rv.length();
1391   for (ri=0, qi=start_qv; ri<size_rv; ++ri, ++qi)
1392     rv[ri] = qv[qi];
1393 }
1394 
1395 
1396 void NonDQUESOBayesCalibration::
copy_gsl_partial(const RealVector & rv,QUESO::GslVector & qv,size_t start_qv)1397 copy_gsl_partial(const RealVector& rv, QUESO::GslVector& qv, size_t start_qv)
1398 {
1399   // copy all of rv into part of qv
1400   size_t ri, qi, size_rv = rv.length();
1401   for (ri=0, qi=start_qv; ri<size_rv; ++ri, ++qi)
1402     qv[qi] = rv[ri];
1403 }
1404 
1405 
1406 void NonDQUESOBayesCalibration::
copy_gsl(const QUESO::GslVector & qv,RealMatrix & rm,int col)1407 copy_gsl(const QUESO::GslVector& qv, RealMatrix& rm, int col)
1408 {
1409   size_t i, size_qv = qv.sizeLocal();
1410   if (col < 0 || col >= rm.numCols() || size_qv != rm.numRows()) {
1411     Cerr << "Error: inconsistent matrix access in copy_gsl()." << std::endl;
1412     abort_handler(METHOD_ERROR);
1413   }
1414   Real* rm_c = rm[col];
1415   for (i=0; i<size_qv; ++i)
1416     rm_c[i] = qv[i];
1417 }
1418 
1419 
1420 bool NonDQUESOBayesCalibration::
equal_gsl(const QUESO::GslVector & qv1,const QUESO::GslVector & qv2)1421 equal_gsl(const QUESO::GslVector& qv1, const QUESO::GslVector& qv2)
1422 {
1423   size_t size_qv1 = qv1.sizeLocal();
1424   if (size_qv1 != qv2.sizeLocal())
1425     return false;
1426   for (size_t i=0; i<size_qv1; ++i)
1427     if (qv1[i] != qv2[i])
1428       return false;
1429   return true;
1430 }
1431 
1432 
1433 } // namespace Dakota
1434