1 /*  _______________________________________________________________________
2 
3     DAKOTA: Design Analysis Kit for Optimization and Terascale Applications
4     Copyright 2014-2020 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
5     This software is distributed under the GNU Lesser General Public License.
6     For more information, see the README file in the top Dakota directory.
7     _______________________________________________________________________ */
8 
9 //- Class:	 NonDGPMSABayesCalibration
10 //- Description: Derived class for Bayesian inference using QUESO/GPMSA
11 //- Owner:       Laura Swiler, Brian Adams
12 //- Checked by:
13 //- Version:
14 
15 // place Dakota headers first to minimize influence of QUESO defines
16 #include "NonDGPMSABayesCalibration.hpp"
17 #include "NonDLHSSampling.hpp"
18 #include "ProblemDescDB.hpp"
19 #include "ParallelLibrary.hpp"
20 #include "DakotaModel.hpp"
21 
22 // then system-related headers
23 #include <boost/filesystem/path.hpp>
24 namespace bfs = boost::filesystem;
25 
26 // then list QUESO headers
27 #include "queso/GPMSA.h"
28 #include "queso/StatisticalInverseProblem.h"
29 #include "queso/GslVector.h"
30 #include "queso/GslMatrix.h"
31 
32 using QUESO::GslVector;
33 using QUESO::GslMatrix;
34 
35 static const char rcsId[]="@(#) $Id$";
36 
37 
38 /* TODO / QUESTIONS:
39 
40  * Write tests that use DOE vs import
41 
42     - Notify users what data we're trying to read from tabular
43 
44  * A number of improvements in DACE / imported build data
45 
46     - Decide on meaning of buildSamples for read vs. DACE iterator control
47 
48     - Read build data in base class?
49 
50     - Read using residualModel's DataTransformModel vars/resp?
51 
52     - Handle string variables (convert to indices), probably be
53       reading into a Variables object
54 
55     - Instead of assuming config vars = 0.5, use initial_state?
56 
57     - Read the build data early so we know buildSamples; would be nice
58       to integrate with populating the data below so we can avoid the
59       temporary
60 
61     - Document: theta, x, f ordering; document allVariables on DACE
62       iterator
63 
64  * Implement m_realizer? We implement only what's needed to compute
65    log prior, not realize()
66 
67  * Can one disable inference of the hyper-parameters?  Option to
68    calibrate theta vs. config vars, e.g.,
69    mhVarOptions->m_parameterDisabledSet.insert(0);
70 
71  * Normalization: Is it truly optional?  It's not clear what normalize
72    means in context of functional data?  Do the config vars need to be
73    normalized?  Need to verify that normalization works in single
74    config and single experiment cases.
75 
76  * Functional data omission of scenarios; set to 0.5?
77    Need to be sure that no other code is assuming these params don't exist...
78    Also need to make sure to send a valid domain
79 
80  * BJW: How to handle LHS design generation?  Over all variables or
81         just theta?  Probably all variables; that's being assumed in
82         some places.
83 
84  * Can't use MAP pre-solve since don't have access to the likelihood?
85    Perhaps get access through GPMSAFactory?
86 
87  * Allow basic vs. advanced user options
88    ??? Appears only GPMSA allows default construct with parse override?
89    ??? Passed prefix is ignored by some EnvOptions ctors ???
90    ??? Can't construct QUESO env with C++ options, then override from file...
91 
92  * Terminate handler might be getting called recursively in serial, at
93    least when run in debugger.  Perhaps because old_terminate_handler
94    is NULL and causing a segfault when called.
95 
96  * Emulator mean must be set to something to avoid nan/nan issue.
97    Probably want to be able to omit parameters that aren't being
98    calibrated statistically...
99 
100  * Allow variable Experiment sizes
101 
102  * Experiment covariance: correlation or covariance;
103    normalized by simulation std deviation as in example?  Does this
104    need to be defined if identity?
105 
106  * Export chain to named file
107 
108  */
109 
110 
111 namespace Dakota {
112 
113 // initialization of statics
114 NonDGPMSABayesCalibration* NonDGPMSABayesCalibration::nonDGPMSAInstance(NULL);
115 
116 /** This constructor is called for a standard letter-envelope iterator
117     instantiation.  In this case, set_db_list_nodes has been called and
118     probDescDB can be queried for settings from the method specification. */
119 NonDGPMSABayesCalibration::
NonDGPMSABayesCalibration(ProblemDescDB & problem_db,Model & model)120 NonDGPMSABayesCalibration(ProblemDescDB& problem_db, Model& model):
121   NonDQUESOBayesCalibration(problem_db, model),
122   buildSamples(probDescDB.get_int("method.build_samples")),
123   approxImportFile(probDescDB.get_string("method.import_build_points_file")),
124   approxImportFormat(probDescDB.get_ushort("method.import_build_format")),
125   approxImportActiveOnly(
126     probDescDB.get_bool("method.import_build_active_only")),
127   userConfigVars(expData.num_config_vars()),
128   gpmsaConfigVars(std::max(userConfigVars, (unsigned int) 1)),
129   gpmsaNormalize(probDescDB.get_bool("method.nond.gpmsa_normalize"))
130 {
131   bool found_error = false;
132 
133   // Input spec should prevent this, but be sure.  It's possible
134   // through user input that the inbound model is of type surrogate
135   // and that's okay.
136   if (emulatorType != NO_EMULATOR) {
137     Cerr << "\nError: Dakota emulators not supported with GPMSA\n";
138     found_error = true;
139   }
140 
141   // TODO: Do we want to allow a single field group to allow the full
142   // multi-variate case?
143   const SharedResponseData& srd = model.current_response().shared_data();
144   if (srd.num_field_response_groups() > 0 && outputLevel >= NORMAL_OUTPUT)
145     Cout << "\nWarning: GPMSA does not yet treat field_responses; they will be "
146 	 << "treated as a\n         single multivariate response set."
147 	 << std::endl;
148 
149   // REQUIRE experiment data
150   if (expData.num_experiments() < 1) {
151     Cerr << "\nError: GPMSA requires experimental data\n";
152     found_error = true;
153   }
154 
155   if (userConfigVars > 0 && !approxImportFile.empty() &&
156       approxImportActiveOnly && outputLevel >= NORMAL_OUTPUT)
157     Cout << "\nWarning: Experimental data presented to GPMSA has configuration variables, but\n         simulation data import specifies active_only, so nominal values of\n         configuration variables will be used." << std::endl;
158 
159   if (found_error)
160     abort_handler(METHOD_ERROR);
161 
162   // TODO: use base class to manage any problem transformations and
163   // probably the surrogate build data management
164 
165   // TODO: conditionally enable sampler only if needed to augment
166   // samples and allow both to be specified
167   // if (approxImportFile.empty())
168   //   buildSamples = probDescDB.get_int("method.build_samples");
169   // else buildSamples will get set after reading the file at run-time
170 
171   // BMA TODO: should we always instantiate this or not?  Allow augmentation?
172   int samples = approxImportFile.empty() ? buildSamples : 0;
173   const String& rng = probDescDB.get_string("method.random_number_generator");
174   unsigned short sample_type = SUBMETHOD_DEFAULT;
175   lhsIter.assign_rep(std::make_shared<NonDLHSSampling>
176 		     (mcmcModel, sample_type, samples, randomSeed, rng));
177 }
178 
179 
~NonDGPMSABayesCalibration()180 NonDGPMSABayesCalibration::~NonDGPMSABayesCalibration()
181 { }
182 
183 
derived_init_communicators(ParLevLIter pl_iter)184 void NonDGPMSABayesCalibration::derived_init_communicators(ParLevLIter pl_iter)
185 {
186   // lhsIter uses NoDBBaseConstructor, so no need to manage DB list nodes
187   // at this level
188   lhsIter.init_communicators(pl_iter);
189   NonDBayesCalibration::derived_init_communicators(pl_iter);
190 }
191 
192 
derived_set_communicators(ParLevLIter pl_iter)193 void NonDGPMSABayesCalibration::derived_set_communicators(ParLevLIter pl_iter)
194 {
195   // lhsIter uses NoDBBaseConstructor, so no need to manage DB list nodes
196   // at this level
197   lhsIter.set_communicators(pl_iter);
198   NonDBayesCalibration::derived_set_communicators(pl_iter);
199 }
200 
201 
derived_free_communicators(ParLevLIter pl_iter)202 void NonDGPMSABayesCalibration::derived_free_communicators(ParLevLIter pl_iter)
203 {
204   NonDBayesCalibration::derived_free_communicators(pl_iter);
205   lhsIter.free_communicators(pl_iter);
206 }
207 
208 
209 /** Perform the uncertainty quantification */
calibrate()210 void NonDGPMSABayesCalibration::calibrate()
211 {
212   // TODO: base class needs runtime update as well; decouple these:
213   nonDQUESOInstance = this;
214   nonDGPMSAInstance = this;
215 
216   if (outputLevel >= NORMAL_OUTPUT)
217     Cout << ">>>>> GPMSA: Setting up calibration." << std::endl;
218 
219   // no emulators will be setup, but need to initialize the prob transforms
220   initialize_model();
221 
222   // paramSpace, paramMins/paramMaxs, paramDomain, paramInitials, priorRV
223   // Note: The priorRV is only defined over the calibration parameters
224   // (and any Dakota-managed hyper-parameters)
225   init_parameter_domain();
226 
227   // proposal may depend on the parameter space properties
228   init_proposal_covariance();
229 
230   // GPMSA scenario space = configuration space. GPMSA requires at least 1
231   // configuration variable, set to 0.5 for all scenarios if needed
232   configSpace = std::make_shared<QUESO::VectorSpace<GslVector, GslMatrix>>
233     (*quesoEnv, "scenario_", gpmsaConfigVars, nullptr);
234 
235   // GPMSA output space.  TODO: The simulation output space eta won't
236   // always have same size as experiment space.  Moreover, each
237   // experiment might have different size. Generalize this to
238   // multi-experiment of varying size, and fields.
239   unsigned int num_eta = numFunctions;
240   nEtaSpace = std::make_shared<QUESO::VectorSpace<GslVector, GslMatrix>>
241     (*quesoEnv, "output_", num_eta, nullptr);
242 
243   // BMA TODO: manage experiment size (each experiment need not have
244   // the same size and one sim may inform multiple experiments through
245   // DataTransform...)
246   unsigned int experiment_size = expData.all_data(0).length();
247   experimentSpace = std::make_shared<QUESO::VectorSpace<GslVector, GslMatrix>>
248     (*quesoEnv,"experimentspace_", experiment_size, nullptr);
249 
250   // Construct with default options
251   // default constructed options will have recommended settings, then
252   // we can override via C++ API or input file (parse)
253   gpmsaOptions = std::make_shared<QUESO::GPMSAOptions>();
254 
255   // C++ API options may override defaults
256   // insert Dakota parameters here: gpmsaOptions.m_emulatorPrecisionShape
257 
258   // TODO: user option for min/max vs. mu/sigma?  Also, when user
259   // gives bounds on distro or bounds on configs instead of data,
260   // should we use those?  They might not be finite... For that
261   // matter, autoscale may break on infinite domains?
262   if (gpmsaNormalize) {
263     for (unsigned int i = 0; i < (numContinuousVars + numHyperparams); ++i)
264       gpmsaOptions->set_autoscale_minmax_uncertain_parameter(i);
265     for (unsigned int i = 0; i < gpmsaConfigVars; ++i)
266       gpmsaOptions->set_autoscale_minmax_scenario_parameter(i);
267     for (unsigned int i = 0; i < num_eta; i++)
268       gpmsaOptions->set_autoscale_meanvar_output(i);
269   }
270 
271   // File-based power user parameters have the final say
272   if (!advancedOptionsFile.empty())
273     gpmsaOptions->parse(*quesoEnv, "");
274 
275   if (outputLevel >= DEBUG_OUTPUT)
276     Cout << "\nGPMSA Final Options:" << *gpmsaOptions << std::endl;
277 
278   gpmsaFactory = std::make_shared<QUESO::GPMSAFactory<GslVector, GslMatrix>>
279     (*quesoEnv, gpmsaOptions.get(), *priorRv, *configSpace,
280      *paramSpace, *nEtaSpace, buildSamples,
281      expData.num_experiments());
282 
283   // Populate simulation build data and experiment data
284   fill_simulation_data();
285   fill_experiment_data();
286 
287   // solver setup must follow factory instantiation
288   init_queso_solver();
289 
290   // Override only the calibration parameter values with the ones the
291   // user specified (or TODO: the map point if we pre-solve)
292   GslVector full_param_initials(
293     gpmsaFactory->prior().imageSet().vectorSpace().zeroVector());
294   // Initial condition of the chain: overlay user params on default GPMSA values
295   overlay_initial_params(full_param_initials);
296 
297   if (outputLevel >= VERBOSE_OUTPUT)
298     Cout << "INFO (GPMSA): Final initial point\n [ "
299 	 << full_param_initials << " ]" << std::endl;
300 
301   GslMatrix full_proposal_cov(
302     gpmsaFactory->prior().imageSet().vectorSpace().zeroVector());
303   overlay_proposal_covariance(full_proposal_cov);
304 
305   if (outputLevel >= VERBOSE_OUTPUT)
306     Cout << "INFO (GPMSA): Final proposal covariance matrix\n [ "
307 	 << full_proposal_cov << " ]" << std::endl;
308 
309   if (outputLevel >= NORMAL_OUTPUT) {
310     Cout << ">>>>> GPMSA: Performing calibration with " << mcmcType << " using "
311        << calIpMhOptionsValues->m_rawChainSize << " MCMC samples." << std::endl;
312     if (outputLevel > NORMAL_OUTPUT)
313       Cout << "\n  Calibrating " << numHyperparams << " error hyperparameters."
314 	   << std::endl;
315   }
316 
317   inverseProb->solveWithBayesMetropolisHastings(calIpMhOptionsValues.get(),
318                                                 full_param_initials,
319                                                 &full_proposal_cov);
320 
321   if (outputLevel >= NORMAL_OUTPUT) {
322     Cout << ">>>>> GPMSA: Calibration complete. Generating statistics and ouput.\n";
323 
324     Cout << "  Info: MCMC details are in the QuesoDiagnostics directory:\n"
325 	 << "          display_sub0.txt contains MCMC diagnostics\n";
326     if (standardizedSpace)
327       Cout << "          Matlab files contain chain values (in "
328 	   << "standardized probability space)\n";
329     else
330       Cout << "          Matlab files contain chain values\n";
331 
332     Cout << "  Info: GPMSA cannot currently retrieve response function statistics."
333 	 << std::endl;
334   }
335 
336   cache_acceptance_chain();
337   compute_statistics();
338 }
339 
340 
init_queso_solver()341 void NonDGPMSABayesCalibration::init_queso_solver()
342 {
343   // Note: The postRv includes GP-associated hyper-parameters
344   postRv = std::make_shared<QUESO::GenericVectorRV<GslVector, GslMatrix>>
345     ("post_", gpmsaFactory->prior().imageSet().vectorSpace());
346 
347   // TODO: consider what options are relevant for GPMSA vs. QUESO
348   set_ip_options();
349   set_mh_options();
350 
351   inverseProb =
352     std::make_shared<QUESO::StatisticalInverseProblem<GslVector, GslMatrix>>
353     ("", calIpOptionsValues.get(), *gpmsaFactory, *postRv);
354 }
355 
356 
357 
358 void NonDGPMSABayesCalibration::
overlay_proposal_covariance(GslMatrix & full_prop_cov) const359 overlay_proposal_covariance(GslMatrix& full_prop_cov) const
360 {
361   // Start with the covariance matrix for the whole prior, including
362   // GPMSA hyper-parameters.
363   gpmsaFactory->prior().pdf().distributionVariance(full_prop_cov);
364 
365   if (outputLevel >= DEBUG_OUTPUT)
366     Cout << "INFO (GPMSA): Proposal covariance matrix from GPMSA prior:\n [ "
367 	 << full_prop_cov << " ]" << std::endl;
368 
369   // Now override with user (or iterative algorithm-updated values)
370   unsigned int num_calib_params = numContinuousVars + numHyperparams;
371   for (unsigned int i=0; i<num_calib_params; ++i)
372     for (unsigned int j=0; j<num_calib_params; ++j)
373       full_prop_cov(i,j) = (*proposalCovMatrix)(i,j);
374 
375   if (outputLevel >= DEBUG_OUTPUT)
376     Cout << "INFO (GPMSA): Proposal covariance matrix after overlay: [ \n"
377 	 << full_prop_cov << " ]" << std::endl;
378 
379   // Power users can override the covariance values with a file
380   std::string initial_cov_filename =
381     "initial_proposal_covariance_sub" + quesoEnv->subIdString();
382   if (bfs::exists(initial_cov_filename + ".m")) {
383     std::set<unsigned int> sid_set;
384     sid_set.insert(quesoEnv->subId());
385     full_prop_cov.subReadContents(initial_cov_filename, "m", sid_set);
386     if (outputLevel >= NORMAL_OUTPUT)
387       Cout << "INFO (GPMSA): Initial proposal covariance overridden with values"
388 	   << " from " << (initial_cov_filename + ".m") << std::endl;
389   }
390 }
391 
392 
393 void NonDGPMSABayesCalibration::
overlay_initial_params(GslVector & full_param_initials)394 overlay_initial_params(GslVector& full_param_initials)
395 {
396   // Start with the mean of the prior
397   gpmsaFactory->prior().pdf().distributionMean(full_param_initials);
398 
399   if (outputLevel >= DEBUG_OUTPUT)
400     Cout << "INFO (GPMSA): Initial point from GPMSA prior:\n [ "
401 	 << full_param_initials << " ]" << std::endl;
402 
403   // But override whatever we want, e.g., with user-specified values:
404   unsigned int num_calib_params = numContinuousVars + numHyperparams;;
405   for (unsigned int i=0; i<num_calib_params; ++i)
406     full_param_initials[i] = (*paramInitials)[i];
407 
408   // Example of manually adjusting initial values:
409   // NOTE: This can also be done via file read in QUESO input file
410 
411   // full_param_initials[num_calib_params + 0]  = 0.4; // Emulator mean (unused, but set to avoid NaN!)
412   // full_param_initials[num_calib_params + 1]  = 0.4; // emulator precision
413   // full_param_initials[num_calib_params + 2]  = 0.4; // weights0 precision
414   // full_param_initials[num_calib_params + 3]  = 0.4; // weights1 precision
415   // full_param_initials[num_calib_params + 4]  = 0.97; // emulator corr str
416   // full_param_initials[num_calib_params + 5]  = 0.97; // emulator corr str
417   // full_param_initials[num_calib_params + 6]  = 0.97; // emulator corr str
418   // full_param_initials[num_calib_params + 7]  = 0.97; // emulator corr str
419   // full_param_initials[num_calib_params + 8]  = 0.20; // emulator corr str
420   // full_param_initials[num_calib_params + 9]  = 0.80; // emulator corr str
421   // full_param_initials[num_calib_params + 10] = 10.0; // discrepancy precision
422   // full_param_initials[num_calib_params + 11] = 0.97; // discrepancy corr str
423   // full_param_initials[num_calib_params + 12] = 8000.0; // emulator data precision
424   // full_param_initials[num_calib_params + 13] = 1.0;  // observation error precision
425 
426   if (outputLevel >= DEBUG_OUTPUT)
427     Cout << "INFO (GPMSA): Initial point after overlay:\n [ "
428 	 << full_param_initials << " ]" << std::endl;
429 
430   // Power users can override the initial values with a file
431   std::string initial_point_filename =
432     "initial_point_sub" + quesoEnv->subIdString();
433   if (bfs::exists(initial_point_filename + ".m")) {
434     std::set<unsigned int> sid_set;
435     sid_set.insert(quesoEnv->subId());
436     full_param_initials.subReadContents(initial_point_filename, "m", sid_set);
437     if (outputLevel >= NORMAL_OUTPUT)
438       Cout << "INFO (GPMSA): Initial point overridden with values from "
439 	   << (initial_point_filename + ".m") << std::endl;
440   }
441 }
442 
443 
acquire_simulation_data(RealMatrix & sim_data)444 void NonDGPMSABayesCalibration::acquire_simulation_data(RealMatrix& sim_data)
445 {
446   if (outputLevel >= NORMAL_OUTPUT)
447     Cout << ">>>>> GPMSA: Acquiring simulation data." << std::endl;
448 
449   // Rationale: Trying to keep this agnostic to mocked up config vars...
450 
451   sim_data.shape(buildSamples,
452 		 numContinuousVars + userConfigVars + numFunctions);
453 
454   if (approxImportFile.empty()) {
455 
456     // NOTE: Assumes the design is performed over the config vars
457     // TODO: make this modular on the dimensions of config vars...
458     lhsIter.run(methodPCIter->mi_parallel_level_iterator(miPLIndex));
459     const RealMatrix&  all_samples = lhsIter.all_samples();
460     const IntResponseMap& all_resp = lhsIter.all_responses();
461 
462     if (all_samples.numCols() != buildSamples ||
463         all_resp.size() != buildSamples) {
464       Cerr << "\nError: GPMSA has insufficient surrogate build data.\n";
465       abort_handler(-1);
466     }
467 
468     IntRespMCIter resp_it = all_resp.begin();
469     for (unsigned int i = 0; i < buildSamples; i++, ++resp_it) {
470       for (int j=0; j<numContinuousVars; ++j)
471 	sim_data(i, j) = all_samples(j, i);
472       for (int j=0; j<userConfigVars; ++j)
473 	sim_data(i, numContinuousVars + j) =
474 	  all_samples(numContinuousVars + j, i);
475       for (int j=0; j<numFunctions; ++j)
476 	sim_data(i, numContinuousVars + userConfigVars + j) =
477 	  resp_it->second.function_values()[j];
478     }
479 
480   }
481   else {
482 
483     // Read surrogate build data ( theta, [x, ], fns ) depending on
484     // active_only and number of user-specified config vars.  If
485     // reading active_only, have to assume all simulations conducted
486     // at nominal state variable values.
487     size_t record_len = (approxImportActiveOnly) ?
488       numContinuousVars + numFunctions :
489       numContinuousVars + userConfigVars + numFunctions;
490     if (outputLevel >= NORMAL_OUTPUT)
491       Cout << "GPMSA: Importing simulation data from '" << approxImportFile
492 	   << "'\n       with " << numContinuousVars
493 	   << " calibration variable(s), " << userConfigVars
494 	   << " configuration variable(s),\n       and " << numFunctions
495 	   << " simulation output(s)." << std::endl;
496     bool verbose = (outputLevel > NORMAL_OUTPUT);
497     TabularIO::read_data_tabular(approxImportFile, "GMPSA simulation data",
498 				 sim_data, buildSamples, record_len,
499 				 approxImportFormat, verbose);
500     // TODO: Have to fill in configuration variable values for
501     // active_only, or error and move the function data over if so...
502   }
503 
504 }
505 
506 
fill_simulation_data()507 void NonDGPMSABayesCalibration::fill_simulation_data()
508 {
509   // simulations are described by configuration, parameters, output values
510   std::vector<QUESO::SharedPtr<GslVector>::Type >
511     sim_scenarios(buildSamples),  // config var values
512     sim_params(buildSamples),     // theta var values
513     sim_outputs(buildSamples);    // simulation output (response) values
514 
515   // Instantiate each of the simulation points/outputs
516   for (unsigned int i = 0; i < buildSamples; i++) {
517     sim_scenarios[i] = std::make_shared<GslVector>(configSpace->zeroVector());
518     sim_params[i] = std::make_shared<GslVector>(paramSpace->zeroVector());
519     sim_outputs[i] = std::make_shared<GslVector>(nEtaSpace->zeroVector()); // eta
520   }
521 
522   /// simulation data, one row per simulation build sample, columns
523   /// for calibration variables, configuration variables, function
524   /// values (duplicates storage, but unifies import vs. DOE cases)
525   RealMatrix sim_data;
526   acquire_simulation_data(sim_data);
527 
528   for (int i=0; i<buildSamples; ++i) {
529 
530     for (int j=0; j<numContinuousVars; ++j)
531        (*sim_params[i])[j] = sim_data(i, j);
532 
533     for (int j=0; j<gpmsaConfigVars; ++j)
534        if (userConfigVars > 0)
535 	 (*sim_scenarios[i])[j] = sim_data(i, numContinuousVars + j);
536        else
537 	 (*sim_scenarios[i])[j] = 0.5;
538 
539     for (int j=0; j<numFunctions; ++j)
540       (*sim_outputs[i])[j] =
541 	sim_data(i, numContinuousVars + userConfigVars + j);
542 
543   }
544 
545   gpmsaFactory->addSimulations(sim_scenarios, sim_params, sim_outputs);
546 }
547 
548 
fill_experiment_data()549 void NonDGPMSABayesCalibration::fill_experiment_data()
550 {
551   unsigned int num_experiments = expData.num_experiments();
552   unsigned int experiment_size = expData.all_data(0).length();
553 
554   // characterization of the experiment data
555   std::vector<QUESO::SharedPtr<GslVector>::Type >
556    exp_scenarios(num_experiments),   // config var values
557    exp_outputs(num_experiments);        // experiment (response) values
558 
559   // BMA: Why is sim_outputs based on nEtaSpace?  Is simulation allowed
560   // to be differently sized from experiments?
561 
562   // Load the information on the experiments (scenarios and data)
563   const RealVectorArray& exp_config_vars = expData.config_vars();
564   for (unsigned int i = 0; i < num_experiments; i++) {
565 
566     exp_scenarios[i] = std::make_shared<GslVector>(configSpace->zeroVector());
567     exp_outputs[i] = std::make_shared<GslVector>(experimentSpace->zeroVector());
568 
569     // NOTE: copy_gsl will resize the target objects rather than erroring.
570     if (userConfigVars > 0)
571       copy_gsl(exp_config_vars[i], *exp_scenarios[i]);
572     else
573       (*exp_scenarios[i])[0] = 0.5;
574 
575     copy_gsl(expData.all_data(i), *exp_outputs[i]);
576 
577   }
578   // TODO: experiment space may not be numFunctions in field case
579 
580   // Experimental observation error covariance (default = I)
581   QUESO::VectorSpace<GslVector, GslMatrix>
582     total_exp_space(*quesoEnv, "experimentspace_",
583                     num_experiments * experiment_size, nullptr);
584   QUESO::SharedPtr<GslMatrix>::Type exp_covariance
585     (new GslMatrix(total_exp_space.zeroVector(), 1.0));
586 
587   if (expData.variance_active()) {
588     for (unsigned int i = 0; i < num_experiments; i++) {
589       RealSymMatrix exp_cov;
590       expData.covariance(i, exp_cov);
591       for (unsigned int j = 0; j < experiment_size; j++) {
592         for (unsigned int k = 0; k < experiment_size; k++) {
593           (*exp_covariance)(experiment_size*i+j, experiment_size*i+k) =
594             exp_cov(j,k);
595         }
596       }
597     }
598   }
599 
600   gpmsaFactory->addExperiments(exp_scenarios, exp_outputs, exp_covariance);
601 
602 }
603 
604 
605 /** This is a subset of the base class retrieval, but we can't do the
606     fn value lookups.  Eventually should be able to retrieve them from
607     GPMSA. */
cache_acceptance_chain()608 void NonDGPMSABayesCalibration::cache_acceptance_chain()
609 {
610   int num_params = numContinuousVars + numHyperparams;
611 
612   const QUESO::BaseVectorSequence<QUESO::GslVector,QUESO::GslMatrix>&
613     mcmc_chain = inverseProb->chain();
614   unsigned int num_mcmc = mcmc_chain.subSequenceSize();
615 
616   if (num_mcmc != chainSamples && outputLevel >= NORMAL_OUTPUT) {
617     Cout << "GPMSA Warning: Final chain is length " << num_mcmc
618 	 << ", not expected length " << chainSamples << std::endl;
619   }
620 
621   acceptanceChain.shapeUninitialized(numContinuousVars + numHyperparams,
622 				     chainSamples);
623   acceptedFnVals.shapeUninitialized(numFunctions, chainSamples);
624 
625   // The posterior includes GPMSA hyper-parameters, so use the postRv space
626   QUESO::GslVector qv(postRv->imageSet().vectorSpace().zeroVector());
627   RealVector nan_fn_vals(numFunctions);
628   nan_fn_vals = std::numeric_limits<double>::quiet_NaN();
629 
630   for (int i=0; i<chainSamples; ++i) {
631 
632     // translate the QUESO vector into x-space acceptanceChain
633     mcmc_chain.getPositionValues(i, qv); // extract GSLVector from sequence
634     if (standardizedSpace) {
635       // u_rv and x_rv omit any hyper-parameters
636       RealVector u_rv(numContinuousVars, false);
637       copy_gsl_partial(qv, 0, u_rv);
638       Real* acc_chain_i = acceptanceChain[i];
639       RealVector x_rv(Teuchos::View, acc_chain_i, numContinuousVars);
640       mcmcModel.probability_transformation().trans_U_to_X(u_rv, x_rv);
641       for (int j=numContinuousVars; j<num_params; ++j)
642 	acc_chain_i[j] = qv[j]; // trailing hyperparams are not transformed
643     }
644     else {
645       // A view that includes calibration params and Dakota-managed
646       // hyper-parameters, to facilitate copying from the longer qv
647       // into acceptanceChain:
648       RealVector theta_hp(Teuchos::View, acceptanceChain[i], num_params);
649       copy_gsl_partial(qv, 0, theta_hp);
650     }
651 
652     // TODO: Find a way to set meaningful function values
653     Teuchos::setCol(nan_fn_vals, i, acceptedFnVals);
654   }
655 }
656 
657 void NonDGPMSABayesCalibration::
print_results(std::ostream & s,short results_state)658 print_results(std::ostream& s, short results_state)
659 {
660   //  TODO: additional GPMSA output
661 
662   NonDBayesCalibration::print_results(s, results_state);
663 }
664 
665 } // namespace Dakota
666