1 /*  _______________________________________________________________________
2 
3     DAKOTA: Design Analysis Kit for Optimization and Terascale Applications
4     Copyright 2014-2020 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
5     This software is distributed under the GNU Lesser General Public License.
6     For more information, see the README file in the top Dakota directory.
7     _______________________________________________________________________ */
8 
9 //- Class:	 NonDBayesCalibration
10 //- Description: Base class for generic Bayesian inference
11 //- Owner:       Laura Swiler
12 //- Checked by:
13 //- Version:
14 
15 #include "NonDBayesCalibration.hpp"
16 #include "ProblemDescDB.hpp"
17 #include "DataFitSurrModel.hpp"
18 #include "ProbabilityTransformModel.hpp"
19 #include "DataTransformModel.hpp"
20 #include "ScalingModel.hpp"
21 #include "WeightingModel.hpp"
22 #include "NonDMultilevelPolynomialChaos.hpp"
23 #include "NonDMultilevelStochCollocation.hpp"
24 #include "NonDLHSSampling.hpp"
25 #include "NPSOLOptimizer.hpp"
26 #include "SNLLOptimizer.hpp"
27 #include "Teuchos_SerialDenseHelpers.hpp"
28 #include "LHSDriver.hpp"
29 #include "dakota_mersenne_twister.hpp"
30 #include "boost/random.hpp"
31 #include "boost/random/normal_distribution.hpp"
32 #include "boost/random/variate_generator.hpp"
33 #include "boost/generator_iterator.hpp"
34 #include "boost/math/special_functions/digamma.hpp"
35 // BMA: May need to better manage DLL export / import from ANN in the future
36 // #ifdef DLL_EXPORTS
37 // #undef DLL_EXPORTS
38 // #endif
39 #include "ANN/ANN.h"
40 //#include "ANN/ANNperf.h"
41 //#include "ANN/ANNx.h"
42 #include "dakota_data_util.hpp"
43 //#include "dakota_tabular_io.hpp"
44 #include "DiscrepancyCorrection.hpp"
45 #include "bayes_calibration_utils.hpp"
46 #include "dakota_stat_util.hpp"
47 
48 static const char rcsId[]="@(#) $Id$";
49 
50 namespace Dakota {
51 
52 enum miAlg : unsigned short {MI_ALG_KSG1 = 0, MI_ALG_KSG2 = 1};
53 
54 // initialization of statics
55 NonDBayesCalibration* NonDBayesCalibration::nonDBayesInstance(NULL);
56 
57 /** This constructor is called for a standard letter-envelope iterator
58     instantiation.  In this case, set_db_list_nodes has been called and
59     probDescDB can be queried for settings from the method specification. */
60 NonDBayesCalibration::
NonDBayesCalibration(ProblemDescDB & problem_db,Model & model)61 NonDBayesCalibration(ProblemDescDB& problem_db, Model& model):
62   NonDCalibration(problem_db, model),
63   emulatorType(probDescDB.get_short("method.nond.emulator")),
64   mcmcModelHasSurrogate(false),
65   mapOptAlgOverride(probDescDB.get_ushort("method.nond.pre_solve_method")),
66   chainSamples(probDescDB.get_int("method.nond.chain_samples")),
67   randomSeed(probDescDB.get_int("method.random_seed")),
68   mcmcDerivOrder(1),
69   adaptExpDesign(probDescDB.get_bool("method.nond.adapt_exp_design")),
70   initHifiSamples (probDescDB.get_int("method.samples")),
71   scalarDataFilename(probDescDB.get_string("responses.scalar_data_filename")),
72   importCandPtsFile(
73     probDescDB.get_string("method.import_candidate_points_file")),
74   importCandFormat(
75     probDescDB.get_ushort("method.import_candidate_format")),
76   numCandidates(probDescDB.get_sizet("method.num_candidates")),
77   maxHifiEvals(probDescDB.get_int("method.max_hifi_evaluations")),
78   batchEvals(probDescDB.get_int("method.batch_size")),
79   mutualInfoAlg(probDescDB.get_bool("method.nond.mutual_info_ksg2") ?
80 		MI_ALG_KSG2 : MI_ALG_KSG1),
81   readFieldCoords(probDescDB.get_bool("responses.read_field_coordinates")),
82   calModelDiscrepancy(probDescDB.get_bool("method.nond.model_discrepancy")),
83   discrepancyType(probDescDB.get_string("method.nond.discrepancy_type")),
84   numPredConfigs(probDescDB.get_sizet("method.num_prediction_configs")),
85   predictionConfigList(probDescDB.get_rv("method.nond.prediction_configs")),
86   importPredConfigs(probDescDB.get_string("method.import_prediction_configs")),
87   importPredConfigFormat(
88     probDescDB.get_ushort("method.import_prediction_configs_format")),
89   exportCorrModelFile(
90     probDescDB.get_string("method.nond.export_corrected_model_file")),
91   exportCorrModelFormat(
92     probDescDB.get_ushort("method.nond.export_corrected_model_format")),
93   exportDiscrepFile(
94     probDescDB.get_string("method.nond.export_discrepancy_file")),
95   exportDiscrepFormat(
96     probDescDB.get_ushort("method.nond.export_discrep_format")),
97   exportCorrVarFile(
98     probDescDB.get_string("method.nond.export_corrected_variance_file")),
99   exportCorrVarFormat(
100     probDescDB.get_ushort("method.nond.export_corrected_variance_format")),
101   approxCorrectionOrder(probDescDB.get_short("method.nond.correction_order")),
102 // BMA: This is probably wrong as config vars need not be continuous!
103   configLowerBnds(probDescDB.get_rv("variables.continuous_state.lower_bounds")),
104   configUpperBnds(probDescDB.get_rv("variables.continuous_state.upper_bounds")),
105   obsErrorMultiplierMode(
106     probDescDB.get_ushort("method.nond.calibrate_error_mode")),
107   numHyperparams(0),
108   invGammaAlphas(probDescDB.get_rv("method.nond.hyperprior_alphas")),
109   invGammaBetas(probDescDB.get_rv("method.nond.hyperprior_betas")),
110   adaptPosteriorRefine(
111     probDescDB.get_bool("method.nond.adaptive_posterior_refinement")),
112   proposalCovarType(
113     probDescDB.get_string("method.nond.proposal_covariance_type")),
114   proposalCovarData(probDescDB.get_rv("method.nond.proposal_covariance_data")),
115   proposalCovarFilename(
116     probDescDB.get_string("method.nond.proposal_covariance_filename")),
117   proposalCovarInputType(
118     probDescDB.get_string("method.nond.proposal_covariance_input_type")),
119   burnInSamples(probDescDB.get_int("method.burn_in_samples")),
120   posteriorStatsKL(probDescDB.get_bool("method.posterior_stats.kl_divergence")),
121   posteriorStatsMutual(
122     probDescDB.get_bool("method.posterior_stats.mutual_info")),
123   posteriorStatsKDE(probDescDB.get_bool("method.posterior_stats.kde")),
124   chainDiagnostics(probDescDB.get_bool("method.chain_diagnostics")),
125   chainDiagnosticsCI(probDescDB.get_bool("method.chain_diagnostics.confidence_intervals")),
126   calModelEvidence(probDescDB.get_bool("method.model_evidence")),
127   calModelEvidMC(probDescDB.get_bool("method.mc_approx")),
128   calModelEvidLaplace(probDescDB.get_bool("method.laplace_approx")),
129   evidenceSamples(probDescDB.get_int("method.evidence_samples")),
130   subSamplingPeriod(probDescDB.get_int("method.sub_sampling_period")),
131   exportMCMCFilename(
132     probDescDB.get_string("method.nond.export_mcmc_points_file")),
133   exportMCMCFormat(probDescDB.get_ushort("method.nond.export_samples_format")),
134   scaleFlag(probDescDB.get_bool("method.scaling")),
135   weightFlag(!iteratedModel.primary_response_fn_weights().empty())
136 {
137   if (randomSeed != 0)
138     Cout << " NonDBayes Seed (user-specified) = " << randomSeed << std::endl;
139   else {
140     randomSeed = generate_system_seed();
141     Cout << " NonDBayes Seed (system-generated) = " << randomSeed << std::endl;
142   }
143 
144   // NOTE: Burn-in defaults to 0 and sub-sampling to 1. We want to
145   // allow chain_samples == 0 to perform map pre-solve only, so
146   // account for that case here.
147   if (burnInSamples > 0 && burnInSamples >= chainSamples) {
148     Cerr << "\nError: burn_in_samples must be less than chain_samples.\n";
149     abort_handler(PARSE_ERROR);
150   }
151   int num_filtered = 1 + (chainSamples - burnInSamples - 1)/subSamplingPeriod;
152   Cout << "\nA chain of length " << chainSamples << " has been specified. "
153        << burnInSamples << " burn in samples will be \ndiscarded and every "
154        << subSamplingPeriod << "-th sample will be kept in the final chain. "
155        << "The \nfinal chain will have length " << num_filtered << ".\n";
156 
157   if (adaptExpDesign) {
158     // TODO: instead of pulling these models out, change modes on the
159     // iteratedModel
160     if (iteratedModel.model_type() != "surrogate") {
161       Cerr << "\nError: Adaptive Bayesian experiment design requires "
162 	   << "hierarchical surrogate\n       model.\n";
163       abort_handler(PARSE_ERROR);
164     }
165     hifiModel = iteratedModel.truth_model();
166 
167     int num_exp = expData.num_experiments();
168     int num_lhs_samples = std::max(initHifiSamples - num_exp, 0);
169     // construct a hi-fi LHS sampler only if needed
170     if (num_lhs_samples > 0) {
171       unsigned short sample_type = SUBMETHOD_LHS;
172       bool vary_pattern = true;
173       String rng("mt19937");
174       auto lhs_sampler_rep = std::make_shared<NonDLHSSampling>
175 	(hifiModel, sample_type, num_lhs_samples, randomSeed, rng, vary_pattern,
176 	 ACTIVE_UNIFORM);
177       hifiSampler.assign_rep(lhs_sampler_rep);
178     }
179   }
180 
181   switch (emulatorType) {
182   case PCE_EMULATOR:  case MF_PCE_EMULATOR:  case ML_PCE_EMULATOR:
183   case  SC_EMULATOR:  case  MF_SC_EMULATOR:
184     standardizedSpace = true; break; // nataf defined w/i ProbTransformModel
185   default:
186     standardizedSpace = probDescDB.get_bool("method.nond.standardized_space");
187     break;
188   }
189 
190   // Errors if there are correlations and the user hasn't specified standardized_space,
191   // since this is currently unsupported.
192   // Note that gamma distribution should be supported but currently results in a seg fault.
193   if ( !standardizedSpace && iteratedModel.multivariate_distribution().correlation() ){
194     Cerr << "Error: correlation is only supported if user specifies standardized_space.\n"
195       << "    Only the following types of correlated random variables are supported:\n"
196       << "    unbounded normal, untruncated lognormal, uniform, exponential, gumbel, \n"
197       << "    frechet, and weibull."
198 	    << std::endl;
199     abort_handler(METHOD_ERROR);
200   }
201 
202   // Construct emulator objects for raw QoI, prior to data residual recast
203   construct_mcmc_model();
204   // define variable augmentation within residualModel
205   init_hyper_parameters();
206 
207   // expand initial point by numHyperparams for use in negLogPostModel
208   size_t i, num_orig_cv = iteratedModel.cv(),
209     num_augment_cv = num_orig_cv + numHyperparams;
210   mapSoln.sizeUninitialized(num_augment_cv);
211   copy_data_partial(mcmcModel.continuous_variables(), mapSoln, 0);
212   for (i=0; i<numHyperparams; ++i)
213     mapSoln[num_orig_cv + i] = invGammaDists[i].mode();
214 
215   // Now the underlying simulation model mcmcModel is setup; wrap it
216   // in a data transformation, making sure to allocate gradient/Hessian space
217   if (calibrationData) {
218     residualModel.assign_rep(std::make_shared<DataTransformModel>
219 			     (mcmcModel, expData, numHyperparams,
220 			      obsErrorMultiplierMode, mcmcDerivOrder));
221     // update bounds for hyper-parameters
222     Real dbl_inf = std::numeric_limits<Real>::infinity();
223     for (i=0; i<numHyperparams; ++i) {
224       residualModel.continuous_lower_bound(0.0,     numContinuousVars + i);
225       residualModel.continuous_upper_bound(dbl_inf, numContinuousVars + i);
226     }
227   }
228   else
229     residualModel = mcmcModel;  // shallow copy
230 
231   // Order is important: data transform, then scale, then weights
232   if (scaleFlag)   scale_model();
233   if (weightFlag)  weight_model();
234 
235   init_map_optimizer();
236   construct_map_model();
237 
238   int mcmc_concurrency = 1; // prior to concurrent chains
239   maxEvalConcurrency *= mcmc_concurrency;
240 }
241 
242 
construct_mcmc_model()243 void NonDBayesCalibration::construct_mcmc_model()
244 {
245   // for adaptive experiment design, the surrogate model is the low-fi
246   // model which should be calibrated
247   // TODO: could avoid this lightweight copy entirely, but less clean
248   Model inbound_model =
249     adaptExpDesign ? iteratedModel.surrogate_model() : iteratedModel;
250 
251   switch (emulatorType) {
252 
253   case PCE_EMULATOR: case SC_EMULATOR:
254   case ML_PCE_EMULATOR: case MF_PCE_EMULATOR: case MF_SC_EMULATOR: {
255     mcmcModelHasSurrogate = true;
256     short u_space_type = probDescDB.get_short("method.nond.expansion_type");
257     const RealVector& dim_pref
258       = probDescDB.get_rv("method.nond.dimension_preference");
259     short refine_type
260         = probDescDB.get_short("method.nond.expansion_refinement_type"),
261       refine_cntl
262         = probDescDB.get_short("method.nond.expansion_refinement_control"),
263       cov_cntl
264         = probDescDB.get_short("method.nond.covariance_control"),
265       rule_nest = probDescDB.get_short("method.nond.nesting_override"),
266       rule_growth = probDescDB.get_short("method.nond.growth_override");
267     bool pw_basis = probDescDB.get_bool("method.nond.piecewise_basis"),
268        use_derivs = probDescDB.get_bool("method.derivative_usage");
269     std::shared_ptr<NonDExpansion> se_rep;
270 
271     if (emulatorType == SC_EMULATOR) { // SC sparse grid interpolation
272       unsigned short ssg_level
273 	= probDescDB.get_ushort("method.nond.sparse_grid_level");
274       unsigned short tpq_order
275 	= probDescDB.get_ushort("method.nond.quadrature_order");
276       if (ssg_level != USHRT_MAX) {
277 	short exp_coeff_approach = Pecos::COMBINED_SPARSE_GRID;
278 	if (probDescDB.get_short("method.nond.expansion_basis_type") ==
279 	    Pecos::HIERARCHICAL_INTERPOLANT)
280 	  exp_coeff_approach = Pecos::HIERARCHICAL_SPARSE_GRID;
281 	else if (refine_cntl)
282 	  exp_coeff_approach = Pecos::INCREMENTAL_SPARSE_GRID;
283 	se_rep = std::make_shared<NonDStochCollocation>(inbound_model, exp_coeff_approach,
284 	  ssg_level, dim_pref, u_space_type, refine_type, refine_cntl,
285 	  cov_cntl, rule_nest, rule_growth, pw_basis, use_derivs);
286       }
287       else if (tpq_order != USHRT_MAX)
288 	se_rep = std::make_shared<NonDStochCollocation>(inbound_model, Pecos::QUADRATURE,
289 	  tpq_order, dim_pref, u_space_type, refine_type, refine_cntl,
290 	  cov_cntl, rule_nest, rule_growth, pw_basis, use_derivs);
291       mcmcDerivOrder = 3; // Hessian computations not yet implemented for SC
292     }
293 
294     else if (emulatorType == PCE_EMULATOR) {
295       unsigned short ssg_level
296 	= probDescDB.get_ushort("method.nond.sparse_grid_level");
297       unsigned short tpq_order
298 	= probDescDB.get_ushort("method.nond.quadrature_order");
299       unsigned short cub_int
300 	= probDescDB.get_ushort("method.nond.cubature_integrand");
301       if (ssg_level != USHRT_MAX) { // PCE sparse grid
302 	short exp_coeff_approach = (refine_cntl) ?
303 	  Pecos::INCREMENTAL_SPARSE_GRID : Pecos::COMBINED_SPARSE_GRID;
304 	se_rep = std::make_shared<NonDPolynomialChaos>(inbound_model, exp_coeff_approach,
305 	  ssg_level, dim_pref, u_space_type, refine_type, refine_cntl,
306 	  cov_cntl, rule_nest, rule_growth, pw_basis, use_derivs);
307       }
308       else if (tpq_order != USHRT_MAX)
309 	se_rep = std::make_shared<NonDPolynomialChaos>(inbound_model, Pecos::QUADRATURE,
310 	  tpq_order, dim_pref, u_space_type, refine_type, refine_cntl,
311 	  cov_cntl, rule_nest, rule_growth, pw_basis, use_derivs);
312       else if (cub_int != USHRT_MAX)
313 	se_rep = std::make_shared<NonDPolynomialChaos>(inbound_model, Pecos::CUBATURE,
314 	  cub_int, dim_pref, u_space_type, refine_type, refine_cntl,
315 	  cov_cntl, rule_nest, rule_growth, pw_basis, use_derivs);
316       else { // regression PCE: LeastSq/CS, OLI
317 	se_rep = std::make_shared<NonDPolynomialChaos>(inbound_model,
318 	  probDescDB.get_short("method.nond.regression_type"),
319 	  probDescDB.get_ushort("method.nond.expansion_order"), dim_pref,
320 	  probDescDB.get_sizet("method.nond.collocation_points"),
321 	  probDescDB.get_real("method.nond.collocation_ratio"), // single scalar
322 	  randomSeed, u_space_type, refine_type, refine_cntl, cov_cntl,
323 	  /* rule_nest, rule_growth, */ pw_basis, use_derivs,
324 	  probDescDB.get_bool("method.nond.cross_validation"),
325 	  probDescDB.get_string("method.import_build_points_file"),
326 	  probDescDB.get_ushort("method.import_build_format"),
327 	  probDescDB.get_bool("method.import_build_active_only"));
328       }
329       mcmcDerivOrder = 7; // Hessian computations implemented for PCE
330     }
331 
332     else if (emulatorType == MF_SC_EMULATOR) {
333       const UShortArray& ssg_level_seq
334 	= probDescDB.get_usa("method.nond.sparse_grid_level");
335       const UShortArray& tpq_order_seq
336 	= probDescDB.get_usa("method.nond.quadrature_order");
337       short ml_alloc_cntl
338 	= probDescDB.get_short("method.nond.multilevel_allocation_control"),
339 	ml_discrep
340 	= probDescDB.get_short("method.nond.multilevel_discrepancy_emulation");
341       if (!ssg_level_seq.empty()) {
342 	short exp_coeff_approach = Pecos::COMBINED_SPARSE_GRID;
343 	if (probDescDB.get_short("method.nond.expansion_basis_type") ==
344 	    Pecos::HIERARCHICAL_INTERPOLANT)
345 	  exp_coeff_approach = Pecos::HIERARCHICAL_SPARSE_GRID;
346 	else if (refine_cntl)
347 	  exp_coeff_approach = Pecos::INCREMENTAL_SPARSE_GRID;
348 	se_rep = std::make_shared<NonDMultilevelStochCollocation>(inbound_model,
349 	  exp_coeff_approach, ssg_level_seq, dim_pref, u_space_type,
350 	  refine_type, refine_cntl, cov_cntl, ml_alloc_cntl, ml_discrep,
351 	  rule_nest, rule_growth, pw_basis, use_derivs);
352       }
353       else if (!tpq_order_seq.empty())
354 	se_rep = std::make_shared<NonDMultilevelStochCollocation>(inbound_model,
355 	  Pecos::QUADRATURE, tpq_order_seq, dim_pref, u_space_type, refine_type,
356 	  refine_cntl, cov_cntl, ml_alloc_cntl, ml_discrep, rule_nest,
357 	  rule_growth, pw_basis, use_derivs);
358       mcmcDerivOrder = 3; // Hessian computations not yet implemented for SC
359     }
360 
361     else if (emulatorType == MF_PCE_EMULATOR) {
362       const UShortArray& ssg_level_seq
363 	= probDescDB.get_usa("method.nond.sparse_grid_level");
364       const UShortArray& tpq_order_seq
365 	= probDescDB.get_usa("method.nond.quadrature_order");
366       short ml_alloc_cntl
367 	= probDescDB.get_short("method.nond.multilevel_allocation_control"),
368 	ml_discrep
369 	= probDescDB.get_short("method.nond.multilevel_discrepancy_emulation");
370       if (!ssg_level_seq.empty()) {
371 	short exp_coeff_approach = (refine_cntl) ?
372 	  Pecos::INCREMENTAL_SPARSE_GRID : Pecos::COMBINED_SPARSE_GRID;
373 	se_rep = std::make_shared<NonDMultilevelPolynomialChaos>(inbound_model,
374 	  exp_coeff_approach, ssg_level_seq, dim_pref, u_space_type,
375 	  refine_type, refine_cntl, cov_cntl, ml_alloc_cntl, ml_discrep,
376 	  rule_nest, rule_growth, pw_basis, use_derivs);
377       }
378       else if (!tpq_order_seq.empty())
379 	se_rep = std::make_shared<NonDMultilevelPolynomialChaos>(inbound_model,
380 	  Pecos::QUADRATURE, tpq_order_seq, dim_pref, u_space_type, refine_type,
381 	  refine_cntl, cov_cntl, ml_alloc_cntl, ml_discrep, rule_nest,
382 	  rule_growth, pw_basis, use_derivs);
383       else { // regression PCE: LeastSq/CS, OLI
384 	SizetArray seed_seq(1, randomSeed); // reuse bayes_calib scalar spec
385 	se_rep = std::make_shared<NonDMultilevelPolynomialChaos>(
386 	  MULTIFIDELITY_POLYNOMIAL_CHAOS, inbound_model,
387 	  probDescDB.get_short("method.nond.regression_type"),
388 	  probDescDB.get_usa("method.nond.expansion_order"), dim_pref,
389 	  probDescDB.get_sza("method.nond.collocation_points"), // sequence
390 	  probDescDB.get_real("method.nond.collocation_ratio"), // scalar
391 	  seed_seq, u_space_type, refine_type, refine_cntl, cov_cntl,
392 	  ml_alloc_cntl, ml_discrep, /* rule_nest, rule_growth, */ pw_basis,
393 	  use_derivs, probDescDB.get_bool("method.nond.cross_validation"),
394 	  probDescDB.get_string("method.import_build_points_file"),
395 	  probDescDB.get_ushort("method.import_build_format"),
396 	  probDescDB.get_bool("method.import_build_active_only"));
397       }
398       mcmcDerivOrder = 7; // Hessian computations implemented for PCE
399     }
400 
401     else if (emulatorType == ML_PCE_EMULATOR) {
402       SizetArray seed_seq(1, randomSeed); // reuse bayes_calib scalar spec
403       se_rep = std::make_shared<NonDMultilevelPolynomialChaos>(MULTILEVEL_POLYNOMIAL_CHAOS,
404 	inbound_model, probDescDB.get_short("method.nond.regression_type"),
405 	probDescDB.get_usa("method.nond.expansion_order"), dim_pref,
406 	probDescDB.get_sza("method.nond.collocation_points"), // sequence
407 	probDescDB.get_real("method.nond.collocation_ratio"), // scalar
408 	seed_seq, u_space_type, refine_type, refine_cntl, cov_cntl,
409 	probDescDB.get_short("method.nond.multilevel_allocation_control"),
410 	probDescDB.get_short("method.nond.multilevel_discrepancy_emulation"),
411 	/* rule_nest, rule_growth, */ pw_basis, use_derivs,
412 	probDescDB.get_bool("method.nond.cross_validation"),
413 	probDescDB.get_string("method.import_build_points_file"),
414 	probDescDB.get_ushort("method.import_build_format"),
415 	probDescDB.get_bool("method.import_build_active_only"));
416       mcmcDerivOrder = 7; // Hessian computations implemented for PCE
417     }
418 
419     // for adaptive exp refinement, propagate controls from Bayes method spec:
420     se_rep->maximum_iterations(maxIterations);
421     se_rep->maximum_refinement_iterations(
422       probDescDB.get_int("method.nond.max_refinement_iterations"));
423     se_rep->convergence_tolerance(convergenceTol);
424 
425     stochExpIterator.assign_rep(se_rep);
426     // no CDF or PDF level mappings
427     RealVectorArray empty_rv_array; // empty
428     se_rep->requested_levels(empty_rv_array, empty_rv_array, empty_rv_array,
429       empty_rv_array, respLevelTarget, respLevelTargetReduce, cdfFlag, false);
430     // extract NonDExpansion's uSpaceModel for use in likelihood evals
431     mcmcModel = stochExpIterator.algorithm_space_model(); // shared rep
432     break;
433   }
434 
435   case GP_EMULATOR: case KRIGING_EMULATOR: {
436     mcmcModelHasSurrogate = true;
437     String sample_reuse; String approx_type;
438     if (emulatorType == GP_EMULATOR)
439       { approx_type = "global_gaussian"; mcmcDerivOrder = 3; } // grad support
440     else
441       { approx_type = "global_kriging";  mcmcDerivOrder = 7; } // grad,Hess
442     UShortArray approx_order; // not used by GP/kriging
443     short corr_order = -1, data_order = 1, corr_type = NO_CORRECTION;
444     if (probDescDB.get_bool("method.derivative_usage")) {
445       // derivatives for emulator construction (not emulator evaluation)
446       if (inbound_model.gradient_type() != "none") data_order |= 2;
447       if (inbound_model.hessian_type()  != "none") data_order |= 4;
448     }
449     unsigned short sample_type = SUBMETHOD_DEFAULT;
450     int samples = probDescDB.get_int("method.build_samples");
451     // get point samples file
452     const String& import_pts_file
453       = probDescDB.get_string("method.import_build_points_file");
454     if (!import_pts_file.empty())
455       { samples = 0; sample_reuse = "all"; }
456 
457     // Consider elevating lhsSampler from NonDGPMSABayesCalibration:
458     Iterator lhs_iterator; Model lhs_model;
459     // NKM requires finite bounds for scaling and init of correlation lengths.
460     // Default truncation is +/-10 sigma, which may be overly conservative for
461     // these purposes, but +/-3 sigma has little to no effect in current tests.
462     bool truncate_bnds = (emulatorType == KRIGING_EMULATOR);
463     if (standardizedSpace)
464       lhs_model.assign_rep(std::make_shared<ProbabilityTransformModel>
465 			   (inbound_model, ASKEY_U, truncate_bnds)); //, 3.)
466     else
467       lhs_model = inbound_model; // shared rep
468     // Unlike EGO-based approaches, use ACTIVE sampling mode to concentrate
469     // samples in regions of higher prior density
470     auto lhs_rep = std::make_shared<NonDLHSSampling>
471       (lhs_model, sample_type, samples, randomSeed,
472        probDescDB.get_string("method.random_number_generator"));
473     lhs_iterator.assign_rep(lhs_rep);
474 
475     ActiveSet gp_set = lhs_model.current_response().active_set(); // copy
476     gp_set.request_values(mcmcDerivOrder); // for misfit Hessian
477     mcmcModel.assign_rep(std::make_shared<DataFitSurrModel>
478       (lhs_iterator, lhs_model,
479        gp_set, approx_type, approx_order, corr_type, corr_order, data_order,
480        outputLevel, sample_reuse, import_pts_file,
481        probDescDB.get_ushort("method.import_build_format"),
482        probDescDB.get_bool("method.import_build_active_only")));
483     break;
484   }
485 
486   case NO_EMULATOR:
487     mcmcModelHasSurrogate = (inbound_model.model_type() == "surrogate");
488     // ASKEY_U is currently the best option for scaling the probability space
489     // (but could be expanded when the intent is not orthogonal polynomials).
490     // If an override is needed to decorrelate priors be transforming to
491     // STD_NORMAL space, this is managed by ProbabilityTransformModel::
492     // verify_correlation_support() on a variable-by-variable basis.
493     if (standardizedSpace)
494       mcmcModel.assign_rep(std::make_shared<ProbabilityTransformModel>
495 			   (inbound_model, ASKEY_U));
496     else
497       mcmcModel = inbound_model; // shared rep
498 
499     if (mcmcModel.gradient_type() != "none") mcmcDerivOrder |= 2;
500     if (mcmcModel.hessian_type()  != "none") mcmcDerivOrder |= 4;
501     break;
502   }
503 }
504 
505 
init_hyper_parameters()506 void NonDBayesCalibration::init_hyper_parameters()
507 {
508   // Initialize sizing for hyperparameters (observation error), not
509   // currently part of a RecastModel
510   size_t num_resp_groups =
511     mcmcModel.current_response().shared_data().num_response_groups();
512   if (obsErrorMultiplierMode == CALIBRATE_ONE)
513     numHyperparams = 1;
514   else if (obsErrorMultiplierMode == CALIBRATE_PER_EXPER)
515     numHyperparams = expData.num_experiments();
516   else if (obsErrorMultiplierMode == CALIBRATE_PER_RESP)
517     numHyperparams = num_resp_groups;
518   else if (obsErrorMultiplierMode == CALIBRATE_BOTH)
519     numHyperparams = expData.num_experiments() * num_resp_groups;
520 
521   // Setup priors distributions on hyper-parameters
522   if ( (invGammaAlphas.length()  > 1 &&
523 	invGammaAlphas.length() != numHyperparams) ||
524        (invGammaAlphas.length() != invGammaBetas.length()) ) {
525     Cerr << "\nError: hyperprior_alphas and hyperprior_betas must both have "
526          << "length 1 or number of calibrated\n       error multipliers.\n";
527     abort_handler(PARSE_ERROR);
528   }
529   invGammaDists.resize(numHyperparams);
530   for (size_t i=0; i<numHyperparams; ++i) {
531     // alpha = (mean/std_dev)^2 + 2
532     // beta = mean*(alpha-1)
533     // default:
534     Real alpha = 102.0, beta = 103.0;
535     // however chosen to have mode = beta/(alpha+1) = 1.0 (initial point)
536     //   mean = beta/(alpha-1) ~ 1.0
537     //   s.d. ~ 0.1
538     if (invGammaAlphas.length() == 1)
539       { alpha = invGammaAlphas[0]; beta = invGammaBetas[0]; }
540     else if (invGammaAlphas.length() == numHyperparams)
541       { alpha = invGammaAlphas[i]; beta = invGammaBetas[i]; }
542     // BMA TODO: could store only one inverse gamma if all parameters the same
543     invGammaDists[i] = Pecos::RandomVariable(Pecos::INV_GAMMA);
544     std::shared_ptr<Pecos::InvGammaRandomVariable> rv_rep =
545       std::static_pointer_cast<Pecos::InvGammaRandomVariable>
546 	     (invGammaDists[i].random_variable_rep());
547     rv_rep->update(alpha, beta);
548   }
549 }
550 
551 
552 /** Construct optimizer for MAP pre-solve
553     Emulator:     on by default; can be overridden with "pre_solve none"
554     No emulator: off by default; can be activated  with "pre_solve {sqp,nip}"
555                  relies on mapOptimizer ctor to enforce min derivative support
556     Calculation of model evidence using Laplace approximation:
557 		 this requires a MAP solve.
558 */
init_map_optimizer()559 void NonDBayesCalibration::init_map_optimizer()
560 {
561   switch (mapOptAlgOverride) {
562   case SUBMETHOD_SQP:
563 #ifndef HAVE_NPSOL
564     Cerr << "\nWarning: this executable not configured with NPSOL SQP."
565 	 << "\n         MAP pre-solve not available." << std::endl;
566     mapOptAlgOverride = SUBMETHOD_NONE; // model,optimizer not constructed
567 #endif
568     break;
569   case SUBMETHOD_NIP:
570 #ifndef HAVE_OPTPP
571     Cerr << "\nWarning: this executable not configured with OPT++ NIP."
572 	 << "\n         MAP pre-solve not available." << std::endl;
573     mapOptAlgOverride = SUBMETHOD_NONE; // model,optimizer not constructed
574 #endif
575     break;
576   case SUBMETHOD_DEFAULT: // use full Newton, if available
577     if (emulatorType || calModelEvidLaplace) {
578 #ifdef HAVE_OPTPP
579       mapOptAlgOverride = SUBMETHOD_NIP;
580 #elif HAVE_NPSOL
581       mapOptAlgOverride = SUBMETHOD_SQP;
582 #else
583       mapOptAlgOverride = SUBMETHOD_NONE;
584 #endif
585     }
586     break;
587   //case SUBMETHOD_NONE:
588   //  break;
589   }
590 
591   // Errors / warnings
592   if (mapOptAlgOverride == SUBMETHOD_NONE) {
593     if (calModelEvidLaplace) {
594       Cout << "Error: You must specify a pre-solve method for the Laplace "
595 	   << "approximation of model evidence." << std::endl;
596       abort_handler(METHOD_ERROR);
597     }
598     if (emulatorType)
599       Cerr << "\nWarning: this executable not configured with NPSOL or OPT++."
600 	   << "\n         MAP pre-solve not available." << std::endl;
601   }
602 }
603 
604 
construct_map_model()605 void NonDBayesCalibration::construct_map_model()
606 {
607   if (mapOptAlgOverride == SUBMETHOD_NONE) return;
608 
609   size_t num_total_calib_terms = residualModel.num_primary_fns();
610   Sizet2DArray vars_map_indices, primary_resp_map_indices(1),
611     secondary_resp_map_indices;
612   primary_resp_map_indices[0].resize(num_total_calib_terms);
613   for (size_t i=0; i<num_total_calib_terms; ++i)
614     primary_resp_map_indices[0][i] = i;
615   bool nonlinear_vars_map = false; BoolDequeArray nonlinear_resp_map(1);
616   nonlinear_resp_map[0] = BoolDeque(num_total_calib_terms, true);
617   SizetArray recast_vc_totals;  // empty: no change in size
618   BitArray all_relax_di, all_relax_dr; // empty: no discrete relaxation
619 
620   // recast ActiveSet requests if full-Newton NIP with gauss-Newton misfit
621   // (avoids error in unsupported Hessian requests in Model::manage_asv())
622   short nlp_resp_order = 3; // quasi-Newton optimization
623   void (*set_recast) (const Variables&, const ActiveSet&, ActiveSet&) = NULL;
624   if (mapOptAlgOverride == SUBMETHOD_NIP) {
625     nlp_resp_order = 7; // size RecastModel response for full Newton Hessian
626     if (mcmcDerivOrder == 3) // map asrv for Gauss-Newton approx
627       set_recast = gnewton_set_recast;
628   }
629 
630   // RecastModel for bound-constrained argmin(misfit - log prior)
631   negLogPostModel.assign_rep(std::make_shared<RecastModel>
632     (residualModel, vars_map_indices, recast_vc_totals,
633      all_relax_di, all_relax_dr, nonlinear_vars_map, nullptr,
634      set_recast, primary_resp_map_indices,
635      secondary_resp_map_indices, 0, nlp_resp_order,
636      nonlinear_resp_map, neg_log_post_resp_mapping, nullptr));
637 }
638 
639 
construct_map_optimizer()640 void NonDBayesCalibration::construct_map_optimizer()
641 {
642   // Note: this fn may get invoked repeatedly but assign_rep() manages the
643   // outgoing pointer
644 
645   switch (mapOptAlgOverride) {
646 #ifdef HAVE_NPSOL
647   case SUBMETHOD_SQP: {
648     // SQP with BFGS Hessians
649     int npsol_deriv_level = 3;
650     mapOptimizer.assign_rep(std::make_shared<NPSOLOptimizer>
651 			    (negLogPostModel, npsol_deriv_level, convergenceTol));
652     break;
653   }
654 #endif
655 #ifdef HAVE_OPTPP
656   case SUBMETHOD_NIP:
657     // full Newton (OPTPP::OptBCNewton)
658     mapOptimizer.assign_rep(std::make_shared<SNLLOptimizer>
659 			    ("optpp_newton", negLogPostModel));
660     break;
661 #endif
662   }
663 }
664 
665 
~NonDBayesCalibration()666 NonDBayesCalibration::~NonDBayesCalibration()
667 { }
668 
669 
pre_run()670 void NonDBayesCalibration::pre_run()
671 {
672   Analyzer::pre_run();
673 
674   // now that vars/labels/bounds/targets have flowed down at run-time from
675   // any higher level recursions, propagate them up local Model recursions
676   // so that they are correct when they propagate back down.
677   // *** TO DO: count Model recursion layers on top of iteratedModel
678   if (!negLogPostModel.is_null())
679     negLogPostModel.update_from_subordinate_model(); // depth = max
680   else
681     residualModel.update_from_subordinate_model();   // depth = max
682 
683   // needs to follow bounds updates so that correct OPT++ optimizer is selected
684   // (OptBCNewtonLike or OptNewtonLike)
685   construct_map_optimizer();
686 }
687 
688 
core_run()689 void NonDBayesCalibration::core_run()
690 {
691   nonDBayesInstance = this;
692 
693   if (adaptExpDesign) // use meta-iteration in this class
694     calibrate_to_hifi();
695   else                // delegate to base class calibration
696     calibrate();
697 
698   if (calModelDiscrepancy) // calibrate a model discrepancy function
699     build_model_discrepancy();
700     //print_discrepancy_results();
701 }
702 
703 
derived_init_communicators(ParLevLIter pl_iter)704 void NonDBayesCalibration::derived_init_communicators(ParLevLIter pl_iter)
705 {
706   // stochExpIterator and mcmcModel use NoDBBaseConstructor,
707   // so no need to manage DB list nodes at this level
708   switch (emulatorType) {
709   case PCE_EMULATOR:    case SC_EMULATOR:
710   case ML_PCE_EMULATOR: case MF_PCE_EMULATOR: case MF_SC_EMULATOR:
711     stochExpIterator.init_communicators(pl_iter);              break;
712   //default:
713   //  mcmcModel.init_communicators(pl_iter, maxEvalConcurrency); break;
714   }
715   residualModel.init_communicators(pl_iter, maxEvalConcurrency);
716 
717   if (!mapOptimizer.is_null())
718     mapOptimizer.init_communicators(pl_iter);
719 
720   if (!hifiSampler.is_null())
721     hifiSampler.init_communicators(pl_iter);
722 }
723 
724 
derived_set_communicators(ParLevLIter pl_iter)725 void NonDBayesCalibration::derived_set_communicators(ParLevLIter pl_iter)
726 {
727   miPLIndex = methodPCIter->mi_parallel_level_index(pl_iter);
728 
729   // stochExpIterator and mcmcModel use NoDBBaseConstructor,
730   // so no need to manage DB list nodes at this level
731   switch (emulatorType) {
732   case PCE_EMULATOR:    case SC_EMULATOR:
733   case ML_PCE_EMULATOR: case MF_PCE_EMULATOR: case MF_SC_EMULATOR:
734     stochExpIterator.set_communicators(pl_iter);              break;
735   //default:
736   //  mcmcModel.set_communicators(pl_iter, maxEvalConcurrency); break;
737   }
738   residualModel.set_communicators(pl_iter, maxEvalConcurrency);
739 
740   if (!mapOptimizer.is_null())
741     mapOptimizer.set_communicators(pl_iter);
742 
743   if (!hifiSampler.is_null())
744     hifiSampler.set_communicators(pl_iter);
745 }
746 
747 
derived_free_communicators(ParLevLIter pl_iter)748 void NonDBayesCalibration::derived_free_communicators(ParLevLIter pl_iter)
749 {
750   if (!hifiSampler.is_null())
751     hifiSampler.free_communicators(pl_iter);
752 
753   if (!mapOptimizer.is_null())
754     mapOptimizer.free_communicators(pl_iter);
755 
756   residualModel.free_communicators(pl_iter, maxEvalConcurrency);
757   switch (emulatorType) {
758   case PCE_EMULATOR:    case SC_EMULATOR:
759   case ML_PCE_EMULATOR: case MF_PCE_EMULATOR: case MF_SC_EMULATOR:
760     stochExpIterator.free_communicators(pl_iter);              break;
761   //default:
762   //  mcmcModel.free_communicators(pl_iter, maxEvalConcurrency); break;
763   }
764 }
765 
766 
initialize_model()767 void NonDBayesCalibration::initialize_model()
768 {
769   switch (emulatorType) {
770   case PCE_EMULATOR: case SC_EMULATOR:
771   case ML_PCE_EMULATOR: case MF_PCE_EMULATOR: case MF_SC_EMULATOR: {
772     ParLevLIter pl_iter = methodPCIter->mi_parallel_level_iterator(miPLIndex);
773     stochExpIterator.run(pl_iter); break;
774   }
775   default: // GPs and NO_EMULATOR
776     //resize_final_statistics_gradients(); // not required
777     if (emulatorType)
778       mcmcModel.build_approximation();
779     break;
780   }
781   if(posteriorStatsMutual)
782     Cout << "Mutual Information estimation not yet implemented\n";
783 }
784 
785 
calibrate_to_hifi()786 void NonDBayesCalibration::calibrate_to_hifi()
787 {
788   const RealVector initial_point(Teuchos::Copy,
789       				 mcmcModel.continuous_variables().values(),
790 				 mcmcModel.continuous_variables().length());
791 
792   /* TODO:
793      - Handling of hyperparameters
794      - More efficient resizing/reconstruction
795      - Use hierarhical surrogate eval modes
796   */
797   int num_exp = expData.num_experiments();
798   int num_lhs_samples = std::max(initHifiSamples - num_exp, 0);
799   if (num_lhs_samples > 0)
800     add_lhs_hifi_data();
801   num_exp = expData.num_experiments();
802   int random_seed = randomSeed;
803 
804   // Apply hifi error
805   const RealVector& hifi_sim_error = hifiModel.current_response().
806                                        shared_data().simulation_error();
807   if (hifi_sim_error.length() > 0)
808     for (int i = 0; i < num_exp; i++)
809       apply_error_vec(hifi_sim_error, random_seed, i);
810 
811   if (outputLevel >= DEBUG_OUTPUT)
812     for (size_t i=0; i<initHifiSamples; i++)
813       Cout << "Exp Data  i " << i << " value = " << expData.all_data(i);
814 
815   // Build matrix of candidate designs
816   int num_candidates = numCandidates;
817   RealMatrix design_matrix;
818   build_designs(design_matrix);
819 
820   bool stop_metric = false;
821   size_t optimal_ind;
822   double max_MI;
823   double prev_MI;
824   int max_hifi = (maxHifiEvals > -1.) ? maxHifiEvals : numCandidates;
825   int num_hifi = 0;
826   int num_it = 1;
827   std::ofstream out_file("experimental_design_output.txt");
828 
829   if (outputLevel >= DEBUG_OUTPUT) {
830     Cout << "Design Matrix   " << design_matrix << '\n';
831     Cout << "Max high-fidelity model runs = " << max_hifi << "\n\n";
832   }
833 
834   while (!stop_metric) {
835 
836     // EVALUATE STOPPING CRITERIA
837     stop_metric = eval_hi2lo_stop(stop_metric, prev_MI, max_MI, num_it,
838 	                          num_hifi, max_hifi, num_candidates);
839 
840     // If the experiment data changed, need to update a number of
841     // models that wrap it.  TODO: make this more lightweight instead
842     // of reconstructing
843 
844     // BMA TODO: this doesn't permit use of hyperparameters (see main ctor)
845     mcmcModel.continuous_variables(initial_point);
846     residualModel.assign_rep(std::make_shared<DataTransformModel>
847 			     (mcmcModel, expData, numHyperparams,
848 			      obsErrorMultiplierMode, mcmcDerivOrder));
849     construct_map_optimizer();
850 
851     // Run the underlying calibration solver (MCMC)
852     calibrate();
853 
854     if (outputLevel >= DEBUG_OUTPUT) {
855       // Print chain moments
856       StringArray combined_labels;
857       copy_data(residualModel.continuous_variable_labels(),
858   	combined_labels);
859       NonDSampling::print_moments(Cout, chainStats, RealMatrix(),
860 	  "posterior variable", STANDARD_MOMENTS, combined_labels, false);
861       // Print response moments
862       StringArray resp_labels = mcmcModel.current_response().function_labels();
863       NonDSampling::print_moments(Cout, fnStats, RealMatrix(),
864           "response function", STANDARD_MOMENTS, resp_labels, false);
865     }
866 
867     if (!stop_metric || max_hifi == 0) {
868 
869       if (outputLevel >= NORMAL_OUTPUT)
870 	print_hi2lo_begin(num_it);
871 
872       // After QUESO is run, get the posterior values of the samples; go
873       // through all the designs and pick the one with maximum mutual
874       // information
875 
876       RealMatrix mi_chain;
877       filter_chain(acceptanceChain, mi_chain, 5000);
878       int num_filtered = mi_chain.numCols();
879 
880       int batch_size = batchEvals;
881       if (max_hifi != 0)
882         if (num_candidates < batchEvals || max_hifi - num_hifi < batchEvals)
883 	  batchEvals = min(num_candidates, max_hifi - num_hifi);
884       // Build optimal observations matrix, contains obsverations from
885       // previously selected optimal designs
886       RealMatrix optimal_obs;
887       RealVector optimal_config(design_matrix.numRows());
888       RealMatrix optimal_config_matrix(design_matrix.numRows(), batchEvals);
889       RealVector MI_vec(batchEvals);
890 
891       RealMatrix Xmatrix;
892       // For loop for batch MI
893       for (int batch_n = 1; batch_n < batchEvals+1; batch_n ++) {
894         Xmatrix.reshape(numContinuousVars + batch_n * numFunctions,
895 	    		num_filtered);
896 
897         // Build simulation error matrix
898         RealMatrix sim_error_matrix;
899         const RealVector& sim_error_vec = mcmcModel.current_response().
900                                           shared_data().simulation_error();
901         if (sim_error_vec.length() > 0) {
902           sim_error_matrix.reshape(numFunctions, num_filtered);
903           build_error_matrix(sim_error_vec, sim_error_matrix, random_seed);
904         }
905 
906         for (size_t i=0; i<num_candidates; i++) {
907           RealVector xi_i = Teuchos::getCol(Teuchos::View, design_matrix,
908 	      				    int(i));
909           Model::inactive_variables(xi_i, mcmcModel);
910 
911 	  build_hi2lo_xmatrix(Xmatrix, batch_n, mi_chain, sim_error_matrix);
912 
913           // calculate the mutual information b/w post theta and lofi responses
914           Real MI = knn_mutual_info(Xmatrix, numContinuousVars,
915 				    batch_n * numFunctions, mutualInfoAlg);
916 	  if (outputLevel >= NORMAL_OUTPUT)
917 	    print_hi2lo_status(num_it, i, xi_i, MI);
918 
919           // Now track max MI:
920           if (i == 0) {
921 	    max_MI = MI;
922 	    optimal_ind = i;
923           }
924           else
925             if ( MI > max_MI) {
926               max_MI = MI;
927 	      optimal_ind = i;
928             }
929         } // end for over the number of candidates
930 
931         MI_vec[batch_n-1] = max_MI;
932         optimal_config = Teuchos::getCol(Teuchos::Copy, design_matrix,
933     					           int(optimal_ind));
934 	Teuchos::setCol(optimal_config, batch_n-1, optimal_config_matrix);
935         // Update optimal_obs matrix
936         if (batchEvals > 1) {
937 	  // Evaluate lofi model at optimal design, update Xmatrix
938 	  RealMatrix lofi_resp_matrix;
939           Model::inactive_variables(optimal_config, mcmcModel);
940 	  Model::evaluate(mi_chain, mcmcModel, lofi_resp_matrix);
941           if (sim_error_matrix.numRows() > 0)
942 	    lofi_resp_matrix += sim_error_matrix;
943 
944 	  RealMatrix optimal_obs
945 	    (Teuchos::View, Xmatrix, numFunctions, num_filtered,
946 	     numContinuousVars + (batch_n-1)*numFunctions, 0);
947 	  optimal_obs.assign(lofi_resp_matrix);
948         }
949 
950         // update list of candidates
951         remove_column(design_matrix, optimal_ind);
952         --num_candidates;
953 	if (batch_size > 1)
954           if (outputLevel >= NORMAL_OUTPUT)
955 	    print_hi2lo_batch_status(num_it, batch_n, batchEvals,
956 				     optimal_config, max_MI);
957       } // end batch_n loop
958 
959       // RUN HIFI MODEL WITH NEW POINT(S)
960       RealMatrix resp_matrix;
961       if (max_hifi > 0) {
962 	run_hifi(optimal_config_matrix, resp_matrix);
963         if (hifi_sim_error.length() > 0) // apply sim error to new point
964           for (int i = 0; i < optimal_config_matrix.numCols(); i++)
965             apply_error_vec(hifi_sim_error, random_seed, num_exp+num_hifi+i);
966 	num_hifi += optimal_config_matrix.numCols();;
967       }
968       num_it++;
969 
970       // Print results to screen and to file
971       if (outputLevel >= NORMAL_OUTPUT)
972 	print_hi2lo_selected(num_it, batchEvals, optimal_config_matrix,
973 	    		     optimal_config, max_MI);
974       print_hi2lo_file(out_file, num_it, batchEvals, optimal_config_matrix,
975 	    	MI_vec, max_hifi, resp_matrix, optimal_config, max_MI);
976     } // end MI loop
977   } // end while loop
978 }
979 
eval_hi2lo_stop(bool stop_metric,double prev_MI,double max_MI,int num_it,int num_hifi,int max_hifi,int num_candidates)980 bool NonDBayesCalibration::eval_hi2lo_stop(bool stop_metric, double prev_MI,
981     			   double max_MI, int num_it, int num_hifi, int
982 			   max_hifi, int num_candidates)
983 {
984   // check relative MI change
985   if (num_it == 1)
986     prev_MI = max_MI;
987   else if (num_it > 1) {
988     double MIdiff = prev_MI - max_MI;
989     double MIrel = fabs(MIdiff/prev_MI);
990     if (MIrel < 0.05) {
991       stop_metric = true;
992       Cout << "Experimental Design Stop Criteria met: "
993            << "Relative change in mutual information is \n"
994            << "sufficiently small \n"
995            << '\n';
996     }
997     else
998       prev_MI = max_MI;
999   }
1000 
1001   // check remaining number of candidates
1002   if (num_candidates == 0) {
1003     stop_metric = true;
1004     Cout << "Experimental Design Stop Criteria met: "
1005          << "Design candidates have been exhausted \n"
1006          << '\n';
1007   }
1008 
1009   // check number of hifi evaluations
1010   if (num_hifi == max_hifi) {
1011     stop_metric = true;
1012     Cout << "Experimental Design Stop Criteria met: "
1013          << "Maximum number of hifi evaluations has \n"
1014          << "been reached \n"
1015          << '\n';
1016   }
1017   return stop_metric;
1018 }
1019 
print_hi2lo_begin(int num_it)1020 void NonDBayesCalibration::print_hi2lo_begin(int num_it)
1021 {
1022   Cout << "\n----------------------------------------------\n";
1023   Cout << "Begin Experimental Design Iteration " << num_it;
1024   Cout << "\n----------------------------------------------\n";
1025 }
1026 
print_hi2lo_status(int num_it,int i,const RealVector & xi_i,double MI)1027 void NonDBayesCalibration::print_hi2lo_status(int num_it, int i,
1028     			   const RealVector& xi_i, double MI)
1029 {
1030   Cout << "\n----------------------------------------------\n";
1031   Cout << "Experimental Design Iteration "<< num_it <<" Progress";
1032   Cout << "\n----------------------------------------------\n";
1033   Cout << "Design candidate " << i << " :\n" << xi_i;
1034   Cout << "Mutual Information = " << MI << '\n';
1035 }
1036 
print_hi2lo_batch_status(int num_it,int batch_n,int batchEvals,const RealVector & optimal_config,double max_MI)1037 void NonDBayesCalibration::print_hi2lo_batch_status(int num_it, int batch_n,
1038     			   int batchEvals, const RealVector& optimal_config,
1039 			   double max_MI)
1040 {
1041   Cout << "\n----------------------------------------------\n";
1042   Cout << "Experimental Design Iteration "<< num_it <<" Progress";
1043   Cout << "\n----------------------------------------------\n";
1044   Cout << "Point " << batch_n << " of " << batchEvals
1045        << " selected\n";
1046   Cout << "Optimal design:\n" << optimal_config;
1047   Cout << "Mutual information = " << max_MI << '\n';
1048   Cout << "\n";
1049 }
1050 
print_hi2lo_selected(int num_it,int batchEvals,RealMatrix & optimal_config_matrix,const RealVector & optimal_config,double max_MI)1051 void NonDBayesCalibration::print_hi2lo_selected(int num_it, int batchEvals,
1052     			   RealMatrix& optimal_config_matrix, const RealVector&
1053       		           optimal_config, double max_MI)
1054 {
1055   Cout << "\n----------------------------------------------\n";
1056   Cout << "Experimental Design Iteration " << num_it-1 << " Complete";
1057   Cout << "\n----------------------------------------------\n";
1058   if (batchEvals > 1) {
1059     Cout << batchEvals << " optimal designs selected\n";
1060     for (int batch_n = 0; batch_n < batchEvals; batch_n++) {
1061       RealVector col = Teuchos::getCol(Teuchos::View,
1062 			       optimal_config_matrix, batch_n);
1063       Cout << col;
1064     }
1065   }
1066   else
1067     Cout << "Optimal design:\n" << optimal_config;
1068   Cout << "Mutual information = " << max_MI << '\n';
1069   Cout << "\n";
1070 }
1071 
print_hi2lo_file(std::ostream & out_file,int num_it,int batchEvals,RealMatrix & optimal_config_matrix,const RealVector & MI_vec,int max_hifi,RealMatrix & resp_matrix,const RealVector & optimal_config,double max_MI)1072 void NonDBayesCalibration::print_hi2lo_file(std::ostream& out_file, int num_it,
1073     			   int batchEvals, RealMatrix& optimal_config_matrix,
1074 			   const RealVector& MI_vec, int max_hifi, RealMatrix&
1075 			   resp_matrix, const RealVector& optimal_config,
1076 			   double max_MI)
1077 {
1078 
1079   out_file << "ITERATION " << num_it -1 << "\n";
1080   if (batchEvals > 1) {
1081     out_file << batchEvals << " optimal designs selected\n\n";
1082     for (int batch_n = 0; batch_n < batchEvals; batch_n++) {
1083       RealVector col = Teuchos::getCol(Teuchos::View,
1084                                 optimal_config_matrix, batch_n);
1085       out_file << "Design point " << col;
1086       out_file << "Mutual Information = " << MI_vec[batch_n] << '\n';
1087       if (max_hifi > 0) {
1088         RealVector col = Teuchos::getCol(Teuchos::View, resp_matrix,
1089    	                                 batch_n);
1090         out_file << "Hifi Response = " << col << '\n';
1091       }
1092     }
1093   }
1094   else {
1095     out_file << "Optimal Design: " << optimal_config;
1096     out_file << "Mutual Information = " << max_MI << '\n';
1097     if (max_hifi > 0) {
1098       RealVector col = Teuchos::getCol(Teuchos::View, resp_matrix, 0);
1099       out_file << "Hifi Response = " << col << '\n';
1100     }
1101   }
1102 }
1103 
add_lhs_hifi_data()1104 void NonDBayesCalibration::add_lhs_hifi_data()
1105 {
1106   hifiSampler.run();
1107 
1108   int num_exp = expData.num_experiments();
1109   const RealMatrix& all_samples = hifiSampler.all_samples();
1110   const IntResponseMap& all_responses = hifiSampler.all_responses();
1111 
1112   if (num_exp == 0) {
1113     // No file data; all initial hifi calibration data points come from LHS
1114     // BMA TODO: Once ExperimentData can be updated, post this into
1115     // expData directly
1116     ExperimentData exp_data(initHifiSamples,
1117                             mcmcModel.current_response().shared_data(),
1118                             all_samples, all_responses, outputLevel);
1119     expData = exp_data;
1120   }
1121   else {
1122     // If number of num_experiments is less than the number of desired initial
1123     // samples, run LHS to supplement
1124     IntRespMCIter responses_it = all_responses.begin();
1125     IntRespMCIter responses_end = all_responses.end();
1126     for (int i=0 ; responses_it != responses_end; ++responses_it, ++i) {
1127       RealVector col_vec =
1128         Teuchos::getCol(Teuchos::Copy, const_cast<RealMatrix&>(all_samples),
1129                         i);
1130       expData.add_data(col_vec, responses_it->second.copy());
1131     }
1132   }
1133 }
1134 
apply_error_vec(const RealVector & sim_error_vec,int & stoch_seed,int experiment)1135 void NonDBayesCalibration::apply_error_vec(const RealVector& sim_error_vec,
1136     			   int &stoch_seed, int experiment)
1137 {
1138   int num_exp = expData.num_experiments();
1139   RealVector error_vec(numFunctions);
1140   Real stdev;
1141   boost::mt19937 rnumGenerator;
1142   if (sim_error_vec.length() == 1) {
1143     rnumGenerator.seed(stoch_seed);
1144     stdev = std::sqrt(sim_error_vec[0]);
1145     boost::normal_distribution<> err_dist(0.0, stdev);
1146     boost::variate_generator<boost::mt19937,
1147            boost::normal_distribution<> > err_gen(rnumGenerator, err_dist);
1148       for (size_t j = 0; j < numFunctions; j++)
1149         error_vec[j] = err_gen();
1150       expData.apply_simulation_error(error_vec, experiment);
1151   }
1152   else {
1153       for (size_t j = 0; j < numFunctions; j++) {
1154         ++stoch_seed;
1155         stdev = std::sqrt(sim_error_vec[j]);
1156         rnumGenerator.seed(stoch_seed);
1157         boost::normal_distribution<> err_dist(0.0, stdev);
1158         boost::variate_generator<boost::mt19937,
1159                boost::normal_distribution<> > err_gen(rnumGenerator, err_dist);
1160         error_vec[j] = err_gen();
1161       }
1162       expData.apply_simulation_error(error_vec, experiment);
1163   }
1164   ++stoch_seed;
1165 }
1166 
build_error_matrix(const RealVector & sim_error_vec,RealMatrix & sim_error_matrix,int & stoch_seed)1167 void NonDBayesCalibration::build_error_matrix(const RealVector& sim_error_vec,
1168                            RealMatrix& sim_error_matrix, int &stoch_seed)
1169 {
1170   Real stdev;
1171   RealVector col_vec(numFunctions);
1172   boost::mt19937 rnumGenerator;
1173   int num_filtered = sim_error_matrix.numCols();
1174   ++stoch_seed;
1175   if (sim_error_vec.length() == 1) {
1176     rnumGenerator.seed(stoch_seed);
1177     stdev = std::sqrt(sim_error_vec[0]);
1178     boost::normal_distribution<> err_dist(0.0, stdev);
1179     boost::variate_generator<boost::mt19937,
1180                              boost::normal_distribution<> >
1181            err_gen(rnumGenerator, err_dist);
1182     for (int j = 0; j < num_filtered; j++) {
1183       for (size_t k = 0; k < numFunctions; k++) {
1184         col_vec[k] = err_gen();
1185       }
1186       Teuchos::setCol(col_vec, j, sim_error_matrix);
1187     }
1188   }
1189   else {
1190     for (int j = 0; j < num_filtered; j++) {
1191       for (size_t k = 0; k < numFunctions; k++) {
1192         ++stoch_seed;
1193         rnumGenerator.seed(stoch_seed);
1194         stdev = std::sqrt(sim_error_vec[k]);
1195         boost::normal_distribution<> err_dist(0.0, stdev);
1196         boost::variate_generator<boost::mt19937,
1197                    boost::normal_distribution<> >
1198                err_gen(rnumGenerator, err_dist);
1199         col_vec[k] = err_gen();
1200       }
1201       Teuchos::setCol(col_vec, j, sim_error_matrix);
1202     }
1203   }
1204 }
1205 
build_designs(RealMatrix & design_matrix)1206 void NonDBayesCalibration::build_designs(RealMatrix& design_matrix)
1207 {
1208   // We assume the hifiModel's active variables are the config vars
1209   size_t num_candidates_in = 0, num_design_vars =
1210     hifiModel.cv() + hifiModel.div() + hifiModel.dsv() + hifiModel.drv();
1211   design_matrix.shape(num_design_vars, numCandidates);
1212 
1213   // If available, import data first
1214   if (!importCandPtsFile.empty()) {
1215     RealMatrix design_matrix_in;
1216     TabularIO::read_data_tabular(importCandPtsFile,
1217 				 "user-provided candidate points",
1218 				 design_matrix_in, num_design_vars,
1219 				 importCandFormat, false);
1220     num_candidates_in = design_matrix_in.numCols();
1221     if (num_candidates_in > numCandidates) {
1222       // truncate the imported data
1223       num_candidates_in = numCandidates;
1224       design_matrix_in.reshape(num_design_vars, num_candidates_in);
1225       if (outputLevel >= VERBOSE_OUTPUT) {
1226 	Cout << "\nWarning: Bayesian design of experiments only using the "
1227 	     << "first " << numCandidates << " candidates in "
1228 	     << importCandPtsFile << '\n';
1229       }
1230     }
1231     // populate the sub-matrix (possibly full matrix) of imported candidates
1232     RealMatrix des_mat_imported(Teuchos::View, design_matrix,
1233 				num_design_vars, num_candidates_in, 0, 0);
1234     des_mat_imported.assign(design_matrix_in);
1235   }
1236 
1237   // If needed, supplement with LHS-generated designs
1238   if (num_candidates_in < numCandidates) {
1239     size_t new_candidates = numCandidates - num_candidates_in;
1240 
1241     Iterator lhs_iterator2;
1242     unsigned short sample_type = SUBMETHOD_LHS;
1243     bool vary_pattern = true;
1244     String rng("mt19937");
1245     int random_seed_1 = randomSeed+1;
1246     auto lhs_sampler_rep2 = std::make_shared<NonDLHSSampling>
1247       (hifiModel, sample_type, new_candidates, random_seed_1,
1248        rng, vary_pattern, ACTIVE_UNIFORM);
1249     lhs_iterator2.assign_rep(lhs_sampler_rep2);
1250     lhs_iterator2.pre_run();
1251 
1252     // populate the sub-matrix (possibly full matrix) of generated candidates
1253     RealMatrix des_mat_generated(Teuchos::View, design_matrix,
1254 				 num_design_vars, new_candidates,
1255 				 0, num_candidates_in);
1256     des_mat_generated.assign(lhs_iterator2.all_samples());
1257   }
1258 }
1259 
1260 
build_hi2lo_xmatrix(RealMatrix & Xmatrix,int i,const RealMatrix & mi_chain,RealMatrix & sim_error_matrix)1261 void NonDBayesCalibration::build_hi2lo_xmatrix(RealMatrix& Xmatrix, int i,
1262     			   const RealMatrix& mi_chain, RealMatrix&
1263 			   sim_error_matrix)
1264 {
1265   // run the model at each posterior sample, with option to batch
1266   // evaluate the lo-fi model
1267 
1268   // receive the evals in a separate matrix for safety
1269   RealMatrix lofi_resp_matrix;
1270   Model::evaluate(mi_chain, mcmcModel, lofi_resp_matrix);
1271 
1272   //concatenate posterior_theta and lofi_resp_mat into Xmatrix
1273   RealMatrix xmatrix_theta(Teuchos::View, Xmatrix,
1274  		   numContinuousVars, Xmatrix.numCols());
1275   xmatrix_theta.assign(mi_chain);
1276 
1277   RealMatrix xmatrix_curr_responses
1278     (Teuchos::View, Xmatrix, numFunctions, Xmatrix.numCols(),
1279      numContinuousVars + (i-1)*numFunctions, 0);
1280   xmatrix_curr_responses.assign(lofi_resp_matrix);
1281 
1282   if (sim_error_matrix.numRows() > 0)
1283     xmatrix_curr_responses += sim_error_matrix;
1284 }
1285 
run_hifi(RealMatrix & optimal_config_matrix,RealMatrix & resp_matrix)1286 void NonDBayesCalibration::run_hifi(RealMatrix& optimal_config_matrix,
1287     			   RealMatrix& resp_matrix)
1288 {
1289   // batch evaluate hifiModel, populating resp_matrix
1290   Model::evaluate(optimal_config_matrix, hifiModel, resp_matrix);
1291   // update hifi experiment data
1292   RealMatrix::ordinalType col_ind;
1293   RealMatrix::ordinalType num_evals = optimal_config_matrix.numCols();
1294   for (col_ind = 0; col_ind < num_evals; ++col_ind) {
1295     RealVector config_vars =
1296       Teuchos::getCol(Teuchos::Copy, optimal_config_matrix, col_ind);
1297     // ExperimentData requires a new Response for each insertion
1298     RealVector hifi_fn_vals =
1299       Teuchos::getCol(Teuchos::Copy, resp_matrix, col_ind);
1300     Response hifi_resp = hifiModel.current_response().copy();
1301     hifi_resp.function_values(hifi_fn_vals);
1302     expData.add_data(config_vars, hifi_resp);
1303   }
1304 }
1305 
build_model_discrepancy()1306 void NonDBayesCalibration::build_model_discrepancy()
1307 {
1308   size_t num_field_groups = expData.num_fields();
1309   if (num_field_groups == 0)
1310     build_scalar_discrepancy();
1311   else {
1312     if (readFieldCoords)
1313       build_field_discrepancy();
1314     else {
1315       Cout << "You must specify read_field_coodinates in input file in order "
1316            << "to calculate model discrepancy\n";
1317       abort_handler(METHOD_ERROR);
1318     }
1319   }
1320 }
1321 
build_scalar_discrepancy()1322 void NonDBayesCalibration::build_scalar_discrepancy()
1323 {
1324   // For now, use average params (unfiltered)
1325   RealMatrix acc_chain_transpose(acceptanceChain, Teuchos::TRANS);
1326   int num_cols = acc_chain_transpose.numCols();
1327   RealVector ave_params(num_cols);
1328   compute_col_means(acc_chain_transpose, ave_params);
1329   mcmcModel.continuous_variables(ave_params);
1330 
1331   int num_exp = expData.num_experiments();
1332   size_t num_configvars = expData.config_vars()[0].length();
1333   RealMatrix allConfigInputs(num_configvars,num_exp);
1334   for (int i = 0; i < num_exp; i++) {
1335     RealVector config_vec = expData.config_vars()[i];
1336     Teuchos::setCol(config_vec, i, allConfigInputs);
1337   }
1338 
1339   // Initialize DiscrepancyCorrection class
1340   IntSet fn_indices;
1341   // Hardcode for now, call id_surrogates eventually? See SurrogateModel.cpp
1342   for (int i = 0; i < numFunctions; ++i)
1343     fn_indices.insert(i);
1344   DiscrepancyCorrection modelDisc;
1345   short corr_type = ADDITIVE_CORRECTION;
1346   modelDisc.initialize(fn_indices, numFunctions, num_configvars, corr_type,
1347        		       approxCorrectionOrder, discrepancyType);
1348 
1349   // Construct config var information
1350   Variables vars_copy = mcmcModel.current_variables().copy();
1351   std::pair<short, short> view(MIXED_STATE, EMPTY_VIEW);
1352   SizetArray vars_comps_totals(NUM_VC_TOTALS, 0);
1353   vars_comps_totals = mcmcModel.current_variables().shared_data().
1354     		      inactive_components_totals();
1355   SharedVariablesData svd(view, vars_comps_totals);
1356   Variables configvars(svd);
1357   VariablesArray configvar_array(num_exp);
1358   for (int i=0; i<num_exp; i++) {
1359     const RealVector& config_i = Teuchos::getCol(Teuchos::View,
1360 				 allConfigInputs, i);
1361     Model::inactive_variables(config_i, mcmcModel, vars_copy);
1362     configvars.continuous_variables(vars_copy.inactive_continuous_variables());
1363     configvars.discrete_int_variables(vars_copy.
1364 				      inactive_discrete_int_variables());
1365     configvars.discrete_real_variables(vars_copy.
1366 				       inactive_discrete_real_variables());
1367     configvar_array[i] = configvars.copy();
1368   }
1369 
1370   // Construct response information from expData and model
1371   ResponseArray simresponse_array(num_exp);
1372   ResponseArray expresponse_array(num_exp);
1373   for (int i = 0; i<num_exp; i++){
1374     RealVector config_vec = Teuchos::getCol(Teuchos::View, allConfigInputs,
1375 		 	    i);
1376     Model::inactive_variables(config_vec, mcmcModel);
1377     mcmcModel.evaluate();
1378     simresponse_array[i] = mcmcModel.current_response().copy();
1379     expresponse_array[i] = expData.response(i);
1380   }
1381   //Cout << "sim response array = " << simresponse_array << '\n';
1382   //Cout << "exp response array = " << expresponse_array << '\n';
1383   bool quiet_flag = (outputLevel < NORMAL_OUTPUT);
1384   modelDisc.compute(configvar_array, expresponse_array, simresponse_array,
1385       		    quiet_flag);
1386 
1387   // Construct config var information for prediction configs
1388   int num_pred;
1389   RealMatrix configpred_mat;
1390   RealVector config(1); //KAM: currently assume only one config var
1391   VariablesArray configpred_array;
1392   if (!importPredConfigs.empty()) {
1393     TabularIO::read_data_tabular(importPredConfigs,
1394 				 "user-provided prediction configurations",
1395 				 configpred_mat, num_configvars,
1396 				 importPredConfigFormat, false);
1397     num_pred = configpred_mat.numCols();
1398     configpred_array.resize(num_pred);
1399     for (int i = 0; i < num_pred; i++) {
1400       config = Teuchos::getCol(Teuchos::View, configpred_mat, i);
1401       configvars.continuous_variables(config);
1402       configpred_array[i] = configvars.copy();
1403     }
1404 
1405   }
1406   else if (!predictionConfigList.empty()) {
1407     num_pred = predictionConfigList.length();
1408     configpred_array.resize(num_pred);
1409     configpred_mat.shapeUninitialized(num_configvars, num_pred);
1410     for (int i = 0; i < num_pred; i++) {
1411       config = predictionConfigList[i];
1412       configvars.continuous_variables(config);
1413       configpred_array[i] = configvars.copy();
1414       Teuchos::setCol(config, i, configpred_mat);
1415     }
1416   }
1417   else {
1418     num_pred = ( numPredConfigs > 0) ? numPredConfigs : 10;
1419     configpred_array.resize(num_pred);
1420     configpred_mat.shapeUninitialized(num_configvars, num_pred);
1421     double config_step;
1422     if (numPredConfigs == 1)
1423       config_step = (configUpperBnds[0] - configLowerBnds[0])/2;
1424     else
1425       config_step = (configUpperBnds[0]-configLowerBnds[0])/(num_pred-1);
1426     for (int i = 0; i < num_pred; i++){
1427       config = configLowerBnds[0] + config_step*i;
1428       configvars.continuous_variables(config);
1429       configpred_array[i] = configvars.copy();
1430       Teuchos::setCol(config, i, configpred_mat);
1431     }
1432   }
1433 
1434   // Compute dsicrepancy approx and corrected response
1435   correctedResponses.resize(num_pred);
1436   discrepancyResponses.resize(num_pred);
1437   Response zero_response = mcmcModel.current_response().copy();
1438   for (int i = 0; i < num_pred; i++) {
1439     for (size_t j = 0; j < numFunctions; j++)
1440       zero_response.function_value(0,j);
1441     RealVector config_vec = Teuchos::getCol(Teuchos::View, configpred_mat, i);
1442     Model::inactive_variables(config_vec, mcmcModel);
1443     mcmcModel.continuous_variables(ave_params); //KAM -delete later
1444     mcmcModel.evaluate();
1445     Variables configpred = configpred_array[i];
1446     Response simresponse_pred = mcmcModel.current_response();
1447     Cout << "Calculating model discrepancy";
1448     modelDisc.apply(configpred, zero_response, quiet_flag);
1449     discrepancyResponses[i] = zero_response.copy();
1450     Cout << "Correcting model response";
1451     modelDisc.apply(configpred, simresponse_pred, quiet_flag);
1452     correctedResponses[i] = simresponse_pred.copy();
1453   }
1454 
1455   // Compute correction variance
1456   RealMatrix discrep_var(num_pred, numFunctions);
1457   correctedVariances.shapeUninitialized(num_pred, numFunctions);
1458   modelDisc.compute_variance(configpred_array, discrep_var, quiet_flag);
1459   if (expData.variance_active()) {
1460     RealVectorArray exp_stddevs(num_exp*numFunctions);
1461     expData.cov_std_deviation(exp_stddevs); // one vector per experiment
1462     RealVector col_vec(num_pred);
1463     for (int i = 0; i < numFunctions; i++) {
1464       Real& max_var = exp_stddevs[0][i];
1465       for (int j = 0; j < num_exp; j++)
1466        if (exp_stddevs[j][i] > max_var)
1467 	 max_var = exp_stddevs[j][i];
1468       RealVector discrep_varvec = Teuchos::getCol(Teuchos::View, discrep_var,i);
1469       for (int j = 0; j < num_pred; j++) {
1470     	col_vec[j] = discrep_varvec[j] + pow(max_var, 2);
1471       }
1472       Teuchos::setCol(col_vec, i, correctedVariances);
1473     }
1474   }
1475   else {
1476     correctedVariances = discrep_var;
1477     Cout << "\nWarning: No variance information was provided in "
1478          << scalarDataFilename << ".\n         Prediction variance computed "
1479 	 << "contains only variance information\n         from the "
1480 	 << "discrepancy model.\n";
1481   }
1482 
1483   export_discrepancy(configpred_mat);
1484 }
1485 
build_field_discrepancy()1486 void NonDBayesCalibration::build_field_discrepancy()
1487 {
1488   // For now, use average params (unfiltered)
1489   RealMatrix acc_chain_transpose(acceptanceChain, Teuchos::TRANS);
1490   int num_cols = acc_chain_transpose.numCols();
1491   RealVector ave_params(num_cols);
1492   compute_col_means(acc_chain_transpose, ave_params);
1493   mcmcModel.continuous_variables(ave_params);
1494   //mcmcModel.evaluate();
1495 
1496   int num_exp = expData.num_experiments();
1497   size_t num_configvars = expData.config_vars()[0].length();
1498   // KAM TODO: add catch when num_config_vars == 0. Trying to retrieve
1499   // this length will seg fault
1500 
1501   size_t num_field_groups = expData.num_fields();
1502   // KAM TODO: Throw error if num_field_groups > 1 (either need separate
1503   // discrepancy for each or only allow one field group)
1504   // Read independent coordinates
1505 
1506   // Read variables for discrepancy build points
1507   RealMatrix allvars_mat;
1508   RealVector col_vec;
1509   for (int i = 0; i < num_field_groups; i++) {
1510     for (int j = 0; j < num_exp; j++) {
1511       RealMatrix vars_mat = expData.field_coords_view(i,j);
1512       int num_indepvars = vars_mat.numRows();
1513       int dim_indepvars = vars_mat.numCols();
1514       vars_mat.reshape(num_indepvars, dim_indepvars + num_configvars);
1515       RealVector config_vec = expData.config_vars()[j];
1516       col_vec.resize(num_indepvars);
1517       for (int k = 0; k < num_configvars; k++) {
1518         col_vec.putScalar(config_vec[k]);
1519         Teuchos::setCol(col_vec, dim_indepvars+k, vars_mat);
1520       }
1521       // add to total matrix
1522       RealMatrix varsmat_trans(vars_mat, Teuchos::TRANS);
1523       int col = allvars_mat.numCols();
1524       allvars_mat.reshape(vars_mat.numCols(),
1525                           allvars_mat.numCols() + num_indepvars);
1526       for (int k = 0; k < varsmat_trans.numCols(); k ++) {
1527         col_vec = Teuchos::getCol(Teuchos::Copy, varsmat_trans, k);
1528         Teuchos::setCol(col_vec, col+k, allvars_mat);
1529       }
1530     }
1531   }
1532 
1533   /*
1534   ShortArray my_asv(3);
1535   my_asv[0]=1;
1536   my_asv[1]=0;
1537   my_asv[2]=0;
1538   */
1539   ShortArray exp_asv(num_exp);
1540   for (int i = 0; i < num_exp; i++) {
1541     exp_asv[i] = 1;
1542   }
1543 
1544   // Build concatenated vector for discrepancy
1545   //for (int i = 0; i < num_field_groups; i++) {
1546   int ind = 0;
1547   RealVector concat_disc;
1548   for (int i = 0; i < num_exp; i++) {
1549     const IntVector field_lengths = expData.field_lengths(i);
1550     RealVector config_vec = expData.config_vars()[i];
1551     Model::inactive_variables(config_vec, mcmcModel);
1552     mcmcModel.evaluate();
1553     for (int j = 0; j < field_lengths.length(); j++) {
1554       concat_disc.resize(ind + field_lengths[j]);
1555       if (expData.interpolate_flag()) {
1556         const Response model_resp = mcmcModel.current_response().copy();
1557         Response interpolated_resp = expData.response(i).copy();
1558         expData.interpolate_simulation_data(model_resp, i, exp_asv, 0,
1559                                             interpolated_resp);
1560         for (int k=0; k<field_lengths[j]; k++)
1561           concat_disc[ind + k] = expData.all_data(i)[k] -
1562                                  interpolated_resp.function_values()[k];
1563       }
1564       else {
1565         for (int k=0; k<field_lengths[j]; k++)
1566           concat_disc[ind + k] = expData.all_data(i)[k] -
1567                               mcmcModel.current_response().function_values()[k];
1568       }
1569       ind += field_lengths[j];
1570     }
1571   }
1572   //Cout << "disc_build_points " <<concat_disc;
1573 
1574   // Build matrix containing prediction points for discrepancy
1575   RealMatrix discrepvars_pred;
1576   RealMatrix configpred_mat;
1577   int num_pred;
1578   // Read in prediction config vars
1579   if (!importPredConfigs.empty()) {
1580     TabularIO::read_data_tabular(importPredConfigs,
1581                                  "user-provided prediction configurations",
1582                                  configpred_mat, num_configvars,
1583                                  importPredConfigFormat, false);
1584     num_pred = configpred_mat.numCols();
1585   }
1586   else if (!predictionConfigList.empty()) {
1587     //KAM TODO: check length % num_config vars
1588     num_pred = predictionConfigList.length()/num_configvars;
1589     configpred_mat.shapeUninitialized(num_configvars, num_pred);
1590     RealVector config(num_configvars);
1591     for (int j = 0; j < num_pred; j++) {
1592       for (int k = 0; k < num_configvars; k++)
1593         config[k] = predictionConfigList[j*num_configvars + k];
1594       Teuchos::setCol(config, j, configpred_mat);
1595     }
1596   }
1597   else if (numPredConfigs > 1) {
1598     num_pred = numPredConfigs;
1599     configpred_mat.shapeUninitialized(num_configvars, numPredConfigs);
1600     RealVector config_step(num_configvars);
1601     RealVector config(num_configvars);
1602     for (int i = 0; i < num_configvars; i++)
1603       config_step[i] = (configUpperBnds[i] -
1604                               configLowerBnds[i])/(numPredConfigs - 1);
1605     for (int i = 0; i < numPredConfigs; i++) {
1606       for (int j = 0; j < num_configvars; j++)
1607         config[j] = configLowerBnds[j] + config_step[j]*i;
1608       Teuchos::setCol(config, i, configpred_mat);
1609     }
1610   }
1611   else {
1612     for (int j = 0; j < num_exp; j++) {
1613       num_pred = num_exp;
1614       configpred_mat.shapeUninitialized(num_configvars, num_exp);
1615       RealVector config_vec = expData.config_vars()[j];
1616       Teuchos::setCol(config_vec, j, configpred_mat);
1617     }
1618   }
1619   //Cout << "pred config mat = " << configpred_mat << '\n';
1620   // Combine with simulation indep vars
1621   for (int i = 0; i < num_field_groups; i ++) {
1622     for (int j = 0; j < num_pred; j++) {
1623       RealMatrix vars_mat = mcmcModel.current_response().field_coords_view(i);
1624       int num_indepvars = vars_mat.numRows();
1625       int dim_indepvars = vars_mat.numCols();
1626       vars_mat.reshape(num_indepvars, dim_indepvars + num_configvars);
1627       const RealVector& config_vec = Teuchos::getCol(Teuchos::Copy,
1628                                               configpred_mat, j);
1629       col_vec.resize(num_indepvars);
1630       for (int k = 0; k < num_configvars; k++) {
1631         col_vec.putScalar(config_vec[k]);
1632         Teuchos::setCol(col_vec, dim_indepvars+k, vars_mat);
1633       }
1634       RealMatrix varsmat_trans(vars_mat, Teuchos::TRANS);
1635       int col = discrepvars_pred.numCols();
1636       discrepvars_pred.reshape(vars_mat.numCols(),
1637                           discrepvars_pred.numCols() + num_indepvars);
1638       for (int k = 0; k < varsmat_trans.numCols(); k ++) {
1639         col_vec = Teuchos::getCol(Teuchos::Copy, varsmat_trans, k);
1640         Teuchos::setCol(col_vec, col+k, discrepvars_pred);
1641       }
1642     }
1643   }
1644 
1645   //Cout << "All vars matrix " << allvars_mat << '\n';
1646   //Cout << "Pred vars matrix " << discrepvars_pred << '\n';
1647   discrepancyFieldResponses.resize(discrepvars_pred.numCols());
1648   correctedFieldVariances.resize(discrepvars_pred.numCols());
1649   build_GP_field(allvars_mat, discrepvars_pred, concat_disc,
1650                  discrepancyFieldResponses, correctedFieldVariances);
1651   //Cout << "disc_pred " << discrepancyFieldResponses;
1652   //Cout << "disc_var = " << correctedFieldVariances << '\n';
1653   //TODO: Currently, correctedFieldVariances contains only variance from
1654   //the discrepancy approx. This needs to be combined with the experimental
1655   //variance
1656 
1657   // Compute corrected response
1658   ind = 0;
1659   correctedFieldResponses.resize(discrepvars_pred.numCols());
1660   for (int i = 0; i < num_pred; i++) {
1661     const RealVector& config_vec = Teuchos::getCol(Teuchos::Copy,
1662                                             configpred_mat, i);
1663     Model::inactive_variables(config_vec, mcmcModel);
1664     mcmcModel.evaluate();
1665     const RealVector sim_resp = mcmcModel.current_response().function_values();
1666     for (int j = 0; j < sim_resp.length(); j ++) {
1667       correctedFieldResponses[ind + j] = sim_resp[j]
1668                               + discrepancyFieldResponses[ind+j];
1669     }
1670     ind += sim_resp.length();
1671   }
1672 
1673     //allExperiments[exp_ind].function_value(i);
1674     //Response residual_response;
1675     //expData.form_residuals(mcmcModel.current_response(), j, my_asv, 0, residual_response);
1676     //expData.form_residuals(mcmcModel.current_response(), j, residual_response);
1677     //build_GP_field(vector view(indep_coordinates, t_new_coord, concat_disc, disc_pred);
1678     //Cout << residual_response.function_values();
1679   //}
1680 
1681   // We are getting coordinates for the experiment and ultimately we need to handle the simulation coordinates
1682   // if interpolation.
1683   //
1684   // Initialize newFnDiscClass
1685   // newFnDiscClass.initialize(x, theta, t)
1686   // newFnDiscClass.build(vector t, vector responses, correction_term)
1687   // newFnDiscClass.apply(vector t_new, vector responses, corrected_responses)
1688   // One possibility is to use the GP construction from NonDLHSSampling, line 794
1689   // Another is to use the DiscrepancyCorrection class
1690 
1691   export_field_discrepancy(discrepvars_pred);
1692 }
1693 
build_GP_field(const RealMatrix & discrep_vars_mat,RealMatrix & discrep_vars_pred,const RealVector & concat_disc,RealVector & disc_pred,RealVector & disc_var)1694 void NonDBayesCalibration::build_GP_field(const RealMatrix& discrep_vars_mat,
1695                            RealMatrix& discrep_vars_pred,
1696 			   const RealVector& concat_disc, RealVector& disc_pred,
1697                            RealVector& disc_var)
1698 {
1699   String approx_type;
1700   approx_type = "global_kriging";  // Surfpack GP
1701   UShortArray approx_order;
1702   short data_order = 1;  // assume only function values
1703   short output_level = NORMAL_OUTPUT;
1704   SharedApproxData sharedData;
1705   // if multiple independent coordinates and config vars, need to
1706   // change 1 to number of input parameters for GP
1707   int num_GP_inputs = discrep_vars_mat.numRows();
1708   sharedData = SharedApproxData(approx_type, approx_order, num_GP_inputs,
1709                                 data_order, output_level);
1710   Approximation gpApproximation(sharedData);
1711 
1712   // build the GP for the discrepancy
1713   gpApproximation.add_array(discrep_vars_mat,concat_disc);
1714   gpApproximation.build();
1715   //gpApproximations.export_model(GPstring, GPPrefix, ALGEBRAIC_FILE);
1716   int pred_length = discrep_vars_pred.numCols();
1717   for (int i=0; i<pred_length; i++) {
1718     RealVector new_sample = Teuchos::getCol(Teuchos::View,discrep_vars_pred,i);
1719     disc_pred(i)=gpApproximation.value(new_sample);
1720     disc_var(i) = gpApproximation.prediction_variance(new_sample);
1721   }
1722 }
1723 
export_discrepancy(RealMatrix & pred_config_mat)1724 void NonDBayesCalibration::export_discrepancy(RealMatrix&
1725     			   pred_config_mat)
1726 {
1727 
1728   // Calculate number of predictions
1729   int num_pred = pred_config_mat.numCols();
1730   Variables output_vars = mcmcModel.current_variables().copy();
1731   const StringArray& resp_labels =
1732     		     mcmcModel.current_response().function_labels();
1733   size_t wpp4 = write_precision+4;
1734 
1735   // Discrepancy responses file output
1736   unsigned short discrep_format = exportDiscrepFormat;
1737   String discrep_filename =
1738     exportDiscrepFile.empty() ? "dakota_discrepancy_tabular.dat" :
1739     exportDiscrepFile;
1740   std::ofstream discrep_stream;
1741   TabularIO::open_file(discrep_stream, discrep_filename,
1742       		       "NonDBayesCalibration discrepancy response export");
1743 
1744   TabularIO::write_header_tabular(discrep_stream, output_vars, resp_labels,
1745       				  "config_id", discrep_format);
1746   discrep_stream << std::setprecision(write_precision)
1747     		 << std::resetiosflags(std::ios::floatfield);
1748   for (int i = 0; i < num_pred; ++i) {
1749     TabularIO::write_leading_columns(discrep_stream, i+1,
1750 				     mcmcModel.interface_id(),
1751 				     discrep_format);
1752     const RealVector& config_vec = Teuchos::getCol(Teuchos::View,
1753 						   pred_config_mat, i);
1754     Model::inactive_variables(config_vec, mcmcModel);
1755     output_vars = mcmcModel.current_variables().copy();
1756     output_vars.write_tabular(discrep_stream);
1757     const RealVector& resp_vec = discrepancyResponses[i].function_values();
1758     for (size_t j = 0; j < numFunctions; ++j)
1759       discrep_stream << std::setw(wpp4) << resp_vec[j] << ' ';
1760     discrep_stream << '\n';
1761   }
1762   TabularIO::close_file(discrep_stream, discrep_filename,
1763       	 		"NonDBayesCalibration discrepancy response export");
1764 
1765   // Corrected model (model+discrep) file output
1766   unsigned short corrmodel_format = exportCorrModelFormat;
1767   String corrmodel_filename =
1768     exportCorrModelFile.empty() ? "dakota_corrected_model_tabular.dat" :
1769     exportCorrModelFile;
1770   std::ofstream corrmodel_stream;
1771   TabularIO::open_file(corrmodel_stream, corrmodel_filename,
1772       		       "NonDBayesCalibration corrected model response export");
1773 
1774   TabularIO::write_header_tabular(corrmodel_stream, output_vars, resp_labels,
1775       				  "config_id", corrmodel_format);
1776   corrmodel_stream << std::setprecision(write_precision)
1777     		 << std::resetiosflags(std::ios::floatfield);
1778   for (int i = 0; i < num_pred; ++i) {
1779     TabularIO::write_leading_columns(corrmodel_stream, i+1,
1780 				     mcmcModel.interface_id(),
1781 				     corrmodel_format);
1782     const RealVector& config_vec = Teuchos::getCol(Teuchos::View,
1783 						   pred_config_mat, i);
1784     Model::inactive_variables(config_vec, mcmcModel);
1785     output_vars = mcmcModel.current_variables().copy();
1786     output_vars.write_tabular(corrmodel_stream);
1787     const RealVector& resp_vec = correctedResponses[i].function_values();
1788     for (size_t j = 0; j < numFunctions; ++j)
1789       corrmodel_stream << std::setw(wpp4) << resp_vec[j] << ' ';
1790     corrmodel_stream << '\n';
1791   }
1792   TabularIO::close_file(corrmodel_stream, corrmodel_filename,
1793       	 		"NonDBayesCalibration corrected model response export");
1794 
1795   // Corrected model variances file output
1796   unsigned short discrepvar_format = exportCorrVarFormat;
1797   String var_filename = exportCorrVarFile.empty() ?
1798     		 "dakota_discrepancy_variance_tabular.dat" : exportCorrVarFile;
1799   //discrep_filename = "dakota_corrected_variances.dat";
1800   std::ofstream discrepvar_stream;
1801   TabularIO::open_file(discrepvar_stream, var_filename,
1802       		       "NonDBayesCalibration corrected model variance export");
1803 
1804   RealMatrix corrected_var_transpose(correctedVariances, Teuchos::TRANS);
1805   StringArray var_labels(numFunctions);
1806   for (int i = 0; i < numFunctions; i++) {
1807     std::stringstream s;
1808     s << resp_labels[i] << "_var";
1809     var_labels[i] = s.str();
1810   }
1811   TabularIO::write_header_tabular(discrepvar_stream, output_vars, var_labels,
1812       				  "pred_config", discrepvar_format);
1813   discrepvar_stream << std::setprecision(write_precision)
1814     		 << std::resetiosflags(std::ios::floatfield);
1815   for (int i = 0; i < num_pred; ++i) {
1816     TabularIO::write_leading_columns(discrepvar_stream, i+1,
1817 				     mcmcModel.interface_id(),
1818 				     discrepvar_format);
1819     const RealVector& config_vec = Teuchos::getCol(Teuchos::View,
1820 						   pred_config_mat, i);
1821     Model::inactive_variables(config_vec, mcmcModel);
1822     output_vars = mcmcModel.current_variables().copy();
1823     output_vars.write_tabular(discrepvar_stream);
1824     const RealVector& var_vec = Teuchos::getCol(Teuchos::View,
1825 						corrected_var_transpose, i);
1826     for (size_t j = 0; j < numFunctions; ++j)
1827       discrepvar_stream << std::setw(wpp4) << var_vec[j] << ' ';
1828     discrepvar_stream << '\n';
1829   }
1830   TabularIO::close_file(discrepvar_stream, var_filename,
1831       	 		"NonDBayesCalibration corrected model variance export");
1832 }
1833 
export_field_discrepancy(RealMatrix & pred_vars_mat)1834 void NonDBayesCalibration::export_field_discrepancy(RealMatrix& pred_vars_mat)
1835 {
1836   // Calculate number of predictions
1837   int num_pred = pred_vars_mat.numCols()/numFunctions;
1838   size_t num_field_groups = expData.num_fields();
1839   Variables output_vars = mcmcModel.current_variables().copy();
1840   const StringArray& resp_labels =
1841     		     mcmcModel.current_response().function_labels();
1842   size_t wpp4 = write_precision+4;
1843 
1844   // Discrepancy responses file output
1845   unsigned short discrep_format = exportDiscrepFormat;
1846   String discrep_filename =
1847     exportDiscrepFile.empty() ? "dakota_discrepancy_tabular.dat" :
1848     exportDiscrepFile;
1849   std::ofstream discrep_stream;
1850   TabularIO::open_file(discrep_stream, discrep_filename,
1851       		       "NonDBayesCalibration discrepancy response export");
1852 
1853   TabularIO::write_header_tabular(discrep_stream, output_vars, resp_labels,
1854       				  "config_id", discrep_format);
1855   discrep_stream << std::setprecision(write_precision)
1856     		 << std::resetiosflags(std::ios::floatfield);
1857   int ind = 0;
1858   for (int i = 0; i < num_pred; ++i) {
1859     TabularIO::write_leading_columns(discrep_stream, i+1,
1860 				     mcmcModel.interface_id(),
1861 				     discrep_format);
1862     const RealVector& pred_vec = Teuchos::getCol(Teuchos::View,
1863 					  pred_vars_mat, int(numFunctions)*i);
1864     for (int j = 0; j < num_field_groups; j++) {
1865       RealMatrix vars_mat = mcmcModel.current_response().field_coords_view(j);
1866       int field_length = vars_mat.numRows();
1867       int indepvars_dim = vars_mat.numCols();
1868       int config_length = pred_vec.length() - indepvars_dim;
1869       RealVector config_vec(config_length);
1870       for (int k = 0; k < config_length; k ++)
1871         config_vec[k] = pred_vec[indepvars_dim + k];
1872       Model::inactive_variables(config_vec, mcmcModel);
1873       output_vars = mcmcModel.current_variables().copy();
1874       output_vars.write_tabular(discrep_stream);
1875       for (int k = 0; k < field_length; k++)
1876         discrep_stream << std::setw(wpp4) << discrepancyFieldResponses[ind + k]
1877                        << ' ';
1878       ind += field_length;
1879     }
1880     discrep_stream << '\n';
1881   }
1882   TabularIO::close_file(discrep_stream, discrep_filename,
1883       	 		"NonDBayesCalibration discrepancy response export");
1884 
1885   // Corrected model (model+discrep) file output
1886   unsigned short corrmodel_format = exportCorrModelFormat;
1887   String corrmodel_filename =
1888     exportCorrModelFile.empty() ? "dakota_corrected_model_tabular.dat" :
1889     exportCorrModelFile;
1890   std::ofstream corrmodel_stream;
1891   TabularIO::open_file(corrmodel_stream, corrmodel_filename,
1892       		       "NonDBayesCalibration corrected model response export");
1893 
1894   TabularIO::write_header_tabular(corrmodel_stream, output_vars, resp_labels,
1895       				  "config_id", corrmodel_format);
1896   corrmodel_stream << std::setprecision(write_precision)
1897     		 << std::resetiosflags(std::ios::floatfield);
1898   ind = 0;
1899   for (int i = 0; i < num_pred; ++i) {
1900     TabularIO::write_leading_columns(corrmodel_stream, i+1,
1901 				     mcmcModel.interface_id(),
1902 				     corrmodel_format);
1903     const RealVector& pred_vec = Teuchos::getCol(Teuchos::View,
1904 					  pred_vars_mat, int(numFunctions)*i);
1905     for (int j = 0; j < num_field_groups; j++) {
1906       RealMatrix vars_mat = mcmcModel.current_response().field_coords_view(j);
1907       int field_length = vars_mat.numRows();
1908       int indepvars_dim = vars_mat.numCols();
1909       int config_length = pred_vec.length() - indepvars_dim;
1910       RealVector config_vec(config_length);
1911       for (int k = 0; k < config_length; k ++)
1912         config_vec[k] = pred_vec[indepvars_dim + k];
1913       Model::inactive_variables(config_vec, mcmcModel);
1914       output_vars = mcmcModel.current_variables().copy();
1915       output_vars.write_tabular(corrmodel_stream);
1916       for (int k = 0; k < field_length; k++)
1917         corrmodel_stream << std::setw(wpp4) << correctedFieldResponses[ind + k]
1918                        << ' ';
1919       ind += field_length;
1920     }
1921     corrmodel_stream << '\n';
1922   }
1923   TabularIO::close_file(corrmodel_stream, corrmodel_filename,
1924       	 		"NonDBayesCalibration corrected model response export");
1925 
1926   // Discrepancy variances file output
1927   // TODO: Corrected model variances file output
1928   unsigned short discrepvar_format = exportCorrVarFormat;
1929   String var_filename = exportCorrVarFile.empty() ?
1930     		 "dakota_discrepancy_variance_tabular.dat" : exportCorrVarFile;
1931   //discrep_filename = "dakota_corrected_variances.dat";
1932   std::ofstream discrepvar_stream;
1933   TabularIO::open_file(discrepvar_stream, var_filename,
1934       		       "NonDBayesCalibration corrected model variance export");
1935 
1936   //RealMatrix corrected_var_transpose(correctedVariances, Teuchos::TRANS);
1937   StringArray var_labels(numFunctions);
1938   for (int i = 0; i < numFunctions; i++) {
1939     std::stringstream s;
1940     s << resp_labels[i] << "_var";
1941     var_labels[i] = s.str();
1942   }
1943   TabularIO::write_header_tabular(discrepvar_stream, output_vars, var_labels,
1944       				  "pred_config", discrepvar_format);
1945   discrepvar_stream << std::setprecision(write_precision)
1946     		 << std::resetiosflags(std::ios::floatfield);
1947   ind = 0;
1948   for (int i = 0; i < num_pred; ++i) {
1949     TabularIO::write_leading_columns(discrepvar_stream, i+1,
1950 				     mcmcModel.interface_id(),
1951 				     discrepvar_format);
1952     const RealVector& pred_vec = Teuchos::getCol(Teuchos::View,
1953 					  pred_vars_mat, int(numFunctions)*i);
1954     for (int j = 0; j < num_field_groups; j++) {
1955       RealMatrix vars_mat = mcmcModel.current_response().field_coords_view(j);
1956       int field_length = vars_mat.numRows();
1957       int indepvars_dim = vars_mat.numCols();
1958       int config_length = pred_vec.length() - indepvars_dim;
1959       RealVector config_vec(config_length);
1960       for (int k = 0; k < config_length; k ++)
1961         config_vec[k] = pred_vec[indepvars_dim + k];
1962       Model::inactive_variables(config_vec, mcmcModel);
1963       output_vars = mcmcModel.current_variables().copy();
1964       output_vars.write_tabular(discrepvar_stream);
1965       for (int k = 0; k < field_length; k++)
1966         discrepvar_stream << std::setw(wpp4) << correctedFieldVariances[ind + k]
1967                        << ' ';
1968       ind += field_length;
1969     }
1970     discrepvar_stream << '\n';
1971   }
1972   TabularIO::close_file(discrepvar_stream, var_filename,
1973       	 		"NonDBayesCalibration corrected model variance export");
1974 }
1975 
1976 void NonDBayesCalibration::
extract_selected_posterior_samples(const std::vector<int> & points_to_keep,const RealMatrix & samples_for_posterior_eval,const RealVector & posterior_density,RealMatrix & posterior_data) const1977 extract_selected_posterior_samples(const std::vector<int> &points_to_keep,
1978 				   const RealMatrix &samples_for_posterior_eval,
1979 				   const RealVector &posterior_density,
1980 				   RealMatrix &posterior_data ) const
1981 {
1982 }
1983 
1984 void NonDBayesCalibration::
export_posterior_samples_to_file(const std::string filename,const RealMatrix & posterior_data) const1985 export_posterior_samples_to_file( const std::string filename,
1986 				  const RealMatrix &posterior_data ) const
1987 {
1988 }
1989 
1990 
1991 
1992 
1993 /** Calculate the log-likelihood, accounting for contributions from
1994     covariance and hyperparameters, as well as constant term:
1995 
1996       log(L) = -1/2*Nr*log(2*pi) - 1/2*log(det(Cov)) - 1/2*r'(Cov^{-1})*r
1997 
1998     The passed residuals must already be size-adjusted, differenced
1999     with any data, if present, and scaled by covariance^{-1/2}. */
2000 Real NonDBayesCalibration::
log_likelihood(const RealVector & residuals,const RealVector & all_params)2001 log_likelihood(const RealVector& residuals, const RealVector& all_params)
2002 {
2003   // if needed, extract the trailing hyper-parameters
2004   RealVector hyper_params;
2005   if (numHyperparams > 0)
2006     hyper_params = RealVector(Teuchos::View,
2007                               all_params.values() + numContinuousVars,
2008                               numHyperparams);
2009 
2010   size_t num_total_calib_terms = residuals.length();
2011   Real half_nr_log2pi = num_total_calib_terms * HALF_LOG_2PI;
2012   Real half_log_det =
2013     expData.half_log_cov_determinant(hyper_params, obsErrorMultiplierMode);
2014 
2015   // misfit defined as 1/2 r^T (mult^2*Gamma_d)^{-1} r
2016   Real misfit = residuals.dot( residuals ) / 2.0;
2017 
2018   Real log_like = -half_nr_log2pi - half_log_det - misfit;
2019 
2020   return log_like;
2021 }
2022 
2023 
prior_cholesky_factorization()2024 void NonDBayesCalibration::prior_cholesky_factorization()
2025 {
2026   // factorization to be performed offline (init time) and used online
2027   int i, j, num_params = numContinuousVars + numHyperparams;
2028   priorCovCholFactor.shape(num_params, num_params); // init to 0
2029 
2030   if (!standardizedSpace &&
2031       iteratedModel.multivariate_distribution().correlation()) { // x_dist
2032     Teuchos::SerialSpdDenseSolver<int, Real> corr_solver;
2033     RealSymMatrix prior_cov_matrix;//= ();
2034 
2035     Cerr << "prior_cholesky_factorization() not yet implemented for this case."
2036 	 << std::endl;
2037     abort_handler(-1);
2038 
2039     corr_solver.setMatrix( Teuchos::rcp(&prior_cov_matrix, false) );
2040     corr_solver.factor(); // Cholesky factorization (LL^T) in place
2041     // assign lower triangle
2042     for (i=0; i<num_params; ++i)
2043       for (j=0; j<=i; ++j)
2044 	priorCovCholFactor(i, j) = prior_cov_matrix(i, j);
2045   }
2046   else {
2047     // SVD index conversion is more general, but not required for current uses
2048     //const SharedVariablesData& svd
2049     //  = mcmcModel.current_variables().shared_data();
2050     RealVector dist_stdevs  // u_dist (decorrelated)
2051       = mcmcModel.multivariate_distribution().std_deviations();
2052     for (i=0; i<numContinuousVars; ++i)
2053       priorCovCholFactor(i,i) = dist_stdevs[i];
2054 	//= dist_stdevs[svd.cv_index_to_active_index(i)];
2055 
2056     // for now we assume a variance when the inv gamma has infinite moments
2057     Real alpha;
2058     for (i=0; i<numHyperparams; ++i) {
2059       invGammaDists[i].pull_parameter(Pecos::IGA_ALPHA, alpha);
2060       priorCovCholFactor(numContinuousVars + i, numContinuousVars + i) =
2061 	(alpha > 2.) ? invGammaDists[i].standard_deviation()
2062 	             : invGammaDists[i].mode() * 0.05;
2063     }
2064   }
2065 }
2066 
2067 
2068 void NonDBayesCalibration::
get_positive_definite_covariance_from_hessian(const RealSymMatrix & hessian,const RealMatrix & prior_chol_fact,RealSymMatrix & covariance,short output_lev)2069 get_positive_definite_covariance_from_hessian(const RealSymMatrix &hessian,
2070 					      const RealMatrix& prior_chol_fact,
2071 					      RealSymMatrix &covariance,
2072 					      short output_lev)
2073 {
2074   // precondition H_misfit by computing L^T H_misfit L where L is the Cholesky
2075   // factor of the prior covariance.  Important notes:
2076   // > in other contexts, we compute the Hessian of the negative log posterior
2077   //   from the Hessian of the misfit minus the Hessian of log prior density.
2078   // > In the context of defining a MVN covariance, we use the fact that the
2079   //   Hessian of the negative log of a normal density is 1 / variance, such
2080   //   that the inverse Hessian is simply the prior covariance.
2081   //   >> Thus, we justify use of the prior covariance, even for cases (e.g.,
2082   //      uniform, exp) where the Hessian of the log prior density is zero.
2083   // > For uncorrelated priors, L is simply diag[sigma_i].
2084 
2085   // Option 1: if augmenting with Hessian of negative log prior
2086   //           Hess of neg log posterior = Hess of misfit - Hess of log prior
2087   //const RealVector& c_vars = mcmcModel.continuous_variables();
2088   //augment_hessian_with_log_prior(log_hess, c_vars);
2089 
2090   // Option 2: if preconditioning with prior covariance using L^T H L
2091   // can use incoming Hessian as both input and output since an internal
2092   // temporary is created for H L prior to creating symmetric L^T H L
2093   int num_rows = hessian.numRows();
2094   RealSymMatrix LT_H_L(num_rows, false); // copy
2095   Teuchos::symMatTripleProduct(Teuchos::TRANS, 1., hessian,
2096 			       prior_chol_fact, LT_H_L);
2097 
2098   // Compute eigenvalue decomposition of matrix A=QDQ'
2099   // In general, the eigenvalue decomposition of a matrix is A=QDinv(Q).
2100   // For real symmetric matrices A = QDQ', i.e. inv(Q) = Q'
2101   RealVector eigenvalues; RealMatrix eigenvectors;
2102   symmetric_eigenvalue_decomposition( LT_H_L, eigenvalues, eigenvectors );
2103 
2104   // Find smallest positive eigenvalue
2105   //Real min_eigval = std::numeric_limits<double>::max();
2106   //for ( int i=0; i<num_rows; i++)
2107   //  if ( eigenvalues[i] > 0. )
2108   //    min_eigval = std::min( eigenvalues[i], min_eigval );
2109 
2110 #ifdef DEBUG
2111   Cout << "eigenvalues from symmetric_eigenvalue_decomposition:\n"
2112        << eigenvalues;
2113 #endif
2114 
2115   /*
2116   // Ensure hessian is positive definite by setting all negative eigenvalues
2117   // to be positive.
2118   Real eigenval_tol = 1.e-4; // TO DO: tie to prior bounds?
2119   int i, j, num_neglect = 0;
2120   for (i=0; i<num_rows; ++i)
2121     if ( eigenvalues[i] < eigenval_tol )
2122       { eigenvalues[i] = eigenval_tol; ++num_neglect; }
2123     else
2124       break;
2125 
2126   // The covariance matrix is the inverse of the hessian so scale eigenvectors
2127   // by Q*inv(D)
2128   RealMatrix scaled_eigenvectors(num_rows, num_rows, false); // don't init to 0
2129   for ( i=0; i<num_rows; ++i ) {
2130     for ( j=0; j<num_neglect; ++j )
2131       scaled_eigenvectors(i,j) = 0.;
2132     for ( j=num_neglect; j<num_rows; ++j )
2133       scaled_eigenvectors(i,j) = eigenvectors(i,j) / eigenvalues[j];
2134   }
2135   covariance.shapeUninitialized( num_rows );
2136   covariance.multiply( Teuchos::NO_TRANS, Teuchos::TRANS,
2137 		       1., scaled_eigenvectors, eigenvectors, 0. );
2138   */
2139 
2140   // Form V and D
2141   Real eigenval_tol = 0.; //1.e-4; // Petra2014 suggests tol=1 in Fig 5.2
2142   int n, r, num_neglect = 0;
2143   for (n=0; n<num_rows; ++n)
2144     if ( eigenvalues[n] <= eigenval_tol ) ++num_neglect;
2145     else                                  break;
2146   int num_low_rank = num_rows - num_neglect, offset_r;
2147   RealSymMatrix D(num_low_rank); // init to 0;    r x r diagonal matrix
2148   RealMatrix V(num_rows, num_low_rank, false); // n x r matrix for r retained
2149   for (r=0; r<num_low_rank; ++r) {
2150     offset_r = r + num_neglect;
2151     Real lambda = eigenvalues[offset_r];
2152     D(r,r) = lambda / (lambda + 1.); // Sherman-Morrison-Woodbury
2153     for (n=0; n<num_rows; ++n)
2154       V(n,r) = eigenvectors(n,offset_r); // copy column
2155   }
2156 
2157   // Form I - V D V^T
2158   covariance.shapeUninitialized(num_rows);
2159   Teuchos::symMatTripleProduct(Teuchos::NO_TRANS, -1., D, V, covariance);
2160   for (n=0; n<num_rows; ++n)
2161     covariance(n,n) += 1.;
2162 
2163   // form inv(hessian) = L (I - V D V^T) L^T
2164   // can use covariance as both input and output (see above)
2165   Teuchos::symMatTripleProduct(Teuchos::NO_TRANS, 1., covariance,
2166 			       prior_chol_fact, covariance);
2167 
2168   if (output_lev > NORMAL_OUTPUT) {
2169     Cout << "Hessian of negative log-likelihood (from misfit):\n" << hessian;
2170     //Cout << "2x2 determinant = " << hessian(0,0) * hessian(1,1) -
2171     //  hessian(0,1) * hessian(1,0) << '\n';
2172 
2173     //Cout << "Cholesky factor of prior covariance:\n" << prior_chol_fact;
2174 
2175     Cout << "Prior-preconditioned misfit Hessian:\n" << LT_H_L;
2176     //Cout << "2x2 determinant = " << LT_H_L(0,0) * LT_H_L(1,1) -
2177     //  LT_H_L(0,1) * LT_H_L(1,0) << '\n';
2178     if (num_neglect)
2179       Cout << "Hessian decomposition neglects " << num_neglect
2180            << " eigenvalues based on " << eigenval_tol << " tolerance.\n";
2181   }
2182   if (output_lev > NORMAL_OUTPUT) {
2183     Cout << "Positive definite covariance from inverse of Hessian:\n"
2184 	 << covariance;
2185     //Cout << "2x2 determinant = " << covariance(0,0) * covariance(1,1) -
2186     //  covariance(0,1) * covariance(1,0) << '\n';
2187   }
2188 
2189   //return num_neglect;
2190 }
2191 
2192 
2193 /** Response mapping callback used within RecastModel for MAP
2194     pre-solve. Computes
2195 
2196       -log(post) = -log(like) - log(prior); where
2197       -log(like) = 1/2*Nr*log(2*pi) + 1/2*log(det(Cov)) + 1/2*r'(Cov^{-1})*r
2198                  = 1/2*Nr*log(2*pi) + 1/2*log(det(Cov)) + misfit
2199 
2200     (misfit defined as 1/2 r^T (mult^2*Gamma_d)^{-1} r) The passed
2201     residual_resp has been differenced, interpolated, and
2202     covariance-scaled */
2203 void NonDBayesCalibration::
neg_log_post_resp_mapping(const Variables & residual_vars,const Variables & nlpost_vars,const Response & residual_resp,Response & nlpost_resp)2204 neg_log_post_resp_mapping(const Variables& residual_vars,
2205                           const Variables& nlpost_vars,
2206                           const Response& residual_resp,
2207                           Response& nlpost_resp)
2208 {
2209   const RealVector& c_vars = nlpost_vars.continuous_variables();
2210   short nlpost_req = nlpost_resp.active_set_request_vector()[0];
2211   bool output_flag = (nonDBayesInstance->outputLevel >= DEBUG_OUTPUT);
2212   // if needed, extract the trailing hyper-parameters
2213   RealVector hyper_params;
2214   if (nonDBayesInstance->numHyperparams > 0)
2215     hyper_params =
2216       RealVector(Teuchos::View,
2217 		 c_vars.values() + nonDBayesInstance->numContinuousVars,
2218 		 nonDBayesInstance->numHyperparams);
2219 
2220   if (nlpost_req & 1) {
2221     const RealVector& residuals = residual_resp.function_values();
2222     Real nlp = -nonDBayesInstance->log_likelihood(residuals, c_vars) -
2223       nonDBayesInstance->log_prior_density(c_vars);
2224     nlpost_resp.function_value(nlp, 0);
2225     if (output_flag)
2226       Cout << "MAP pre-solve: negative log posterior = " << nlp << std::endl;
2227   }
2228 
2229   if (nlpost_req & 2) {
2230     // avoid copy by updating gradient vector in place
2231     RealVector log_grad = nlpost_resp.function_gradient_view(0);
2232     // Gradient contribution from misfit
2233     nonDBayesInstance->
2234       expData.build_gradient_of_sum_square_residuals(residual_resp, log_grad);
2235     // Add the contribution from 1/2*log(det(Cov))
2236     nonDBayesInstance->expData.half_log_cov_det_gradient
2237       (hyper_params, nonDBayesInstance->obsErrorMultiplierMode,
2238        nonDBayesInstance->numContinuousVars, log_grad);
2239     // Add the contribution from -log(prior)
2240     nonDBayesInstance->augment_gradient_with_log_prior(log_grad, c_vars);
2241     if (output_flag)
2242       Cout << "MAP pre-solve: negative log posterior gradient:\n" << log_grad;
2243   }
2244 
2245   if (nlpost_req & 4) {
2246     // avoid copy by updating Hessian matrix in place
2247     RealSymMatrix log_hess = nlpost_resp.function_hessian_view(0);
2248     // Hessian contribution from misfit
2249     nonDBayesInstance->
2250       expData.build_hessian_of_sum_square_residuals(residual_resp, log_hess);
2251     // Add the contribution from 1/2*log(det(Cov))
2252     nonDBayesInstance->expData.half_log_cov_det_hessian
2253       (hyper_params, nonDBayesInstance->obsErrorMultiplierMode,
2254        nonDBayesInstance->numContinuousVars, log_hess);
2255     // Add the contribution from -log(prior)
2256     nonDBayesInstance->augment_hessian_with_log_prior(log_hess, c_vars);
2257     if (output_flag)
2258       Cout << "MAP pre-solve: negative log posterior Hessian:\n" << log_hess;
2259   }
2260 
2261   //Cout << "nlpost_resp:\n" << nlpost_resp;
2262 }
2263 
compute_statistics()2264 void NonDBayesCalibration::compute_statistics()
2265 {
2266   // mcmcchain is either acceptanceChain or filtered_chain
2267   // mcmcfnvals is either acceptedFnVals or filteredFnVals
2268   int num_skip = (subSamplingPeriod > 0) ? subSamplingPeriod : 1;
2269   int burnin = (burnInSamples > 0) ? burnInSamples : 0;
2270   int num_samples = acceptanceChain.numCols();
2271   int num_filtered = int((num_samples-burnin)/num_skip);
2272 
2273   RealMatrix filtered_chain;
2274   if (burnInSamples > 0 || num_skip > 1) {
2275     filter_chain(acceptanceChain, filtered_chain);
2276     filter_fnvals(acceptedFnVals, filteredFnVals);
2277   }
2278   else {
2279 
2280     filtered_chain =
2281       RealMatrix(Teuchos::View, acceptanceChain.values(),
2282 		 acceptanceChain.stride(),
2283 		 acceptanceChain.numRows(), acceptanceChain.numCols());
2284 
2285     filteredFnVals =
2286       RealMatrix(Teuchos::View, acceptedFnVals.values(), acceptedFnVals.stride(),
2287 		 acceptedFnVals.numRows(), acceptedFnVals.numCols());
2288 
2289   }
2290 
2291   NonDSampling::compute_moments(filtered_chain, chainStats, STANDARD_MOMENTS);
2292   NonDSampling::compute_moments(filteredFnVals,    fnStats, STANDARD_MOMENTS);
2293   if (!requestedProbLevels[0].empty())
2294     compute_intervals();
2295 
2296   // Print tabular file for the filtered chain
2297   if (!exportMCMCFilename.empty() || outputLevel >= NORMAL_OUTPUT)
2298     export_chain(filtered_chain, filteredFnVals);
2299 
2300   if (posteriorStatsKL)
2301     kl_post_prior(acceptanceChain);
2302   if (posteriorStatsMutual)
2303     mutual_info_buildX();
2304   if (posteriorStatsKDE)
2305     calculate_kde();
2306   if (calModelEvidence)
2307     calculate_evidence();
2308 }
2309 
2310 
2311 void NonDBayesCalibration::
filter_chain(const RealMatrix & acceptance_chain,RealMatrix & filtered_chain,int target_length)2312 filter_chain(const RealMatrix& acceptance_chain, RealMatrix& filtered_chain,
2313 	     int target_length)
2314 {
2315   // Default behavior: burn in 20% of samples, take only every third point in
2316   // chain
2317   int num_mcmc_samples = acceptance_chain.numCols();
2318   int burn_in_post = int(0.2*num_mcmc_samples);
2319   int burned_in_post = num_mcmc_samples - burn_in_post;
2320   int num_skip;
2321   int mcmc_threshhold = target_length*3;
2322   if (burned_in_post < mcmc_threshhold) {
2323     num_skip = 3;
2324   }
2325   else {
2326     // maximally skip to achieve the target length: floor( (count-1)/(len-1) )
2327     num_skip = (burned_in_post-1)/(target_length-1);
2328   }
2329   filter_matrix_cols(acceptance_chain, burn_in_post, num_skip, filtered_chain);
2330 }
2331 
filter_chain(const RealMatrix & acceptance_chain,RealMatrix & filtered_chain)2332 void NonDBayesCalibration::filter_chain(const RealMatrix& acceptance_chain,
2333 					RealMatrix& filtered_chain)
2334 {
2335   int burnin = (burnInSamples > 0) ? burnInSamples : 0;
2336   int num_skip = (subSamplingPeriod > 0) ? subSamplingPeriod : 1;
2337   filter_matrix_cols(acceptance_chain, burnin, num_skip, filtered_chain);
2338 }
2339 
filter_fnvals(const RealMatrix & accepted_fn_vals,RealMatrix & filtered_fn_vals)2340 void NonDBayesCalibration::filter_fnvals(const RealMatrix& accepted_fn_vals,
2341     					 RealMatrix& filtered_fn_vals)
2342 {
2343   int burnin = (burnInSamples > 0) ? burnInSamples : 0;
2344   int num_skip = (subSamplingPeriod > 0) ? subSamplingPeriod : 1;
2345   filter_matrix_cols(accepted_fn_vals, burnin, num_skip, filtered_fn_vals);
2346 }
2347 
2348 
2349 void NonDBayesCalibration::
filter_matrix_cols(const RealMatrix & orig_matrix,int start_index,int stride,RealMatrix & filtered_matrix)2350 filter_matrix_cols(const RealMatrix& orig_matrix, int start_index,
2351 		   int stride, RealMatrix& filtered_matrix)
2352 {
2353   // Alternately, could return a view using the stride to skip ahead
2354   int num_orig = orig_matrix.numCols();
2355   if (num_orig <= start_index || stride <= 0) {
2356     Cerr << "\nError: Invalid arguments to NonDBayesCalibraion::"
2357 	 << "filter_matrix_cols()\n";
2358     abort_handler(METHOD_ERROR);
2359   }
2360 
2361   // ceil(num_orig/stride), using integer arithmetic since we have ints
2362   // instead of converting to double (1 + remaining cols divided equally)
2363   int num_filtered =  1 + (num_orig-start_index-1)/stride;
2364   filtered_matrix.shape(orig_matrix.numRows(), num_filtered);
2365   for (int i=start_index, j=0; i<num_orig; i+=stride, ++j) {
2366       RealVector col_vec =
2367 	Teuchos::getCol(Teuchos::View, const_cast<RealMatrix&>(orig_matrix), i);
2368       Teuchos::setCol(col_vec, j, filtered_matrix);
2369   }
2370 }
2371 
2372 
compute_intervals()2373 void NonDBayesCalibration::compute_intervals()
2374 {
2375   std::ofstream interval_stream("dakota_mcmc_CredPredIntervals.dat");
2376   std::ostream& screen_stream = Cout;
2377 
2378   // Make accepted function values the rows instead of the columns
2379   RealMatrix filtered_fn_vals_transpose(filteredFnVals, Teuchos::TRANS);
2380   // Augment function values with experimental uncertainty for prediction ints
2381   int num_filtered = filteredFnVals.numCols();
2382   size_t num_exp = expData.num_experiments();
2383   size_t num_concatenated = num_exp*num_filtered;
2384 
2385   const StringArray& resp = mcmcModel.current_response().function_labels();
2386   size_t width = write_precision+7;
2387 
2388   // Calculate +/- 2sigma credibility intervals
2389   RealVector Fn_ave(numFunctions), Fn_stdevs(numFunctions),
2390 	     Cred_interval_minima(numFunctions),
2391 	     Cred_interval_maxima(numFunctions);
2392   compute_col_means(filtered_fn_vals_transpose, Fn_ave);
2393   compute_col_stdevs(filtered_fn_vals_transpose, Fn_ave, Fn_stdevs);
2394   interval_stream << "Function aves = " <<Fn_ave << '\n';
2395   interval_stream << "Function st devs = " <<Fn_stdevs << '\n';
2396   interval_stream << "2 sigma Credibility Intervals\n";
2397   for(size_t i=0; i<numFunctions; ++i){
2398     Cred_interval_minima[i] = Fn_ave[i] - 2*Fn_stdevs[i];
2399     Cred_interval_maxima[i] = Fn_ave[i] + 2*Fn_stdevs[i];
2400     interval_stream << std::setw(width) << resp[i] << " ";
2401     interval_stream << Cred_interval_minima[i] << ", " << Cred_interval_maxima[i]
2402                << '\n';
2403   }
2404   interval_stream << "\n";
2405 
2406   // Calculate +/- 2sigma prediction intervals
2407   predVals.shapeUninitialized(numFunctions, num_concatenated);
2408   if (expData.variance_active()) {
2409     compute_prediction_vals(filteredFnVals, predVals,
2410     			    num_filtered, num_exp, num_concatenated);
2411     RealVector Pred_ave(numFunctions), Pred_stdevs(numFunctions),
2412 	       Pred_interval_minima(numFunctions),
2413 	       Pred_interval_maxima(numFunctions);
2414     RealMatrix predVals_transpose(predVals, Teuchos::TRANS);
2415     compute_col_means(predVals_transpose, Pred_ave);
2416     compute_col_stdevs(predVals_transpose, Pred_ave, Pred_stdevs);
2417     interval_stream << "2 sigma Prediction Intervals\n";
2418     for(size_t i=0; i<numFunctions; ++i){
2419       Pred_interval_minima[i] = Pred_ave[i] - 2*Pred_stdevs[i];
2420       Pred_interval_maxima[i] = Pred_ave[i] + 2*Pred_stdevs[i];
2421       interval_stream << std::setw(width) << resp[i] << " ";
2422       interval_stream << Pred_interval_minima[i] << ", "
2423 		      << Pred_interval_maxima[i] << '\n';
2424     }
2425   }
2426   interval_stream << "\n";
2427   // Calculate intervals with sorting - print to screen and interval file
2428   size_t num_levels = 0;
2429   for(int i = 0; i < numFunctions; ++i)
2430     num_levels += requestedProbLevels[i].length();
2431   if (num_levels > 0)
2432     print_intervals_file(interval_stream, filtered_fn_vals_transpose,
2433       			   predVals, num_filtered, num_concatenated);
2434 
2435   interval_stream << "acceptedVals = " << acceptedFnVals << '\n';
2436   interval_stream << "predVals = " << predVals << '\n';
2437 }
2438 
compute_prediction_vals(RealMatrix & filtered_fn_vals,RealMatrix & predVals,int num_filtered,size_t num_exp,size_t num_concatenated)2439 void NonDBayesCalibration::compute_prediction_vals
2440 (RealMatrix& filtered_fn_vals, RealMatrix& predVals,
2441 int num_filtered, size_t num_exp, size_t num_concatenated)
2442 {
2443   // Read std_dev and correl matrices if specified for experiments
2444   RealVectorArray std_deviations;
2445   RealSymMatrixArray correl_matrices;
2446   //if (calibrationData && expData.variance_active()){
2447   expData.cov_std_deviation(std_deviations);
2448   expData.cov_as_correlation(correl_matrices);
2449   //}
2450 
2451   // Augment function values with experimental uncertainty for prediction ints
2452   // Generate normal errors using LHS
2453   /*int num_res = residualModel.response_size();
2454     RealVector means_vec(num_res), lower_bnds(num_res), upper_bnds(num_res);
2455     */
2456   RealVector means_vec(numFunctions), lower_bnds(numFunctions),
2457 	     upper_bnds(numFunctions);
2458   means_vec.putScalar(0.0);
2459   Real dbl_inf = std::numeric_limits<Real>::infinity();
2460   lower_bnds.putScalar(-dbl_inf);
2461   upper_bnds.putScalar( dbl_inf);
2462   RealMatrix lhs_normal_samples;
2463   unsigned short sample_type = SUBMETHOD_LHS;
2464   short sample_ranks_mode = 0; //IGNORE RANKS
2465   Pecos::LHSDriver lhsDriver; // the C++ wrapper for the F90 LHS library
2466   size_t e, s, r, cntr = 0;
2467   lhsDriver.seed(randomSeed);
2468   lhsDriver.initialize("lhs", sample_ranks_mode, true);
2469   for (e=0; e<num_exp; ++e) {
2470     //int lhs_seed = (randomSeed > 0) ? randomSeed : generate_system_seed();
2471     lhsDriver.generate_normal_samples(means_vec, std_deviations[e], lower_bnds,
2472               upper_bnds, correl_matrices[e], num_filtered, lhs_normal_samples);
2473     for (s=0; s<num_filtered; ++s, ++cntr)
2474       for (r=0; r<numFunctions; ++r)
2475 	predVals(r,cntr) = filtered_fn_vals(r,s) + lhs_normal_samples(r,s);
2476   }
2477 }
2478 
2479 /** Print tabular file with filtered chain, function values, and pred values */
2480 void NonDBayesCalibration::
export_chain(RealMatrix & filtered_chain,RealMatrix & filtered_fn_vals)2481 export_chain(RealMatrix& filtered_chain, RealMatrix& filtered_fn_vals)
2482 {
2483   String mcmc_filename =
2484     exportMCMCFilename.empty() ? "dakota_mcmc_tabular.dat" : exportMCMCFilename;
2485   std::ofstream export_mcmc_stream;
2486   TabularIO::open_file(export_mcmc_stream, mcmc_filename,
2487 		       "NonDBayesCalibration chain export");
2488 
2489   // Use a Variables object for proper tabular formatting.
2490   // The residual model includes hyper-parameters, if present
2491   Variables output_vars = residualModel.current_variables().copy();
2492 
2493   // When outputting only chain responses
2494   const StringArray& resp_labels =
2495     mcmcModel.current_response().function_labels();
2496   // When outputting experimental responses
2497   /*
2498   size_t num_exp = expData.num_experiments();
2499   StringArray resp_labels;
2500   const StringArray& resp = mcmcModel.current_response().function_labels();
2501   for (size_t i=0; i<num_exp+1; ++i){
2502     for (size_t k=0; k<numFunctions; ++k){
2503       resp_labels.push_back(resp[k]);
2504     }
2505   }
2506   */
2507 
2508   TabularIO::
2509     write_header_tabular(export_mcmc_stream, output_vars,
2510 			 resp_labels, "mcmc_id", exportMCMCFormat);
2511 
2512   size_t wpp4 = write_precision+4;
2513   export_mcmc_stream << std::setprecision(write_precision)
2514 		     << std::resetiosflags(std::ios::floatfield);
2515 
2516   int num_filtered = filtered_chain.numCols();
2517   for (int i=0; i<num_filtered; ++i) {
2518     TabularIO::write_leading_columns(export_mcmc_stream, i+1,
2519 				     mcmcModel.interface_id(),
2520 				     exportMCMCFormat);
2521     RealVector accept_pt = Teuchos::getCol(Teuchos::View, filtered_chain, i);
2522     output_vars.continuous_variables(accept_pt);
2523     output_vars.write_tabular(export_mcmc_stream);
2524     // Write function values to filtered_tabular
2525     RealVector col_vec = Teuchos::getCol(Teuchos::View, filtered_fn_vals, i);
2526     for (size_t j=0; j<numFunctions; ++j){
2527       export_mcmc_stream << std::setw(wpp4) << col_vec[j] << ' ';
2528     }
2529     // Write predicted values to filtered_tabular
2530     // When outputting experimental responses
2531     /*
2532     for (size_t j =0; j<num_exp; ++j){
2533       for (size_t k = 0; k<numFunctions; ++k){
2534 	int col_index = j*num_filtered+i;
2535         const RealVector& col_vec = Teuchos::getCol(Teuchos::View,
2536       				    predVals, col_index);
2537   	export_mcmc_stream << std::setw(wpp4) << col_vec[k] << ' ';
2538       }
2539     }
2540     */
2541     export_mcmc_stream << '\n';
2542   }
2543 
2544   TabularIO::close_file(export_mcmc_stream, mcmc_filename,
2545 			"NonDQUESOBayesCalibration chain export");
2546 }
2547 
2548 void NonDBayesCalibration::
calculate_kde()2549 calculate_kde()
2550 {
2551   RealVector pdf_results;
2552   Pecos::GaussianKDE kde;
2553   //Cout << "Accepted Chain in KDE " << acceptanceChain <<  '\n';
2554   //Cout << "Accepted Fn Values in KDE " << acceptedFnVals <<  '\n';
2555   std::ofstream export_kde;
2556   size_t wpp4 = write_precision+4;
2557   StringArray var_labels;
2558         copy_data(residualModel.continuous_variable_labels(),var_labels);
2559   const StringArray& resp_labels =
2560     		     mcmcModel.current_response().function_labels();
2561   TabularIO::open_file(export_kde, "kde_posterior.dat",
2562 			"NonDBayesCalibration kde posterior export");
2563 
2564   int num_rows = acceptanceChain.numCols();
2565   int num_vars = acceptanceChain.numRows();
2566   RealMatrix current_var;
2567   current_var.shapeUninitialized(1,num_rows);
2568   for (int i=0; i<num_vars; ++i){
2569     for (int j=0; j<num_rows; j++)
2570       current_var(0,j)=acceptanceChain(i,j);
2571     //Cout << current_var;
2572     kde.initialize(current_var,Teuchos::TRANS);
2573     kde.pdf(current_var, pdf_results,Teuchos::TRANS);
2574     //Cout << pdf_results;
2575     export_kde << var_labels[i] << "  KDE PDF estimate  " << '\n';
2576     //TabularIO::
2577     //  write_header_tabular(export_kde, output_vars(i), "KDE PDF estimate");
2578     for (int j=0; j<num_rows; j++)
2579       export_kde <<  current_var(0,j) << "    " << pdf_results(j) << '\n';
2580     export_kde << '\n';
2581   }
2582   int num_responses = acceptedFnVals.numRows();
2583   RealMatrix current_resp;
2584   current_resp.shapeUninitialized(1,num_rows);
2585   for (int i=0; i<num_responses; ++i){
2586     for (int j=0; j<num_rows; j++)
2587       current_resp(0,j)=acceptedFnVals(i,j);
2588     //Cout << current_resp;
2589     //RealMatrix& col_resp = Teuchos::getCol(Teuchos::View, acceptedFnVals, i);
2590     //kde.initialize(current_resp, Teuchos::TRANS);
2591     kde.initialize(current_resp,Teuchos::TRANS);
2592     kde.pdf(current_resp, pdf_results,Teuchos::TRANS);
2593     //Cout << pdf_results;
2594     export_kde << resp_labels[i] << "  KDE PDF estimate  " << '\n';
2595     //TabularIO::
2596     //  write_header_tabular(export_kde, resp_labels(i), "KDE PDF estimate");
2597     for (int j=0; j<num_rows; j++)
2598       export_kde <<  current_resp(0,j) << "    " << pdf_results(j) << '\n';
2599     export_kde << '\n';
2600   }
2601   TabularIO::close_file(export_kde, "kde_posterior.dat",
2602 			"NonDBayesCalibration kde posterior export");
2603 
2604 }
2605 
calculate_evidence()2606 void NonDBayesCalibration::calculate_evidence()
2607 {
2608   // set default to MC approximation if neither method specified
2609   if (!calModelEvidMC && !calModelEvidLaplace)
2610     calModelEvidMC = true;
2611   if (calModelEvidMC) {
2612     //int num_prior_samples = chainSamples; //KAM: replace later
2613     int num_prior_samples = (evidenceSamples>0) ? evidenceSamples : chainSamples;
2614     int num_params = numContinuousVars + numHyperparams;
2615     // Draw samples from prior distribution
2616     RealMatrix prior_dist_samples(num_params, num_prior_samples);
2617     prior_sample_matrix(prior_dist_samples);
2618     // Calculate likelihood for each sample
2619     double sum_like = 0.;
2620     for (int i = 0; i < num_prior_samples; i++) {
2621       RealVector params = Teuchos::getCol(Teuchos::View, prior_dist_samples, i);
2622       RealVector cont_params = params;
2623       cont_params.resize(numContinuousVars);
2624       residualModel.continuous_variables(cont_params);
2625       residualModel.evaluate();
2626       RealVector residual = residualModel.current_response().function_values();
2627       double log_like = log_likelihood(residual, params);
2628       sum_like += std::exp(log_like);
2629     }
2630     double evidence = sum_like/num_prior_samples;
2631     Cout << "Model evidence (Monte Carlo) = " << evidence << '\n';
2632     //Cout << "num samples = " << num_prior_samples << '\n';
2633   }
2634   if (calModelEvidLaplace) {
2635     if (obsErrorMultiplierMode > CALIBRATE_NONE) {
2636       Cout << "The Laplace approximation of model evidence currently "
2637            << "does not work when error multipliers are specified."<< std::endl;
2638       abort_handler(METHOD_ERROR);
2639     }
2640     //if (mapOptAlgOverride == SUBMETHOD_NONE) {
2641     //  Cout << "You must specify a pre-solve method for the Laplace "
2642     //       << "approximation of model evidence." << std::endl;
2643     //  abort_handler(METHOD_ERROR);
2644     //}
2645     Cout << "Starting Laplace approximation of model evidence, first "
2646          << "\nobtain MAP point from pre-solve.\n";
2647     const RealVector& map_c_vars
2648       = mapOptimizer.variables_results().continuous_variables();
2649     //estimate likelihood at MAP point:
2650     residualModel.continuous_variables(map_c_vars);
2651     ActiveSet resAS = residualModel.current_response().active_set();
2652     resAS.request_values(7);
2653     residualModel.evaluate(resAS);
2654     RealVector residual = residualModel.current_response().function_values();
2655     Real laplace_like = log_likelihood(residual, map_c_vars);
2656     //obtain prior density at MAP point:
2657     Real laplace_prior =  nonDBayesInstance->log_prior_density(map_c_vars);
2658     if (outputLevel >= DEBUG_OUTPUT) {
2659       Cout << "Residual at MAP point" << residualModel.current_response() << '\n';
2660       Cout << "Log_likelihood at MAP Point" << laplace_like << '\n';
2661       Cout << "Laplace_prior " << laplace_prior << "\n";
2662     }
2663     Response nlpost_resp = negLogPostModel.current_response().copy();
2664     ActiveSet as2 = nlpost_resp.active_set();
2665     as2.request_values(7);
2666     nlpost_resp.active_set(as2);
2667     neg_log_post_resp_mapping(mapOptimizer.variables_results(), mapOptimizer.variables_results(),
2668       residualModel.current_response(), nlpost_resp);
2669     if (outputLevel >= DEBUG_OUTPUT) {
2670       Cout << "Negative log posterior function values " << nlpost_resp.function_values() << '\n';
2671       Cout << "Negative log posterior Hessian " << nlpost_resp.function_hessian_view(0) << '\n';
2672       //Cout << nlpost_resp << '\n';
2673     }
2674     RealSymMatrix log_hess;
2675     nonDBayesInstance->
2676       expData.build_hessian_of_sum_square_residuals(residualModel.current_response(), log_hess);
2677     // Add the contribution from 1/2*log(det(Cov))
2678     nonDBayesInstance->expData.half_log_cov_det_hessian
2679       (0, nonDBayesInstance->obsErrorMultiplierMode,
2680       nonDBayesInstance->numContinuousVars, log_hess);
2681     // Add the contribution from -log(prior)
2682     nonDBayesInstance->augment_hessian_with_log_prior(log_hess, map_c_vars);
2683     Cout << "Laplace approximation: negative log posterior Hessian:\n"
2684          <<  log_hess << "\n";
2685     CovarianceMatrix local_log_post_hess;
2686     // for now, the matrix passed to set_covariance is a RealMatrix, not RealSymMatrix
2687     RealMatrix clog_hess(numContinuousVars, numContinuousVars);
2688     for (int i=0; i<numContinuousVars; i++) {
2689       for (int j=0; j<numContinuousVars; j++) {
2690         clog_hess(i,j)=log_hess(i,j);
2691       }
2692     }
2693     try {
2694       local_log_post_hess.set_covariance(const_cast<RealMatrix&>(clog_hess));
2695       Cout << "log determinant post" << local_log_post_hess.log_determinant() <<std::endl;
2696       double lpres= laplace_prior+laplace_like+numContinuousVars*HALF_LOG_2PI-0.5*local_log_post_hess.log_determinant();
2697       Cout << "Model evidence (Laplace) = " << std::exp(lpres) <<  '\n';
2698     }
2699     catch (const std::runtime_error& re){
2700       Cout << "Can't compute model evidence because the Hessian of the negative log posterior distribution "
2701            << "is not positive definite. " << re.what();
2702     }
2703   }
2704 }
2705 
print_intervals_file(std::ostream & s,RealMatrix & filteredFnVals_transpose,RealMatrix & predVals,int num_filtered,size_t num_concatenated)2706 void NonDBayesCalibration::print_intervals_file
2707 (std::ostream& s, RealMatrix& filteredFnVals_transpose,
2708  RealMatrix& predVals, int num_filtered, size_t num_concatenated)
2709 {
2710 
2711   const StringArray& resp = mcmcModel.current_response().function_labels();
2712   size_t width = write_precision+7;
2713   double alpha;
2714   int lower_index;
2715   int upper_index;
2716   // Credibility Intervals
2717   for(int i = 0; i < numFunctions; ++i){
2718     // Sort function values
2719     const RealVector& col_vec = Teuchos::getCol(Teuchos::View,
2720 				filteredFnVals_transpose, i);
2721     std::sort(col_vec.values(), col_vec.values() + num_filtered);
2722     // Write intervals
2723     size_t num_prob_levels = requestedProbLevels[i].length();
2724     if (num_prob_levels > 0){
2725       s << "Credibility Intervals for ";
2726       s << resp[i] << '\n';
2727       s << std::setw(width) << ' ' << " Response Level    Probability Level\n";
2728       s << std::setw(width) << ' ' << " ----------------- -----------------\n";
2729       for (size_t j = 0; j < num_prob_levels; ++j){
2730         alpha = requestedProbLevels[i][j];
2731         lower_index = floor(alpha/2*(num_filtered));
2732         upper_index = num_filtered - lower_index;
2733         s << std::setw(width) << ' ' << std::setw(width)
2734 	  << col_vec[lower_index] << ' ' << std::setw(width)
2735 	  << alpha << '\n'
2736 	  << std::setw(width) << ' ' << std::setw(width)
2737 	  << col_vec[upper_index] << ' '<< std::setw(width)
2738 	  << 1-alpha << '\n'
2739 	  << std::setw(width) << ' ' <<  "        -----             -----\n";
2740       }
2741     }
2742   }
2743   // Prediction Intervals
2744   if (expData.variance_active()) {
2745     RealMatrix predVals_transpose(predVals, Teuchos::TRANS);
2746     for(int i = 0; i < numFunctions; ++i){
2747       // Sort function values
2748       const RealVector& col_vec1 = Teuchos::getCol(Teuchos::View,
2749 				   predVals_transpose, i);
2750       std::sort(col_vec1.values(), col_vec1.values() + num_concatenated);
2751       // Write intervals
2752       size_t num_prob_levels = requestedProbLevels[i].length();
2753       if (num_prob_levels > 0){
2754         s << "Prediction Intervals for ";
2755         s << resp[i] << '\n';
2756         s << std::setw(width) << ' ' << " Response Level    Probability Level\n";
2757         s << std::setw(width) << ' ' << " ----------------- -----------------\n";
2758         for (size_t j = 0; j < num_prob_levels; ++j){
2759           alpha = requestedProbLevels[i][j];
2760           //s << "alpha = " << alpha << '\n';
2761           lower_index = floor(alpha/2*(num_concatenated));
2762           upper_index = num_concatenated - lower_index;
2763           s << std::setw(width) << ' ' << std::setw(width)
2764 	    << col_vec1[lower_index] << ' ' << std::setw(width)
2765 	    << alpha << '\n'
2766 	    << std::setw(width) << ' ' << std::setw(width)
2767 	    << col_vec1[upper_index] << ' '<< std::setw(width)
2768 	    << 1-alpha << '\n'
2769 	    << std::setw(width) << ' ' <<  "        -----             -----\n";
2770         }
2771       }
2772     }
2773   }
2774 }
2775 
print_intervals_screen(std::ostream & s,RealMatrix & filteredFnVals_transpose,RealMatrix & predVals_transpose,int num_filtered)2776 void NonDBayesCalibration::print_intervals_screen
2777 (std::ostream& s, RealMatrix& filteredFnVals_transpose,
2778  RealMatrix& predVals_transpose, int num_filtered)
2779 {
2780   const StringArray& resp = mcmcModel.current_response().function_labels();
2781   size_t width = write_precision+7;
2782   double alpha;
2783   int lower_index;
2784   int upper_index;
2785   s << "\n";
2786   // Credibility Intervals
2787   for(int i = 0; i < numFunctions; ++i){
2788     // Sort function values
2789     const RealVector& col_vec = Teuchos::getCol(Teuchos::View,
2790 				filteredFnVals_transpose, i);
2791     std::sort(col_vec.values(), col_vec.values() + num_filtered);
2792     // Write intervals
2793     size_t num_prob_levels = requestedProbLevels[i].length();
2794     if (num_prob_levels > 0){
2795       s << "Credibility Intervals for ";
2796       s << resp[i] << '\n';
2797       s << std::setw(width) << ' ' << " Response Level    Probability Level\n";
2798       s << std::setw(width) << ' ' << " ----------------- -----------------\n";
2799       for (size_t j = 0; j < num_prob_levels; ++j){
2800         alpha = requestedProbLevels[i][j];
2801         lower_index = floor(alpha/2*(num_filtered));
2802         upper_index = num_filtered - lower_index;
2803         s << std::setw(width) << ' ' << std::setw(width)
2804 	  << col_vec[lower_index] << ' ' << std::setw(width)
2805 	  << alpha << '\n'
2806 	  << std::setw(width) << ' ' << std::setw(width)
2807 	  << col_vec[upper_index] << ' '<< std::setw(width)
2808 	  << 1-alpha << '\n';
2809 	  //<< std::setw(width) << ' ' <<  "        -----             -----\n";
2810       }
2811     }
2812   }
2813   // Prediction Intervals
2814   if (expData.variance_active() ) {
2815     size_t num_exp = expData.num_experiments();
2816     size_t num_concatenated = num_exp*num_filtered;
2817     for(int i = 0; i < numFunctions; ++i){
2818       // Sort function values
2819       const RealVector& col_vec1 = Teuchos::getCol(Teuchos::View,
2820 				   predVals_transpose, i);
2821       std::sort(col_vec1.values(), col_vec1.values() + num_concatenated);
2822       // Write intervals
2823       size_t num_prob_levels = requestedProbLevels[i].length();
2824       if (num_prob_levels > 0){
2825         s << "Prediction Intervals for ";
2826         s << resp[i] << '\n';
2827         s << std::setw(width) <<' '<< " Response Level    Probability Level\n";
2828         s << std::setw(width) <<' '<< " ----------------- -----------------\n";
2829         for (size_t j = 0; j < num_prob_levels; ++j){
2830           alpha = requestedProbLevels[i][j];
2831           lower_index = floor(alpha/2*(num_concatenated));
2832           upper_index = num_concatenated - lower_index;
2833           s << std::setw(width) << ' ' << std::setw(width)
2834 	    << col_vec1[lower_index] << ' ' << std::setw(width)
2835 	    << alpha << '\n'
2836 	    << std::setw(width) << ' ' << std::setw(width)
2837 	    << col_vec1[upper_index] << ' '<< std::setw(width)
2838 	    << 1-alpha << '\n';
2839 	  //<< std::setw(width) << ' ' <<  "        -----             -----\n";
2840         }
2841       }
2842     }
2843   }
2844 }
2845 
print_results(std::ostream & s,short results_state)2846 void NonDBayesCalibration::print_results(std::ostream& s, short results_state)
2847 {
2848   // Print chain moments
2849   StringArray combined_labels;
2850   copy_data(residualModel.continuous_variable_labels(), combined_labels);
2851   NonDSampling::print_moments(s, chainStats, RealMatrix(),
2852       "posterior variable", STANDARD_MOMENTS, combined_labels, false);
2853   // Print response moments
2854   StringArray resp_labels = mcmcModel.current_response().function_labels();
2855   NonDSampling::print_moments(s, fnStats, RealMatrix(),
2856       "response function", STANDARD_MOMENTS, resp_labels, false);
2857 
2858   // Print chain diagnostics for variables
2859   if (chainDiagnostics)
2860     print_chain_diagnostics(s);
2861   // Print credibility and prediction intervals to screen
2862   if (requestedProbLevels[0].length() > 0 && outputLevel >= NORMAL_OUTPUT) {
2863     int num_filtered = filteredFnVals.numCols();
2864     RealMatrix filteredFnVals_transpose(filteredFnVals, Teuchos::TRANS);
2865     RealMatrix predVals_transpose(predVals, Teuchos::TRANS);
2866     print_intervals_screen(s, filteredFnVals_transpose,
2867 			   predVals_transpose, num_filtered);
2868   }
2869 
2870   // Print posterior stats
2871   if (posteriorStatsKL)
2872     print_kl(s);
2873 }
2874 
kl_post_prior(RealMatrix & acceptanceChain)2875 void NonDBayesCalibration::kl_post_prior(RealMatrix& acceptanceChain)
2876 {
2877   // sub-sample posterior chain
2878   int num_params = numContinuousVars + numHyperparams;
2879   int num_post_samples = acceptanceChain.numCols();
2880   int burn_in_post = int(0.2*num_post_samples);
2881   int burned_in_post = num_post_samples - burn_in_post;
2882   RealMatrix knn_post_samples;
2883   RealMatrix prior_dist_samples;
2884   if (num_post_samples < 18750){ // Aiming for num_filtered = 5000
2885     int num_skip = 3;
2886     int num_filtered = int(burned_in_post/num_skip);
2887     int num_prior_samples = num_filtered*125;
2888     knn_post_samples.shape(num_params, num_filtered);
2889     prior_dist_samples.shape(num_params, num_prior_samples);
2890     int j = 0;
2891     int it_cntr = 0;
2892     for (int i = burn_in_post+1; i < num_post_samples; ++i){
2893       ++it_cntr;
2894       if (it_cntr % num_skip == 0){
2895 	RealVector param_vec = Teuchos::getCol(Teuchos::View,
2896 	  			        acceptanceChain, i);
2897 	Teuchos::setCol(param_vec, j, knn_post_samples);
2898 	++j;
2899       }
2900     }
2901   }
2902   else{
2903     int num_skip = int(burned_in_post/5000);
2904     int num_filtered = int(burned_in_post/num_skip);
2905     int num_prior_samples = num_filtered*125;
2906     knn_post_samples.shapeUninitialized(num_params, num_filtered);
2907     prior_dist_samples.shapeUninitialized(num_params, num_prior_samples);
2908     int j = 0;
2909     int it_cntr = 0;
2910     for (int i = burn_in_post; i < num_post_samples; ++i){
2911       if (it_cntr % num_skip == 0){
2912 	  ++it_cntr;
2913 	  RealVector param_vec = Teuchos::getCol(Teuchos::View,
2914 	    			        acceptanceChain, i);
2915 	  Teuchos::setCol(param_vec, j, knn_post_samples);
2916 	  j++;
2917       }
2918     }
2919   }
2920 
2921   // produce matrix of prior samples
2922   prior_sample_matrix(prior_dist_samples);
2923   // compute knn kl-div between prior and posterior
2924   kl_est = knn_kl_div(knn_post_samples, prior_dist_samples, numContinuousVars);
2925 }
2926 
prior_sample_matrix(RealMatrix & prior_dist_samples)2927 void NonDBayesCalibration::prior_sample_matrix(RealMatrix& prior_dist_samples)
2928 {
2929   // Create matrix containing samples from the prior distribution
2930   boost::mt19937 rnumGenerator;
2931   int num_params = prior_dist_samples.numRows();
2932   int num_samples = prior_dist_samples.numCols();
2933   RealVector vec(num_params);
2934   rnumGenerator.seed(randomSeed);
2935   for(int i = 0; i < num_samples; ++i){
2936     prior_sample(rnumGenerator, vec);
2937     Teuchos::setCol(vec, i, prior_dist_samples);
2938   }
2939 }
2940 
knn_kl_div(RealMatrix & distX_samples,RealMatrix & distY_samples,size_t dim)2941 Real NonDBayesCalibration::knn_kl_div(RealMatrix& distX_samples,
2942     			 	RealMatrix& distY_samples, size_t dim)
2943 {
2944   approxnn::normSelector::instance().method(approxnn::L2_NORM);
2945 
2946   size_t NX = distX_samples.numCols();
2947   size_t NY = distY_samples.numCols();
2948   //size_t dim = numContinuousVars;
2949 
2950   // k is recorded for each distance so that if it needs to be adapted
2951   // (if kNN dist = 0), we can calculate the correction term
2952   IntVector k_vec_XY(NX);
2953   k_vec_XY.putScalar(6); //k default set to 6
2954   IntVector k_vec_XX(NX);
2955   k_vec_XX.putScalar(7); //k default set to 6
2956   			 //1st neighbor is self, so need k+1 for XtoX
2957   double eps = 0.0; //default ann error
2958 
2959   // Copy distX samples into ANN data types
2960   ANNpointArray dataX;
2961   dataX = annAllocPts(NX, dim);
2962   RealVector col(dim);
2963   for (int i = 0; i < NX; i++){
2964     col = Teuchos::getCol(Teuchos::View, distX_samples, i);
2965     for (int j = 0; j < dim; j++){
2966       dataX[i][j] = col[j];
2967     }
2968   }
2969   // Copy distY samples into ANN data types
2970   ANNpointArray dataY;
2971   dataY = annAllocPts(NY, dim);
2972   for (int i = 0; i < NY; i++){
2973     col = Teuchos::getCol(Teuchos::View, distY_samples, i);
2974     for (int j = 0; j < dim; j++){
2975       dataY[i][j] = col[j];
2976     }
2977   }
2978 
2979   // calculate vector of kNN distances from dist1 to dist2
2980   RealVector XtoYdistances(NX);
2981   ann_dist(dataX, dataY, XtoYdistances, NX, NY, dim, k_vec_XY, eps);
2982   // calculate vector of kNN distances from dist1 to itself
2983   RealVector XtoXdistances(NX);
2984   ann_dist(dataX, dataX, XtoXdistances, NX, NX, dim, k_vec_XX, eps);
2985 
2986   double log_sum = 0;
2987   double digamma_sum = 0;
2988   double rat;
2989   for (int i = 0; i < NX; i++){
2990     rat = XtoYdistances[i]/XtoXdistances[i];
2991     log_sum += log(XtoYdistances[i]/XtoXdistances[i]);
2992     if (k_vec_XY[i] != (k_vec_XX[i]-1)){ //XtoX: first NN is self
2993       double psiX = boost::math::digamma(k_vec_XX[i]-1);
2994       double psiY = boost::math::digamma(k_vec_XY[i]);
2995       digamma_sum += psiX - psiY;
2996     }
2997   }
2998   Real Dkl_est = 0.0;
2999   Dkl_est = (double(dim)*log_sum + digamma_sum)/double(NX)
3000           + log( double(NY)/(double(NX)-1) );
3001 
3002   annDeallocPts( dataX );
3003   annDeallocPts( dataY );
3004 
3005   approxnn::normSelector::instance().reset();
3006 
3007   return Dkl_est;
3008 }
3009 
mutual_info_buildX()3010 void NonDBayesCalibration::mutual_info_buildX()
3011 {
3012   /* Build matrix X, containing samples of the two distributions being
3013    * considered in the mutual info calculation. Each column has the form
3014    * X_i = [x1_i x2_i ... xn_i y1_i y2_i ... ym_i]
3015    */
3016 
3017   int num_params = numContinuousVars + numHyperparams;
3018   int num_samples = 1000;
3019   boost::mt19937 rnumGenerator;
3020   RealMatrix Xmatrix;
3021   Xmatrix.shapeUninitialized(2*num_params, num_samples);
3022   RealVector vec(num_params);
3023   RealVector col_vec(2*num_params);
3024   rnumGenerator.seed(randomSeed);
3025   for (int i = 0; i < num_samples; ++i){
3026     prior_sample(rnumGenerator, vec);
3027     for (int j = 0; j < num_params; ++j){
3028       col_vec[j] = vec[j];
3029     }
3030     prior_sample(rnumGenerator, vec);
3031     for (int j = 0; j < num_params; ++j){
3032       col_vec[j+1] = vec[j];
3033     }
3034     Teuchos::setCol(col_vec, i, Xmatrix);
3035   }
3036 
3037   // Test matrix
3038   /*
3039   int num_samples = acceptanceChain.numCols();
3040   RealMatrix Xmatrix;
3041   Xmatrix.shapeUninitialized(2*num_params, num_samples);
3042   RealVector vec(num_params);
3043   RealVector col_vec(2*num_params);
3044   for (int i = 0; i < num_samples-1; ++i){
3045     vec = Teuchos::getCol(Teuchos::View, acceptanceChain, i);
3046     for (int j = 0; j < num_params; ++j){
3047       col_vec[j] = vec[j]; //offset values by 1
3048     }
3049     vec = Teuchos::getCol(Teuchos::View, acceptanceChain, i+1);
3050     for (int j = 0; j < num_params; ++j){
3051       col_vec[j+num_params] = vec[j];
3052     }
3053     Teuchos::setCol(col_vec, i, Xmatrix);
3054   }
3055   // last column
3056   vec = Teuchos::getCol(Teuchos::View, acceptanceChain, num_samples-1);
3057   for (int j = 0; j < num_params; ++j){
3058     col_vec[j] = vec[j]; //offset values by 1
3059   }
3060   vec = Teuchos::getCol(Teuchos::View, acceptanceChain, 0);
3061   for (int j = 0; j < num_params; ++j){
3062     col_vec[j+num_params] = vec[j];
3063   }
3064   Teuchos::setCol(col_vec, num_samples-1, Xmatrix);
3065   */
3066 
3067   //test_stream << "Xmatrix = " << Xmatrix << '\n';
3068 
3069   Real mutualinfo_est =
3070     knn_mutual_info(Xmatrix, num_params, num_params, mutualInfoAlg);
3071   Cout << "MI est = " << mutualinfo_est << '\n';
3072 
3073 }
3074 
knn_mutual_info(RealMatrix & Xmatrix,int dimX,int dimY,unsigned short alg)3075 Real NonDBayesCalibration::knn_mutual_info(RealMatrix& Xmatrix, int dimX,
3076     int dimY, unsigned short alg)
3077 {
3078   approxnn::normSelector::instance().method(approxnn::LINF_NORM);
3079 
3080   //std::ofstream test_stream("kam1.txt");
3081   //test_stream << "Xmatrix = " << Xmatrix << '\n';
3082   //Cout << "Xmatrix = " << Xmatrix << '\n';
3083 
3084   int num_samples = Xmatrix.numCols();
3085   int dim = dimX + dimY;
3086 
3087   // Cast Xmatrix into ANN data structure
3088   ANNpointArray dataXY;
3089   dataXY = annAllocPts(num_samples, dim);
3090   RealVector col(dim);
3091   for (int i = 0; i < num_samples; i++){
3092     col = Teuchos::getCol(Teuchos::View, Xmatrix, i);
3093     for (int j = 0; j < dim; j++){
3094       dataXY[i][j] = col[j];
3095     }
3096   }
3097 
3098   // Normalize data
3099   ANNpoint meanXY, stdXY;
3100   meanXY = annAllocPt(dim); //means
3101   for (int i = 0; i < num_samples; i++){
3102     for(int j = 0; j < dim; j++){
3103       meanXY[j] += dataXY[i][j];
3104     }
3105   }
3106   for (int j = 0; j < dim; j++){
3107     meanXY[j] = meanXY[j]/double(num_samples);
3108     //Cout << "mean" << j << " = " << meanXY[j] << '\n';
3109   }
3110   stdXY = annAllocPt(dim); //standard deviations
3111   for (int i = 0; i < num_samples; i++){
3112     for (int j = 0; j < dim; j++){
3113       stdXY[j] += pow (dataXY[i][j] - meanXY[j], 2.0);
3114     }
3115   }
3116   for (int j = 0; j < dim; j++){
3117     stdXY[j] = sqrt( stdXY[j]/(double(num_samples)-1.0) );
3118     //Cout << "std" << j << " = " << stdXY[j] << '\n';
3119   }
3120   for (int i = 0; i < num_samples; i++){
3121     for (int j = 0; j < dim; j++){
3122       dataXY[i][j] = ( dataXY[i][j] - meanXY[j] )/stdXY[j];
3123     }
3124     //Cout << "dataXY = " << dataXY[i][0] << '\n';
3125   }
3126 
3127   // Get knn-distances for Xmatrix
3128   RealVector XYdistances(num_samples);
3129   Int2DArray XYindices(num_samples);
3130   IntVector k_vec(num_samples);
3131   int k = 6;
3132   k_vec.putScalar(k); // for self distances, need k+1
3133   double eps = 0.0;
3134   ann_dist(dataXY, dataXY, XYdistances, XYindices, num_samples, num_samples,
3135            dim, k_vec, eps);
3136 
3137   // Build marginals
3138   ANNpointArray dataX, dataY;
3139   dataX = annAllocPts(num_samples, dimX);
3140   dataY = annAllocPts(num_samples, dimY);
3141   //RealMatrix chainX(dimX, num_samples);
3142   //RealMatrix chainY(dimY, num_samples);
3143   for(int i = 0; i < num_samples; i++){
3144     col = Teuchos::getCol(Teuchos::View, Xmatrix, i);
3145     //Cout << "col = " << col << '\n';
3146     for(int j = 0; j < dimX; j++){
3147       //dataX[i][j] = dataXY[i][j];
3148       //Cout << "col " << j << " = " << col[j] << '\n';
3149       //chainX[i][j] = col[j];
3150       dataX[i][j] = col[j];
3151     }
3152     for(int j = 0; j < dimY; j++){
3153       //dataY[i][j] = dataXY[i][dimX+j];
3154       //chainY[i][j] = col[dimX+j];
3155       dataY[i][j] = col[dimX+j];
3156     }
3157   }
3158   //Cout << "chainX = " << chainX;
3159   ANNkd_tree* kdTreeX;
3160   ANNkd_tree* kdTreeY;
3161   kdTreeX = new ANNkd_tree(dataX, num_samples, dimX);
3162   kdTreeY = new ANNkd_tree(dataY, num_samples, dimY);
3163 
3164   double marg_sum = 0.0;
3165   int n_x, n_y;
3166   for(int i = 0; i < num_samples; i++){
3167     if (alg == MI_ALG_KSG2) { //alg=1, ksg2
3168       IntArray XYind_i = XYindices[i];
3169       RealArray ex_vec(XYind_i.size()-1);
3170       RealArray ey_vec(XYind_i.size()-1);
3171       for(int j = 1; j < XYind_i.size(); j ++) {
3172 	ex_vec[j-1] = annDist(dimX, dataX[i], dataX[XYind_i[j]]);
3173 	ey_vec[j-1] = annDist(dimY, dataY[i], dataY[XYind_i[j]]);
3174       }
3175       ANNdist e_x = *std::max_element(ex_vec.begin(), ex_vec.end());
3176       ANNdist e_y = *std::max_element(ey_vec.begin(), ey_vec.end());
3177       /*
3178       ANNdist e = max(e_x, e_y);
3179       n_x = kdTreeX->annkFRSearch(dataX[i], e, 0, NULL, NULL, eps);
3180       n_y = kdTreeY->annkFRSearch(dataY[i], e, 0, NULL, NULL, eps);
3181       */
3182       n_x = kdTreeX->annkFRSearch(dataX[i], e_x, 0, NULL, NULL, eps);
3183       n_y = kdTreeY->annkFRSearch(dataY[i], e_y, 0, NULL, NULL, eps);
3184     }
3185     else { //alg=0, ksg1
3186       n_x = kdTreeX->annkFRSearch(dataX[i], XYdistances[i], 0, NULL,
3187   				    NULL, eps);
3188       n_y = kdTreeY->annkFRSearch(dataY[i], XYdistances[i], 0, NULL,
3189   				    NULL, eps);
3190     }
3191     double psiX = boost::math::digamma(n_x);
3192     double psiY = boost::math::digamma(n_y);
3193     //double psiX = boost::math::digamma(n_x+1);
3194     //double psiY = boost::math::digamma(n_y+1);
3195     marg_sum += psiX + psiY;
3196     //test_stream <<"i = "<< i <<", nx = "<< n_x <<", ny = "<< n_y <<'\n';
3197     //test_stream << "psiX = " << psiX << '\n';
3198     //test_stream << "psiY = " << psiY << '\n';
3199   }
3200   double psik = boost::math::digamma(k);
3201   double psiN = boost::math::digamma(num_samples);
3202   double MI_est = psik - (marg_sum/double(num_samples)) + psiN;
3203   if (alg == MI_ALG_KSG2) {
3204     MI_est = MI_est - 1/double(k);
3205   }
3206   //test_stream << "psi_k = " << psik << '\n';
3207   //test_stream << "marg_sum = " << marg_sum << '\n';
3208   //test_stream << "psiN = " << psiN << '\n';
3209   //test_stream << "MI_est = " << MI_est << '\n';
3210 
3211   // Dealloc memory
3212   delete kdTreeX;
3213   delete kdTreeY;
3214   annDeallocPts(dataX);
3215   annDeallocPts(dataY);
3216   annDeallocPts(dataXY);
3217 
3218   approxnn::normSelector::instance().reset();
3219 
3220   // Compare to dkl
3221   /*
3222   double kl_est = knn_kl_div(Xmatrix, Xmatrix);
3223   test_stream << "KL = " << kl_est << '\n';
3224   Cout << "KL = " << kl_est << '\n';
3225   */
3226   return MI_est;
3227 
3228 }
3229 
ann_dist(const ANNpointArray matrix1,const ANNpointArray matrix2,RealVector & distances,int NX,int NY,int dim2,IntVector & k_vec,double eps)3230 void NonDBayesCalibration::ann_dist(const ANNpointArray matrix1,
3231      const ANNpointArray matrix2, RealVector& distances, int NX, int NY,
3232      int dim2, IntVector& k_vec, double eps)
3233 {
3234 
3235   ANNkd_tree* kdTree;
3236   kdTree = new ANNkd_tree( matrix2, NY, dim2 );
3237   for (unsigned int i = 0; i < NX; ++i){
3238     int k_i = k_vec[i] ;
3239     ANNdistArray knn_dist = new ANNdist[k_i+1];
3240     ANNidxArray knn_ind = new ANNidx[k_i+1];
3241     //calc min number of distances needed
3242     kdTree->annkSearch(matrix1[ i ], k_i+1, knn_ind, knn_dist, eps);
3243     double dist = knn_dist[k_i];
3244     if (dist == 0.0){
3245       ANNdistArray knn_dist_i = new ANNdist[NY];
3246       ANNidxArray knn_ind_i = new ANNidx[NY];
3247       //calc distances for whole array
3248       kdTree->annkSearch(matrix1[ i ], NY, knn_ind_i, knn_dist_i, eps);
3249       for (unsigned int j = k_i+1; j < NY; ++j){
3250 	if (knn_dist_i[j] > 0.0){
3251 	  dist = knn_dist_i[j];
3252 	  k_vec[i] = j;
3253 	  break;
3254 	}
3255       }
3256       delete [] knn_ind_i;
3257       delete [] knn_dist_i;
3258     }
3259     distances[i] = dist;
3260     delete [] knn_ind;
3261     delete [] knn_dist;
3262   }
3263   delete kdTree;
3264   annClose();
3265 }
3266 
ann_dist(const ANNpointArray matrix1,const ANNpointArray matrix2,RealVector & distances,Int2DArray & indices,int NX,int NY,int dim2,IntVector & k_vec,double eps)3267 void NonDBayesCalibration::ann_dist(const ANNpointArray matrix1,
3268      const ANNpointArray matrix2, RealVector& distances, Int2DArray& indices,
3269      int NX, int NY, int dim2, IntVector& k_vec,
3270      double eps)
3271 {
3272   ANNkd_tree* kdTree;
3273   kdTree = new ANNkd_tree( matrix2, NY, dim2 );
3274   for (unsigned int i = 0; i < NX; ++i){
3275     int k_i = k_vec[i] ;
3276     ANNdistArray knn_dist = new ANNdist[k_i+1];
3277     ANNidxArray knn_ind = new ANNidx[k_i+1];
3278     //calc min number of distances needed
3279     kdTree->annkSearch(matrix1[ i ], k_i+1, knn_ind, knn_dist, eps);
3280     double dist = knn_dist[k_i];
3281     IntArray ind(k_i+1);
3282     for (int j = 0; j < k_i+1; j ++)
3283       ind[j] = knn_ind[j];
3284     if (dist == 0.0){
3285       ANNdistArray knn_dist_i = new ANNdist[NY];
3286       ANNidxArray knn_ind_i = new ANNidx[NY];
3287       //calc distances for whole array
3288       kdTree->annkSearch(matrix1[ i ], NY, knn_ind_i, knn_dist_i, eps);
3289       for (unsigned int j = k_i+1; j < NY; ++j){
3290 	if (knn_dist_i[j] > 0.0){
3291 	  dist = knn_dist_i[j];
3292 	  ind.resize(j);
3293 	  for (int k = 0; k < j; k ++)
3294 	    ind[k] = knn_ind_i[k];
3295 	  k_vec[i] = j;
3296 	  break;
3297 	}
3298       }
3299       delete [] knn_ind_i;
3300       delete [] knn_dist_i;
3301     }
3302     distances[i] = dist;
3303     indices[i] = ind;
3304     delete [] knn_ind;
3305     delete [] knn_dist;
3306   }
3307   delete kdTree;
3308   annClose();
3309 }
3310 
print_kl(std::ostream & s)3311 void NonDBayesCalibration::print_kl(std::ostream& s)
3312 {
3313   s << "Information gained from prior to posterior = " << kl_est;
3314   s << '\n';
3315 }
3316 
print_chain_diagnostics(std::ostream & s)3317 void NonDBayesCalibration::print_chain_diagnostics(std::ostream& s)
3318 {
3319   s << "\nChain diagnostics\n";
3320   if (chainDiagnosticsCI)
3321     print_batch_means_intervals(s);
3322 }
3323 
print_batch_means_intervals(std::ostream & s)3324 void NonDBayesCalibration::print_batch_means_intervals(std::ostream& s)
3325 {
3326   size_t width = write_precision+7;
3327   Real alpha = 0.95;
3328 
3329   int num_vars = acceptanceChain.numRows();
3330   StringArray var_labels;
3331   copy_data(residualModel.continuous_variable_labels(),	var_labels);
3332   RealMatrix variables_mean_interval_mat, variables_mean_batch_means;
3333   batch_means_interval(acceptanceChain, variables_mean_interval_mat,
3334                        variables_mean_batch_means, 1, alpha);
3335   RealMatrix variables_var_interval_mat, variables_var_batch_means;
3336   batch_means_interval(acceptanceChain, variables_var_interval_mat,
3337                        variables_var_batch_means, 2, alpha);
3338 
3339   int num_responses = acceptedFnVals.numRows();
3340   StringArray resp_labels = mcmcModel.current_response().function_labels();
3341   RealMatrix responses_mean_interval_mat, responses_mean_batch_means;
3342   batch_means_interval(acceptedFnVals, responses_mean_interval_mat,
3343                        responses_mean_batch_means, 1, alpha);
3344   RealMatrix responses_var_interval_mat, responses_var_batch_means;
3345   batch_means_interval(acceptedFnVals, responses_var_interval_mat,
3346                        responses_var_batch_means, 2, alpha);
3347 
3348   if (outputLevel >= DEBUG_OUTPUT) {
3349     for (int i = 0; i < num_vars; i++) {
3350       s << "\tBatch means of mean for variable ";
3351       s << var_labels[i] << '\n';
3352       const RealVector& mean_vec = Teuchos::getCol(Teuchos::View,
3353                                   variables_mean_batch_means, i);
3354       s << mean_vec ;
3355       s << "\tBatch means of variance for variable ";
3356       const RealVector& var_vec = Teuchos::getCol(Teuchos::View,
3357                                   variables_var_batch_means, i);
3358       s << var_labels[i] << '\n';
3359       s << var_vec ;
3360     }
3361     for (int i = 0; i < num_responses; i++) {
3362       s << "\tBatch means of mean for response ";
3363       s << resp_labels[i] << '\n';
3364       const RealVector mean_vec = Teuchos::getCol(Teuchos::View,
3365                                   responses_mean_batch_means, i);
3366       s << mean_vec ;
3367       s << "\tBatch means of variance for response ";
3368       s << resp_labels[i] << '\n';
3369       const RealVector var_vec = Teuchos::getCol(Teuchos::View,
3370                                  responses_var_batch_means, i);
3371       s << var_vec ;
3372     }
3373   }
3374 
3375   s << "\t95% Confidence Intervals of means\n";
3376   for (int i = 0; i < num_vars; i++) {
3377     RealVector col_vec = Teuchos::getCol(Teuchos::View,
3378                                          variables_mean_interval_mat, i);
3379     s << '\t' << std::setw(width) << var_labels[i]
3380       << " = [" << col_vec[0] << ", " << col_vec[1] << "]\n";
3381   }
3382   for (int i = 0; i < num_responses; i++) {
3383     RealVector col_vec = Teuchos::getCol(Teuchos::View,
3384                                          responses_mean_interval_mat, i);
3385     s << '\t' << std::setw(width) << resp_labels[i]
3386       << " = [" << col_vec[0] << ", " << col_vec[1] << "]\n";
3387   }
3388   s << "\t95% Confidence Intervals of variances\n";
3389   for (int i = 0; i < num_vars; i++) {
3390     RealVector col_vec = Teuchos::getCol(Teuchos::View,
3391                                          variables_var_interval_mat, i);
3392     s << '\t' << std::setw(width) << var_labels[i]
3393       << " = [" << col_vec[0] << ", " << col_vec[1] << "]\n";
3394   }
3395   for (int i = 0; i < num_responses; i++) {
3396     RealVector col_vec = Teuchos::getCol(Teuchos::View,
3397                                          responses_var_interval_mat, i);
3398     s << '\t' << std::setw(width) << resp_labels[i]
3399       << " = [" << col_vec[0] << ", " << col_vec[1] << "]\n";
3400   }
3401 }
3402 
3403 
3404 /** Wrap the residualModel in a scaling transformation, such that
3405     residualModel now contains a scaling recast model. */
scale_model()3406 void NonDBayesCalibration::scale_model()
3407 {
3408   if (outputLevel >= DEBUG_OUTPUT)
3409     Cout << "Initializing scaling transformation" << std::endl;
3410 
3411   // residualModel becomes the sub-model of a RecastModel:
3412   residualModel.assign_rep(std::make_shared<ScalingModel>(residualModel));
3413   // scalingModel = residualModel;
3414 }
3415 
3416 
3417 /** Setup Recast for weighting model.  The weighting transformation
3418     doesn't resize, and makes no vars, active set or secondary mapping.
3419     All indices are one-to-one mapped (no change in counts). */
weight_model()3420 void NonDBayesCalibration::weight_model()
3421 {
3422   if (outputLevel >= DEBUG_OUTPUT)
3423     Cout << "Initializing weighting transformation" << std::endl;
3424 
3425   // we assume sqrt(w_i) will be applied to each residual, therefore:
3426   const RealVector& lsq_weights = residualModel.primary_response_fn_weights();
3427   for (int i=0; i<lsq_weights.length(); ++i)
3428     if (lsq_weights[i] < 0) {
3429       Cerr << "\nError: Calibration term weights must be nonnegative. "
3430 	   << "Specified weights are:\n" << lsq_weights << '\n';
3431       abort_handler(-1);
3432     }
3433 
3434   // TODO: pass sqrt to WeightingModel
3435   residualModel.assign_rep(std::make_shared<WeightingModel>(residualModel));
3436 }
3437 
3438 } // namespace Dakota
3439