1 /*  _______________________________________________________________________
2 
3     DAKOTA: Design Analysis Kit for Optimization and Terascale Applications
4     Copyright 2014-2020 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
5     This software is distributed under the GNU Lesser General Public License.
6     For more information, see the README file in the top Dakota directory.
7     _______________________________________________________________________ */
8 
9 //- Class:       DataFitSurrModel
10 //- Description: Implementation code for the DataFitSurrModel class
11 //- Owner:       Mike Eldred
12 //- Checked by:
13 
14 #include "DataFitSurrModel.hpp"
15 #include "ProbabilityTransformModel.hpp"
16 #include "ApproximationInterface.hpp"
17 #include "ParamResponsePair.hpp"
18 #include "ProblemDescDB.hpp"
19 #include "PRPMultiIndex.hpp"
20 #include "dakota_data_io.hpp"
21 #include "dakota_tabular_io.hpp"
22 #include <boost/accumulators/accumulators.hpp>
23 #include <boost/accumulators/statistics/stats.hpp>
24 #include <boost/accumulators/statistics/rolling_mean.hpp>
25 #include "EvaluationStore.hpp"
26 
27 static const char rcsId[]="@(#) $Id: DataFitSurrModel.cpp 7034 2010-10-22 20:16:32Z mseldre $";
28 
29 
30 namespace Dakota {
31 
32 extern PRPCache data_pairs;
33 
34 
DataFitSurrModel(ProblemDescDB & problem_db)35 DataFitSurrModel::DataFitSurrModel(ProblemDescDB& problem_db):
36   SurrogateModel(problem_db),
37   pointsTotal(problem_db.get_int("model.surrogate.points_total")),
38   pointsManagement(problem_db.get_short("model.surrogate.points_management")),
39   pointReuse(problem_db.get_string("model.surrogate.point_reuse")),
40   exportSurrogate(problem_db.get_bool("model.surrogate.export_surrogate")),
41   importPointsFile(
42     problem_db.get_string("model.surrogate.import_build_points_file")),
43   exportPointsFile(
44     problem_db.get_string("model.surrogate.export_approx_points_file")),
45   exportFormat(problem_db.get_ushort("model.surrogate.export_approx_format")),
46   exportVarianceFile(
47     problem_db.get_string("model.surrogate.export_approx_variance_file")),
48   exportVarianceFormat(problem_db.get_ushort("model.surrogate.export_approx_variance_format")),
49   autoRefine(problem_db.get_bool("model.surrogate.auto_refine")),
50   maxIterations(problem_db.get_int("model.max_iterations")),
51   maxFuncEvals(problem_db.get_int("model.max_function_evals")),
52   convergenceTolerance(problem_db.get_real("model.convergence_tolerance")),
53   softConvergenceLimit(problem_db.get_int("model.soft_convergence_limit")),
54   refineCVMetric(problem_db.get_string("model.surrogate.refine_cv_metric")),
55   refineCVFolds(problem_db.get_int("model.surrogate.refine_cv_folds"))
56 {
57   // ignore bounds when finite differencing on data fits, since the bounds are
58   // artificial in this case (and reflecting the stencil degrades accuracy)
59   ignoreBounds = true;
60 
61   // if no user points management spec, assign RECOMMENDED_POINTS as default
62   if (pointsManagement == DEFAULT_POINTS)
63     pointsManagement = (pointsTotal > 0) ? TOTAL_POINTS : RECOMMENDED_POINTS;
64 
65   bool import_pts = !importPointsFile.empty(),
66     export_pts = !exportPointsFile.empty() || !exportVarianceFile.empty();
67   if (pointReuse.empty()) // assign default
68     pointReuse = (import_pts) ? "all" : "none";
69 
70   // DataFitSurrModel is allowed to set the db list nodes, so long as it
71   // restores the list nodes to their previous setting
72   const String& dace_method_pointer
73     = problem_db.get_string("model.dace_method_pointer");
74   const String& actual_model_pointer
75     = problem_db.get_string("model.surrogate.actual_model_pointer");
76   bool dace_construct = !dace_method_pointer.empty(),
77       model_construct = (dace_construct || !actual_model_pointer.empty());
78   size_t method_index = _NPOS, model_index = _NPOS;
79   if (dace_construct) {
80     method_index = problem_db.get_db_method_node(); // for restoration
81     model_index  = problem_db.get_db_model_node();  // for restoration
82     problem_db.set_db_list_nodes(dace_method_pointer);
83   }
84   else if (model_construct) {
85     model_index = problem_db.get_db_model_node(); // for restoration
86     problem_db.set_db_model_nodes(actual_model_pointer);
87   }
88 
89   // Instantiate actual model from DB
90   bool basis_expansion = false;  short u_space_type;
91   if (model_construct) {
92     // If approx type uses standardized random variables (hard-wired for now),
93     // wrap with a ProbabilityTransformModel (retaining distribution bounds as
94     // in PCE, SC, C3 ctors)
95     if (strends(surrogateType, "_orthogonal_polynomial") ||
96 	strends(surrogateType, "_interpolation_polynomial")) {
97       basis_expansion = true;
98       u_space_type = problem_db.get_short("model.surrogate.expansion_type");
99     }
100     else if (strends(surrogateType, "_function_train" )) {
101       basis_expansion = true;
102       // Hardwire for C3 case prior to availability of XML spec:
103       u_space_type = PARTIAL_ASKEY_U;//problem_db.get_short("model.surrogate.expansion_type");
104     }
105     else {
106       actualModel = problem_db.get_model();
107       // leave mvDist as initialized in Model ctor (from variables spec)
108     }
109 
110     if (basis_expansion) {
111       actualModel.assign_rep(std::make_shared<ProbabilityTransformModel>
112 			     (problem_db.get_model(), u_space_type));
113       // overwrite mvDist from Model ctor by copying transformed u-space dist
114       // (keep them distinct to allow for different active views).
115       // construct time augmented with run time pull_distribution_parameters().
116       mvDist = actualModel.multivariate_distribution().copy();
117     }
118 
119     // ensure consistency of inputs/outputs between actual and approx
120     check_submodel_compatibility(actualModel);
121   }
122 
123   // Instantiate dace iterator from DB
124   if (dace_construct) {
125     daceIterator = problem_db.get_iterator(actualModel); // no meta-iterators
126     daceIterator.sub_iterator_flag(true);
127     // if outer level output is verbose/debug and actualModel verbosity is
128     // defined by the DACE method spec, request fine-grained evaluation
129     // reporting for purposes of the final output summary.  This allows verbose
130     // final summaries without verbose output on every dace-iterator completion.
131     if (outputLevel > NORMAL_OUTPUT)
132       actualModel.fine_grained_evaluation_counters();
133   }
134 
135   // reset all method/model pointers
136   if (dace_construct) {
137     problem_db.set_db_method_node(method_index); // restore method only
138     problem_db.set_db_model_nodes(model_index);  // restore all model nodes
139   }
140   else if (model_construct)
141     problem_db.set_db_model_nodes(model_index);  // restore all model nodes
142   // else global approx. built solely from reuse_points: daceIterator/
143   // actualModel remain empty envelopes.  Verify that there is a data source:
144   // this basic check is augmented with a build_global() check which enforces
145   // that the total points from both sources be >= minimum required.
146   else if ( pointReuse == "none" ) {
147     Cerr << "Error: to build a data fit surrogate model, either a global "
148 	 << "approximation\n       must be specified with reuse_points or "
149 	 << "dace_method_pointer, or a\n       local/multipoint approximation "
150 	 << "must be specified with an actual_model_pointer." << std::endl;
151     abort_handler(MODEL_ERROR);
152   }
153 
154   // assign the ApproximationInterface instance which manages the
155   // local/multipoint/global approximation.  The number of
156   // approximation variables is defined by the active variable set in
157   // the sub-model, and any conversions based on differing variable
158   // views must be performed in ApproximationInterface::map().
159   const Variables& vars = (actualModel.is_null()) ? currentVariables :
160     actualModel.current_variables();
161   bool cache = false; String am_interface_id;
162   if (!actualModel.is_null()) {
163     am_interface_id = actualModel.interface_id();
164     // for ApproximationInterface to be able to look up actualModel eval records
165     // within data_pairs, the actualModel must have an active evaluation cache
166     // and derivative estimation (which causes consolidation of Interface evals
167     // within Model evals, breaking Model eval lookups) must be off.
168     // Note: use of ProbabilityTransform recursion prevents data_pairs lookup
169     if ( actualModel.evaluation_cache(false) &&
170 	!actualModel.derivative_estimation())
171       cache = true;
172   }
173   // size approxInterface based on currentResponse, which is constructed from
174   // DB response spec, since actualModel could contain response aggregations
175   approxInterface.assign_rep(std::make_shared<ApproximationInterface>
176 			     (problem_db, vars, cache, am_interface_id,
177 			      currentResponse.function_labels()));
178 
179   // initialize the basis, if needed
180   if (basis_expansion)
181     shared_approximation().construct_basis(mvDist);
182 
183   // initialize the DiscrepancyCorrection instance
184   switch (responseMode) {
185   case MODEL_DISCREPANCY: case AUTO_CORRECTED_SURROGATE:
186     if (corrType)
187       deltaCorr.initialize(*this, surrogateFnIndices, corrType,
188 	problem_db.get_short("model.surrogate.correction_order"));
189     break;
190   }
191 
192   if (import_pts)
193     import_points(problem_db.get_ushort("model.surrogate.import_build_format"),
194       problem_db.get_bool("model.surrogate.import_use_variable_labels"),
195       problem_db.get_bool("model.surrogate.import_build_active_only"));
196   if (export_pts)
197     initialize_export();
198   if (import_pts || export_pts)
199     manage_data_recastings();
200 }
201 
202 
203 DataFitSurrModel::
DataFitSurrModel(Iterator & dace_iterator,Model & actual_model,const ActiveSet & set,const String & approx_type,const UShortArray & approx_order,short corr_type,short corr_order,short data_order,short output_level,const String & point_reuse,const String & import_build_points_file,unsigned short import_build_format,bool import_build_active_only,const String & export_approx_points_file,unsigned short export_approx_format)204 DataFitSurrModel(Iterator& dace_iterator, Model& actual_model,
205 		 //const SharedVariablesData& svd,const SharedResponseData& srd,
206 		 const ActiveSet& set, const String& approx_type,
207 		 const UShortArray& approx_order, short corr_type,
208 		 short corr_order, short data_order, short output_level,
209 		 const String& point_reuse,
210 		 const String& import_build_points_file,
211 		 unsigned short import_build_format,
212 		 bool import_build_active_only,
213 		 const String& export_approx_points_file,
214 		 unsigned short export_approx_format):
215   // SVD can be shared, but don't share SRD as QoI +aggregations are consumed:
216   SurrogateModel(actual_model.problem_description_db(),
217 		 actual_model.parallel_library(),
218 		 actual_model.current_variables().shared_data(), true,
219 		 actual_model.current_response().shared_data(), false,
220 		 set, corr_type, output_level),
221   daceIterator(dace_iterator), actualModel(actual_model), pointsTotal(0),
222   pointsManagement(DEFAULT_POINTS), pointReuse(point_reuse),
223   exportSurrogate(false), exportPointsFile(export_approx_points_file),
224   exportFormat(export_approx_format),
225   importPointsFile(import_build_points_file), autoRefine(false),
226   maxIterations(100), maxFuncEvals(1000), convergenceTolerance(1e-4),
227   softConvergenceLimit(0), refineCVMetric("root_mean_square"), refineCVFolds(10)
228 {
229   // dace_iterator may be an empty envelope (local, multipoint approx),
230   // but actual_model must be defined.
231   if (actualModel.is_null()) {
232     Cerr << "Error: actualModel is empty envelope in alternate "
233 	 << "DataFitSurrModel constructor." << std::endl;
234     abort_handler(MODEL_ERROR);
235   }
236 
237   surrogateType = approx_type;
238 
239   bool import_pts = !importPointsFile.empty(),
240     export_pts = !exportPointsFile.empty() || !exportVarianceFile.empty();
241   if (pointReuse.empty()) // assign default
242     pointReuse = (import_pts) ? "all" : "none";
243 
244   // copy actualModel dist (keep distinct to allow for different active views).
245   // ref values for distribution params at construct time are updated at run
246   // time via pull_distribution_parameters().
247   mvDist = actualModel.multivariate_distribution().copy();
248 
249   // update constraint counts in userDefinedConstraints.
250   userDefinedConstraints.reshape(actualModel.num_nonlinear_ineq_constraints(),
251 				 actualModel.num_nonlinear_eq_constraints(),
252 				 actualModel.num_linear_ineq_constraints(),
253 				 actualModel.num_linear_eq_constraints());
254 
255   update_from_model(actualModel);
256   check_submodel_compatibility(actualModel);
257 
258   // for ApproximationInterface to be able to look up actualModel eval records
259   // within data_pairs, the actualModel must have an active evaluation cache
260   // and derivative estimation (which causes consolidation of Interface evals
261   // within Model evals, breaking Model eval lookups) must be off.
262   bool cache = ( actualModel.evaluation_cache(false) &&
263 		!actualModel.derivative_estimation() );
264   // assign the ApproximationInterface instance which manages the
265   // local/multipoint/global approximation.  By instantiating with assign_rep(),
266   // Interface::get_interface() does not need special logic for approximations.
267   approxInterface.assign_rep(std::make_shared<ApproximationInterface>(approx_type,
268     approx_order, actualModel.current_variables(), cache,
269     actualModel.interface_id(), numFns, data_order, outputLevel));
270 
271   if (!daceIterator.is_null()) // global DACE approximations
272     daceIterator.sub_iterator_flag(true);
273   //else { // local/multipoint approximation
274   //}
275 
276   // initialize the DiscrepancyCorrection instance
277   deltaCorr.initialize(*this, surrogateFnIndices, corr_type, corr_order);
278 
279   // to define derivative settings, we use incoming ASV to define requests
280   // and surrogate type to determine analytic derivative support.
281   const ShortArray& asv = set.request_vector();
282   size_t i, num_fns = asv.size();
283   bool grad_flag = false, hess_flag = false;
284   for (i=0; i<num_fns; ++i) {
285     if (asv[i] & 2) grad_flag = true;
286     if (asv[i] & 4) hess_flag = true;
287   }
288   if (grad_flag)
289     gradientType = (approx_type == "global_polynomial" ||
290       approx_type == "global_gaussian" || approx_type == "global_kriging" ||
291       approx_type == "global_moving_least_squares" ||
292       strends(approx_type, "_orthogonal_polynomial") ||
293       strends(approx_type, "_interpolation_polynomial") ||
294       strbegins(approx_type, "local_") ||
295       strbegins(approx_type, "multipoint_")) ? "analytic" : "numerical";
296   else
297     gradientType = "none";
298   if (hess_flag)
299     hessianType = ( strbegins(approx_type, "local_") ||
300       approx_type == "global_polynomial" || approx_type == "global_kriging" ||
301       strends(approx_type, "_orthogonal_polynomial"))
302     //strends(approx_type, "_interpolation_polynomial")) // TO DO
303       ? "analytic" : "numerical";
304   else
305     hessianType = "none";
306 
307   if (outputLevel > NORMAL_OUTPUT)
308     Cout << "DFS gradientType = " << gradientType
309 	 << " DFS hessianType = " << hessianType << std::endl;
310 
311   // Promote fdGradStepSize/fdHessByFnStepSize/fdHessByGradStepSize to
312   // defaults if needed.
313   if (gradientType == "numerical") { // mixed not supported for this Model
314     methodSource = "dakota"; intervalType = "central";
315     fdGradStepType = "relative";
316     fdGradStepSize.resize(1); fdGradStepSize[0] = 0.001;
317   }
318   if (hessianType == "numerical") { // mixed not supported for this Model
319     if (gradientType == "numerical") {
320       fdHessStepType = "relative";
321       fdHessByFnStepSize.resize(1); fdHessByFnStepSize[0] = 0.002;
322     }
323     else
324       { fdHessByGradStepSize.resize(1); fdHessByGradStepSize[0] = 0.001; }
325   }
326 
327   // ignore bounds when finite differencing on data fits, since the bounds are
328   // artificial in this case (and reflecting the stencil degrades accuracy)
329   ignoreBounds = true;
330 
331   // TODO: pass import_use_var_labels from lightweight DFSModel ctor
332   bool import_use_var_labels = false;
333   if (import_pts) import_points(import_build_format, import_use_var_labels,
334 				import_build_active_only);
335   if (export_pts) initialize_export();
336   if (import_pts || export_pts) manage_data_recastings();
337 }
338 
339 
check_submodel_compatibility(const Model & sub_model)340 void DataFitSurrModel::check_submodel_compatibility(const Model& sub_model)
341 {
342   bool error_flag = false;
343   // Check for compatible array sizing between sub_model and currentResponse.
344   // HierarchSurrModel creates aggregations and DataFitSurrModel consumes them.
345   // For now, allow either a factor of 2 or 1 from aggregation or not.  In the
346   // future, aggregations may span a broader model hierarchy (e.g., factor =
347   // orderedModels.size()).  In general, the fn count check needs to be
348   // specialized in the derived classes.
349   size_t sm_qoi = sub_model.qoi();
350   if ( numFns != sm_qoi) {
351     Cerr << "Error: incompatibility between approximate and actual model "
352 	 << "response function sets\n       within DataFitSurrModel: " << numFns
353 	 << " approximate and " << sm_qoi << " actual functions.\n       "
354 	 << "Check consistency of responses specifications." << std::endl;
355     error_flag = true;
356   }
357 
358   // check view-based variable counts:
359   SurrogateModel::check_submodel_compatibility(sub_model);
360   // cases not covered by the SurrogateModel check are disallowed for DFSModel
361   short active_view = currentVariables.view().first,
362      sm_active_view = sub_model.current_variables().view().first;
363   if ( !( active_view == sm_active_view ||
364 	( ( sm_active_view == RELAXED_ALL || sm_active_view == MIXED_ALL ) &&
365 	  active_view >= RELAXED_DESIGN ) ||
366 	( ( active_view == RELAXED_ALL || active_view == MIXED_ALL ) &&
367 	  sm_active_view >= RELAXED_DESIGN ) ) ) {
368     Cerr << "Error: unsupported variable view differences between "
369    	     << "approximate and actual models within DataFitSurrModel."
370    	     << std::endl;
371     error_flag = true;
372   }
373 
374   if (error_flag)
375     abort_handler(-1);
376 }
377 
378 
initialize_mapping(ParLevLIter pl_iter)379 bool DataFitSurrModel::initialize_mapping(ParLevLIter pl_iter)
380 {
381   Model::initialize_mapping(pl_iter);
382   actualModel.initialize_mapping(pl_iter);
383 
384   // push data that varies per iterator execution rather than per-evaluation
385   // from currentVariables and userDefinedConstraints into actualModel
386   init_model(actualModel);
387 
388   return false; // no change to problem size
389 }
390 
391 
392 /** Inactive variables must be propagated when a HierarchSurrModel
393     is employed by a sub-iterator (e.g., OUU with MLMC or MLPCE).
394     In current use cases, this can occur once per sub-iterator
395     execution within Model::initialize_mapping(). */
finalize_mapping()396 bool DataFitSurrModel::finalize_mapping()
397 {
398   actualModel.finalize_mapping();
399   Model::finalize_mapping();
400 
401   return false; // no change to problem size
402 }
403 
404 
405 /** asynchronous flags need to be initialized for the sub-models.  In addition,
406     max_eval_concurrency is the outer level iterator concurrency, not the
407     DACE concurrency that actualModel will see, and recomputing the
408     message_lengths on the sub-model is probably not a bad idea either.
409     Therefore, recompute everything on actualModel using init_communicators. */
410 void DataFitSurrModel::
derived_init_communicators(ParLevLIter pl_iter,int max_eval_concurrency,bool recurse_flag)411 derived_init_communicators(ParLevLIter pl_iter, int max_eval_concurrency,
412 			   bool recurse_flag)
413 {
414   // initialize approxInterface (for serial operations).
415   // Note: this is where max_eval_concurrency would be used.
416   //approxInterface.init_serial();
417 
418   // initialize actualModel for parallel operations
419   if (recurse_flag && !actualModel.is_null()) {
420 
421     // minimum_points() returns the minimum number of points needed to build
422     // approxInterface (global and local approximations) without any numerical
423     // derivatives multiplier.  Obtain the deriv multiplier from actualModel.
424     // min_points does not account for reuse_points or anchor, since these
425     // will vary, and min_points must remain constant among ctor/run/dtor.
426     int min_conc = approxInterface.minimum_points(false)
427                  * actualModel.derivative_concurrency();
428     // as for constructors, we recursively set and restore DB list nodes
429     // (initiated from the restored starting point following construction)
430     size_t model_index = probDescDB.get_db_model_node(); // for restoration
431     if (daceIterator.is_null()) {
432       // store within empty envelope for later use in derived_{set,free}_comms
433       daceIterator.maximum_evaluation_concurrency(min_conc);
434       daceIterator.iterated_model(actualModel);
435       // init comms for actualModel
436       probDescDB.set_db_model_nodes(actualModel.model_id());
437       actualModel.init_communicators(pl_iter, min_conc);
438     }
439     else {
440       // daceIterator.maximum_evaluation_concurrency() includes user-specified
441       // samples for building a global approx & any numerical deriv multiplier.
442       // Analyzer::maxEvalConcurrency must remain constant for ctor/run/dtor.
443 
444       // The concurrency for global/local surrogate construction is defined by
445       // the greater of the dace samples user-specification and the min_points
446       // approximation requirement.
447       if (min_conc > daceIterator.maximum_evaluation_concurrency())
448 	daceIterator.maximum_evaluation_concurrency(min_conc); // update
449 
450       // init comms for daceIterator
451       size_t method_index = probDescDB.get_db_method_node(); // for restoration
452       probDescDB.set_db_list_nodes(daceIterator.method_id());
453       daceIterator.init_communicators(pl_iter);
454       probDescDB.set_db_method_node(method_index); // restore method only
455     }
456     probDescDB.set_db_model_nodes(model_index); // restore all model nodes
457   }
458 }
459 
460 
461 /** This function constructs a new approximation, discarding any
462     previous data.  It constructs any required data for
463     SurrogateData::{vars,resp}Data and does not define an anchor point
464     for SurrogateData::anchor{Vars,Resp}, so is an unconstrained build. */
build_approximation()465 void DataFitSurrModel::build_approximation()
466 {
467   Cout << "\n>>>>> Building " << surrogateType << " approximations.\n";
468 
469   // update actualModel w/ variable values/bounds/labels
470   update_model(actualModel);
471 
472   // build a local, multipoint, or global data fit approximation.
473   if (strbegins(surrogateType, "local_") ||
474       strbegins(surrogateType, "multipoint_")) { // NOTE: branch used by TRMM
475     update_local_reference();
476     build_local_multipoint();
477   }
478   else { // global approximation.  NOTE: branch not used by TRMM.
479     update_global_reference();
480     clear_approx_interface();
481     build_global();
482   }
483 
484   Cout << "\n<<<<< " << surrogateType << " approximation builds completed.\n";
485 }
486 
487 
488 /** This function constructs a new approximation, discarding any
489     previous data.  It uses the passed data to populate
490     SurrogateData::anchor{Vars,Resp} and constructs any required data
491     points for SurrogateData::{vars,resp}Data. */
492 bool DataFitSurrModel::
build_approximation(const Variables & vars,const IntResponsePair & response_pr)493 build_approximation(const Variables& vars, const IntResponsePair& response_pr)
494 {
495   // Usage notes:
496   // > not used by SBLM local/multipoint
497   // > used by SBLM global *with* persistent center vars,response
498   // > used by NonDLocal *without* persistent vars,response
499 
500   Cout << "\n>>>>> Building " << surrogateType << " approximations.\n";
501 
502   // update actualModel w/ variable values/bounds/labels
503   update_model(actualModel);
504 
505   // build a local, multipoint, or global data fit approximation.
506   if (strbegins(surrogateType, "local_") ||
507       strbegins(surrogateType, "multipoint_")) {// NOTE: branch not used by TRMM
508     update_local_reference();
509     build_local_multipoint(vars, response_pr);
510   }
511   else { // global approximation.  NOTE: branch used by TRMM.
512     update_global_reference();
513     update_approx_interface(vars, response_pr);
514     build_global();
515   }
516 
517   Cout << "\n<<<<< " << surrogateType << " approximation builds completed.\n";
518 
519   // return a bool indicating whether the incoming data defines an embedded
520   // correction (hard constraint) or just another data point.  It would be
521   // preferable to flow this up from the surrogate, but keep it simple for now.
522   return (strbegins(surrogateType, "local_") ||
523 	  strbegins(surrogateType, "multipoint_") ||
524 	  surrogateType == "global_polynomial");
525 }
526 
527 
528 /** This function updates an existing approximation, by appending new data.
529     It does not define an anchor point, so is an unconstrained build. */
rebuild_approximation()530 void DataFitSurrModel::rebuild_approximation()
531 {
532   if (outputLevel >= NORMAL_OUTPUT)
533     Cout << "\n>>>>> Rebuilding " << surrogateType << " approximations.\n";
534 
535   // update actualModel w/ variable values/bounds/labels
536   update_model(actualModel);
537 
538   // rebuild a local, multipoint, or global data fit approximation
539   if (strbegins(surrogateType, "local_") ||
540       strbegins(surrogateType, "multipoint_")) {
541     //update_local_reference();//updates from build_approximation() remain valid
542     build_local_multipoint(); // no change for build vs. rebuild
543   }
544   else { // global approximation
545     //update_global_reference();// updates from build_approximation remain valid
546     rebuild_global();
547   }
548 
549   if (outputLevel >= NORMAL_OUTPUT)
550     Cout << "\n<<<<< "<< surrogateType <<" approximation rebuilds completed.\n";
551 }
552 
553 
554 /** This function populates/replaces SurrogateData::anchor{Vars,Resp}
555     and rebuilds the approximation, if requested.  It does not clear
556     other data (i.e., SurrogateData::{vars,resp}Data) and does not
557     update the actualModel with revised bounds, labels, etc.  Thus, it
558     updates data from a previous call to build_approximation(), and is
559     not intended to be used in isolation. */
update_approximation(bool rebuild_flag)560 void DataFitSurrModel::update_approximation(bool rebuild_flag)
561 {
562   if (outputLevel >= NORMAL_OUTPUT)
563     Cout << "\n>>>>> Updating " << surrogateType << " approximations.\n";
564 
565   // replace the current points for each approximation
566   //daceIterator.run(pl_iter);
567   const IntResponseMap& all_resp = daceIterator.all_responses();
568   if (daceIterator.compact_mode())
569     approxInterface.update_approximation(daceIterator.all_samples(),  all_resp);
570   else
571     approxInterface.update_approximation(daceIterator.all_variables(),all_resp);
572 
573   if (rebuild_flag) // update the coefficients for each approximation
574     rebuild_approximation(all_resp);
575 
576   if (outputLevel >= NORMAL_OUTPUT)
577     Cout << "\n<<<<< " << surrogateType
578 	 << " approximation updates completed.\n";
579 }
580 
581 
582 /** This function populates/replaces SurrogateData::anchor{Vars,Resp}
583     and rebuilds the approximation, if requested.  It does not clear
584     other data (i.e., SurrogateData::{vars,resp}Data) and does not
585     update the actualModel with revised bounds, labels, etc.  Thus, it
586     updates data from a previous call to build_approximation(), and is
587     not intended to be used in isolation. */
588 void DataFitSurrModel::
update_approximation(const Variables & vars,const IntResponsePair & response_pr,bool rebuild_flag)589 update_approximation(const Variables& vars, const IntResponsePair& response_pr,
590 		     bool rebuild_flag)
591 {
592   if (outputLevel >= NORMAL_OUTPUT)
593     Cout << "\n>>>>> Updating " << surrogateType << " approximations.\n";
594 
595   // populate/replace the anchor point for each approximation
596   approxInterface.update_approximation(vars, response_pr); // update anchor pt
597 
598   if (rebuild_flag)
599     rebuild_approximation(response_pr);
600 
601   if (outputLevel >= NORMAL_OUTPUT)
602     Cout << "\n<<<<< " << surrogateType
603 	 << " approximation updates completed.\n";
604 }
605 
606 
607 /** This function populates/replaces SurrogateData::{vars,resp}Data
608     and rebuilds the approximation, if requested.  It does not clear
609     other data (i.e., SurrogateData::anchor{Vars,Resp}) and does not
610     update the actualModel with revised bounds, labels, etc.  Thus, it
611     updates data from a previous call to build_approximation(), and is
612     not intended to be used in isolation. */
613 void DataFitSurrModel::
update_approximation(const VariablesArray & vars_array,const IntResponseMap & resp_map,bool rebuild_flag)614 update_approximation(const VariablesArray& vars_array,
615 		     const IntResponseMap& resp_map, bool rebuild_flag)
616 {
617   if (outputLevel >= NORMAL_OUTPUT)
618     Cout << "\n>>>>> Updating " << surrogateType << " approximations.\n";
619 
620   // populate/replace the current points for each approximation
621   approxInterface.update_approximation(vars_array, resp_map);
622 
623   if (rebuild_flag)
624     rebuild_approximation(resp_map);
625 
626   if (outputLevel >= NORMAL_OUTPUT)
627     Cout << "\n<<<<< " << surrogateType
628 	 << " approximation updates completed.\n";
629 }
630 
631 
632 /** This function populates/replaces SurrogateData::{vars,resp}Data
633     and rebuilds the approximation, if requested.  It does not clear
634     other data (i.e., SurrogateData::anchor{Vars,Resp}) and does not
635     update the actualModel with revised bounds, labels, etc.  Thus, it
636     updates data from a previous call to build_approximation(), and is
637     not intended to be used in isolation. */
638 void DataFitSurrModel::
update_approximation(const RealMatrix & samples,const IntResponseMap & resp_map,bool rebuild_flag)639 update_approximation(const RealMatrix& samples, const IntResponseMap& resp_map,
640 		     bool rebuild_flag)
641 {
642   if (outputLevel >= NORMAL_OUTPUT)
643     Cout << "\n>>>>> Updating " << surrogateType << " approximations.\n";
644 
645   // populate/replace the current points for each approximation
646   approxInterface.update_approximation(samples, resp_map);
647 
648   if (rebuild_flag)
649     rebuild_approximation(resp_map);
650 
651   if (outputLevel >= NORMAL_OUTPUT)
652     Cout << "\n<<<<< " << surrogateType
653 	 << " approximation updates completed.\n";
654 }
655 
656 
657 /** This function appends all{Samples,Variables,Responses} to
658     SurrogateData::{vars,resp}Data and rebuilds the approximation,
659     if requested. */
append_approximation(bool rebuild_flag)660 void DataFitSurrModel::append_approximation(bool rebuild_flag)
661 {
662   // append to the current points for each approximation
663   //daceIterator.run(pl_iter);
664   const IntResponseMap& all_resp = daceIterator.all_responses();
665   if (outputLevel >= NORMAL_OUTPUT)
666     Cout << "\n>>>>> Appending " << all_resp.size() << " points to "
667 	 << surrogateType << " approximations.\n";
668   if (daceIterator.compact_mode())
669     approxInterface.append_approximation(daceIterator.all_samples(),  all_resp);
670   else
671     approxInterface.append_approximation(daceIterator.all_variables(),all_resp);
672 
673   if (rebuild_flag)
674     rebuild_approximation(all_resp);
675 
676   if (outputLevel >= NORMAL_OUTPUT)
677     Cout << "\n<<<<< " << surrogateType
678 	 << " approximation updates completed.\n";
679 }
680 
681 
682 /** This function appends one point to SurrogateData::{vars,resp}Data
683     and rebuilds the approximation, if requested.  It does not modify
684     other data (i.e., SurrogateData::anchor{Vars,Resp}) and does not
685     update the actualModel with revised bounds, labels, etc.  Thus, it
686     appends to data from a previous call to build_approximation(), and
687     is not intended to be used in isolation. */
688 void DataFitSurrModel::
append_approximation(const Variables & vars,const IntResponsePair & response_pr,bool rebuild_flag)689 append_approximation(const Variables& vars, const IntResponsePair& response_pr,
690 		     bool rebuild_flag)
691 {
692   if (outputLevel >= NORMAL_OUTPUT)
693     Cout << "\n>>>>> Appending to " << surrogateType << " approximations.\n";
694 
695   // append to the current points for each approximation
696   approxInterface.append_approximation(vars, response_pr);
697 
698   if (rebuild_flag)
699     rebuild_approximation(response_pr);
700 
701   if (outputLevel >= NORMAL_OUTPUT)
702     Cout << "\n<<<<< " << surrogateType
703 	 << " approximation updates completed.\n";
704 }
705 
706 
707 /** This function appends multiple points to SurrogateData::{vars,resp}Data
708     and rebuilds the approximation, if requested.  It does not modify other
709     data (i.e., SurrogateData::anchor{Vars,Resp}) and does not update the
710     actualModel with revised bounds, labels, etc.  Thus, it appends to data
711     from a previous call to build_approximation(), and is not intended to
712     be used in isolation. */
713 void DataFitSurrModel::
append_approximation(const VariablesArray & vars_array,const IntResponseMap & resp_map,bool rebuild_flag)714 append_approximation(const VariablesArray& vars_array,
715 		     const IntResponseMap& resp_map, bool rebuild_flag)
716 {
717   if (outputLevel >= NORMAL_OUTPUT)
718     Cout << "\n>>>>> Appending to " << surrogateType << " approximations.\n";
719 
720   // append to the current points for each approximation
721   approxInterface.append_approximation(vars_array, resp_map);
722 
723   if (rebuild_flag)
724     rebuild_approximation(resp_map);
725 
726   if (outputLevel >= NORMAL_OUTPUT)
727     Cout << "\n<<<<< " << surrogateType
728 	 << " approximation updates completed.\n";
729 }
730 
731 
732 /** This function appends multiple points to SurrogateData::{vars,resp}Data
733     and rebuilds the approximation, if requested.  It does not modify other
734     data (i.e., SurrogateData::anchor{Vars,Resp}) and does not update the
735     actualModel with revised bounds, labels, etc.  Thus, it appends to data
736     from a previous call to build_approximation(), and is not intended to
737     be used in isolation. */
738 void DataFitSurrModel::
append_approximation(const RealMatrix & samples,const IntResponseMap & resp_map,bool rebuild_flag)739 append_approximation(const RealMatrix& samples, const IntResponseMap& resp_map,
740 		     bool rebuild_flag)
741 {
742   if (outputLevel >= NORMAL_OUTPUT)
743     Cout << "\n>>>>> Appending to " << surrogateType << " approximations.\n";
744 
745   // append to the current points for each approximation
746   approxInterface.append_approximation(samples, resp_map);
747 
748   if (rebuild_flag)
749     rebuild_approximation(resp_map);
750 
751   if (outputLevel >= NORMAL_OUTPUT)
752     Cout << "\n<<<<< " << surrogateType
753 	 << " approximation updates completed.\n";
754 }
755 
756 
pop_approximation(bool save_surr_data,bool rebuild_flag)757 void DataFitSurrModel::pop_approximation(bool save_surr_data, bool rebuild_flag)
758 {
759   if (outputLevel >= NORMAL_OUTPUT)
760     Cout << "\n>>>>> Popping data from " << surrogateType
761 	 << " approximations.\n";
762 
763   // append to the current points for each approximation
764   approxInterface.pop_approximation(save_surr_data);
765 
766   if (rebuild_flag) { // update the coefficients for each approximation
767     BitArray rebuild_fns; // empty: default rebuild of all fns
768     approxInterface.rebuild_approximation(rebuild_fns);
769     ++approxBuilds;
770   }
771 
772   if (outputLevel >= NORMAL_OUTPUT)
773     Cout << "\n<<<<< " << surrogateType
774          << " approximation data removal completed.\n";
775 }
776 
777 
push_approximation()778 void DataFitSurrModel::push_approximation()//(bool rebuild_flag)
779 {
780   if (outputLevel >= NORMAL_OUTPUT)
781     Cout << "\n>>>>> Retrieving " << surrogateType << " approximation data.\n";
782 
783   // append to the current points for each approximation
784   approxInterface.push_approximation();
785 
786   /*
787   if (rebuild_flag) { // update the coefficients for each approximation
788     BitArray rebuild_fns; // empty: default rebuild of all fns
789     approxInterface.rebuild_approximation(rebuild_fns);
790     ++approxBuilds;
791   }
792   */
793 
794   if (outputLevel >= NORMAL_OUTPUT)
795     Cout << "\n<<<<< " << surrogateType << " approximation data retrieved.\n";
796 }
797 
798 
finalize_approximation()799 void DataFitSurrModel::finalize_approximation()//(bool rebuild_flag)
800 {
801   if (outputLevel >= NORMAL_OUTPUT)
802     Cout << "\n>>>>> Finalizing " << surrogateType << " approximations.\n";
803 
804   // append to the current points for each approximation
805   approxInterface.finalize_approximation();
806 
807   /*
808   if (rebuild_flag) { // update the coefficients for each approximation
809     BitArray rebuild_fns; // empty: default rebuild of all fns
810     approxInterface.rebuild_approximation(rebuild_fns);
811     ++approxBuilds;
812   }
813   */
814 
815   if (outputLevel >= NORMAL_OUTPUT)
816     Cout << "\n<<<<< " << surrogateType << " approximation finalized.\n";
817 }
818 
819 
combine_approximation()820 void DataFitSurrModel::combine_approximation()
821 {
822   if (outputLevel >= NORMAL_OUTPUT)
823     Cout << "\n>>>>> Combining " << surrogateType << " approximations.\n";
824 
825   // Manage swap detection here or within Pecos::sharedPolyApproxData?
826   // Note: access to spec sequences are on Dakota side, but need to reach
827   // into Pecos driver levels (TPQ, SSG) and approx orders (regression) for
828   // access to final refinement levels (GSG starting levels could be the same).
829   //NonDIntegration* nond_int = (NonDIntegration*)daceIterator.iterator_rep();
830   //bool swap = !nond_int->maximal_grid();
831 
832   approxInterface.combine_approximation();
833 }
834 
835 
combined_to_active(bool clear_combined)836 void DataFitSurrModel::combined_to_active(bool clear_combined)
837 {
838   if (outputLevel >= NORMAL_OUTPUT)
839     Cout << "\n>>>>> Promoting combined " << surrogateType << " approximation "
840 	 << "to active approximation.\n";
841 
842   approxInterface.combined_to_active(clear_combined);
843 }
844 
845 
update_local_reference()846 void DataFitSurrModel::update_local_reference()
847 {
848   // Store the actualModel inactive variable values for use in force_rebuild()
849   // for determining whether an automatic approximation rebuild is required.
850 
851   // the actualModel data has been updated by update_model(), which precedes
852   // update_local_reference()
853 
854   const Variables& actual_vars = actualModel.current_variables();
855   if (actual_vars.view().first >= RELAXED_DESIGN) { // Distinct view
856     copy_data(actual_vars.inactive_continuous_variables(),    referenceICVars);
857     copy_data(actual_vars.inactive_discrete_int_variables(),  referenceIDIVars);
858     copy_data(actual_vars.inactive_discrete_real_variables(), referenceIDRVars);
859   }
860 }
861 
862 
update_global_reference()863 void DataFitSurrModel::update_global_reference()
864 {
865   // Store the actualModel active variable bounds and inactive variable values
866   // for use in force_rebuild() to determine whether an automatic approximation
867   // rebuild is required.
868 
869   // the actualModel data has been updated by update_model(), which precedes
870   // update_global_reference().
871 
872   const Variables& vars = (actualModel.is_null()) ? currentVariables :
873     actualModel.current_variables();
874   if (vars.view().first >= RELAXED_DESIGN) { // Distinct view
875     copy_data(vars.inactive_continuous_variables(),    referenceICVars);
876     copy_data(vars.inactive_discrete_int_variables(),  referenceIDIVars);
877     copy_data(vars.inactive_discrete_real_variables(), referenceIDRVars);
878   }
879 
880   if (!actualModel.is_null() && actualModel.model_type() == "recast") {
881     // dive through Model recursion to bypass recasting
882     Model sub_model = actualModel.subordinate_model();
883     while (sub_model.model_type() == "recast")
884       sub_model = sub_model.subordinate_model();
885     // update referenceCLBnds/referenceCUBnds/referenceDLBnds/referenceDUBnds
886     copy_data(sub_model.continuous_lower_bounds(),    referenceCLBnds);
887     copy_data(sub_model.continuous_upper_bounds(),    referenceCUBnds);
888     copy_data(sub_model.discrete_int_lower_bounds(),  referenceDILBnds);
889     copy_data(sub_model.discrete_int_upper_bounds(),  referenceDIUBnds);
890     copy_data(sub_model.discrete_real_lower_bounds(), referenceDRLBnds);
891     copy_data(sub_model.discrete_real_upper_bounds(), referenceDRUBnds);
892   }
893   else {
894     const Constraints& cons = (actualModel.is_null()) ? userDefinedConstraints :
895       actualModel.user_defined_constraints();
896     copy_data(cons.continuous_lower_bounds(),    referenceCLBnds);
897     copy_data(cons.continuous_upper_bounds(),    referenceCUBnds);
898     copy_data(cons.discrete_int_lower_bounds(),  referenceDILBnds);
899     copy_data(cons.discrete_int_upper_bounds(),  referenceDIUBnds);
900     copy_data(cons.discrete_real_lower_bounds(), referenceDRLBnds);
901     copy_data(cons.discrete_real_upper_bounds(), referenceDRUBnds);
902   }
903 }
904 
905 
clear_approx_interface()906 void DataFitSurrModel::clear_approx_interface()
907 {
908   // fresh build: clear out previous data, but preserve history if needed
909   // (multipoint preserves history for both {build,rebuild}_approximation())
910   approxInterface.clear_current_active_data();
911 }
912 
913 
914 void DataFitSurrModel::
update_approx_interface(const Variables & vars,const IntResponsePair & response_pr)915 update_approx_interface(const Variables& vars,
916 			const IntResponsePair& response_pr)
917 {
918   // fresh build: clear out previous data, but preserve history if needed
919   // (multipoint preserves history for both {build,rebuild}_approximation())
920   approxInterface.clear_current_active_data();
921   // populate/replace the anchor data.  When supported by the surrogate type
922   // (local, multipoint, equality-constrained global regression), this is
923   // enforced as a hard constraint. Otherwise, it is just another data point.
924   approxInterface.update_approximation(vars, response_pr);
925 }
926 
927 
build_approx_interface()928 void DataFitSurrModel::build_approx_interface()
929 {
930   if (actualModel.is_null())
931     approxInterface.build_approximation(
932       userDefinedConstraints.continuous_lower_bounds(),
933       userDefinedConstraints.continuous_upper_bounds(),
934       userDefinedConstraints.discrete_int_lower_bounds(),
935       userDefinedConstraints.discrete_int_upper_bounds(),
936       userDefinedConstraints.discrete_real_lower_bounds(),
937       userDefinedConstraints.discrete_real_upper_bounds());
938   else { // employ sub-model vars view, if available
939     approxInterface.build_approximation(
940       actualModel.continuous_lower_bounds(),
941       actualModel.continuous_upper_bounds(),
942       actualModel.discrete_int_lower_bounds(),
943       actualModel.discrete_int_upper_bounds(),
944       actualModel.discrete_real_lower_bounds(),
945       actualModel.discrete_real_upper_bounds());
946   }
947   if (exportSurrogate) {
948     // skip the ApproximationInterface layer and go directly to
949     // Approximations this could pass in the response name too,
950     // presumably, but the API for export_model requires all
951     // {resp_label, prefix, format} parameters or none to handle
952     // whether comes from shared data vs. Model...
953     for (auto approx : approximations())
954       approx.export_model(currentVariables);
955   }
956 }
957 
958 
959 /** Evaluate the value, gradient, and possibly Hessian needed for a
960     local or multipoint approximation using actualModel. */
build_local_multipoint()961 void DataFitSurrModel::build_local_multipoint()
962 {
963   // set DataFitSurrModel parallelism mode to actualModel
964   component_parallel_mode(TRUTH_MODEL_MODE);
965 
966   // Define the data requests
967   short asv_value = 3;
968   if (strbegins(surrogateType, "local_") &&
969       actualModel.hessian_type() != "none")
970     asv_value += 4;
971   ShortArray orig_asv(numFns), actual_asv;
972   ISIter it;
973   for (it=surrogateFnIndices.begin(); it!=surrogateFnIndices.end(); ++it)
974     orig_asv[*it] = asv_value;
975   asv_inflate_build(orig_asv, actual_asv);
976 
977   // Evaluate value and derivatives using actualModel
978   ActiveSet set = actualModel.current_response().active_set(); // copy
979   set.request_vector(actual_asv);
980   set.derivative_vector(actualModel.continuous_variable_ids());
981   actualModel.evaluate(set);
982 
983   // construct a new approximation using this actualModel evaluation
984   build_local_multipoint(actualModel.current_variables(),
985 			 IntResponsePair(actualModel.evaluation_id(),
986 					 actualModel.current_response()));
987 }
988 
989 
990 void DataFitSurrModel::
build_local_multipoint(const Variables & vars,const IntResponsePair & response_pr)991 build_local_multipoint(const Variables& vars,
992 		       const IntResponsePair& response_pr)
993 {
994   // push the anchor data to approxInterface
995   update_approx_interface(vars, response_pr);
996   // construct the new local/multipoint approximation
997   build_approx_interface();
998   ++approxBuilds;
999 }
1000 
1001 
1002 /** Determine points to use in building the approximation and then
1003     evaluate them on actualModel using daceIterator.  Any changes to
1004     the bounds should be performed by setting them at a higher level
1005     (e.g., SurrBasedOptStrategy). */
build_global()1006 void DataFitSurrModel::build_global()
1007 {
1008   // build_global() follows update_model() so we may use
1009   // actualModel.continuous_(lower/upper)_bounds() to avoid view
1010   // conversions and allow pass-by-reference.
1011 
1012   // **************************************************************************
1013   // Check data_pairs and importPointsFile for any existing evaluations to reuse
1014   // **************************************************************************
1015   size_t i, j, reuse_points = 0;
1016   int fn_index = *surrogateFnIndices.begin();
1017   const Pecos::SurrogateData& approx_data
1018     = approxInterface.approximation_data(fn_index);
1019   bool anchor = approx_data.anchor();
1020   if (pointReuse == "all" || pointReuse == "region") {
1021 
1022     size_t num_c_vars, num_di_vars, num_dr_vars;
1023     if (actualModel.is_null()) {
1024       num_c_vars  = currentVariables.cv();
1025       num_di_vars = currentVariables.div();
1026       num_dr_vars = currentVariables.drv();
1027     }
1028     else {
1029       num_c_vars  = actualModel.cv();
1030       num_di_vars = actualModel.div();
1031       num_dr_vars = actualModel.drv();
1032     }
1033 
1034     // Process PRPCache using default iterators (index 0 = ordered_non_unique).
1035     // This includes evals from current run, evals imported from restart, and
1036     // evals imported from a tabular file (DataFitSurrModel::import_points()).
1037     // Each of these data_pairs sources is in "user-space" (e.g., generated by
1038     // an ApplicationInterface).  To compare with DataFitSurrModel values and
1039     // bounds, any recastings within the model recursion must be managed.
1040     String am_interface_id;
1041     if (!actualModel.is_null()) am_interface_id = actualModel.interface_id();
1042     if(am_interface_id.empty()) am_interface_id = "NO_ID";
1043     ModelLRevIter ml_rit; PRPCacheCIter prp_iter;
1044     Variables db_vars; Response db_resp;
1045     bool map_to_iter_space = recastings();
1046     for (prp_iter=data_pairs.begin(); prp_iter!=data_pairs.end(); ++prp_iter) {
1047 
1048       const Variables& prp_vars = prp_iter->variables();
1049       const Response&  prp_resp = prp_iter->response();
1050       if (prp_iter->interface_id() == am_interface_id && consistent(prp_vars)) {
1051 	// apply any recastings below this level: we perform these recastings at
1052 	// run time (instead of once in import_points()) to support any updates
1053 	// to the transformations (e.g., distribution parameter updates).
1054 	if (map_to_iter_space)
1055 	  user_space_to_iterator_space(prp_vars, prp_resp, db_vars, db_resp);
1056 	else
1057 	  { db_vars = prp_vars; db_resp = prp_resp; }
1058 
1059 	// Note: since SurrBasedLocalMinimizer currently evaluates the trust
1060 	// region center first, we must take care to not include this point
1061 	// in the point reuse, since this would cause it to be used twice.
1062 	// Note: for NonD uses with u-space models, the global_bounds boolean
1063 	// in ProbabilityTransformModel ctor needs to be set in order to allow
1064 	// test of transformed bounds in "region" reuse case.  For "all" reuse
1065 	// case typically used with data import, this is not necessary.
1066 	if ( inside(db_vars) && !(anchor && // avoid anchor duplic
1067 	     active_vars_compare(db_vars, approx_data.anchor_variables())) ) {
1068 	  // Eval id definitions:
1069 	  //   id > 0 for unique evals from current execution
1070 	  //   id = 0 for evals from file import --> data_pairs
1071 	  //   id < 0 for non-unique evals from restart
1072 	  // update one point at a time since accumulation within an
1073 	  // IntResponseMap requires unique id's
1074 	  approxInterface.append_approximation(db_vars,
1075 	    std::make_pair(prp_iter->eval_id(), db_resp));
1076 	  ++reuse_points;
1077 
1078 	  if (outputLevel >= DEBUG_OUTPUT) {
1079 	    if (map_to_iter_space) Cout <<   "Transformed ";
1080 	    else                   Cout << "Untransformed ";
1081 	    Cout << "data for DB eval " << prp_iter->eval_id() << ":\n"
1082 		 << db_vars << db_resp;
1083 	  }
1084 	}
1085       }
1086     }
1087   }
1088 
1089   // *******************************************
1090   // Evaluate new data points using daceIterator
1091   // *******************************************
1092   int new_points = 0;
1093   if (daceIterator.is_null()) { // reused/imported data only (no new data)
1094     // check for sufficient data
1095     int min_points = approxInterface.minimum_points(true);
1096     if (reuse_points < min_points) {
1097       Cerr << "Error: a minimum of " << min_points << " points is required by "
1098 	   << "DataFitSurrModel::build_global.\n" << reuse_points
1099 	   << " were provided." << std::endl;
1100       abort_handler(MODEL_ERROR);
1101     }
1102   }
1103   else { // new data
1104 
1105     // set DataFitSurrModel parallelism mode to actualModel
1106     component_parallel_mode(TRUTH_MODEL_MODE);
1107 
1108     // daceIterator must generate at least diff_points samples, should
1109     // populate allData lists (allDataFlag = true), and should bypass
1110     // statistics computation (statsFlag = false).
1111     int diff_points = std::max(0, required_points() - (int)reuse_points);
1112     daceIterator.sampling_reset(diff_points, true, false);// update s.t. lwr bnd
1113     // The DACE iterator's samples{Spec,Ref} value provides a lower bound on
1114     // the number of samples generated: new_points = max(diff_points,reference).
1115     new_points = daceIterator.num_samples();
1116 
1117     // only run the iterator if work to do
1118     if (new_points) {
1119       run_dace();
1120       append_approximation(false); // append new data sets; defer build
1121     }
1122     else if (outputLevel >= DEBUG_OUTPUT)
1123       Cout << "DataFitSurrModel: No samples needed from DACE iterator."
1124 	   << std::endl;
1125   }
1126 
1127   //deltaCorr.compute(...need data...);
1128   // could add deltaCorr.compute() here and in HierarchSurrModel::
1129   // build_approximation if global approximations had easy access
1130   // to the truth/approx responses.  Instead, it is called from
1131   // SurrBasedLocalMinimizer using data from the trust region center.
1132 
1133   // **********************************
1134   // Now build the global approximation
1135   // **********************************
1136   String anchor_str = (anchor) ? "one" : "no";
1137   Cout << "Constructing global approximations with " << anchor_str
1138        << " anchor, " << new_points << " DACE samples, and " << reuse_points
1139        << " reused points.\n";
1140   if (autoRefine) refine_surrogate(); // BMA TODO: Move to an external refiner
1141   else            build_approx_interface();
1142   ++approxBuilds; // Note: auto-refined surrogate counts as 1 approx build
1143 }
1144 
1145 
1146 /** Determine points to use in rebuilding the approximation and
1147     then evaluate them on actualModel using daceIterator.  Assumes
1148     data imports/reuse have been handled previously within build_global(). */
rebuild_global()1149 void DataFitSurrModel::rebuild_global()
1150 {
1151   // rebuild_global() follows update_model() so we may use
1152   // actualModel.continuous_(lower/upper)_bounds() to avoid view
1153   // conversions and allow pass-by-reference.
1154 
1155   // *******************************************
1156   // Evaluate new data points using daceIterator
1157   // *******************************************
1158   size_t pts_i, curr_points = std::numeric_limits<size_t>::max();
1159   ISIter it;
1160   for (it=surrogateFnIndices.begin(); it!=surrogateFnIndices.end(); ++it) {
1161     pts_i = approxInterface.approximation_data(*it).points();
1162     if (pts_i < curr_points) curr_points = pts_i;
1163   }
1164   int new_points = 0;
1165   if (daceIterator.is_null()) { // reused/imported data only (no new data)
1166     // check for sufficient data
1167     int min_points = approxInterface.minimum_points(true);
1168     if (curr_points < min_points) {
1169       Cerr << "Error: a minimum of " << min_points << " points is required by "
1170 	   << "DataFitSurrModel::build_global.\n" << curr_points
1171 	   << " were provided." << std::endl;
1172       abort_handler(MODEL_ERROR);
1173     }
1174   }
1175   else { // new data
1176 
1177     // set DataFitSurrModel parallelism mode to actualModel
1178     component_parallel_mode(TRUTH_MODEL_MODE);
1179 
1180     int diff_points = std::max(0, required_points() - (int)curr_points);
1181     if (diff_points) { // only run the iterator if work to do
1182       // For a rebuild, we do not enforce a lower bound as in build_global():
1183       // daceIterator's samples{Spec,Ref} is intended to overlay minimum user
1184       // spec with imported/reqd data, by defining a lower bound on the number
1185       // of generated samples, e.g. new_points = max(diff_points, samplesRef)
1186       daceIterator.sampling_reference(0); // make new points = diff points
1187       // daceIterator generates diff_points samples, populates allData arrays
1188       // (allDataFlag = true), bypasses stats computation (statsFlag = false)
1189       daceIterator.sampling_reset(diff_points, true, false);
1190       run_dace();
1191       // append new data sets, rebuild approximation, increment approxBuilds
1192       append_approximation(true);
1193     }
1194     else if (approxInterface.formulation_updated()) {
1195       // Rebuild the approximation for updated formulation with existing data
1196 
1197       // This approach currently assumes an increment to data and coeffs:
1198       //BitArray rebuild_fns; // empty: default rebuild of all fns
1199       //approxInterface.rebuild_approximation(rebuild_fns);
1200 
1201       // Overwrite previous build without assumed increment to data/coeffs:
1202       build_approx_interface();
1203       ++approxBuilds; // Note: this is a replacement rather than an increment...
1204     }
1205     else if (outputLevel >= DEBUG_OUTPUT)
1206       Cout << "DataFitSurrModel: no rebuild as no new data and same surrogate "
1207 	   << "formulation." << std::endl;
1208   }
1209 }
1210 
1211 
run_dace()1212 void DataFitSurrModel::run_dace()
1213 {
1214   // Execute the daceIterator
1215 
1216   // daceIterator activeSet gets resized in DataFitSurrModel::
1217   // resize_from_subordinate_model(), but can be overwritten by top-level
1218   // Iterator (e.g., NonDExpansion::compute_expansion()
1219   const ShortArray& dace_asv = daceIterator.active_set_request_vector();
1220   if (dace_asv.size() != actualModel.response_size()) {
1221     ShortArray actual_asv;
1222     asv_inflate_build(dace_asv, actual_asv);
1223     daceIterator.active_set_request_vector(actual_asv);
1224   }
1225 
1226   // prepend hierarchical tag before running
1227   if (hierarchicalTagging) {
1228     String eval_tag = evalTagPrefix + '.' + std::to_string(surrModelEvalCntr+1);
1229     daceIterator.eval_tag_prefix(eval_tag);
1230   }
1231 
1232   // run the iterator
1233   ParLevLIter pl_iter = modelPCIter->mi_parallel_level_iterator(miPLIndex);
1234   daceIterator.run(pl_iter);
1235 }
1236 
1237 
refine_surrogate()1238 void DataFitSurrModel::refine_surrogate()
1239 {
1240   StringArray diag_metrics(1, refineCVMetric);
1241   int curr_iter = 0; // initial surrogate build is iteration 0
1242 
1243   // accumulate num_samples in each iteration in total_evals.
1244   int total_evals = 0, num_samples;
1245 
1246   // Disable the soft convergence limit by setting it to maxIterations if
1247   // the user didn't set it
1248   int soft_conv_limit = maxIterations;
1249   if(softConvergenceLimit != 0)
1250     soft_conv_limit = softConvergenceLimit;
1251 
1252   // accumulator for rolling average of length soft_conv_limit
1253   using namespace boost::accumulators;
1254   accumulator_set<Real, stats<tag::rolling_mean> > mean_err(
1255       tag::rolling_window::window_size = soft_conv_limit);
1256 
1257   num_samples = daceIterator.num_samples();
1258   total_evals += num_samples;
1259 
1260   // build surrogate from initial sample
1261   build_approx_interface();
1262   Real2DArray cv_diags =
1263     approxInterface.cv_diagnostics(diag_metrics, refineCVFolds);
1264   size_t resp_fns = currentResponse.num_functions();
1265   RealArray cv_per_fn(resp_fns);
1266   for (size_t i=0; i<resp_fns; ++i)
1267     cv_per_fn[i] = cv_diags[i][0];
1268   Real curr_err = *std::max_element(cv_per_fn.begin(), cv_per_fn.end());
1269   // keep prev_err to calculate improvement
1270   Real prev_err = curr_err;
1271   Cout << "\n------------\nAuto-refinement initial error (" << refineCVMetric
1272     << "): " << curr_err << std::endl;
1273   while (true) {
1274     // convergence conditions. messages will need to be fixed if we add
1275     // challenge error
1276     if(curr_err <= convergenceTolerance) {
1277       Cout << "\n------------\nAuto-refined surrogate(s) converged: "
1278         << "Cross-validation error criterion met.\n";
1279       break;
1280     } else if(curr_iter++ == maxIterations) {
1281       Cout << "\n------------\nAuto-refinment halted: Maximum iterations "
1282         << "met or exceeded.\n";
1283       break;
1284     } else if( curr_iter >= soft_conv_limit &&
1285         rolling_mean(mean_err) < convergenceTolerance) {
1286       Cout << "\n------------\nAuto-refinment halted: Average reduction in "
1287         << "cross-validation error over\nprevious " << soft_conv_limit
1288         << " iterations less than " << "convergence_tolerance ("
1289         << convergenceTolerance << ")\n";
1290       break;
1291     } else if(total_evals >= maxFuncEvals) {
1292       Cout << "\n------------\nAuto-refinment halted: Maximum function "
1293 	<< "evaluations met or exceeded.\n";
1294       break;
1295     }
1296 
1297     // sampling reset only resets the base numSamples; if there are
1298     // refinement samples, increment them here and add points
1299     daceIterator.sampling_increment();
1300     num_samples = daceIterator.num_samples();
1301     total_evals += num_samples;
1302     Cout << "\n------------\nRefining surrogate(s) with " << num_samples
1303 	 << " samples (iteration " << curr_iter << ")\n";
1304     run_dace();
1305     append_approximation(false); // append new data sets; don't rebuild
1306 
1307     // build and check diagnostics
1308     build_approx_interface();
1309     Real2DArray cv_diags =
1310       approxInterface.cv_diagnostics(diag_metrics, refineCVFolds);
1311     RealArray cv_per_fn(resp_fns);
1312     for (size_t i=0; i<resp_fns; ++i)
1313       cv_per_fn[i] = cv_diags[i][0];
1314     curr_err = *std::max_element(cv_per_fn.begin(), cv_per_fn.end());
1315     Cout << "\n------------\nAuto-refinement iteration " << curr_iter
1316       << " complete.\n";
1317     Cout << "Cross-validation error (" << refineCVMetric << "): " << curr_err
1318 	 << std::endl;
1319     mean_err(prev_err-curr_err);
1320     prev_err = curr_err;
1321     Cout << "Mean reduction in CV error: " << rolling_mean(mean_err)
1322 	 << std::endl;
1323   }
1324 }
1325 
1326 
consistent(const Variables & vars) const1327 bool DataFitSurrModel::consistent(const Variables& vars) const
1328 {
1329   size_t i, num_acv = vars.acv(), num_adiv = vars.adiv(),
1330     num_adsv = vars.adsv(), num_adrv = vars.adrv(), cv_start = vars.cv_start(),
1331     div_start = vars.div_start(), dsv_start = vars.dsv_start(),
1332     drv_start = vars.drv_start(), num_cv = vars.cv(), num_div = vars.div(),
1333     num_dsv = vars.dsv(), num_drv = vars.drv();
1334 
1335   const Variables& am_vars = (actualModel.is_null()) ?
1336     currentVariables : actualModel.current_variables();
1337 
1338   if (am_vars.acv()       != num_acv   || am_vars.adiv()      != num_adiv ||
1339       am_vars.adsv()      != num_adsv  || am_vars.adrv()      != num_adrv ||
1340       am_vars.cv_start()  != cv_start  || am_vars.div_start() != div_start ||
1341       am_vars.dsv_start() != dsv_start || am_vars.drv_start() != drv_start ||
1342       am_vars.cv()        != num_cv    || am_vars.div()       != num_div  ||
1343       am_vars.dsv()       != num_dsv   || am_vars.drv()       != num_drv ) {
1344     Cerr << "Warning: inconsistent variable counts in DataFitSurrModel::"
1345 	 << "consistent().  Excluding candidate data point.\n";
1346     return false;
1347   }
1348 
1349   size_t cv_end =  cv_start + num_cv,    div_end = div_start + num_div,
1350         dsv_end = dsv_start + num_dsv,   drv_end = drv_start + num_drv,
1351     num_post_cv = num_acv - cv_end, num_post_drv = num_adrv - drv_end;
1352 
1353   // This tolerance is important since imported data may lack full precision
1354   Real rel_tol = 1.e-10; // hardwired for now
1355 
1356   // complement of active cont vars must be identical (within rel tol)
1357   const RealVector&    acv =    vars.all_continuous_variables();
1358   const RealVector& am_acv = am_vars.all_continuous_variables();
1359   RealVector pre_cv(Teuchos::View, acv.values(),           cv_start),
1360             post_cv(Teuchos::View, acv.values()+cv_end,    num_post_cv),
1361           pre_am_cv(Teuchos::View, am_acv.values(),        cv_start),
1362          post_am_cv(Teuchos::View, am_acv.values()+cv_end, num_post_cv);
1363   if ( !nearby(pre_cv,  pre_am_cv,  rel_tol) ||
1364        !nearby(post_cv, post_am_cv, rel_tol) )
1365     return false;
1366   // for (i=0; i<cv_start; ++i)
1367   //   if (acv[i] != am_acv[i])
1368   //     return false;
1369   // for (i=cv_end; i<num_acv; ++i)
1370   //   if (acv[i] != am_acv[i])
1371   //     return false;
1372   // complement of active discrete int vars must be identical
1373   const IntVector& adiv = vars.all_discrete_int_variables();
1374   const IntVector& am_adiv = am_vars.all_discrete_int_variables();
1375   for (i=0; i<div_start; ++i)
1376     if (adiv[i] != am_adiv[i])
1377       return false;
1378   for (i=div_end; i<num_adiv; ++i)
1379     if (adiv[i] != am_adiv[i])
1380       return false;
1381   // complement of active discrete string vars must be identical
1382   StringMultiArrayConstView adsv = vars.all_discrete_string_variables();
1383   StringMultiArrayConstView am_adsv = am_vars.all_discrete_string_variables();
1384   for (i=0; i<dsv_start; ++i)
1385     if (adsv[i] != am_adsv[i])
1386       return false;
1387   for (i=dsv_end; i<num_adsv; ++i)
1388     if (adsv[i] != am_adsv[i])
1389       return false;
1390   // complement of active discrete real vars must be identical (within rel tol)
1391   const RealVector& adrv = vars.all_discrete_real_variables();
1392   const RealVector& am_adrv = am_vars.all_discrete_real_variables();
1393   RealVector pre_drv(Teuchos::View, adrv.values(),            drv_start),
1394             post_drv(Teuchos::View, adrv.values()+drv_end,    num_post_drv),
1395           pre_am_drv(Teuchos::View, am_adrv.values(),         drv_start),
1396          post_am_drv(Teuchos::View, am_adrv.values()+drv_end, num_post_drv);
1397   if ( !nearby(pre_drv,  pre_am_drv,  rel_tol) ||
1398        !nearby(post_drv, post_am_drv, rel_tol) )
1399     return false;
1400   // for (i=0; i<drv_start; ++i)
1401   //   if (adrv[i] != am_adrv[i])
1402   //     return false;
1403   // for (i=drv_end; i<num_adrv; ++i)
1404   //   if (adrv[i] != am_adrv[i])
1405   //     return false;
1406 
1407   return true;
1408 }
1409 
1410 
inside(const Variables & vars) const1411 bool DataFitSurrModel::inside(const Variables& vars) const
1412 {
1413   // additionally check if within current bounds for "region" case
1414   if (pointReuse == "region") {
1415 
1416     const Constraints& am_cons = (actualModel.is_null()) ?
1417       userDefinedConstraints : actualModel.user_defined_constraints();
1418     const RealVector&  cv = vars.continuous_variables();
1419     const IntVector&  div = vars.discrete_int_variables();
1420     const RealVector& drv = vars.discrete_real_variables();
1421     size_t i, num_cv = cv.length(), num_div = div.length(),
1422       num_drv = drv.length();
1423 
1424     const RealVector& c_l_bnds = am_cons.continuous_lower_bounds();
1425     const RealVector& c_u_bnds = am_cons.continuous_upper_bounds();
1426     for (i=0; i<num_cv; ++i)
1427       if (cv[i] < c_l_bnds[i] || cv[i] > c_u_bnds[i])
1428 	return false;
1429 
1430     const IntVector& di_l_bnds = am_cons.discrete_int_lower_bounds();
1431     const IntVector& di_u_bnds = am_cons.discrete_int_upper_bounds();
1432     for (i=0; i<num_div; ++i)
1433       if (div[i] < di_l_bnds[i] || div[i] > di_u_bnds[i])
1434 	return false;
1435 
1436     // No check for active string variable bounds
1437 
1438     const RealVector& dr_l_bnds = am_cons.discrete_real_lower_bounds();
1439     const RealVector& dr_u_bnds = am_cons.discrete_real_upper_bounds();
1440     for (i=0; i<num_drv; ++i)
1441       if (drv[i] < dr_l_bnds[i] || drv[i] > dr_u_bnds[i])
1442 	return false;
1443   }
1444 
1445   return true;
1446 }
1447 
1448 
1449 /** Compute the response synchronously using actualModel, approxInterface,
1450     or both (mixed case).  For the approxInterface portion, build the
1451     approximation if needed, evaluate the approximate response, and apply
1452     correction (if active) to the results. */
derived_evaluate(const ActiveSet & set)1453 void DataFitSurrModel::derived_evaluate(const ActiveSet& set)
1454 {
1455   ++surrModelEvalCntr;
1456 
1457   ShortArray actual_asv, approx_asv; bool actual_eval, approx_eval, mixed_eval;
1458   Response actual_response, approx_response; // empty handles
1459   switch (responseMode) {
1460   case UNCORRECTED_SURROGATE: case AUTO_CORRECTED_SURROGATE:
1461     asv_split_eval(set.request_vector(), actual_asv, approx_asv);
1462     actual_eval = !actual_asv.empty(); approx_eval = !approx_asv.empty();
1463     mixed_eval = (actual_eval && approx_eval); break;
1464   case BYPASS_SURROGATE:
1465     actual_eval = true; approx_eval = false;   break;
1466   case MODEL_DISCREPANCY: case AGGREGATED_MODELS:
1467     actual_eval = approx_eval = true;          break;
1468   }
1469 
1470   if (hierarchicalTagging) {
1471     String eval_tag = evalTagPrefix + '.' + std::to_string(surrModelEvalCntr+1);
1472     if (actual_eval)
1473       actualModel.eval_tag_prefix(eval_tag);
1474   }
1475 
1476   // -----------------------------
1477   // Compute actual model response
1478   // -----------------------------
1479   if (actual_eval) {
1480     component_parallel_mode(TRUTH_MODEL_MODE);
1481     update_model(actualModel); // update variables/bounds/labels in actualModel
1482     switch (responseMode) {
1483     case UNCORRECTED_SURROGATE: case AUTO_CORRECTED_SURROGATE: {
1484       ActiveSet actual_set = set;
1485       actual_set.request_vector(actual_asv);
1486       actualModel.evaluate(actual_set);
1487       if (mixed_eval)
1488 	actual_response = actualModel.current_response(); // shared rep
1489       else {
1490 	currentResponse.active_set(actual_set);
1491 	currentResponse.update(actualModel.current_response());
1492       }
1493       break;
1494     }
1495     case BYPASS_SURROGATE:
1496       actualModel.evaluate(set);
1497       currentResponse.active_set(set);
1498       currentResponse.update(actualModel.current_response());
1499       // TODO: Add to surrogate build data
1500       //      add_datapoint(....)
1501       break;
1502     case MODEL_DISCREPANCY: case AGGREGATED_MODELS:
1503       actualModel.evaluate(set);
1504       break;
1505     }
1506   }
1507 
1508   // ---------------------------------
1509   // Compute approx interface response
1510   // ---------------------------------
1511   if (approx_eval) { // normal case: evaluation of approxInterface
1512     // pre-process
1513     switch (responseMode) {
1514     case UNCORRECTED_SURROGATE: case AUTO_CORRECTED_SURROGATE:
1515       // if build_approximation has not yet been called, call it now
1516       if (!approxBuilds || force_rebuild())
1517 	build_approximation();
1518       break;
1519     }
1520 
1521     // compute the approximate response
1522     //component_parallel_mode(SURROGATE_MODEL_MODE); // does not use parallelism
1523     //ParConfigLIter pc_iter = parallelLib.parallel_configuration_iterator();
1524     //parallelLib.parallel_configuration_iterator(modelPCIter);
1525     if(interfEvaluationsDBState == EvaluationsDBState::UNINITIALIZED)
1526       interfEvaluationsDBState = evaluationsDB.interface_allocate(modelId,
1527           approxInterface.interface_id(), "approximation", currentVariables, currentResponse,
1528           default_interface_active_set(), approxInterface.analysis_components());
1529 
1530     switch (responseMode) {
1531     case UNCORRECTED_SURROGATE: case AUTO_CORRECTED_SURROGATE: {
1532       ActiveSet approx_set = set;
1533       approx_set.request_vector(approx_asv);
1534       approx_response = (mixed_eval) ? currentResponse.copy() : currentResponse;
1535       approxInterface.map(currentVariables, approx_set, approx_response);
1536       if(interfEvaluationsDBState == EvaluationsDBState::ACTIVE) {
1537         evaluationsDB.store_interface_variables(modelId, approxInterface.interface_id(),
1538           approxInterface.evaluation_id(), approx_set, currentVariables);
1539         evaluationsDB.store_interface_response(modelId, approxInterface.interface_id(),
1540           approxInterface.evaluation_id(), approx_response);
1541       }
1542       break;
1543     }
1544     case MODEL_DISCREPANCY: case AGGREGATED_MODELS:
1545       approx_response = currentResponse.copy(); // TO DO
1546       approxInterface.map(currentVariables, set, approx_response);
1547       if(interfEvaluationsDBState == EvaluationsDBState::ACTIVE) {
1548         evaluationsDB.store_interface_variables(modelId, approxInterface.interface_id(),
1549           approxInterface.evaluation_id(), set, currentVariables);
1550         evaluationsDB.store_interface_response(modelId, approxInterface.interface_id(),
1551           approxInterface.evaluation_id(), approx_response);
1552       }
1553       break;
1554     }
1555 
1556     //parallelLib.parallel_configuration_iterator(pc_iter); // restore
1557 
1558 
1559     // export data (optional)
1560     if (!exportPointsFile.empty() || !exportVarianceFile.empty())
1561       export_point(surrModelEvalCntr, currentVariables, approx_response);
1562 
1563     // post-process
1564     switch (responseMode) {
1565     case AUTO_CORRECTED_SURROGATE: {
1566       bool quiet_flag = (outputLevel < NORMAL_OUTPUT);
1567       //if (!deltaCorr.computed())
1568       //  deltaCorr.compute(currentVariables, centerResponse, approx_response,
1569       //                    quiet_flag);
1570       deltaCorr.apply(currentVariables, approx_response, quiet_flag);
1571       break;
1572     }
1573     }
1574   }
1575 
1576   // --------------------------------------
1577   // perform any actual/approx aggregations
1578   // --------------------------------------
1579   switch (responseMode) {
1580   case MODEL_DISCREPANCY: {
1581     // don't update surrogate data within deltaCorr's Approximations; just
1582     // update currentResponse (managed as surrogate data at a higher level)
1583     bool quiet_flag = (outputLevel < NORMAL_OUTPUT);
1584     deltaCorr.compute(actualModel.current_response(), approx_response,
1585 		      currentResponse, quiet_flag);
1586     break;
1587   }
1588   case AGGREGATED_MODELS:
1589     aggregate_response(actualModel.current_response(), approx_response,
1590 		       currentResponse);
1591     break;
1592   case UNCORRECTED_SURROGATE: case AUTO_CORRECTED_SURROGATE:
1593     if (mixed_eval) {
1594       currentResponse.active_set(set);
1595       response_combine(actual_response, approx_response, currentResponse);
1596     }
1597     break;
1598   }
1599 }
1600 
1601 
1602 /** Compute the response asynchronously using actualModel,
1603     approxInterface, or both (mixed case).  For the approxInterface
1604     portion, build the approximation if needed and evaluate the
1605     approximate response in a quasi-asynchronous approach
1606     (ApproximationInterface::map() performs the map synchronously and
1607     bookkeeps the results for return in derived_synchronize() below). */
derived_evaluate_nowait(const ActiveSet & set)1608 void DataFitSurrModel::derived_evaluate_nowait(const ActiveSet& set)
1609 {
1610   ++surrModelEvalCntr;
1611 
1612   ShortArray actual_asv, approx_asv; bool actual_eval, approx_eval;
1613   switch (responseMode) {
1614   case UNCORRECTED_SURROGATE: case AUTO_CORRECTED_SURROGATE:
1615     asv_split_eval(set.request_vector(), actual_asv, approx_asv);
1616     actual_eval = !actual_asv.empty(); approx_eval = !approx_asv.empty(); break;
1617   case BYPASS_SURROGATE:
1618     actual_eval = true; approx_eval = false;                              break;
1619   case MODEL_DISCREPANCY: case AGGREGATED_MODELS:
1620     actual_eval = approx_eval = true;                                     break;
1621   }
1622 
1623   if (hierarchicalTagging) {
1624     String eval_tag = evalTagPrefix + '.' + std::to_string(surrModelEvalCntr+1);
1625     if (actual_eval)
1626       actualModel.eval_tag_prefix(eval_tag);
1627   }
1628 
1629   // -----------------------------
1630   // Compute actual model response
1631   // -----------------------------
1632   if (actual_eval) {
1633     // don't need to set component parallel mode since this only queues the job
1634     update_model(actualModel); // update variables/bounds/labels in actualModel
1635     switch (responseMode) {
1636     case UNCORRECTED_SURROGATE: case AUTO_CORRECTED_SURROGATE: {
1637       ActiveSet actual_set = set;
1638       actual_set.request_vector(actual_asv);
1639       actualModel.evaluate_nowait(actual_set); break;
1640     }
1641     case BYPASS_SURROGATE: case MODEL_DISCREPANCY: case AGGREGATED_MODELS:
1642       actualModel.evaluate_nowait(set);        break;
1643     }
1644     // store mapping from actualModel eval id to DataFitSurrModel id
1645     truthIdMap[actualModel.evaluation_id()] = surrModelEvalCntr;
1646   }
1647 
1648   // ---------------------------------
1649   // Compute approx interface response
1650   // ---------------------------------
1651   if (approx_eval) { // normal case: evaluation of approxInterface
1652     // pre-process
1653     switch (responseMode) {
1654     case UNCORRECTED_SURROGATE: case AUTO_CORRECTED_SURROGATE:
1655       // if build_approximation has not yet been called, call it now
1656       if (!approxBuilds || force_rebuild())
1657 	build_approximation();
1658       break;
1659     }
1660 
1661     if(interfEvaluationsDBState == EvaluationsDBState::ACTIVE)
1662       evaluationsDB.interface_allocate(modelId, approxInterface.interface_id(),
1663                                        "approximation", currentVariables, currentResponse,
1664                                        default_interface_active_set(),
1665                                        approxInterface.analysis_components());
1666 
1667     // compute the approximate response
1668     // don't need to set component parallel mode since this only queues the job
1669     switch (responseMode) {
1670     case UNCORRECTED_SURROGATE: case AUTO_CORRECTED_SURROGATE: {
1671       ActiveSet approx_set = set;
1672       approx_set.request_vector(approx_asv);
1673       approxInterface.map(currentVariables, approx_set, currentResponse, true);
1674       if(interfEvaluationsDBState == EvaluationsDBState::ACTIVE)
1675         evaluationsDB.store_interface_variables(modelId, approxInterface.interface_id(),
1676           approxInterface.evaluation_id(), approx_set, currentVariables);
1677       break;
1678     }
1679     case MODEL_DISCREPANCY: case AGGREGATED_MODELS:
1680       approxInterface.map(currentVariables,        set, currentResponse, true);
1681       if(interfEvaluationsDBState == EvaluationsDBState::ACTIVE)
1682         evaluationsDB.store_interface_variables(modelId, approxInterface.interface_id(),
1683           approxInterface.evaluation_id(), set, currentVariables);
1684       break;
1685     }
1686 
1687     // post-process
1688     switch (responseMode) {
1689     case AUTO_CORRECTED_SURROGATE:
1690       rawVarsMap[surrModelEvalCntr] = currentVariables.copy(); break;
1691     default:
1692       if (!exportPointsFile.empty() || !exportVarianceFile.empty())
1693 	rawVarsMap[surrModelEvalCntr] = currentVariables.copy();
1694       break;
1695     }
1696     // store map from approxInterface eval id to DataFitSurrModel id
1697     surrIdMap[approxInterface.evaluation_id()] = surrModelEvalCntr;
1698   }
1699 }
1700 
1701 
1702 /** Blocking retrieval of asynchronous evaluations from actualModel,
1703     approxInterface, or both (mixed case).  For the approxInterface
1704     portion, apply correction (if active) to each response in the array.
1705     derived_synchronize() is designed for the general case where
1706     derived_evaluate_nowait() may be inconsistent in its use
1707     of actual evaluations, approximate evaluations, or both. */
derived_synchronize()1708 const IntResponseMap& DataFitSurrModel::derived_synchronize()
1709 {
1710   surrResponseMap.clear();
1711   bool actual_evals = !truthIdMap.empty(), approx_evals = !surrIdMap.empty(),
1712     block = true;
1713 
1714   // -----------------------------
1715   // synchronize actualModel evals
1716   // -----------------------------
1717   IntResponseMap actual_resp_map_rekey;
1718   if (actual_evals) {
1719     component_parallel_mode(TRUTH_MODEL_MODE);
1720 
1721     // update map keys to use surrModelEvalCntr
1722     if (approx_evals)
1723       rekey_synch(actualModel, block, truthIdMap, actual_resp_map_rekey);
1724     else {
1725       rekey_synch(actualModel, block, truthIdMap, surrResponseMap);
1726       return surrResponseMap; // if no approx evals, return actual results
1727     }
1728   }
1729 
1730   // ---------------------------------
1731   // synchronize approxInterface evals
1732   // ---------------------------------
1733   IntResponseMap approx_resp_map_rekey;
1734   if (approx_evals) {
1735     // derived_synchronize() and derived_synchronize_nowait() share code since
1736     // approx_resp_map is complete in both cases
1737     if (actual_evals)
1738       derived_synchronize_approx(block, approx_resp_map_rekey);
1739     else {
1740       derived_synchronize_approx(block, surrResponseMap);
1741       return surrResponseMap; // if no approx evals, return actual results
1742     }
1743   }
1744 
1745   // --------------------------------------
1746   // perform any actual/approx aggregations
1747   // --------------------------------------
1748   // Both actual and approx evals are present: {actual,approx}_resp_map_rekey
1749   // may be partial sets (partial surrogateFnIndices in
1750   // {UN,AUTO_}CORRECTED_SURROGATE) or full sets (MODEL_DISCREPANCY).
1751   Response empty_resp;
1752   IntRespMCIter act_it = actual_resp_map_rekey.begin(),
1753                 app_it = approx_resp_map_rekey.begin();
1754   switch (responseMode) {
1755   case MODEL_DISCREPANCY: {
1756     bool quiet_flag = (outputLevel < NORMAL_OUTPUT);
1757     for (; act_it != actual_resp_map_rekey.end() &&
1758 	   app_it != approx_resp_map_rekey.end(); ++act_it, ++app_it) {
1759       check_key(act_it->first, app_it->first);
1760       deltaCorr.compute(act_it->second, app_it->second,
1761 			surrResponseMap[act_it->first], quiet_flag);
1762     }
1763     break;
1764   }
1765   case AGGREGATED_MODELS:
1766     for (; act_it != actual_resp_map_rekey.end() &&
1767 	   app_it != approx_resp_map_rekey.end(); ++act_it, ++app_it) {
1768       check_key(act_it->first, app_it->first);
1769       aggregate_response(act_it->second, app_it->second,
1770 			 surrResponseMap[act_it->first]);
1771     }
1772     break;
1773   default: // {UN,AUTO_}CORRECTED_SURROGATE modes
1774     // process any combination of HF and LF completions
1775     while (act_it != actual_resp_map_rekey.end() ||
1776 	   app_it != approx_resp_map_rekey.end()) {
1777       int act_eval_id = (act_it == actual_resp_map_rekey.end()) ?
1778 	INT_MAX : act_it->first;
1779       int app_eval_id = (app_it == approx_resp_map_rekey.end()) ?
1780 	INT_MAX : app_it->first;
1781 
1782       if (act_eval_id < app_eval_id) // only HF available
1783 	{ response_combine(act_it->second, empty_resp,
1784 			   surrResponseMap[act_eval_id]); ++act_it; }
1785       else if (app_eval_id < act_eval_id) // only LF available
1786 	{ response_combine(empty_resp, app_it->second,
1787 			   surrResponseMap[app_eval_id]); ++app_it; }
1788       else // both LF and HF available
1789 	{ response_combine(act_it->second, app_it->second,
1790 			   surrResponseMap[act_eval_id]); ++act_it; ++app_it; }
1791     }
1792     break;
1793   }
1794 
1795   return surrResponseMap;
1796 }
1797 
1798 
1799 /** Nonblocking retrieval of asynchronous evaluations from
1800     actualModel, approxInterface, or both (mixed case).  For the
1801     approxInterface portion, apply correction (if active) to each
1802     response in the map.  derived_synchronize_nowait() is designed for
1803     the general case where derived_evaluate_nowait() may be
1804     inconsistent in its use of actual evals, approx evals, or both. */
derived_synchronize_nowait()1805 const IntResponseMap& DataFitSurrModel::derived_synchronize_nowait()
1806 {
1807   surrResponseMap.clear();
1808   bool actual_evals = !truthIdMap.empty(), approx_evals = !surrIdMap.empty(),
1809     block = false;
1810 
1811   // -----------------------------
1812   // synchronize actualModel evals
1813   // -----------------------------
1814   IntResponseMap actual_resp_map_rekey;
1815   if (actual_evals) {
1816     component_parallel_mode(TRUTH_MODEL_MODE);
1817 
1818     // update map keys to use surrModelEvalCntr
1819     if (approx_evals)
1820       rekey_synch(actualModel, block, truthIdMap, actual_resp_map_rekey);
1821     else {
1822       rekey_synch(actualModel, block, truthIdMap, surrResponseMap);
1823       return surrResponseMap; // if no approx evals, return actual results
1824     }
1825   }
1826 
1827   // ---------------------------------
1828   // synchronize approxInterface evals
1829   // ---------------------------------
1830   IntResponseMap approx_resp_map_rekey;
1831   if (approx_evals) {
1832     // derived_synchronize() and derived_synchronize_nowait() share code since
1833     // approx_resp_map is complete in both cases
1834     if (actual_evals)
1835       derived_synchronize_approx(block, approx_resp_map_rekey);
1836     else {
1837       derived_synchronize_approx(block, surrResponseMap);
1838       return surrResponseMap; // if no approx evals, return actual results
1839     }
1840   }
1841 
1842   // --------------------------------------
1843   // perform any approx/actual aggregations
1844   // --------------------------------------
1845   // Both actual and approx evals are present:
1846   // > actual_resp_map_rekey may be a partial set of evals (partial
1847   //   surrogateFnIndices in {UN,AUTO_}CORRECTED_SURROGATE modes) or
1848   //   full sets (MODEL_DISCREPANCY mode)
1849   // > approx_resp_map_rekey is a complete set of evals
1850   Response empty_resp;
1851   IntRespMCIter act_it = actual_resp_map_rekey.begin(),
1852                 app_it = approx_resp_map_rekey.begin();
1853   bool quiet_flag = (outputLevel < NORMAL_OUTPUT);
1854   // remaining values from truthIdMap key-value pairs
1855   IntSet remain_truth_ids; IntIntMIter im_it;
1856   for (im_it=truthIdMap.begin(); im_it!=truthIdMap.end(); ++im_it)
1857     remain_truth_ids.insert(im_it->second);
1858   // process any combination of actual and approx completions
1859   while (act_it != actual_resp_map_rekey.end() ||
1860 	 app_it != approx_resp_map_rekey.end()) {
1861     int act_eval_id = (act_it == actual_resp_map_rekey.end()) ?
1862       INT_MAX : act_it->first;
1863     int app_eval_id = (app_it == approx_resp_map_rekey.end()) ?
1864       INT_MAX : app_it->first;
1865     // process approx/actual results or cache them for next pass
1866     if (act_eval_id < app_eval_id) { // only actual available
1867       switch (responseMode) {
1868       case MODEL_DISCREPANCY: case AGGREGATED_MODELS:
1869 	Cerr << "Error: approx eval missing in DataFitSurrModel::"
1870 	     << "derived_synchronize_nowait()" << std::endl;
1871 	abort_handler(MODEL_ERROR); break;
1872       default: // {UN,AUTO_}CORRECTED_SURROGATE modes
1873 	// there is no approx component to this response
1874 	response_combine(act_it->second, empty_resp,
1875 			 surrResponseMap[act_eval_id]);
1876 	break;
1877       }
1878       ++act_it;
1879     }
1880     else if (app_eval_id < act_eval_id) { // only approx available
1881       switch (responseMode) {
1882       case MODEL_DISCREPANCY: case AGGREGATED_MODELS:
1883 	// cache approx response since actual contribution not yet available
1884 	cachedApproxRespMap[app_eval_id] = app_it->second; break;
1885       default: // {UN,AUTO_}CORRECTED_SURROGATE modes
1886 	if (remain_truth_ids.find(app_eval_id) != remain_truth_ids.end())
1887 	  // cache approx response since actual contribution still pending
1888 	  cachedApproxRespMap[app_eval_id] = app_it->second;
1889 	else // response complete: there is no actual contribution
1890 	  response_combine(empty_resp, app_it->second,
1891 			   surrResponseMap[app_eval_id]);
1892 	break;
1893       }
1894       ++app_it;
1895     }
1896     else { // both approx and actual available
1897       switch (responseMode) {
1898       case MODEL_DISCREPANCY:
1899 	deltaCorr.compute(act_it->second, app_it->second,
1900 			  surrResponseMap[act_eval_id], quiet_flag); break;
1901       case AGGREGATED_MODELS:
1902 	aggregate_response(act_it->second, app_it->second,
1903 			   surrResponseMap[act_eval_id]);            break;
1904       default: // {UN,AUTO_}CORRECTED_SURROGATE modes
1905 	response_combine(act_it->second, app_it->second,
1906 			 surrResponseMap[act_eval_id]);              break;
1907       }
1908       ++act_it; ++app_it;
1909     }
1910   }
1911 
1912   return surrResponseMap;
1913 }
1914 
1915 
1916 void DataFitSurrModel::
derived_synchronize_approx(bool block,IntResponseMap & approx_resp_map_rekey)1917 derived_synchronize_approx(bool block, IntResponseMap& approx_resp_map_rekey)
1918 {
1919   bool actual_evals = !truthIdMap.empty();
1920 
1921   //component_parallel_mode(SURROGATE_MODEL_MODE); // does not use parallelism
1922   //ParConfigLIter pc_iter = parallelLib.parallel_configuration_iterator();
1923   //parallelLib.parallel_configuration_iterator(modelPCIter);
1924 
1925   // synchronize and rekey to use surrModelEvalCntr
1926   rekey_synch(approxInterface, block, surrIdMap, approx_resp_map_rekey);
1927 
1928   //parallelLib.parallel_configuration_iterator(pc_iter); // restore
1929 
1930   IntRespMIter r_it;
1931   bool export_pts = !exportPointsFile.empty() || !exportVarianceFile.empty();
1932   if (responseMode == AUTO_CORRECTED_SURROGATE && corrType) {
1933     // Interface::rawResponseMap can be corrected directly in the case of an
1934     // ApproximationInterface since data_pairs is not used (not true for
1935     // HierarchSurrModel::derived_synchronize()/derived_synchronize_nowait()).
1936     // The response map from ApproximationInterface's quasi-asynch mode is
1937     // complete and in order.
1938 
1939     bool quiet_flag = (outputLevel < NORMAL_OUTPUT);
1940     //if (!deltaCorr.computed() && !approx_resp_map_rekey.empty())
1941     //  deltaCorr.compute(rawVarsMap.begin()->second, ...,
1942     //                    approx_resp_map_rekey.begin()->second, quiet_flag);
1943     IntVarsMIter v_it;
1944     for (r_it  = approx_resp_map_rekey.begin(), v_it = rawVarsMap.begin();
1945 	 r_it != approx_resp_map_rekey.end(); ++r_it, ++v_it) {
1946       deltaCorr.apply(v_it->second,//rawVarsMap[r_it->first],
1947 		      r_it->second, quiet_flag);
1948       // decided to export auto-corrected approx response
1949       if (export_pts)
1950 	export_point(r_it->first, v_it->second, r_it->second);
1951     }
1952     rawVarsMap.clear();
1953   }
1954   else if (export_pts) {
1955     IntVarsMIter v_it;
1956     for (r_it  = approx_resp_map_rekey.begin(), v_it = rawVarsMap.begin();
1957 	 r_it != approx_resp_map_rekey.end(); ++r_it, ++v_it)
1958       export_point(r_it->first, v_it->second, r_it->second);
1959     rawVarsMap.clear();
1960   }
1961 
1962   // add cached evals (synchronized approx evals that could not be returned
1963   // since truth eval portions were still pending) for processing.  Do not
1964   // correct them a second time.
1965   for (r_it  = cachedApproxRespMap.begin();
1966        r_it != cachedApproxRespMap.end(); ++r_it)
1967     approx_resp_map_rekey[r_it->first] = r_it->second;
1968   cachedApproxRespMap.clear();
1969 }
1970 
1971 
1972 void DataFitSurrModel::
asv_inflate_build(const ShortArray & orig_asv,ShortArray & actual_asv)1973 asv_inflate_build(const ShortArray& orig_asv, ShortArray& actual_asv)
1974 {
1975   // DataFitSurrModel consumes replicates from any response aggregations
1976   // occurring in actualModel
1977   size_t num_orig = orig_asv.size(), num_actual = actualModel.response_size();
1978   if (num_actual < num_orig || num_actual % num_orig) {
1979     Cerr << "Error: ASV size mismatch in DataFitSurrModel::asv_inflate_build()."
1980 	 << std::endl;
1981     abort_handler(MODEL_ERROR);
1982   }
1983 
1984   if (surrogateFnIndices.size() == numFns) {
1985     if (num_actual > num_orig) { // inflate actual_asv if needed
1986       actual_asv.resize(num_actual);
1987       for (size_t i=0; i<num_actual; ++i)
1988 	actual_asv[i] = orig_asv[i % num_orig];
1989     }
1990     else
1991       actual_asv = orig_asv;
1992   }
1993   else { // mixed response set
1994     size_t i; int index; short orig_asv_val;
1995     actual_asv.assign(num_actual, 0);
1996     for (ISIter it=surrogateFnIndices.begin();
1997 	 it!=surrogateFnIndices.end(); ++it) {
1998       index = *it; orig_asv_val = orig_asv[index];
1999       if (orig_asv_val)
2000 	for (i=index; i<num_actual; i+=num_orig) // inflate actual_asv
2001 	  actual_asv[i] = orig_asv_val;
2002     }
2003   }
2004 }
2005 
2006 
2007 void DataFitSurrModel::
asv_split_eval(const ShortArray & orig_asv,ShortArray & actual_asv,ShortArray & approx_asv)2008 asv_split_eval(const ShortArray& orig_asv, ShortArray& actual_asv,
2009 	       ShortArray& approx_asv)
2010 {
2011   if (actualModel.is_null() || surrogateFnIndices.size() == numFns)
2012     { approx_asv = orig_asv; return; } // don't inflate approx_asv
2013   // else mixed response set
2014 
2015   // DataFitSurrModel consumes replicates from any response aggregations
2016   // occurring in actualModel
2017   size_t num_orig = orig_asv.size(), num_actual = actualModel.response_size();
2018   if (num_orig != numFns || num_actual < num_orig || num_actual % num_orig) {
2019     Cerr << "Error: ASV size mismatch in DataFitSurrModel::asv_split_eval()."
2020 	 << std::endl;
2021     abort_handler(MODEL_ERROR);
2022   }
2023   int index; short orig_asv_val;
2024   for (index=0; index<num_orig; ++index) {
2025     orig_asv_val = orig_asv[index];
2026     if (orig_asv_val) {
2027       if (surrogateFnIndices.count(index)) {
2028 	if (approx_asv.empty()) // keep empty if no active requests
2029 	  approx_asv.assign(num_orig, 0);
2030 	approx_asv[index] = orig_asv_val; // don't inflate approx_asv
2031       }
2032       else {
2033 	if (actual_asv.empty()) // keep empty if no active requests
2034 	  actual_asv.assign(num_actual, 0);
2035 	for (size_t i=index; i<num_actual; i+=num_orig) // inflate actual_asv
2036 	  actual_asv[i] = orig_asv_val;
2037       }
2038     }
2039   }
2040 }
2041 
2042 
2043 /** Constructor helper to read the points file once, if provided, and
2044     then reuse its data as appropriate within build_global().
2045     Surrogate data imports default to active/inactive variables, but
2046     user can override to active only */
2047 void DataFitSurrModel::
import_points(unsigned short tabular_format,bool use_var_labels,bool active_only)2048 import_points(unsigned short tabular_format, bool use_var_labels, bool active_only)
2049 {
2050   // Temporary objects to use to read correct size vars/resp; use copies
2051   // so that read_data_tabular() does not alter state of vars/resp objects
2052   // in Models (especially important for non-active variables).
2053   Variables vars = actualModel.is_null() ? currentVariables.copy() :
2054     actualModel.current_variables().copy();
2055   Response  resp = actualModel.is_null() ? currentResponse.copy() :
2056     actualModel.current_response().copy();
2057   size_t num_vars = active_only ? vars.total_active() : vars.tv();
2058 
2059   if (outputLevel >= NORMAL_OUTPUT)
2060     Cout << "Surrogate model retrieving points with " << num_vars
2061 	 << " variables and " << numFns << " response\nfunctions from file '"
2062 	 << importPointsFile << "'\n";
2063   // Preserves eval and interface ids in the PRPList, if annotated format
2064   // If no eval ID, will number successively from 1
2065   PRPList import_prp_list;
2066   bool verbose = (outputLevel > NORMAL_OUTPUT);
2067   String context_msg = "Surrogate model with id '" + model_id() +
2068     "' import_build_points";
2069   TabularIO::read_data_tabular(importPointsFile, context_msg, vars, resp,
2070 			       import_prp_list, tabular_format, verbose,
2071 			       use_var_labels, active_only);
2072   if (outputLevel >= NORMAL_OUTPUT)
2073     Cout << "Surrogate model retrieved " << import_prp_list.size()
2074 	 << " total points." << std::endl;
2075 
2076   // For consistency with restart read, import_points (as well as post_input)
2077   // should also promote imported data to current eval cache/restart file.
2078   PRPLIter prp_it; String am_iface_id;
2079   bool cache = true, restart = true; // default on (with empty id) if no Model
2080   if (!actualModel.is_null()) {
2081     am_iface_id = actualModel.interface_id(); // default (Note: no recurse!)
2082     cache       = actualModel.evaluation_cache(); // recurse_flag = true
2083     restart     = actualModel.restart_file();     // recurse_flag = true
2084   }
2085   if (cache || restart) {
2086     // For negated sequence that continues from most negative id
2087     //int cache_id = -1;
2088     //if (cache && !data_pairs.empty()) {
2089     //  int first_id = data_pairs.front().evaluation_id();
2090     //  if (first_id < 0) cache_id = first_id - 1;
2091     //}
2092     /// process arrays of data from TabularIO::read_data_tabular() above
2093     for (prp_it =import_prp_list.begin();
2094 	 prp_it!=import_prp_list.end(); ++prp_it) {
2095       ParamResponsePair& pr = *prp_it;
2096       //if ( (tabular_format & TABULAR_EVAL_ID) == 0 )  // not imported
2097       pr.eval_id(0); // always override eval id to 0 for imported data
2098       if ( (tabular_format & TABULAR_IFACE_ID) == 0  && !am_iface_id.empty()) {// not imported: dangerous!
2099           pr.interface_id(am_iface_id); // assign best guess / default
2100       }
2101 
2102       if (restart) parallelLib.write_restart(pr); // preserve eval id
2103       if (cache)   data_pairs.insert(pr); // duplicate ids OK for PRPCache
2104       //if (cache) // for negated sequence
2105       //  { pr.evaluation_id(cache_id); data_pairs.insert(pr); --cache_id; }
2106     }
2107   }
2108 }
2109 
2110 
2111 /** Constructor helper to export approximation-based evaluations to a file. */
initialize_export()2112 void DataFitSurrModel::initialize_export()
2113 {
2114   if (!exportPointsFile.empty()) {
2115     TabularIO::open_file(exportFileStream, exportPointsFile,
2116 			 "DataFitSurrModel export");
2117     TabularIO::write_header_tabular(exportFileStream, currentVariables,
2118 				    currentResponse, "eval_id", exportFormat);
2119   }
2120   if (!exportVarianceFile.empty()) {
2121     StringArray variance_labels;
2122     for (const auto& label : currentResponse.function_labels())
2123       variance_labels.push_back(label + "_variance");
2124     TabularIO::open_file(exportVarianceFileStream, exportVarianceFile,
2125 			 "DataFitSurrModel variance export");
2126     TabularIO::write_header_tabular(exportVarianceFileStream, currentVariables,
2127 				    variance_labels, "eval_id",
2128 				    exportVarianceFormat);
2129   }
2130 }
2131 
2132 
2133 /** Constructor helper to export approximation-based evaluations to a file. */
finalize_export()2134 void DataFitSurrModel::finalize_export()
2135 {
2136   if (!exportPointsFile.empty())
2137     TabularIO::close_file(exportFileStream, exportPointsFile,
2138 			  "DataFitSurrModel export");
2139   if (!exportVarianceFile.empty())
2140     TabularIO::close_file(exportVarianceFileStream, exportVarianceFile,
2141 			  "DataFitSurrModel variance export");
2142 }
2143 
2144 
2145 /** Constructor helper to export approximation-based evaluations to a
2146     file. Exports all variables, so it's clear at what values of
2147     inactive it was built at */
2148 void DataFitSurrModel::
export_point(int eval_id,const Variables & vars,const Response & resp)2149 export_point(int eval_id, const Variables& vars, const Response& resp)
2150 {
2151   Response response_variance;
2152   if (!exportVarianceFile.empty()) {
2153     RealVector approx_var = approximation_variances(vars);
2154     response_variance = resp.copy();
2155     response_variance.function_values(approx_var);
2156   }
2157 
2158   if (recastings()) {
2159     Variables export_vars; Response export_resp;
2160     iterator_space_to_user_space(vars, resp, export_vars, export_resp);
2161     if (!exportPointsFile.empty())
2162       TabularIO::write_data_tabular(exportFileStream, export_vars, interface_id(),
2163 				    export_resp, eval_id, exportFormat);
2164     if (!exportVarianceFile.empty()) {
2165       // BMA TODO WARN or SKIP: exported variance not transformed?!?
2166       // Can't in general map it through a recast model...
2167       TabularIO::write_data_tabular(exportVarianceFileStream, export_vars, interface_id(),
2168 				    response_variance, eval_id, exportVarianceFormat);
2169     }
2170 
2171   }
2172   else {
2173     if (!exportPointsFile.empty())
2174       TabularIO::write_data_tabular(exportFileStream, vars, interface_id(), resp,
2175 				    eval_id, exportFormat);
2176     if (!exportVarianceFile.empty()) {
2177       TabularIO::write_data_tabular(exportVarianceFileStream, vars, interface_id(),
2178 				    response_variance, eval_id, exportVarianceFormat);
2179     }
2180   }
2181 }
2182 
2183 
component_parallel_mode(short mode)2184 void DataFitSurrModel::component_parallel_mode(short mode)
2185 {
2186   // mode may be correct, but can't guarantee active parallel config is in sync
2187   //if (componentParallelMode == mode)
2188   //  return; // already in correct parallel mode
2189 
2190   /* Moved up a level so that config can be restored after optInterface usage
2191   //if (mode == TRUTH_MODEL_MODE) {
2192     // ParallelLibrary::currPCIter activation delegated to subModel
2193   //}
2194   //else
2195   if (mode == SURROGATE_MODEL_MODE)
2196     parallelLib.parallel_configuration_iterator(modelPCIter);
2197   //else if (mode == 0)
2198   */
2199 
2200   componentParallelMode = mode;
2201 }
2202 
2203 
2204 /** Update variables and constraints data within model using
2205     values and labels from currentVariables and bound/linear/nonlinear
2206     constraints from userDefinedConstraints. */
init_model(Model & model)2207 void DataFitSurrModel::init_model(Model& model)
2208 {
2209   if (model.is_null())
2210     return;
2211 
2212   // linear constraints
2213 
2214   if (userDefinedConstraints.num_linear_ineq_constraints()) {
2215     // the views don't necessarily have to be the same, but the number of
2216     // active continuous and active discrete variables have to be consistent.
2217     if (currentVariables.cv()  == model.cv()  &&
2218 	currentVariables.div() == model.div() &&
2219 	currentVariables.drv() == model.drv()) {
2220       model.linear_ineq_constraint_coeffs(
2221         userDefinedConstraints.linear_ineq_constraint_coeffs());
2222       model.linear_ineq_constraint_lower_bounds(
2223         userDefinedConstraints.linear_ineq_constraint_lower_bounds());
2224       model.linear_ineq_constraint_upper_bounds(
2225         userDefinedConstraints.linear_ineq_constraint_upper_bounds());
2226     }
2227     else {
2228       Cerr << "Error: cannot update linear inequality constraints in "
2229 	   << "DataFitSurrModel::init_model() due to inconsistent active "
2230 	   << "variables." << std::endl;
2231       abort_handler(MODEL_ERROR);
2232     }
2233   }
2234   if (userDefinedConstraints.num_linear_eq_constraints()) {
2235     // the views don't necessarily have to be the same, but the number of
2236     // active continuous and active discrete variables have to be consistent.
2237     if (currentVariables.cv()  == model.cv()  &&
2238 	currentVariables.div() == model.div() &&
2239 	currentVariables.drv() == model.drv()) {
2240       model.linear_eq_constraint_coeffs(
2241         userDefinedConstraints.linear_eq_constraint_coeffs());
2242       model.linear_eq_constraint_targets(
2243         userDefinedConstraints.linear_eq_constraint_targets());
2244     }
2245     else {
2246       Cerr << "Error: cannot update linear equality constraints in "
2247 	   << "DataFitSurrModel::init_model() due to inconsistent active "
2248 	   << "variables." << std::endl;
2249       abort_handler(MODEL_ERROR);
2250     }
2251   }
2252 
2253   // nonlinear constraints
2254 
2255   if (userDefinedConstraints.num_nonlinear_ineq_constraints()) {
2256     model.nonlinear_ineq_constraint_lower_bounds(
2257       userDefinedConstraints.nonlinear_ineq_constraint_lower_bounds());
2258     model.nonlinear_ineq_constraint_upper_bounds(
2259       userDefinedConstraints.nonlinear_ineq_constraint_upper_bounds());
2260   }
2261   if (userDefinedConstraints.num_nonlinear_eq_constraints())
2262     model.nonlinear_eq_constraint_targets(
2263       userDefinedConstraints.nonlinear_eq_constraint_targets());
2264 
2265   // labels: update model with current{Variables,Response} descriptors
2266 
2267   if (!approxBuilds) {
2268     model.response_labels(currentResponse.function_labels());
2269 
2270     short approx_active_view = currentVariables.view().first,
2271           actual_active_view = model.current_variables().view().first;
2272     if (approx_active_view == actual_active_view) {
2273       // update active model vars with active currentVariables data
2274       model.continuous_variable_labels(
2275         currentVariables.continuous_variable_labels());
2276       model.discrete_int_variable_labels(
2277         currentVariables.discrete_int_variable_labels());
2278       model.discrete_real_variable_labels(
2279         currentVariables.discrete_real_variable_labels());
2280       if (approx_active_view >= RELAXED_DESIGN) {
2281 	model.inactive_continuous_variable_labels(
2282           currentVariables.inactive_continuous_variable_labels());
2283 	model.inactive_discrete_int_variable_labels(
2284           currentVariables.inactive_discrete_int_variable_labels());
2285 	model.inactive_discrete_real_variable_labels(
2286           currentVariables.inactive_discrete_real_variable_labels());
2287       }
2288     }
2289     else if ( approx_active_view >= RELAXED_DESIGN &&
2290 	      ( actual_active_view == RELAXED_ALL ||
2291 		actual_active_view == MIXED_ALL ) ) {
2292       // update active model vars using "All" view of currentVariables data
2293       model.continuous_variable_labels(
2294         currentVariables.all_continuous_variable_labels());
2295       model.discrete_int_variable_labels(
2296         currentVariables.all_discrete_int_variable_labels());
2297       model.discrete_real_variable_labels(
2298         currentVariables.all_discrete_real_variable_labels());
2299     }
2300     else if ( actual_active_view >= RELAXED_DESIGN &&
2301 	      ( approx_active_view == RELAXED_ALL ||
2302 		approx_active_view == MIXED_ALL ) ) {
2303       // update "All" view of model vars using active currentVariables data
2304       model.all_continuous_variable_labels(
2305         currentVariables.continuous_variable_labels());
2306       model.all_discrete_int_variable_labels(
2307         currentVariables.discrete_int_variable_labels());
2308       model.all_discrete_real_variable_labels(
2309         currentVariables.discrete_real_variable_labels());
2310     }
2311   }
2312 }
2313 
2314 
2315 /** Update variables and constraints data within model using
2316     values and labels from currentVariables and bound/linear/nonlinear
2317     constraints from userDefinedConstraints. */
update_model(Model & model)2318 void DataFitSurrModel::update_model(Model& model)
2319 {
2320   if (model.is_null())
2321     return;
2322 
2323   // vars/bounds/labels
2324 
2325   short approx_active_view = currentVariables.view().first,
2326         actual_active_view = model.current_variables().view().first;
2327   // Update model variables, bounds, and labels in all view cases.
2328   // Note 1: bounds updating isn't strictly required for local/multipoint, but
2329   // is needed for global and could be relevant in cases where model
2330   // involves additional surrogates/nestings.
2331   // Note 2: label updating eliminates the need to replicate variable
2332   // descriptors, e.g., in SBOUU input files.  It only needs to be performed
2333   // once (as opposed to the update of vars and bounds).  However, performing
2334   // this updating in the constructor does not propagate properly for multiple
2335   // surrogates/nestings since the sub-model construction (and therefore any
2336   // sub-sub-model constructions) must finish before calling any set functions
2337   // on it.  That is, after-the-fact updating in constructors only propagates
2338   // one level, whereas before-the-fact updating in compute/build functions
2339   // propagates multiple levels.
2340   if (approx_active_view == actual_active_view) {
2341     // update active model vars/cons with active currentVariables data
2342     model.continuous_variables(currentVariables.continuous_variables());
2343     model.discrete_int_variables(
2344       currentVariables.discrete_int_variables());
2345     model.discrete_real_variables(
2346       currentVariables.discrete_real_variables());
2347     model.continuous_lower_bounds(
2348       userDefinedConstraints.continuous_lower_bounds());
2349     model.continuous_upper_bounds(
2350       userDefinedConstraints.continuous_upper_bounds());
2351     model.discrete_int_lower_bounds(
2352       userDefinedConstraints.discrete_int_lower_bounds());
2353     model.discrete_int_upper_bounds(
2354       userDefinedConstraints.discrete_int_upper_bounds());
2355     model.discrete_real_lower_bounds(
2356       userDefinedConstraints.discrete_real_lower_bounds());
2357     model.discrete_real_upper_bounds(
2358       userDefinedConstraints.discrete_real_upper_bounds());
2359   }
2360   else if ( approx_active_view >= RELAXED_DESIGN &&
2361 	    ( actual_active_view == RELAXED_ALL ||
2362 	      actual_active_view == MIXED_ALL ) ) {
2363     // update active model vars/cons using "All" view of
2364     // currentVariables/userDefinedConstraints data.
2365     model.continuous_variables(
2366       currentVariables.all_continuous_variables());
2367     model.discrete_int_variables(
2368       currentVariables.all_discrete_int_variables());
2369     model.discrete_real_variables(
2370       currentVariables.all_discrete_real_variables());
2371     model.continuous_lower_bounds(
2372       userDefinedConstraints.all_continuous_lower_bounds());
2373     model.continuous_upper_bounds(
2374       userDefinedConstraints.all_continuous_upper_bounds());
2375     model.discrete_int_lower_bounds(
2376       userDefinedConstraints.all_discrete_int_lower_bounds());
2377     model.discrete_int_upper_bounds(
2378       userDefinedConstraints.all_discrete_int_upper_bounds());
2379     model.discrete_real_lower_bounds(
2380       userDefinedConstraints.all_discrete_real_lower_bounds());
2381     model.discrete_real_upper_bounds(
2382       userDefinedConstraints.all_discrete_real_upper_bounds());
2383   }
2384   else if ( actual_active_view >= RELAXED_DESIGN &&
2385 	    ( approx_active_view == RELAXED_ALL ||
2386 	      approx_active_view == MIXED_ALL ) ) {
2387     // update "All" view of model vars/cons using active
2388     // currentVariables/userDefinedConstraints data.
2389     model.all_continuous_variables(
2390       currentVariables.continuous_variables());
2391     model.all_discrete_int_variables(
2392       currentVariables.discrete_int_variables());
2393     model.all_discrete_real_variables(
2394       currentVariables.discrete_real_variables());
2395     model.all_continuous_lower_bounds(
2396       userDefinedConstraints.continuous_lower_bounds());
2397     model.all_continuous_upper_bounds(
2398       userDefinedConstraints.continuous_upper_bounds());
2399     model.all_discrete_int_lower_bounds(
2400       userDefinedConstraints.discrete_int_lower_bounds());
2401     model.all_discrete_int_upper_bounds(
2402       userDefinedConstraints.discrete_int_upper_bounds());
2403     model.all_discrete_real_lower_bounds(
2404       userDefinedConstraints.discrete_real_lower_bounds());
2405     model.all_discrete_real_upper_bounds(
2406       userDefinedConstraints.discrete_real_upper_bounds());
2407   }
2408   // TO DO: extend for aleatory/epistemic uncertain views
2409   else {
2410     Cerr << "Error: unsupported variable view differences in "
2411 	 << "DataFitSurrModel::update_model()" << std::endl;
2412     abort_handler(MODEL_ERROR);
2413   }
2414 
2415   // uncertain variable distribution data (dependent on label updates above)
2416 
2417   // Note: Variables instances defined from the same variablesId are not shared
2418   // (see ProblemDescDB::get_variables()), so we propagate any distribution
2419   // updates (e.g., NestedModel insertions) up/down the Model recursion.  For
2420   // differing variablesId, we assume that the distribution information can be
2421   // mapped when a variable label is matched, but this precludes the case
2422   // where the distribution for the same variable differs between that used to
2423   // build and that used to evaluate.   More careful logic could involve
2424   // matching both variable label and distribution type (presumably the dist
2425   // params would be consistent when the dist type is the same), and this could
2426   // be implemented at the lower (MultivariateDistribution) level.
2427   // > currentVariables may have different active view from incoming model
2428   //   vars, but MultivariateDistribution updates can be performed for all
2429   //   vars (independent of view)
2430   // > when model is a ProbabilityTransformModel, its mvDist is in u-space.
2431   //   DataFit operates in and pushes updates to this transformed space for
2432   //   parameterized std distribs (e.g. {JACOBI,GEN_LAGUERE,NUM_GEN}_ORTHOG).
2433   // > it is sufficient to pull parameters at initialize_mapping() time, as
2434   //   this data varies per iterator execution rather than per-evaluation
2435   const SharedVariablesData&   svd =          currentVariables.shared_data();
2436   const SharedVariablesData& m_svd = model.current_variables().shared_data();
2437   if (svd.id() == m_svd.id()) // same set of variables
2438     model.multivariate_distribution().pull_distribution_parameters(mvDist);
2439   else { // map between related sets of variables based on labels
2440     StringArray pull_labels;    svd.assemble_all_labels(pull_labels);
2441     StringArray push_labels;  m_svd.assemble_all_labels(push_labels);
2442     model.multivariate_distribution().
2443       pull_distribution_parameters(mvDist, pull_labels, push_labels);
2444   }
2445 }
2446 
2447 
2448 /** Update values and labels in currentVariables and
2449     bound/linear/nonlinear constraints in userDefinedConstraints from
2450     variables and constraints data within model. */
update_from_model(const Model & model)2451 void DataFitSurrModel::update_from_model(const Model& model)
2452 {
2453   // vars/bounds/labels
2454 
2455   // update vars/bounds/labels with model data using All view for both
2456   // (since approx arrays are sized but otherwise uninitialized)
2457   currentVariables.all_continuous_variables(
2458     model.all_continuous_variables());
2459   currentVariables.all_discrete_int_variables(
2460     model.all_discrete_int_variables());
2461   currentVariables.all_discrete_string_variables(
2462     model.all_discrete_string_variables());
2463   currentVariables.all_discrete_real_variables(
2464     model.all_discrete_real_variables());
2465   userDefinedConstraints.all_continuous_lower_bounds(
2466     model.all_continuous_lower_bounds());
2467   userDefinedConstraints.all_continuous_upper_bounds(
2468     model.all_continuous_upper_bounds());
2469   userDefinedConstraints.all_discrete_int_lower_bounds(
2470     model.all_discrete_int_lower_bounds());
2471   userDefinedConstraints.all_discrete_int_upper_bounds(
2472     model.all_discrete_int_upper_bounds());
2473   userDefinedConstraints.all_discrete_real_lower_bounds(
2474     model.all_discrete_real_lower_bounds());
2475   userDefinedConstraints.all_discrete_real_upper_bounds(
2476     model.all_discrete_real_upper_bounds());
2477   if (!approxBuilds) {
2478     currentVariables.all_continuous_variable_labels(
2479       model.all_continuous_variable_labels());
2480     currentVariables.all_discrete_int_variable_labels(
2481       model.all_discrete_int_variable_labels());
2482     currentVariables.all_discrete_string_variable_labels(
2483       model.all_discrete_string_variable_labels());
2484     currentVariables.all_discrete_real_variable_labels(
2485       model.all_discrete_real_variable_labels());
2486     currentResponse.function_labels(model.response_labels());
2487   }
2488 
2489   // uncertain variable distribution data (dependent on label updates above)
2490 
2491   // See notes in init_model() above, with the difference that these
2492   // updates are performed once at lightweight construct time.
2493   const SharedVariablesData&   svd =          currentVariables.shared_data();
2494   const SharedVariablesData& m_svd = model.current_variables().shared_data();
2495   if (svd.id() == m_svd.id()) // same variables specification
2496     mvDist.pull_distribution_parameters(model.multivariate_distribution());
2497   else{ // map between related sets of variables based on labels
2498     StringArray pull_labels;  m_svd.assemble_all_labels(pull_labels);
2499     StringArray push_labels;    svd.assemble_all_labels(push_labels);
2500     mvDist.pull_distribution_parameters(model.multivariate_distribution(),
2501 					pull_labels, push_labels);
2502   }
2503 
2504   // linear constraints
2505 
2506   if (model.num_linear_ineq_constraints()) {
2507     // the views don't necessarily have to be the same, but the number of
2508     // active continuous and active discrete variables have to be consistent.
2509     if (model.cv()  == currentVariables.cv()  &&
2510 	model.div() == currentVariables.div() &&
2511 	model.drv() == currentVariables.drv()) {
2512       userDefinedConstraints.linear_ineq_constraint_coeffs(
2513         model.linear_ineq_constraint_coeffs());
2514       userDefinedConstraints.linear_ineq_constraint_lower_bounds(
2515         model.linear_ineq_constraint_lower_bounds());
2516       userDefinedConstraints.linear_ineq_constraint_upper_bounds(
2517         model.linear_ineq_constraint_upper_bounds());
2518     }
2519     else {
2520       Cerr << "Error: cannot update linear inequality constraints in "
2521 	   << "DataFitSurrModel::update_from_model() due to inconsistent "
2522 	   << "active variables." << std::endl;
2523       abort_handler(MODEL_ERROR);
2524     }
2525   }
2526   if (model.num_linear_eq_constraints()) {
2527     // the views don't necessarily have to be the same, but the number of
2528     // active continuous and active discrete variables have to be consistent.
2529     if (model.cv()  == currentVariables.cv()  &&
2530 	model.div() == currentVariables.div() &&
2531 	model.drv() == currentVariables.drv()) {
2532       userDefinedConstraints.linear_eq_constraint_coeffs(
2533         model.linear_eq_constraint_coeffs());
2534       userDefinedConstraints.linear_eq_constraint_targets(
2535         model.linear_eq_constraint_targets());
2536     }
2537     else {
2538       Cerr << "Error: cannot update linear equality constraints in "
2539 	   << "DataFitSurrModel::update_from_model() due to inconsistent "
2540 	   << "active variables." << std::endl;
2541       abort_handler(MODEL_ERROR);
2542     }
2543   }
2544 
2545   // weights and sense for primary response functions
2546 
2547   primaryRespFnWts   = model.primary_response_fn_weights();
2548   primaryRespFnSense = model.primary_response_fn_sense();
2549 
2550   // nonlinear constraints
2551 
2552   if (model.num_nonlinear_ineq_constraints()) {
2553     userDefinedConstraints.nonlinear_ineq_constraint_lower_bounds(
2554       model.nonlinear_ineq_constraint_lower_bounds());
2555     userDefinedConstraints.nonlinear_ineq_constraint_upper_bounds(
2556       model.nonlinear_ineq_constraint_upper_bounds());
2557   }
2558   if (model.num_nonlinear_eq_constraints())
2559     userDefinedConstraints.nonlinear_eq_constraint_targets(
2560       model.nonlinear_eq_constraint_targets());
2561 }
2562 
2563 
declare_sources()2564 void DataFitSurrModel::declare_sources()
2565 {
2566   switch (responseMode) {
2567   case UNCORRECTED_SURROGATE: case AUTO_CORRECTED_SURROGATE:
2568     if(actualModel.is_null() || surrogateFnIndices.size() == numFns) {
2569       evaluationsDB.declare_source(modelId, "surrogate", approxInterface.interface_id(),
2570         "approximation");
2571     } else if(surrogateFnIndices.empty()) { // don't know if this can happen.
2572       evaluationsDB.declare_source(modelId, "surrogate", actualModel.model_id(),
2573         actualModel.model_type());
2574     } else {
2575       evaluationsDB.declare_source(modelId, "surrogate", approxInterface.interface_id(),
2576         "approximation");
2577       evaluationsDB.declare_source(modelId, "surrogate", actualModel.model_id(),
2578         actualModel.model_type());
2579     }
2580     break;
2581   case BYPASS_SURROGATE:
2582     evaluationsDB.declare_source(modelId, "surrogate", actualModel.model_id(),
2583         actualModel.model_type());
2584     break;
2585   case MODEL_DISCREPANCY: case AGGREGATED_MODELS:
2586     evaluationsDB.declare_source(modelId, "surrogate", actualModel.model_id(),
2587         actualModel.model_type());
2588     evaluationsDB.declare_source(modelId, "surrogate", approxInterface.interface_id(),
2589         "approximation");
2590     break;
2591   }
2592 }
2593 
2594 
default_interface_active_set()2595 ActiveSet DataFitSurrModel::default_interface_active_set() {
2596   // The ApproximationInterface may provide just a subset
2597   // of the responses, with the balance coming from the
2598   // actualModel.
2599   ActiveSet set;
2600   set.derivative_vector(currentVariables.all_continuous_variable_ids());
2601   bool has_deriv_vars = set.derivative_vector().size() != 0;
2602   ShortArray asv(numFns);
2603   const bool has_gradients = gradientType != "none" && has_deriv_vars &&
2604     (gradientType == "analytic" || supportsEstimDerivs);
2605   const bool has_hessians = hessianType != "none" &&  has_deriv_vars &&
2606     (hessianType == "analytic" || supportsEstimDerivs);
2607   // Most frequent case: build surrogates for all responses
2608   if (responseMode == MODEL_DISCREPANCY || responseMode == AGGREGATED_MODELS ||
2609       actualModel.is_null() || surrogateFnIndices.size() == numFns) {
2610     std::fill(asv.begin(), asv.end(), 1);
2611     if(has_gradients)
2612       for(auto &a : asv)
2613         a |=  2;
2614     if(has_hessians)
2615        for(auto &a : asv)
2616          a |=  4;
2617   } else {
2618     std::fill(asv.begin(), asv.end(), 0);
2619     for(int i = 0; i < numFns; ++i) {
2620       if(surrogateFnIndices.count(i)) {
2621         asv[i] = 1;
2622         if(has_gradients)
2623           asv[i] |= 2;
2624         if(has_hessians)
2625           asv[i] |= 4;
2626       }
2627     }
2628   }
2629   set.request_vector(asv);
2630   return set;
2631 }
2632 
2633 } // namespace Dakota
2634