1 /*  _______________________________________________________________________
2 
3     DAKOTA: Design Analysis Kit for Optimization and Terascale Applications
4     Copyright 2014-2020 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
5     This software is distributed under the GNU Lesser General Public License.
6     For more information, see the README file in the top Dakota directory.
7     _______________________________________________________________________ */
8 
9 #include "ActiveSubspaceModel.hpp"
10 #include "ProbabilityTransformModel.hpp"
11 #include "NonDLHSSampling.hpp"
12 #include "BootstrapSampler.hpp"
13 #include "dakota_linear_algebra.hpp"
14 #include "ParallelLibrary.hpp"
15 #include "DataFitSurrModel.hpp"
16 #include "MarginalsCorrDistribution.hpp"
17 #include "dakota_mersenne_twister.hpp"
18 #include <boost/random/uniform_int.hpp>
19 #include <boost/random/variate_generator.hpp>
20 
21 namespace Dakota {
22 
23 /// initialization of static needed by RecastModel callbacks
24 ActiveSubspaceModel* ActiveSubspaceModel::asmInstance(NULL);
25 
26 // BMA TODO: Consider whether a DACE Iterator is justified; don't need
27 // the modularity yet, but a lot of the build controls better belong
28 // in a helper iterator specification.
29 
ActiveSubspaceModel(ProblemDescDB & problem_db)30 ActiveSubspaceModel::ActiveSubspaceModel(ProblemDescDB& problem_db):
31   SubspaceModel(problem_db, get_sub_model(problem_db)),
32   initialSamples(problem_db.get_int("model.initial_samples")),
33   subspaceIdBingLi(
34     probDescDB.get_bool("model.active_subspace.truncation_method.bing_li")),
35   subspaceIdConstantine(
36     probDescDB.get_bool("model.active_subspace.truncation_method.constantine")),
37   subspaceIdEnergy(
38     probDescDB.get_bool("model.active_subspace.truncation_method.energy")),
39   subspaceIdCV(
40     probDescDB.get_bool("model.active_subspace.truncation_method.cv")),
41   numReplicates(problem_db.get_int("model.active_subspace.bootstrap_samples")),
42   totalSamples(0), gradientScaleFactors(RealArray(numFns, 1.)),
43   truncationTolerance(probDescDB.get_real(
44     "model.active_subspace.truncation_method.energy.truncation_tolerance")),
45   buildSurrogate(probDescDB.get_bool("model.active_subspace.build_surrogate")),
46   refinementSamples(0),
47   subspaceNormalization(
48     probDescDB.get_ushort("model.active_subspace.normalization")),
49   cvIncremental(probDescDB.get_bool("model.active_subspace.cv.incremental")),
50   cvIdMethod(probDescDB.get_ushort("model.active_subspace.cv.id_method")),
51   cvRelTolerance(
52     probDescDB.get_real("model.active_subspace.cv.relative_tolerance")),
53   cvDecreaseTolerance(
54     probDescDB.get_real("model.active_subspace.cv.decrease_tolerance")),
55   cvMaxRank(problem_db.get_int("model.active_subspace.cv.max_rank"))
56 {
57   modelType = "active_subspace";
58   modelId = RecastModel::recast_model_id(root_model_id(), "ACTIVE_SUBSPACE");
59   // Set seed of bootstrap sampler:
60   BootstrapSamplerBase<RealMatrix>::set_seed(randomSeed);
61 
62   validate_inputs();
63 
64   offlineEvalConcurrency = initialSamples * subModel.derivative_concurrency();
65 
66   // initialize the fullspace derivative sampler; this
67   // will configure it to perform initialSamples
68   init_fullspace_sampler(
69     probDescDB.get_ushort("model.active_subspace.sample_type"));
70 
71   const IntVector& db_refine_samples =
72     problem_db.get_iv("model.refinement_samples");
73   if (db_refine_samples.length() == 1)
74     refinementSamples = db_refine_samples[0];
75   else if (db_refine_samples.length() > 1) {
76     Cerr << "\nError (subspace model): refinement_samples must be "
77          << "length 1 if specified." << std::endl;
78     abort_handler(PARSE_ERROR);
79   }
80 }
81 
82 
83 /** An ActiveSubspaceModel will be built over all functions, without
84     differentiating primary vs. secondary constraints.  However the
85     associated RecastModel has to differentiate. Currently identifies
86     subspace for continuous variables only, but carries other active
87     variables along for the ride. */
88 ActiveSubspaceModel::
ActiveSubspaceModel(const Model & sub_model,unsigned int dimension,const RealMatrix & rotation_matrix,short output_level)89 ActiveSubspaceModel(const Model& sub_model, unsigned int dimension,
90                     const RealMatrix &rotation_matrix, short output_level) :
91   SubspaceModel(sub_model, dimension, output_level),
92   gradientScaleFactors(RealArray(numFns, 1.)), buildSurrogate(false),
93   refinementSamples(0), subspaceNormalization(SUBSPACE_NORM_DEFAULT)
94 {
95   modelType = "active_subspace";
96   modelId = RecastModel::recast_model_id(root_model_id(), "ACTIVE_SUBSPACE");
97 
98   SubspaceModel::validate_inputs();
99 
100   //offlineEvalConcurrency = initialSamples * subModel.derivative_concurrency();
101 
102   RealMatrix reduced_basis_W1(Teuchos::View, rotation_matrix,
103                               numFullspaceVars, reducedRank);
104   reducedBasis = reduced_basis_W1;
105 
106   RealMatrix reduced_basis_W2(Teuchos::View, rotation_matrix, numFullspaceVars,
107 			      numFullspaceVars - reducedRank, 0, reducedRank);
108   inactiveBasis = reduced_basis_W2;
109 
110   initialize_subspace();
111   mappingInitialized = true; // no need to initialize_mapping() for this ctor
112 }
113 
114 
~ActiveSubspaceModel()115 ActiveSubspaceModel::~ActiveSubspaceModel()
116 { }
117 
118 
get_sub_model(ProblemDescDB & problem_db)119 Model ActiveSubspaceModel::get_sub_model(ProblemDescDB& problem_db)
120 {
121   const String& actual_model_pointer
122     = problem_db.get_string("model.surrogate.actual_model_pointer");
123   size_t model_index = problem_db.get_db_model_node(); // for restoration
124   problem_db.set_db_model_nodes(actual_model_pointer);
125 
126   transformVars = true; // hardwired for now
127 
128   Model sub_model;
129   if (transformVars)
130     sub_model.assign_rep(std::make_shared<ProbabilityTransformModel>
131 			 (problem_db.get_model(), STD_NORMAL_U));
132   else
133     sub_model = problem_db.get_model();
134 
135   problem_db.set_db_model_nodes(model_index); // restore
136 
137   return sub_model;
138 }
139 
140 
validate_inputs()141 void ActiveSubspaceModel::validate_inputs()
142 {
143   SubspaceModel::validate_inputs();
144 
145   bool error_flag = false;
146 
147   // set default initialSamples equal to 2
148   int min_initial_samples = 2;
149   if (initialSamples < min_initial_samples) {
150     initialSamples = min_initial_samples;
151     Cout << "\nWarning (subspace model): resetting samples to minimum "
152          << "allowed = " << initialSamples << ". Note that the accuracy of the "
153          << "subspace may be poor with this few samples.\n" << std::endl;
154   }
155 
156   // validate response data
157   if (subModel.gradient_type() == "none") {
158     error_flag = true;
159     Cerr << "\nError (subspace model): gradients are required;"
160          << "\n                        Please select numerical, analytic "
161          << "(recommended), or mixed gradients.\n" << std::endl;
162   }
163 
164   if (error_flag)
165     abort_handler(-1);
166 }
167 
168 
compute_subspace()169 void ActiveSubspaceModel::compute_subspace()
170 {
171   // determine number of points to add
172   totalSamples += initialSamples;
173   if (outputLevel >= NORMAL_OUTPUT)
174     Cout << "\nSubspace Model: Adding " << initialSamples
175          << " full-space samples." << std::endl;
176 
177   // evaluate samples with fullspaceSampler
178   Cout << "\nSubspace Model: Performing sampling to build reduced space"
179        << std::endl;
180   generate_fullspace_samples(initialSamples);
181 
182   // populate the derivative and vars matrices with fullspaceSampler samples
183   populate_matrices(initialSamples);
184 
185   // factor the derivative matrix using singular value decomposition
186   compute_svd();
187 
188   // use the SVD results and the truncation methods to determine the size of the
189   // active subspace
190   truncate_subspace();
191 
192   // update the reducedBasis
193   // the reduced basis is dimension N x r and stored in the first r
194   // cols of leftSingularVectors; extract it instead of using BLAS directly
195   RealMatrix reduced_basis_W1(Teuchos::View, leftSingularVectors,
196                               numFullspaceVars, reducedRank);
197   reducedBasis = reduced_basis_W1;
198   if (outputLevel >= DEBUG_OUTPUT)
199     Cout << "\nSubspace Model: Active basis is:\n" << reducedBasis;
200 
201   RealMatrix reduced_basis_W2(Teuchos::View, leftSingularVectors,
202                               numFullspaceVars, numFullspaceVars - reducedRank,
203                               0, reducedRank);
204   inactiveBasis = reduced_basis_W2;
205 
206   Cout << "\n**************************************************************"
207        << "************\nSubspace Model: Build Statistics"
208        << "\nbuild samples: " << totalSamples
209        << "\nsubspace size: " << reducedRank << "\n************************"
210        << "**************************************************\n";
211 }
212 
213 
initialize_subspace()214 void ActiveSubspaceModel::initialize_subspace()
215 {
216   SubspaceModel::initialize_subspace();
217 
218   if (buildSurrogate)
219     build_surrogate();
220 
221   if (outputLevel >= NORMAL_OUTPUT)
222     Cout << "\nActiveSubspaceModel: Initialization of subspace is complete."
223          << std::endl;
224 }
225 
226 
227 /** Convert the user-specified normal random variables to the
228     appropriate reduced space variables, based on the orthogonal
229     transformation.
230 
231     TODO: Generalize to convert other random variable types (non-normal)
232 
233     TODO: The translation of the correlations from full to reduced
234     space is likely wrong for rank correlations; should be correct for
235     covariance.
236 */
uncertain_vars_to_subspace()237 void ActiveSubspaceModel::uncertain_vars_to_subspace()
238 {
239   // ----------------------------------
240   // initialization of base RecastModel
241   // ----------------------------------
242   initialize_base_recast(variables_mapping, SubspaceModel::set_mapping,
243 			 SubspaceModel::response_mapping);
244 
245   // -------------
246   // Resize mvDist
247   // -------------
248   SubspaceModel::uncertain_vars_to_subspace();
249 
250   // -----------------------
251   // Extract full space data
252   // -----------------------
253   std::shared_ptr<Pecos::MarginalsCorrDistribution> native_dist_rep =
254     std::static_pointer_cast<Pecos::MarginalsCorrDistribution>
255     (subModel.multivariate_distribution().multivar_dist_rep());
256   std::shared_ptr<Pecos::MarginalsCorrDistribution> reduced_dist_rep =
257     std::static_pointer_cast<Pecos::MarginalsCorrDistribution>
258     (mvDist.multivar_dist_rep());
259   const ShortArray& native_rv_types = native_dist_rep->random_variable_types();
260   const BitArray&   active_vars     = native_dist_rep->active_variables();
261   size_t i, num_rv = native_rv_types.size();
262 
263   // native space characterization
264   RealVector mu_x(numFullspaceVars), sd_x(numFullspaceVars);  size_t cntr = 0;
265   for (i=0; i<num_rv; ++i)
266     if (active_vars[i]) {
267       switch (native_rv_types[i]) {
268       case Pecos::STD_NORMAL:// ActiveSubspace(ProbabilityTransform(Simulation))
269 	mu_x[cntr] = 0.;  sd_x[cntr] = 1.;  break;
270       case     Pecos::NORMAL:// ActiveSubspace(Simulation))
271 	native_dist_rep->pull_parameter(i, Pecos::N_MEAN,    mu_x[cntr]);
272 	native_dist_rep->pull_parameter(i, Pecos::N_STD_DEV, sd_x[cntr]);
273 	break;
274       default:
275 	Cerr << "Error: unsupported native distribution type ("
276 	     << native_rv_types[i] << ")." << std::endl;
277 	abort_handler(MODEL_ERROR);  break;
278       }
279       ++cntr;
280     }
281 
282   const RealSymMatrix& correl_x = native_dist_rep->correlation_matrix();
283   bool native_correl = correl_x.empty() ? false : true;
284   if (native_correl && correl_x.numRows() != numFullspaceVars) {
285     Cerr << "\nError (subspace model): Wrong correlation size." << std::endl;
286     abort_handler(MODEL_ERROR);
287   }
288 
289   if (outputLevel >= DEBUG_OUTPUT)
290     Cout << "\nSubspace Model: mu_x = \n" << mu_x
291 	 << "\nSubspace Model: sd_x = \n" << sd_x
292 	 << "\nSubspace Model: correl_x = \n" << correl_x;
293 
294   // -------------------------
295   // Define reduced space data
296   // -------------------------
297 
298   // reduced space characterization: mean mu, std dev sd
299   RealVector mu_y(reducedRank), sd_y(reducedRank),
300              mu_z(inactiveBasis.numCols());
301 
302   // mu_y = reducedBasis^T * mu_x
303   int  m = reducedBasis.numRows(), n = reducedBasis.numCols(),
304     incx = 1, incy = 1;
305   Real alpha = 1., beta = 0.;
306 
307   // y <-- alpha*A*x + beta*y
308   // mu_y <-- 1.0 * reducedBasis^T * mu_x + 0.0 * mu_y
309   Teuchos::BLAS<int, Real> teuchos_blas;
310   teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, reducedBasis.values(), m,
311                     mu_x.values(), incx, beta, mu_y.values(), incy);
312 
313   // convert the correlations C_x to variance V_x
314   // V_x <-- diag(sd_x) * C_x * diag(sd_x)
315   // not using symmetric so we can multiply() below
316   RealMatrix V_x(reducedBasis.numRows(), reducedBasis.numRows(), false);
317   if (native_correl) {
318     for (int row=0; row<reducedBasis.numRows(); ++row)
319       for (int col=0; col<reducedBasis.numRows(); ++col)
320         V_x(row, col) = sd_x(row)*correl_x(row,col)*sd_x(col);
321   }
322   else {
323     V_x = 0.;
324     for (int row=0; row<reducedBasis.numRows(); ++row)
325       V_x(row, row) = sd_x(row)*sd_x(row);
326   }
327   if (outputLevel >= DEBUG_OUTPUT)
328     Cout << "\nSubspace Model: activeBasis = \n" << reducedBasis
329 	 << "\nSubspace Model: V_x =\n" << V_x;
330 
331   // compute V_y = U^T * V_x * U
332   alpha = 1.0;  beta = 0.0;
333   RealMatrix UTVx(n, m, false);
334   UTVx.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, alpha,
335 		reducedBasis, V_x, beta);
336   RealMatrix V_y(reducedRank, reducedRank, false);
337   V_y.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, alpha,
338 	       UTVx, reducedBasis, beta);
339   if (outputLevel >= DEBUG_OUTPUT)
340     Cout << "\nSubspace Model: V_y = \n" << V_y;
341 
342   // compute the standard deviations in reduced space
343   for (int i=0; i<reducedRank; ++i)
344     sd_y(i) = std::sqrt(V_y(i,i));
345 
346   if (outputLevel >= DEBUG_OUTPUT)
347     Cout << "\nSubspace Model: mu_y = \n" << mu_y
348 	 << "\nSubspace Model: sd_y = \n" << sd_y;
349 
350   reduced_dist_rep->push_parameters(Pecos::NORMAL, Pecos::N_MEAN,    mu_y);
351   reduced_dist_rep->push_parameters(Pecos::NORMAL, Pecos::N_STD_DEV, sd_y);
352 
353   // compute the correlations in reduced space
354   // TODO: fix symmetric access to not loop over whole matrix
355 
356   // Unless the native correl was alpha*I, the reduced variables will
357   // be correlated in general, so always set the correltions
358   RealSymMatrix correl_y(reducedRank, false);
359   for (int row=0; row<reducedRank; ++row)
360     for (int col=0; col<reducedRank; ++col)
361       correl_y(row, col) = V_y(row,col)/sd_y(row)/sd_y(col);
362   if (outputLevel >= DEBUG_OUTPUT)
363     Cout << "\nSubspace Model: correl_y = \n" << correl_y;
364   reduced_dist_rep->correlation_matrix(correl_y);
365   reduced_dist_rep->initialize_correlations();
366 
367   // Set inactive subspace variables
368   // mu_z = inactiveBasis^T * mu_x
369   m = inactiveBasis.numRows();
370   n = inactiveBasis.numCols();
371   alpha = 1.0;  beta = 0.0;
372 
373   incx = 1;
374   int incz = 1;
375   teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, inactiveBasis.values(), m,
376                     mu_x.values(), incx, beta, mu_z.values(), incz);
377   inactiveVars = mu_z;
378 
379   // Set continuous variable types:
380   UShortMultiArray cv_types(boost::extents[reducedRank]);
381   for (int i = 0; i < reducedRank; i++)
382     cv_types[i] = NORMAL_UNCERTAIN;
383   currentVariables.continuous_variable_types(
384     cv_types[boost::indices[idx_range(0, reducedRank)]]);
385 
386   // Set currentVariables to means of active variables:
387   continuous_variables(mu_y);
388 }
389 
390 
generate_fullspace_samples(unsigned int diff_samples)391 void ActiveSubspaceModel::generate_fullspace_samples(unsigned int diff_samples)
392 {
393   // Rank-revealing phase requires derivatives (for now)
394   // TODO: always gradients only; no functions
395   //       analysis_driver needs to parse active_set
396   fullspaceSampler.active_set_request_values(3);
397 
398   // Generate the samples.  Have to adjust the base number of samples
399   // with sampling_reference() since the number of samples may go down
400   // from intialSamples to batchSamples
401   fullspaceSampler.sampling_reference(diff_samples);
402   fullspaceSampler.sampling_reset(diff_samples, true, false); // all_data, stats
403 
404   // and generate the additional samples
405   ParLevLIter pl_iter = modelPCIter->mi_parallel_level_iterator(miPLIndex);
406   fullspaceSampler.run(pl_iter);
407 
408   //if (outputLevel >= DEBUG_OUTPUT) {
409   //  std::shared_ptr<NonDLHSSampling> lhs_sampler_rep =
410   //    std::static_pointer_cast<NonDLHSSampling>(
411   //    fullspaceSampler.iterator_rep());
412   //  lhs_sampler_rep->compute_moments(fullspaceSampler.all_responses());
413   //  lhs_sampler_rep->print_moments(Cout);
414   //}
415 }
416 
417 
populate_matrices(unsigned int diff_samples)418 void ActiveSubspaceModel::populate_matrices(unsigned int diff_samples)
419 {
420   // extract into a matrix
421   // all_samples vs. all_variables
422   const RealMatrix& all_vars = fullspaceSampler.all_samples();
423   const IntResponseMap& all_responses = fullspaceSampler.all_responses();
424 
425   // TODO: could easily filter NaN/Inf responses and omit
426   if (outputLevel >= DEBUG_OUTPUT) {
427     Cout << "\nSubspace Model: DACE iterator returned "
428          << all_responses.size() << " samples. (expected "
429          << diff_samples << " samples)" << std::endl;
430   }
431 
432   int sample_insert_point = varsMatrix.numCols();
433   derivativeMatrix.reshape(numFullspaceVars, totalSamples*numFns);
434   varsMatrix.reshape(numFullspaceVars, totalSamples);
435 
436   unsigned int diff_sample_ind = 0;
437   IntRespMCIter resp_it = all_responses.begin(), resp_end = all_responses.end();
438 
439   // Compute gradient scaling factors if more than 1 response function
440   if(numFns > 1) {
441     for ( ; resp_it != resp_end ; ++resp_it, ++diff_sample_ind) {
442       if (subspaceNormalization == SUBSPACE_NORM_MEAN_VALUE) {
443         const RealVector& resp_vector = resp_it->second.function_values();
444         for (unsigned int fn_ind = 0; fn_ind < numFns; ++fn_ind) {
445           gradientScaleFactors[fn_ind] += resp_vector(fn_ind) /
446                                           static_cast<Real>(diff_samples);
447         }
448       } // The SUBSPACE_NORM_MEAN_GRAD and SUBSPACE_NORM_DEFAULT cases will be
449         // handled later
450       else if (subspaceNormalization == SUBSPACE_NORM_MEAN_GRAD) {
451         const RealMatrix& resp_matrix = resp_it->second.function_gradients();
452         for (unsigned int fn_ind = 0; fn_ind < numFns; ++fn_ind) {
453           RealVector grad(numFullspaceVars);
454           for (size_t ii = 0; ii < numFullspaceVars; ++ii)
455             grad[ii] = resp_matrix(ii,fn_ind);
456 
457           Real norm_grad = std::sqrt(grad.dot(grad));
458 
459           gradientScaleFactors[fn_ind] += norm_grad /
460                                           static_cast<Real>(diff_samples);
461         }
462       }
463     }
464   }
465 
466   // Reset iterators and indices
467   diff_sample_ind = 0;
468   resp_it = all_responses.begin();
469   resp_end = all_responses.end();
470 
471   for ( ; resp_it != resp_end ; ++resp_it, ++diff_sample_ind) {
472     // the absolute sample number to insert into
473     unsigned int sample_ind = sample_insert_point + diff_sample_ind;
474     // matrix of num_variables x num_functions
475     const RealMatrix& resp_matrix = resp_it->second.function_gradients();
476     for (unsigned int fn_ind = 0; fn_ind < numFns; ++fn_ind) {
477       unsigned int col_ind = sample_ind * numFns + fn_ind;
478       for (unsigned int var_ind = 0; var_ind < numFullspaceVars; ++var_ind) {
479         Real scale = 1.0;
480         if (numFns > 1 &&
481             (subspaceNormalization == SUBSPACE_NORM_DEFAULT ||
482              subspaceNormalization == SUBSPACE_NORM_LOCAL_GRAD)) {
483           RealVector grad(numFullspaceVars);
484           for (size_t ii = 0; ii < numFullspaceVars; ++ii)
485             grad[ii] = resp_matrix(ii,fn_ind);
486 
487           scale = 1.0 / std::sqrt(grad.dot(grad));
488         }
489 
490         derivativeMatrix(var_ind, col_ind) =
491           scale * resp_matrix(var_ind, fn_ind) / gradientScaleFactors[fn_ind];
492       }
493     }
494     for (unsigned int var_ind = 0; var_ind < numFullspaceVars; ++var_ind) {
495       varsMatrix(var_ind, sample_ind) = all_vars(var_ind, diff_sample_ind);
496     }
497   }
498 
499   if (outputLevel >= DEBUG_OUTPUT)
500     Cout << "\nSubspace Model: Compiled derivative matrix is:\n"
501 	 << derivativeMatrix;
502 }
503 
504 
compute_svd()505 void ActiveSubspaceModel::compute_svd()
506 {
507   // Want eigenvalues of derivMatrix*derivMatrix^T, so perform SVD of
508   // derivMatrix and square them
509 
510   RealMatrix V_transpose;
511   leftSingularVectors = derivativeMatrix;
512   svd(leftSingularVectors, singularValues, V_transpose);
513 
514   // TODO: Analyze whether we need to worry about this
515   if(singularValues.length() == 0) {
516     Cerr << "\nError (subspace model): No computed singular values available!"
517          << std::endl;
518     abort_handler(-1);
519   }
520 
521   int num_singular_values = singularValues.length();
522 
523   if (outputLevel >= NORMAL_OUTPUT) {
524     Cout << "\nSubspace Model: Singular values are:\n[ ";
525     for (unsigned int i=0; i<num_singular_values; ++i)
526       Cout << singularValues[i] << " ";
527     Cout << "]" << std::endl;
528   }
529 }
530 
531 
truncate_subspace()532 void ActiveSubspaceModel::truncate_subspace()
533 {
534   unsigned int bing_li_rank = compute_bing_li_criterion(singularValues),
535     constantine_rank = compute_constantine_metric(singularValues),
536     energy_rank      = compute_energy_criterion(singularValues),
537     cv_rank          = (subspaceIdCV) ? compute_cross_validation_metric() : 0;
538 
539   if (reducedRank > 0 && reducedRank <= singularValues.length()) {
540     if (outputLevel >= NORMAL_OUTPUT)
541       Cout << "\nSubspace Model: Subspace size has been specified as dimension"
542            << " = " << reducedRank << "." << std::endl;
543   }
544   else {
545     reducedRank = 1; // initialize reducedRank
546 
547     if (subspaceIdBingLi) {
548       if (outputLevel >= NORMAL_OUTPUT)
549         Cout << "\nSubspace Model: Bing Li truncation method is active."
550              << std::endl;
551 
552       if (reducedRank < bing_li_rank)
553         reducedRank = bing_li_rank;
554     }
555 
556     if (subspaceIdConstantine) {
557       if (outputLevel >= NORMAL_OUTPUT)
558         Cout << "\nSubspace Model: Constantine truncation method is active."
559              << std::endl;
560 
561       if (reducedRank < constantine_rank)
562         reducedRank = constantine_rank;
563     }
564 
565     if (subspaceIdEnergy) {
566       if (outputLevel >= NORMAL_OUTPUT)
567         Cout << "\nSubspace Model: Eigenvalue energy truncation method is "
568              << "active." << std::endl;
569 
570       if (reducedRank < energy_rank)
571         reducedRank = energy_rank;
572     }
573 
574     if (subspaceIdCV) {
575       if (outputLevel >= NORMAL_OUTPUT)
576         Cout << "\nSubspace Model: Cross validation truncation method is "
577              << "active." << std::endl;
578 
579       if (reducedRank < cv_rank)
580         reducedRank = cv_rank;
581     }
582 
583     // Default case:
584     if (!(subspaceIdBingLi || subspaceIdConstantine || subspaceIdEnergy
585           || subspaceIdCV)) {
586       if (outputLevel >= NORMAL_OUTPUT)
587         Cout << "\nSubspace Model: Determining subspace size with Constantine "
588              << "metric." << std::endl;
589 
590       reducedRank = constantine_rank;
591     }
592   }
593 
594   // Check to make sure subspace size is smaller than numerical rank of the
595   // derivative matrix:
596   double inf_norm = derivativeMatrix.normInf();
597   double mach_svtol = inf_norm * std::numeric_limits<Real>::epsilon();
598   if (singularValues[reducedRank-1] < mach_svtol) {
599     Cout << "\nWarning (subspace model): Computed subspace size is greater than"
600          << " numerical rank. Changing subspace size to numerical rank."
601          << std::endl;
602     for (unsigned int i=0; i<reducedRank; ++i) {
603       if (singularValues[i] < mach_svtol) {
604         reducedRank = i;
605         break;
606       }
607     }
608 
609     if (reducedRank < 1) {
610       Cerr << "\nError (subspace model): Derivative matrix has numerical rank "
611            << "of 0. Something may be wrong with the gradient calculations."
612            << std::endl;
613       abort_handler(-1);
614     }
615 
616     Cout << "\nSubspace Model: New subspace size is dimension = "
617          << reducedRank << "." << std::endl;
618   }
619 
620   int k_temp = std::ceil(initialSamples /
621                          (2.0*std::log10((double)numFullspaceVars)));
622 
623   if (reducedRank >= k_temp) {
624     Cout << "\nWarning (subspace model): Computed subspace may be inaccurate. "
625          << "Consider increasing the number of samples to satisfy: "
626          << "N > 2*k*log(m), where N is the number of samples, k is the "
627          << "subspace size, and m is the dimension of the original model."
628          << std::endl;
629   }
630 }
631 
632 
633 unsigned int ActiveSubspaceModel::
compute_bing_li_criterion(RealVector & singular_values)634 compute_bing_li_criterion(RealVector& singular_values)
635 {
636   int num_vars = derivativeMatrix.numRows();
637   int num_vals;
638   if (derivativeMatrix.numCols() < num_vars)
639     num_vals = derivativeMatrix.numCols();
640   else
641     num_vals = num_vars;
642 
643   // Stores Bing Li's criterion
644   std::vector<RealMatrix::scalarType> bing_li_criterion(num_vals, 0);
645 
646   // Compute part 1 of criterion: relative energy in next eigenvalue in the
647   // spectrum
648 
649   RealMatrix::scalarType eigen_sum = 0.0;
650   for(size_t i = 0; i < num_vals; ++i) {
651     RealMatrix::scalarType eigen_val = singular_values[i] * singular_values[i];
652     bing_li_criterion[i] = eigen_val;
653     eigen_sum += eigen_val;
654   }
655 
656   for(size_t i = 0; i < num_vals; ++i)
657     bing_li_criterion[i] /= eigen_sum;
658 
659   // Compute part 2 of criterion: bootstrapped determinant metric
660 
661   RealMatrix bootstrapped_sample(num_vars, derivativeMatrix.numCols());
662   RealVector sample_sing_vals;
663   RealMatrix sample_sing_vectors;
664 
665   Teuchos::LAPACK<RealMatrix::ordinalType, RealMatrix::scalarType> lapack;
666 
667   std::vector<Real> bootstrapped_det(bing_li_criterion.size());
668 
669   BootstrapSampler<RealMatrix> bootstrap_sampler(derivativeMatrix,numFns);
670 
671   for (size_t i = 0; i < numReplicates; ++i) {
672     bootstrap_sampler(bootstrapped_sample);
673 
674     svd(bootstrapped_sample, sample_sing_vals, sample_sing_vectors);
675 
676     // Overwrite bootstrap replicate with singular matrix product
677     RealMatrix bootstrapped_sample_copy = bootstrapped_sample;
678     bootstrapped_sample.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0,
679                                  leftSingularVectors, bootstrapped_sample_copy,
680                                  0.0);
681 
682     for(size_t j = 1; j < bootstrapped_det.size(); ++j) {
683       size_t num_sing_vec = j;
684 
685       RealMatrix submatrix(Teuchos::Copy, bootstrapped_sample, num_sing_vec,
686                            num_sing_vec);
687 
688       // Get determinant from LU decomposition
689 
690       Teuchos::SerialDenseVector<RealMatrix::ordinalType,
691                                  RealMatrix::ordinalType> pivot(num_sing_vec);
692       RealMatrix::ordinalType info;
693 
694       lapack.GETRF(num_sing_vec, num_sing_vec, submatrix.values(),
695                    num_sing_vec, pivot.values(), &info);
696 
697       RealMatrix::scalarType det = 1.0;
698       for (size_t i = 0; i < j; ++i) {
699         det *= submatrix(i,i);
700       }
701 
702       bootstrapped_det[j] += std::abs(det);
703     }
704   }
705 
706   RealMatrix::scalarType det_sum = 0.0;
707   bootstrapped_det[0] = 0.0;
708   for (size_t i = 1; i < bootstrapped_det.size(); ++i) {
709     bootstrapped_det[i] = 1.0 - bootstrapped_det[i] /
710                           static_cast<RealMatrix::scalarType>(numReplicates);
711     det_sum += bootstrapped_det[i];
712   }
713 
714   for (size_t i = 0; i < bootstrapped_det.size(); ++i) {
715     bing_li_criterion[i] += bootstrapped_det[i] / det_sum;
716   }
717 
718   if (outputLevel >= NORMAL_OUTPUT) {
719     Cout << "\nSubspace Model: Bing Li Criterion values are:\n[ ";
720     for (size_t i = 0; i < bing_li_criterion.size(); ++i) {
721       Cout << bing_li_criterion[i] << " ";
722     }
723     Cout << "]" << std::endl;
724   }
725 
726   // Cutoff is 1st minimum of the criterion
727   unsigned int rank = 0;
728   for (unsigned int i = 1; i < bing_li_criterion.size(); ++i) {
729     if(bing_li_criterion[i-1] < bing_li_criterion[i]) {
730       rank = i-1;
731       break;
732     }
733   }
734 
735   if (outputLevel >= NORMAL_OUTPUT)
736     Cout << "\nSubspace Model: Bing Li metric subspace size estimate = "
737          << rank << std::endl;
738 
739   return rank;
740 }
741 
742 
743 unsigned int ActiveSubspaceModel::
compute_constantine_metric(RealVector & singular_values)744 compute_constantine_metric(RealVector& singular_values)
745 {
746   int num_vars = derivativeMatrix.numRows();
747   int num_vals;
748   if (derivativeMatrix.numCols() < num_vars)
749     num_vals = derivativeMatrix.numCols();
750   else
751     num_vals = num_vars;
752 
753   // Stores Constantine's metric
754   RealArray constantine_metric((num_vals < num_vars-1) ? num_vals : num_vars-1,
755                                0);
756 
757   // Compute bootstrapped subspaces
758   RealMatrix bootstrapped_sample(num_vars, derivativeMatrix.numCols());
759   RealMatrix dist_mat(num_vars, num_vars);
760   RealVector sample_sing_vals;
761   RealMatrix sample_sing_vectors;
762   RealVector dist_sing_vals;
763   RealMatrix dist_sing_vectors;
764 
765   Teuchos::LAPACK<RealMatrix::ordinalType, RealMatrix::scalarType> lapack;
766 
767   BootstrapSampler<RealMatrix> bootstrap_sampler(derivativeMatrix, numFns);
768 
769   for (size_t i = 0; i < numReplicates; ++i) {
770     bootstrap_sampler(bootstrapped_sample);
771 
772     svd(bootstrapped_sample, sample_sing_vals, sample_sing_vectors);
773 
774     for(size_t j = 0; j < constantine_metric.size(); ++j) {
775       size_t num_sing_vec = j+1;
776 
777       RealMatrix submatrix(Teuchos::View, leftSingularVectors, num_vars,
778                            num_sing_vec);
779 
780       RealMatrix submatrix_bootstrap(Teuchos::View, bootstrapped_sample,
781                                      num_vars, num_sing_vec);
782 
783       dist_mat.multiply(Teuchos::NO_TRANS, Teuchos::TRANS, 1.0,
784                         submatrix, submatrix, 0.0);
785 
786       dist_mat.multiply(Teuchos::NO_TRANS, Teuchos::TRANS, -1.0,
787                         submatrix_bootstrap, submatrix_bootstrap, 1.0);
788 
789       // The spectral norm is slow, let's use the Frobenius norm instead.
790       // Compute the spectral norm of dist_mat (largest singular value):
791       //svd(dist_mat, dist_sing_vals, dist_sing_vectors);
792       //constantine_metric[j] += dist_sing_vals(0) / numReplicates;
793 
794       constantine_metric[j] += dist_mat.normFrobenius() / numReplicates;
795     }
796   }
797 
798   if (outputLevel >= NORMAL_OUTPUT) {
799     Cout << "\nSubspace Model: Constantine metric values are:\n[ ";
800     for (size_t i = 0; i < constantine_metric.size(); ++i) {
801       Cout << constantine_metric[i] << " ";
802     }
803     Cout << "]" << std::endl;
804   }
805 
806   // Cutoff is global minimum of metric
807   unsigned int rank = 0;
808   Real min_val = 0;
809   for (unsigned int i = 0; i < constantine_metric.size(); ++i) {
810     if(constantine_metric[i] < min_val || i == 0) {
811       min_val = constantine_metric[i];
812       rank = i+1;
813     }
814   }
815 
816   if (outputLevel >= NORMAL_OUTPUT)
817     Cout << "\nSubspace Model: Constantine metric subspace size estimate = "
818          << rank << std::endl;
819 
820   return rank;
821 }
822 
823 
824 unsigned int ActiveSubspaceModel::
compute_energy_criterion(RealVector & singular_values)825 compute_energy_criterion(RealVector& singular_values)
826 {
827   int num_vars = derivativeMatrix.numRows();
828   int num_vals;
829   if (derivativeMatrix.numCols() < num_vars)
830     num_vals = derivativeMatrix.numCols();
831   else
832     num_vals = num_vars;
833 
834   Real total_energy = 0.0;
835   for (size_t i = 0; i < num_vals; ++i) {
836     // eigenvalue = (singular_value)^2
837     total_energy += std::pow(singular_values[i],2);
838   }
839 
840   RealVector energy_metric(num_vals);
841   energy_metric[0] = std::pow(singular_values[0],2)/total_energy;
842   for (size_t i = 1; i < num_vals; ++i) {
843     energy_metric[i] = std::pow(singular_values[i],2)/total_energy
844                        + energy_metric[i-1];
845   }
846 
847   if (outputLevel >= NORMAL_OUTPUT) {
848     Cout << "\nSubspace Model: Energy criterion values are:\n[ ";
849     for (size_t i = 0; i < num_vals; ++i) {
850       Cout << energy_metric[i] << " ";
851     }
852     Cout << "]" << std::endl;
853   }
854 
855   unsigned int rank = 0;
856   for (unsigned int i = 0; i < num_vals; ++i) {
857     if(std::abs(1.0 - energy_metric[i]) < truncationTolerance) {
858       rank = i+1;
859       break;
860     }
861   }
862 
863   if (outputLevel >= NORMAL_OUTPUT)
864     Cout << "\nSubspace Model: Eigenvalue energy metric subspace size estimate "
865          << "= " << rank << ". (truncation_tolerance = " << truncationTolerance
866          << ")" << std::endl;
867 
868   return rank;
869 }
870 
871 
compute_cross_validation_metric()872 unsigned int ActiveSubspaceModel::compute_cross_validation_metric()
873 {
874   // Compute max_rank, that is the maximum dimension that we have enough samples
875   // to build a surrogate for
876 
877   if (outputLevel >= NORMAL_OUTPUT)
878     Cout << "\nSubspace Model: Beginning cross validation subspace id "
879          << "method.\n" << std::endl;
880 
881   // TODO: replace with C++11 std::mt19937 and related components
882   boost::mt19937 rnum_generator(randomSeed);
883   boost::uniform_int<> uniform_dist;
884   boost::variate_generator<boost::mt19937&, boost::uniform_int<> >
885     rnum_variate_gen(rnum_generator, uniform_dist);
886 
887   int num_folds = 10, poly_degree = 2; // quadratic bases
888 
889   const RealMatrix&     all_x = fullspaceSampler.all_samples();
890   const IntResponseMap& all_y = fullspaceSampler.all_responses();
891   int n_samps = all_x.numCols();
892 
893   int num_samples_req = 1, max_rank = 1;
894   while (n_samps >= num_samples_req && max_rank < numFullspaceVars) {
895     num_samples_req = 1;
896     for (int ii = max_rank + poly_degree; ii > max_rank; ii--) {
897       num_samples_req *= ii; // Numerator
898     }
899     for (int ii = poly_degree; ii > 0; ii--) {
900       num_samples_req /= ii; // Denomenator
901     }
902 
903     if (n_samps < num_samples_req) {
904       max_rank--; // Too big by one
905       break;
906     } else
907       max_rank++; // Increment for next loop iteration
908   }
909 
910   if (cvMaxRank >= 0 && max_rank > cvMaxRank)
911     max_rank = cvMaxRank;
912 
913   // Loop over all feasible subspace sizes
914   std::vector<Real> cv_error;
915   bool found_acceptable_subspace = false,
916     rel_tol_met = false, decrease_tol_met = false;
917   for (unsigned int ii = 1; ii <= max_rank; ii++) {
918     if (found_acceptable_subspace && cvIncremental)
919       break;
920 
921     // Add element to the list of metrics:
922     cv_error.push_back(0.0);
923 
924     // Create a local active subspace model using the light-weight constructor:
925     Model asm_model_tmp;
926     asm_model_tmp.assign_rep(std::make_shared<ActiveSubspaceModel>
927 			     (subModel, ii, leftSingularVectors, QUIET_OUTPUT));
928 
929     String sample_reuse = "", approx_type = "global_moving_least_squares";
930     ActiveSet surr_set = current_response().active_set(); // copy
931 
932     UShortArray approx_order(reducedRank, poly_degree);
933     short corr_order = -1, corr_type = NO_CORRECTION, data_order = 1;
934     Iterator dace_iterator;
935 
936     Model cv_surr_model;
937     cv_surr_model.assign_rep(std::make_shared<DataFitSurrModel>(dace_iterator,
938       asm_model_tmp, surr_set, approx_type, approx_order, corr_type, corr_order,
939       data_order, QUIET_OUTPUT,sample_reuse));
940 
941     Teuchos::BLAS<int, Real> teuchos_blas;
942 
943     //  Project fullspace samples onto active directions
944     //  Calculate x_active = W1^T*x
945     Real alpha = 1.0, beta = 0.0;
946     RealMatrix all_x_active(ii, all_x.numCols()),
947       W1(Teuchos::Copy, leftSingularVectors, numFullspaceVars, ii);
948     int m = W1.numCols(), k = W1.numRows(), n = all_x.numCols();
949 
950     teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, k, alpha,
951                       W1.values(), k, all_x.values(), k, beta,
952                       all_x_active.values(), m);
953 
954     // Randomly partion sample matrix into k folds
955 
956     std::vector<int> random_index_vec;
957     // set sequential index values:
958     for (int ind = 0; ind < n; ++ind)
959       random_index_vec.push_back(ind);
960     // shuffle these indices:
961     std::random_shuffle(random_index_vec.begin(), random_index_vec.end(),
962 			rnum_variate_gen);
963 
964     // Compute the size of each fold:
965     std::vector<int> fold_size(num_folds, n / num_folds);
966     for (int jj = 0; jj < n % num_folds; jj++)
967       fold_size[jj]++;
968 
969     // Loop over k folds holding one fold out at a time as a test set and using
970     // the rest to build the surrogate
971     for (int jj = 0; jj < num_folds; jj++) {
972       // Get kth fold and put into test sets, put the rest into training sets:
973 
974       int fold_start_ind = 0;
975       for (int kk = 0; kk < jj; kk++)
976         fold_start_ind += fold_size[kk];
977 
978       RealMatrix training_x(all_x_active.numRows(), n - fold_size[jj]),
979 	             test_x(all_x_active.numRows(), fold_size[jj]);
980 
981       IntResponseMap training_y, test_y;
982 
983       IntRespMCIter resp_it = all_y.begin(), resp_end = all_y.end();
984 
985       // Fill test_x, test_y, training_x, and training_y
986       int training_count = 0, test_count = 0;
987       for (int kk = 0; kk < all_x_active.numCols(); kk++) {
988         if (fold_start_ind <= random_index_vec[kk] &&
989             (fold_start_ind + fold_size[jj]) > random_index_vec[kk]) {
990           test_y[test_count] = resp_it->second;
991           for (int mm = 0; mm < all_x_active.numRows(); mm++)
992             test_x(mm, test_count) = all_x_active(mm, kk);
993 
994           test_count++;
995         } else {
996           training_y[training_count] = resp_it->second;
997           for (int mm = 0; mm < all_x_active.numRows(); mm++)
998             training_x(mm, training_count) = all_x_active(mm, kk);
999 
1000           training_count++;
1001         }
1002 
1003         resp_it++;
1004       }
1005 
1006       cv_error[ii-1] += build_cv_surrogate(cv_surr_model, training_x,
1007                                            training_y,test_x, test_y)/num_folds;
1008     }
1009 
1010     if (cvIdMethod == RELATIVE_TOLERANCE) {
1011       tolerance_met_index(cv_error, cvRelTolerance, rel_tol_met);
1012       if (rel_tol_met)
1013         found_acceptable_subspace = true;
1014     }
1015 
1016     if (cvIdMethod == DECREASE_TOLERANCE) {
1017       tolerance_met_index(negative_diff(cv_error), cvDecreaseTolerance,
1018                           decrease_tol_met);
1019       if (decrease_tol_met)
1020         found_acceptable_subspace = true;
1021     }
1022   }
1023 
1024   return determine_rank_cv(cv_error);
1025 }
1026 
1027 
1028 unsigned int ActiveSubspaceModel::
determine_rank_cv(const std::vector<Real> & cv_error)1029 determine_rank_cv(const std::vector<Real> &cv_error)
1030 {
1031   if (outputLevel >= NORMAL_OUTPUT) {
1032     Cout << "\nSubspace Model: Cross validation metric values are:\n[ ";
1033     for (size_t i = 0; i < cv_error.size(); ++i) {
1034       Cout << cv_error[i] << " ";
1035     }
1036     Cout << "]" << std::endl;
1037   }
1038 
1039   bool rel_tol_met = false, decrease_tol_met = false;
1040 
1041   unsigned int rank_min_metric = min_index(cv_error) + 1;
1042   unsigned int rank_rel_tol = tolerance_met_index(cv_error, cvRelTolerance,
1043                               rel_tol_met) + 1;
1044   unsigned int rank_decrease_tol = tolerance_met_index(negative_diff(cv_error),
1045                                    cvDecreaseTolerance, decrease_tol_met) + 1;
1046 
1047   unsigned int rank = 0;
1048   bool fallback = false;
1049   switch (cvIdMethod) {
1050   case MINIMUM_METRIC:
1051     rank = rank_min_metric;
1052     break;
1053   case RELATIVE_TOLERANCE:
1054   case CV_ID_DEFAULT:
1055     if (rel_tol_met)
1056       rank = rank_rel_tol;
1057     else {
1058       rank = rank_min_metric;
1059       fallback = true;
1060     }
1061     break;
1062   case DECREASE_TOLERANCE:
1063     if (decrease_tol_met)
1064       rank = rank_decrease_tol;
1065     else {
1066       rank = rank_min_metric;
1067       fallback = true;
1068     }
1069     break;
1070   }
1071 
1072   if (outputLevel >= NORMAL_OUTPUT) {
1073     Cout << "\nSubspace Model: Cross validation metric: minimum metric subspace"
1074          << " size estimate = " << rank_min_metric << ".";
1075     if (cvIdMethod == MINIMUM_METRIC)
1076       Cout << " (active)";
1077     else if (fallback)
1078       Cout << " (active as a fallback)";
1079     else
1080       Cout << " (inactive)";
1081     Cout << std::endl;
1082 
1083     Cout << "\nSubspace Model: Cross validation metric: relative tolerance "
1084          << "subspace size estimate = " << rank_rel_tol << ".";
1085     if (cvIdMethod == RELATIVE_TOLERANCE || cvIdMethod == CV_ID_DEFAULT)
1086       Cout << " (active, ";
1087     else
1088       Cout << " (inactive, ";
1089     Cout << "tolerance = " << cvRelTolerance << ")" << std::endl;
1090 
1091     Cout << "\nSubspace Model: Cross validation metric: decrease tolerance "
1092          << "subspace size estimate = " << rank_decrease_tol << ".";
1093     if (cvIdMethod == DECREASE_TOLERANCE)
1094       Cout << " (active, ";
1095     else
1096       Cout << " (inactive, ";
1097     Cout << "tolerance = " << cvDecreaseTolerance << ")" << std::endl;
1098   }
1099 
1100   return rank;
1101 }
1102 
1103 
1104 /** Build surrogate over active subspace: initialize surrogateModel. */
build_surrogate()1105 void ActiveSubspaceModel::build_surrogate()
1106 {
1107   // Prior to the use of std::shared_ptr, this used *this within assign_rep().
1108   // Rather than using std::enable_shared_from_this<ActiveSubspaceModel> to
1109   // allow std::shared_ptr, we resort to creating a new ActiveSubspaceModel
1110   // instance, reusing the lightweight ctor developed for cross-validating
1111   // different reduced ranks.
1112   // > Note: A potential concern with this approach is the loss of config
1113   //   information stemming from the difference in ctor initializations,
1114   //   but this seems unimportant following final subspace creation (and
1115   //   additional data could be added to the lightweight ctor if/when needed).
1116   Model asm_model;
1117   asm_model.assign_rep(std::make_shared<ActiveSubspaceModel>(subModel,
1118     reducedRank, leftSingularVectors, QUIET_OUTPUT)); // partitioned to W1,W2
1119 
1120   String sample_reuse = "", approx_type = "global_moving_least_squares";
1121   ActiveSet surr_set = current_response().active_set(); // copy
1122   int poly_degree = 2; // quadratic bases
1123   UShortArray approx_order(reducedRank, poly_degree);
1124   short corr_order = -1, corr_type = NO_CORRECTION, data_order = 1;
1125   Iterator dace_iterator;
1126 
1127   surrogateModel.assign_rep(std::make_shared<DataFitSurrModel>(dace_iterator,
1128     asm_model, surr_set, approx_type, approx_order, corr_type, corr_order,
1129     data_order, outputLevel, sample_reuse));
1130 
1131   const RealMatrix& all_vars_x = fullspaceSampler.all_samples();
1132   const IntResponseMap& all_responses = fullspaceSampler.all_responses();
1133 
1134   Teuchos::BLAS<int, Real> teuchos_blas;
1135 
1136   //  Project fullspace samples onto active directions
1137   //  Calculate y = W1^T*x
1138   Real alpha = 1., beta = 0.;
1139 
1140   RealMatrix all_vars_y(reducedRank, all_vars_x.numCols());
1141 
1142   const RealMatrix& W1 = reducedBasis;
1143   int m = W1.numCols();
1144   int k = W1.numRows();
1145   int n = all_vars_x.numCols();
1146 
1147   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, k, alpha,
1148                     W1.values(), k, all_vars_x.values(), k, beta,
1149                     all_vars_y.values(), m);
1150 
1151   // Check to make sure we have enough samples to build a moving least squares
1152   // surrogate. If not, add them using additional refinement samples.
1153   // First calculate the number of samples needed.
1154   // num_samples_req = (reducedRank + poly_degree)!/(reducedRank!*poly_degree!)
1155   // Below is a simplified form of the equation above: (avoids large factorials)
1156   int num_samples_req = 1;
1157   for (int ii = reducedRank + poly_degree; ii > reducedRank; ii--) {
1158     num_samples_req *= ii; // Numerator
1159   }
1160   for (int ii = poly_degree; ii > 0; ii--) {
1161     num_samples_req /= ii; // Denomenator
1162   }
1163 
1164   if ((n + refinementSamples) < num_samples_req) {
1165     int num_new_samples = num_samples_req - (n + refinementSamples);
1166     refinementSamples += num_new_samples;
1167     if (outputLevel >= NORMAL_OUTPUT) {
1168       Cout << "\nWarning (subspace model):  Moving least squares surrogate "
1169            << "needs at least " << num_samples_req << " samples. Adding "
1170            << num_new_samples << " additional refinement_samples for building "
1171            << "surrogate." << std::endl;
1172     }
1173   }
1174 
1175   surrogateModel.append_approximation(all_vars_y, all_responses,
1176                                       (refinementSamples == 0));
1177 
1178   // If user requested refinement_samples for building the surrogate evaluate
1179   // them here. Since moving least squares doesn't use gradients only request
1180   // function values here.
1181   if (refinementSamples > 0) {
1182     if (outputLevel >= DEBUG_OUTPUT) {
1183       Cout << "\nSubspace Model: adding " << refinementSamples
1184            << " refinement_samples for building surrogate." << std::endl;
1185     }
1186 
1187     fullspaceSampler.active_set_request_values(1);
1188     fullspaceSampler.sampling_reference(refinementSamples);
1189     fullspaceSampler.sampling_reset(refinementSamples, true, false);
1190 
1191     // and generate the additional samples
1192     ParLevLIter pl_iter = modelPCIter->mi_parallel_level_iterator(miPLIndex);
1193     fullspaceSampler.run(pl_iter);
1194 
1195     const RealMatrix& all_vars_x_ref = fullspaceSampler.all_samples();
1196     const IntResponseMap& all_responses_ref = fullspaceSampler.all_responses();
1197 
1198     RealMatrix all_vars_y_ref(reducedRank, all_vars_x_ref.numCols());
1199 
1200     n = all_vars_x_ref.numCols();
1201 
1202     teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, k, alpha,
1203                       W1.values(), k, all_vars_x_ref.values(), k, beta,
1204                       all_vars_y_ref.values(), m);
1205 
1206     surrogateModel.append_approximation(all_vars_y_ref, all_responses_ref,true);
1207   }
1208 }
1209 
1210 
1211 /// Build global moving least squares surrogate model to use in cross validation
1212 /// to estimate active subspace size
1213 Real ActiveSubspaceModel::
build_cv_surrogate(Model & cv_surr_model,RealMatrix training_x,IntResponseMap training_y,RealMatrix test_x,IntResponseMap test_y)1214 build_cv_surrogate(Model &cv_surr_model, RealMatrix training_x,
1215                    IntResponseMap training_y,
1216                    RealMatrix test_x, IntResponseMap test_y)
1217 {
1218   cv_surr_model.update_approximation(training_x, training_y, true);
1219 
1220   int num_test_points = test_x.numCols();
1221 
1222   // evaluate surrogate at test points:
1223   IntResponseMap test_y_surr;
1224   ActiveSet surr_set = current_response().active_set(); // copy
1225   for (int ii = 0; ii < num_test_points; ii++) {
1226     cv_surr_model.continuous_variables(
1227       Teuchos::getCol(Teuchos::Copy, test_x, ii));
1228 
1229     cv_surr_model.evaluate(surr_set);
1230 
1231     test_y_surr[ii] = cv_surr_model.current_response().copy();
1232   }
1233 
1234   // compute L2 norm of error between test_prediction and actual response:
1235   IntRespMCIter resp_it = test_y.begin(), resp_end = test_y.end(),
1236     resp_surr_it = test_y_surr.begin(), resp_surr_end = test_y_surr.end();
1237   RealVector error_norm(numFns), mean(numFns), stdev(numFns);
1238   for (; resp_it != resp_end
1239        && resp_surr_it != resp_surr_end; resp_it++, resp_surr_it++) {
1240     const RealVector& resp_vector = resp_it->second.function_values();
1241     const RealVector& resp_surr_vector = resp_surr_it->second.function_values();
1242 
1243     for (size_t ii = 0; ii < numFns; ii++) {
1244       error_norm[ii] += std::pow(resp_vector[ii] - resp_surr_vector[ii], 2);
1245       mean[ii] += resp_vector[ii] / test_y.size();
1246     }
1247   }
1248 
1249   resp_it = test_y.begin();  resp_end = test_y.end();
1250   for (; resp_it != resp_end; resp_it++) {
1251     const RealVector& resp_vector = resp_it->second.function_values();
1252     for (size_t ii = 0; ii < numFns; ii++)
1253       stdev[ii] += std::pow(resp_vector[ii] - mean[ii], 2) / test_y.size();
1254   }
1255   for (size_t ii = 0; ii < numFns; ii++)
1256     stdev[ii] = std::sqrt(stdev[ii]);
1257 
1258   Real max_error_norm = 0;
1259   for (size_t ii = 0; ii < numFns; ii++) {
1260     Real norm_rms_error = std::sqrt(error_norm[ii] / test_y.size()) / stdev[ii];
1261 
1262     if (norm_rms_error > max_error_norm)
1263       max_error_norm = norm_rms_error;
1264   }
1265 
1266   return max_error_norm;
1267 }
1268 
1269 
derived_evaluate(const ActiveSet & set)1270 void ActiveSubspaceModel::derived_evaluate(const ActiveSet& set)
1271 {
1272   if (!mappingInitialized) {
1273     Cerr << "\nError (subspace model): model has not been initialized."
1274          << std::endl;
1275     abort_handler(-1);
1276   }
1277 
1278   component_parallel_mode(ONLINE_PHASE);
1279 
1280   if (buildSurrogate) {
1281     ++recastModelEvalCntr;
1282 
1283     surrogateModel.active_variables(currentVariables);
1284     surrogateModel.evaluate(set);
1285 
1286     currentResponse.active_set(set);
1287     currentResponse.update(surrogateModel.current_response());
1288   }
1289   else
1290     RecastModel::derived_evaluate(set);
1291 }
1292 
1293 
derived_evaluate_nowait(const ActiveSet & set)1294 void ActiveSubspaceModel::derived_evaluate_nowait(const ActiveSet& set)
1295 {
1296   if (!mappingInitialized) {
1297     Cerr << "\nError (subspace model): model has not been initialized."
1298          << std::endl;
1299     abort_handler(-1);
1300   }
1301 
1302   component_parallel_mode(ONLINE_PHASE);
1303 
1304   if (buildSurrogate) {
1305     ++recastModelEvalCntr;
1306 
1307     surrogateModel.active_variables(currentVariables);
1308     surrogateModel.evaluate_nowait(set);
1309 
1310     // store map from surrogateModel eval id to ActiveSubspaceModel id
1311     surrIdMap[surrogateModel.evaluation_id()] = recastModelEvalCntr;
1312   }
1313   else
1314     RecastModel::derived_evaluate_nowait(set);
1315 }
1316 
1317 
derived_synchronize()1318 const IntResponseMap& ActiveSubspaceModel::derived_synchronize()
1319 {
1320   if (!mappingInitialized) {
1321     Cerr << "\nError (subspace model): model has not been initialized."
1322          << std::endl;
1323     abort_handler(-1);
1324   }
1325 
1326   component_parallel_mode(ONLINE_PHASE);
1327 
1328   if (buildSurrogate) {
1329     surrResponseMap.clear();
1330     rekey_synch(surrogateModel, true, surrIdMap, surrResponseMap);
1331     return surrResponseMap;
1332   }
1333   else
1334     return RecastModel::derived_synchronize();
1335 }
1336 
1337 
derived_synchronize_nowait()1338 const IntResponseMap& ActiveSubspaceModel::derived_synchronize_nowait()
1339 {
1340   if (!mappingInitialized) {
1341     Cerr << "\nError (subspace model): model has not been initialized."
1342          << std::endl;
1343     abort_handler(-1);
1344   }
1345 
1346   component_parallel_mode(ONLINE_PHASE);
1347 
1348   if (buildSurrogate) {
1349     surrResponseMap.clear();
1350     rekey_synch(surrogateModel, false, surrIdMap, surrResponseMap);
1351     return surrResponseMap;
1352   }
1353   else
1354     return RecastModel::derived_synchronize_nowait();
1355 }
1356 
1357 
1358 /**  This specialization is because the model is used in multiple
1359      contexts in this iterator, depending on build phase.  Note that
1360      this overrides the default behavior at Iterator which recurses
1361      into any submodels. */
1362 void ActiveSubspaceModel::
derived_init_communicators(ParLevLIter pl_iter,int max_eval_concurrency,bool recurse_flag)1363 derived_init_communicators(ParLevLIter pl_iter, int max_eval_concurrency,
1364                            bool recurse_flag)
1365 {
1366   // The inbound subModel concurrency accounts for any finite differences
1367 
1368   onlineEvalConcurrency = max_eval_concurrency;
1369 
1370   if (recurse_flag) {
1371     if (!mappingInitialized)
1372       fullspaceSampler.init_communicators(pl_iter);
1373 
1374     subModel.init_communicators(pl_iter, max_eval_concurrency);
1375   }
1376 }
1377 
1378 
1379 void ActiveSubspaceModel::
derived_set_communicators(ParLevLIter pl_iter,int max_eval_concurrency,bool recurse_flag)1380 derived_set_communicators(ParLevLIter pl_iter, int max_eval_concurrency,
1381                           bool recurse_flag)
1382 {
1383   miPLIndex = modelPCIter->mi_parallel_level_index(pl_iter);// run time setting
1384 
1385   if (recurse_flag) {
1386     if (!mappingInitialized)
1387       fullspaceSampler.set_communicators(pl_iter);
1388 
1389     subModel.set_communicators(pl_iter, max_eval_concurrency);
1390 
1391     // RecastModels do not utilize default set_ie_asynchronous_mode() as
1392     // they do not define the ie_parallel_level
1393     asynchEvalFlag = subModel.asynch_flag();
1394     evaluationCapacity = subModel.evaluation_capacity();
1395   }
1396 }
1397 
1398 
1399 void ActiveSubspaceModel::
derived_free_communicators(ParLevLIter pl_iter,int max_eval_concurrency,bool recurse_flag)1400 derived_free_communicators(ParLevLIter pl_iter, int max_eval_concurrency,
1401                            bool recurse_flag)
1402 {
1403   if (recurse_flag) {
1404     fullspaceSampler.free_communicators(pl_iter);
1405 
1406     subModel.free_communicators(pl_iter, max_eval_concurrency);
1407   }
1408 }
1409 
1410 
init_fullspace_sampler(unsigned short sample_type)1411 void ActiveSubspaceModel::init_fullspace_sampler(unsigned short sample_type)
1412 {
1413   std::string rng; // use default random number generator
1414 
1415   if (sample_type == SUBMETHOD_DEFAULT)
1416     sample_type = SUBMETHOD_RANDOM; // default to Monte Carlo sampling
1417 
1418   // configure this sampler initially to work with initialSamples
1419   auto ndlhss =
1420     std::make_shared<NonDLHSSampling>(subModel, sample_type, initialSamples,
1421 				      randomSeed, rng, true);
1422   fullspaceSampler.assign_rep(ndlhss);
1423 
1424   // TODO: review whether this is needed
1425   fullspaceSampler.sub_iterator_flag(true);
1426 }
1427 
1428 
1429 /** Perform the variables mapping from recast reduced dimension
1430     variables y to original model x variables via linear transformation.
1431     Maps only continuous variables. */
1432 void ActiveSubspaceModel::
variables_mapping(const Variables & recast_y_vars,Variables & sub_model_x_vars)1433 variables_mapping(const Variables& recast_y_vars, Variables& sub_model_x_vars)
1434 {
1435   const RealVector& y =    recast_y_vars.continuous_variables();
1436   RealVector&       x = sub_model_x_vars.continuous_variables_view();
1437 
1438   //  Calculate x = reducedBasis*y + inactiveBasis*inactiveVars via matvec
1439   //  directly into x cv in submodel
1440 
1441   Teuchos::BLAS<int, Real> teuchos_blas;
1442 
1443   const RealMatrix& W1 = asmInstance->reducedBasis;
1444   int m = W1.numRows(), n = W1.numCols(), incx = 1, incy = 1, incz = 1;
1445   Real alpha = 1., beta = 0.;
1446   teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, W1.values(), m,
1447                     y.values(), incy, beta, x.values(), incx);
1448 
1449   // Now add the inactive variable's contribution:
1450   const RealMatrix& W2 = asmInstance->inactiveBasis;
1451   const RealVector&  z = asmInstance->inactiveVars;
1452   m = W2.numRows();  n = W2.numCols();
1453   alpha = beta = 1.;
1454   teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, W2.values(), m,
1455                     z.values(), incz, beta, x.values(), incx);
1456 
1457   if (asmInstance->output_level() >= DEBUG_OUTPUT)
1458     Cout <<   "\nSubspace Model: Subspace vars are\n"  << recast_y_vars
1459 	 << "\n\nSubspace Model: Fullspace vars are\n" << sub_model_x_vars
1460 	 << std::endl;
1461 }
1462 
1463 }  // namespace Dakota
1464 
1465 
1466