1 /*  _______________________________________________________________________
2 
3     DAKOTA: Design Analysis Kit for Optimization and Terascale Applications
4     Copyright 2014-2020 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
5     This software is distributed under the GNU Lesser General Public License.
6     For more information, see the README file in the top Dakota directory.
7     _______________________________________________________________________ */
8 
9 //- Class:        ExperimentData
10 //- Description:
11 //-
12 //-
13 //- Owner:        Laura Swiler
14 //- Version: $Id$
15 
16 #ifndef EXPERIMENT_DATA_H
17 #define EXPERIMENT_DATA_H
18 
19 #include "dakota_system_defs.hpp"
20 #include "dakota_data_types.hpp"
21 #include "dakota_tabular_io.hpp"
22 #include "ExperimentResponse.hpp"
23 #include "ExperimentDataUtils.hpp"
24 #include "SharedResponseData.hpp"
25 #include "SimulationResponse.hpp"
26 #include <boost/filesystem/operations.hpp>
27 #include "boost/filesystem/path.hpp"
28 
29 namespace Dakota {
30 
31 using boost::multi_array;
32 using boost::extents;
33 
34 /// special values for sigmaType
35 enum sigtype { NO_SIGMA, SCALAR_SIGMA, DIAGONAL_SIGMA, MATRIX_SIGMA };
36 /// special values for experimental data type
37 enum edtype { SCALAR_DATA, FUNCTIONAL_DATA } ;
38 
39 
40   /// Interpolation method for interpolating between experimental and model data.
41   /// I need to work on inputs/outputs to this method.  For now, this assumes
42   /// interpolation of functional data.
43   /* void interpolate(RealVector2DArray& functionalCoordinates, RealVectorArray&
44      experimentFunctionalData, RealVector2DArray& modelCoordinates, RealVectorArray&
45      modelFunctionalData, RealVectorArray& interpolatedResults);
46   */
47   /// As Brian suggested, thsi class has the experimental data (coordinates and
48   // functional data) and can store the interpolated results.  So, we may want a
49   // simpler interpolate definition given in the line below:
50   /*void interpolate(RealVector2DArray& modelCoordinates, RealVectorArray&
51     modelFunctionalData);
52   */
53   /// RealVectorArray interpolatedResults;
54 
55 
56 /** The ExperimentData class is used to read and populate data
57     (currently from user-specified files and/or the input spec)
58     relating to experimental (physical observations) data for
59     the purposes of calibration.  Such data may include (for example):
60     number of experiments, configuration variables,
61     type of data (scalar vs. functional), treatment of sigma (experimental
62     uncertainties).  This class also provides an interpolation capability to
63     interpolate between simulation or experimental data so that the
64     differencing between simulation and experimental data may
65     be performed properly. */
66 
67 class ExperimentData
68 {
69 
70 public:
71 
72   //
73   //- Heading: Constructors, destructor, operators
74   //
75 
76   /// default constructor
77   ExperimentData();
78 
79   /// typical DB-based constructor
80   ExperimentData(const ProblemDescDB& prob_desc_db,
81                  const SharedResponseData& srd, short output_level);
82 
83   /// temporary? constructor for testing
84   ExperimentData(size_t num_experiments, size_t num_config_vars,
85                  const boost::filesystem::path& data_prefix,
86                  const SharedResponseData& srd,
87                  const StringArray& variance_types,
88                  short output_level,
89                  std::string scalarDataFilename = "");
90 
91   ExperimentData(size_t num_experiments, const SharedResponseData& srd,
92                  const RealMatrix& configVars,
93                  const IntResponseMap& all_responses, short output_level);
94 
95   //ExperimentData(const ExperimentData&);            ///< copy constructor
96   //~ExperimentData();                               ///< destructor
97   //ExperimentData& operator=(const ExperimentData&);  ///< assignment operator
98 
99   //
100   //- Heading: Member methods
101   //
102 
103   /// Load experiments from data files (simple scalar or field)
104   void load_data(const std::string& context_message);
105   /// Add one data point to the experimental data set
106   void add_data(const RealVector& one_configvars, const Response& one_response);
107 
108   /// retrieve the number of experiments
num_experiments() const109   size_t num_experiments() const
110     { return allExperiments.size(); }
111 
112   /// retrieve the total number of experimental data points
113   /// over all experiments
114   size_t num_total_exppoints() const;
115 
116   /// retrieve the number of scalars (applies to all experiments)
117   size_t num_scalar_primary() const;
118 
119   /// retrieve the number of fields (applies to all experiments)
120   size_t num_fields() const;
121 
122   /// number of onfiguration variables
123   size_t num_config_vars() const;
124 
125   /// values of the configuration variables, 1 RealVector per experiment
126   const std::vector<RealVector>& config_vars() const;
127 
128   /// return contiguous vector of all data (scalar, followed by field)
129   /// for the specified experiment
130   const RealVector& all_data(size_t experiment);
131 
132   /// return response for the specified experiment
133   const Response& response(size_t experiment);
134 
135   /// return the individual sizes of the experimental data lengths
136   /// (all function values, scalar and field)
137   void per_exp_length(IntVector& per_length) const;
138 
139   /// return the field lengths for specified experiment index
140   const IntVector& field_lengths(size_t experiment) const;
141 
142   /// retrieve the data value for the given response, for the given
143   /// experiment
144   Real scalar_data(size_t response, size_t experiment);
145 
146   /// retrieve a view of the field data for the given response, for the given
147   /// experiment
148   RealVector field_data_view(size_t response, size_t experiment) const;
149 
150   /// retrieve a view of the field data coordinates for the given response, for the given
151   /// experiment
152   RealMatrix field_coords_view(size_t response, size_t experiment) const;
153 
154   /// whether the specified variance type (enum value) is present and active
155   bool variance_type_active(short variance_type) const;
156 
157   /// whether any variance type is active
158   bool variance_active() const;
159 
160   /// apply the covariance responses to compute the triple product
161   /// v'*inv(C)*v for the given experiment
162   Real apply_covariance(const RealVector& residuals, size_t experiment) const;
163   /// apply inverse sqrt of the covariance to compute weighted residuals
164   void apply_covariance_inv_sqrt(const RealVector& residuals,
165 				 size_t experiment,
166 				 RealVector& weighted_residuals) const;
167   /// apply inverse sqrt of the covariance to compute weighted gradients
168   void apply_covariance_inv_sqrt(const RealMatrix& gradients,
169 				 size_t experiment,
170 				 RealMatrix& weighted_gradients) const;
171   /// apply inverse sqrt of the covariance to compute weighted Hessians
172   void apply_covariance_inv_sqrt(const RealSymMatrixArray& hessians,
173 				 size_t experiment,
174 				 RealSymMatrixArray& weighted_hessians) const;
175   /// apply simulation error to experiment data
176   void apply_simulation_error(const RealVector& simulation_error,
177                               size_t experiment);
178 
179   /// return a (copy) vector containing the main diagonal entries of a specified
180   /// experimental covariance matrix
181   void get_main_diagonal( RealVector &diagonal, size_t experiment ) const;
182 
183   /// get the standard deviation of the observation error process, one
184   /// vector per experiment
185   void cov_std_deviation(RealVectorArray& std_deviation) const;
186 
187   /// get the observation error covariance as a correlation matrix, one
188   /// vector per experiment
189   void cov_as_correlation(RealSymMatrixArray& corr_matrix) const;
190 
191   /// retrieve an individual covariance entry as a dense matrix
192   void covariance(int exp_ind, RealSymMatrix& cov_mat) const;
193 
194   /// form residuals for all experiments, interpolating if necessary;
195   /// one simulation response maps to all experiments
196   void form_residuals(const Response& sim_resp, Response& residual_resp) const;
197 
198   /// Populate the portion of residual_resp corresponding to
199   /// experiment curr_exp; the passed simulation response maps only to
200   /// the specified experiment
201   void form_residuals(const Response& sim_resp, const size_t curr_exp,
202 		      Response& residual_resp) const;
203 
204   /// form residuals for an individual experiment, interpolating if necessary
205   void form_residuals(const Response& sim_resp, size_t exp_num,
206 		      const ShortArray &total_asv, size_t residual_resp_offset,
207 		      Response &residual_resp) const;
208 
209   /// recover original model from the first experiment block in a full
210   /// set of residuals; works in no interpolation case only (sizes same)
211   void recover_model(size_t num_pri_fns, RealVector& model_fns) const;
212 
213   /// flag for interpolation.  If 0, no interpolation.
214   /// If 1, interpolate.
215   bool interpolate_flag() const;
216 
217   /// Interpolate simulation data (values, gradients and hessians) onto
218   /// the coordinates of the experimental data
219   void interpolate_simulation_data( const Response &sim_resp,
220 				    size_t exp_num,
221 				    const ShortArray &total_asv,
222 				    size_t exp_offset,
223 				    Response &interp_resp ) const;
224 
225   /// Apply the experiment data covariance to the residual data (scale
226   /// functions by Gamma_d^{-1/2}), returning in scaled_residuals
227   void scale_residuals(const Response& residual_response,
228 		       RealVector& scaled_residuals) const;
229 
230   /// Apply the experiment data covariance to the residual data in-place
231   /// (scale functions, gradients, and Hessians by Gamma_d^{-1/2})
232   void scale_residuals(Response& residual_response) const;
233 
234   // All the following now assume any covariance scaling is already applied
235 
236   /// Build the gradient of the ssr from residuals and function gradients
237   /// based on the response's active set request vector
238   void build_gradient_of_sum_square_residuals(const Response& resp,
239 					      RealVector &ssr_gradient);
240   /// Build the gradient of the ssr from residuals and function gradients
241   /// using the passed active set request vector (overrides the response's
242   /// request vector)
243   void build_gradient_of_sum_square_residuals(const Response& resp,
244 					      const ShortArray& asrv,
245 					      RealVector &ssr_gradient);
246 
247   /// Update the gradient of ssr with the values from the gradient associated
248   /// with a single experiment
249   void build_gradient_of_sum_square_residuals_from_response(
250 						    const Response& resp,
251 						    const ShortArray& asrv,
252 						    int exp_ind,
253 						    RealVector &ssr_gradient);
254 
255   /** \brief Construct the gradient of the sum of squares of residuals
256    *
257    * \param func_gradients A matrix containing the gradients of the
258    * residual vector
259    * \param residuals A vector of residuals (mismatch between experimental data
260    * and the corresponding function values
261    * \param asrv The active set request vector
262    */
263   void build_gradient_of_sum_square_residuals_from_function_data(
264     const RealMatrix &func_gradients, const RealVector &residuals,
265     RealVector &ssr_gradient, const ShortArray& asrv);
266 
267   /// Build the hessian of the ssr from residuals, function gradients and
268   /// function hessians based on the response's active set request vector
269   void build_hessian_of_sum_square_residuals(const Response& resp,
270 					     RealSymMatrix &ssr_hessian);
271   /// Build the hessian of the ssr from residuals, function gradients and
272   /// function hessians using the passed active set request vector (overrides
273   /// the response's request vector)
274   void build_hessian_of_sum_square_residuals(const Response& resp,
275 					     const ShortArray& asrv,
276 					     RealSymMatrix &ssr_hessian);
277 
278   /// Update the hessian of ssr with the values from the hessian associated
279   /// with a single experiment
280   void build_hessian_of_sum_square_residuals_from_response(
281 						    const Response& resp,
282 						    const ShortArray& asrv,
283 						    int exp_ind,
284 						    RealSymMatrix &ssr_hessian);
285 
286   /** \brief Construct the hessian of the sum of squares of residuals
287    *
288    * \param func_hessians A list of matrices containing the Hessians of the
289    * function elements in the residual vector
290    * \param func_gradients A matrix containing the gradients of the
291    * residual vector
292    * \param residuals A vector of residuals (mismatch between experimental data
293    * and the corresponding function values
294    * \param asrv The active set request vector
295    */
296   void build_hessian_of_sum_square_residuals_from_function_data(
297     const RealSymMatrixArray &func_hessians,
298     const RealMatrix &func_gradients, const RealVector &residuals,
299     RealSymMatrix &ssr_hessian, const ShortArray& asrv);
300 
301   /// in-place scale the residual response (functions, gradients,
302   /// Hessians) by sqrt(multipliers), according to blocks indicated by
303   /// multiplier mode
304   void scale_residuals(const RealVector& multipliers,
305 		       unsigned short multiplier_mode, size_t num_calib_params,
306 		       Response& residual_response) const;
307 
308 
309   /// returns the determinant of (covariance block-scaled by the
310   /// passed multipliers)
311   Real cov_determinant(const RealVector& multipliers,
312                        unsigned short multiplier_mode) const;
313 
314   /// returns the log of the determinant of (covariance block-scaled
315   /// by the passed multipliers)
316   Real half_log_cov_determinant(const RealVector& multipliers,
317 				unsigned short multiplier_mode) const;
318 
319   /// populated the passed gradient with derivatives w.r.t. the
320   /// hyper-parameter multipliers, starting at hyper_offset (must be
321   /// sized)
322   void half_log_cov_det_gradient(const RealVector& multipliers,
323 				 unsigned short multiplier_mode,
324 				 size_t hyper_offset,
325 				 RealVector& gradient) const;
326 
327   /// populated the passed Hessian with derivatives w.r.t. the
328   /// hyper-parameter multipliers, starting at hyper_offset (must be
329   /// sized)
330   void half_log_cov_det_hessian(const RealVector& multipliers,
331 				unsigned short multiplier_mode,
332 				size_t hyper_offset,
333 				RealSymMatrix& hessian) const;
334 
335   /// generate variable labels for the covariance (error) multiplier hyperparams
336   StringArray hyperparam_labels(unsigned short multiplier_mode) const;
337 
338 protected:
339 
340   /// Perform check on the active request vector to make sure
341   /// it is amenable to interpolation of simulation data and application
342   /// of apply covariance
343   ShortArray determine_active_request(const Response& resid_resp) const;
344 
345   /// count the number of residuals influenced by each multiplier
346   SizetArray residuals_per_multiplier(unsigned short multiplier_mode) const;
347 
348   /// Generate a set of multipliers commensurate with the residual
349   /// size for the total experiment data set.  Instead of repeating
350   /// the loops all over the place, generate an expanded set of
351   /// multipliers; the conditionals get too complicated otherwise
352   void generate_multipliers(const RealVector& multipliers,
353                             unsigned short multiplier_mode,
354                             RealVector& expanded_multipliers) const;
355 
356   void resid2mult_map(unsigned short multiplier_mode,
357 		      IntVector& resid2mult_indices) const;
358 
359 private:
360 
361   // initialization helpers
362 
363   /// shared body of constructor initialization
364   void initialize(const StringArray& variance_types,
365 		  const SharedResponseData& srd);
366 
367   /// parse user-provided sigma type strings and populate enums
368   void parse_sigma_types(const StringArray& sigma_types);
369 
370   // data loading helpers
371 
372   /// Load a single experiment exp_index into exp_resp
373   void load_experiment(size_t exp_index, std::ifstream& scalar_data_stream,
374 		       size_t num_field_sigma_matrices,
375 		       size_t num_field_sigma_diagonals,
376 		       size_t num_field_sigma_scalars,
377 		       size_t num_field_sigma_none, Response& exp_resp);
378 
379   /// read or default populate the scalar sigma
380   void read_scalar_sigma(std::ifstream& scalar_data_stream,
381 			 RealVector& sigma_scalars,
382 			 IntVector& scalar_map_indices);
383 
384 
385   /// Return a view (to allowing updaing in place) of the residuals associated
386   /// with a given experiment, from a vector contaning residuals from
387   /// all experiments
388   RealVector residuals_view(const RealVector& residuals, size_t experiment) const;
389 
390   /// Return a view (to allowing updaing in place) of the gradients associated
391   /// with a given experiment, from a matrix contaning gradients from
392   /// all experiments
393   RealMatrix gradients_view( const RealMatrix &gradients, size_t experiment) const;
394 
395   /// Return a view (to allowing updaing in place) of the hessians associated
396   /// with a given experiment, from an array contaning the hessians from
397   /// all experiments
398   RealSymMatrixArray hessians_view(const RealSymMatrixArray &hessians,
399 				   size_t experiment) const;
400 
401   //
402   //- Heading: Data
403   //
404 
405 
406   // configuration and sizing information
407 
408   /// whether the user specified a calibration data block
409   bool calibrationDataFlag;
410 
411   /// the total number of experiments
412   size_t numExperiments;
413 
414   /// number of configuration (state) variables to read for each experiment
415   size_t numConfigVars;
416 
417   /// type of variance specified for each variable, one per response
418   /// group; empty varianceType indicates none specified by user
419   UShortArray varianceTypes;
420 
421   /// cached product of each experiment covariance's determinant
422   Real covarianceDeterminant;
423 
424   /// cached sum of each experiment covariance's log determinant
425   Real logCovarianceDeterminant;
426 
427   /// path to prepend to any data file names
428   boost::filesystem::path dataPathPrefix;
429 
430   // scalar data config information
431 
432   /// the user-specied scalar data filename
433   String scalarDataFilename;
434 
435   /// tabular format of the simple scalar data file; supports
436   /// TABULAR_NONE, TABULAR_HEADER, TABULAR_EVAL_ID, TABULAR_EXPER_ANNOT
437   unsigned short scalarDataFormat;
438 
439   /// number of sigma values to read from each row in simple data file
440   /// format (calculated from variance types strings
441   size_t scalarSigmaPerRow;
442 
443   /// whether to read coordinate data files for simulation fields
444   bool readSimFieldCoords;
445 
446   /// archived shared data for use in sizing fields, total functions
447   /// (historically we read all functions, including constraints,
448   /// which might not be correct)
449   SharedResponseData simulationSRD;
450 
451   /// flag for interpolation.
452   bool interpolateFlag;
453   /// output verbosity level
454   short outputLevel;
455 
456   // core data storage
457 
458   /// Vector of numExperiments ExperimentResponses, holding the
459   /// observed data and error (sigma/covariance) for each experiment.
460   std::vector<Response> allExperiments;
461 
462   // TODO: migrate this to a vector of Variables?
463   /// Vector of numExperiments configurations at which data were
464   /// gathered; empty if no configurations specified.
465   std::vector<RealVector> allConfigVars;
466 
467   /// Length of each experiment
468   IntVector experimentLengths;
469   /// function index offsets for individual experiment data sets
470   IntVector expOffsets;
471 };
472 
473 
474 inline void ExperimentData::
build_gradient_of_sum_square_residuals(const Response & resp,RealVector & ssr_gradient)475 build_gradient_of_sum_square_residuals( const Response& resp,
476 					RealVector &ssr_gradient )
477 {
478   // default to asrv in resp (no override)
479   build_gradient_of_sum_square_residuals( resp,
480 					  resp.active_set_request_vector(),
481 					  ssr_gradient);
482 }
483 
484 inline void ExperimentData::
build_hessian_of_sum_square_residuals(const Response & resp,RealSymMatrix & ssr_hessian)485 build_hessian_of_sum_square_residuals( const Response& resp,
486 				       RealSymMatrix &ssr_hessian )
487 {
488   // default to asrv in resp (no override)
489   build_hessian_of_sum_square_residuals( resp, resp.active_set_request_vector(),
490 					 ssr_hessian);
491 }
492 
493 } // namespace Dakota
494 
495 #endif
496