1 /*  _______________________________________________________________________
2 
3     DAKOTA: Design Analysis Kit for Optimization and Terascale Applications
4     Copyright 2014-2020 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
5     This software is distributed under the GNU Lesser General Public License.
6     For more information, see the README file in the top Dakota directory.
7     _______________________________________________________________________ */
8 
9 #ifndef DAKOTA_TABULAR_IO_H
10 #define DAKOTA_TABULAR_IO_H
11 
12 #include "dakota_data_types.hpp"
13 #include "dakota_global_defs.hpp"
14 
15 /** \file dakota_tabular_io.hpp
16     \brief Utility functions for reading and writing tabular data files
17     Emerging utilities for tabular file I/O.  For now, just extraction
18     of capability from separate contexts to faciliate rework.  These
19     augment (and leverage) those in data_util.h
20 
21     Design/capability goals:
22     * Ability to read / write data with row/col headers or in free-form
23     * Detect premature end of file, report if extra data
24     * More consistent and reliable checks for file open errors
25     * Require right number of cols in header mode; only total data
26       checking in free-form (likely)
27     * Allow comment character for header rows or even in data?
28     * variables vs. variables/responses for both read and write
29     * Should we support CSV? delimiter = ','; other?
30     * Verify treatment of trailing newline without reading a zero
31     * Allow reading into the transpose of the data structure
32 */
33 
34 /*
35    Could consider class with attributes
36    * filename
37    * read/write
38    * context message
39    * reference to istream
40 
41    For design and review consider:
42    * write Array[Variables]      (allVariables in DakotaAnalyzer)
43    * write RealMatrix transposed (allSamples in DakotaAnalyzer)
44    * allow specifying lsq data in input file!
45    * truly separate functions that take a file name from those using a stream
46    * Verify: pre-run for active vs. all case; pre-run precision and formatting
47    * should readers or clients size data?  If sized could reduce args.
48 
49    GOALS:
50     * avoid creating dangling variables with no refs
51     * avoid temp vectors that aren't needed (copy overhead)
52 */
53 
54 
55 namespace Dakota {
56 
57 class SharedVariablesData;
58 
59 namespace TabularIO {
60 
61 //
62 //- Utilities for status messages
63 //
64 
65 /// Translate tabular_format into a user-friendly name
66 String format_name(unsigned short tabular_format);
67 
68 /// Describe the expected data file format based on passed parameters
69 void print_expected_format(std::ostream& s, unsigned short tabular_format,
70 			   size_t num_rows, size_t num_cols);
71 
72 /// Print a warning if there's extra data in the file
73 void print_unexpected_data(std::ostream& s, const String& filename,
74 			   const String& context_message,
75 			   unsigned short tabular_format);
76 
77 //
78 //- Utilities for opening and closing tabular files
79 //
80 
81 /// open the file specified by name for reading, using passed input
82 /// stream, presenting context-specific error on failure
83 void open_file(std::ifstream& data_file, const std::string& input_filename,
84 	       const std::string& context_message);
85 
86 /// open the file specified by name for writing, using passed output
87 /// stream, presenting context-specific error on failure
88 void open_file(std::ofstream& data_file, const std::string& output_filename,
89 	       const std::string& context_message);
90 
91 /// close the file specified by name after reading, using passed input
92 /// stream, presenting context-specific error on failure
93 void close_file(std::ifstream& data_file, const std::string& input_filename,
94 		const std::string& context_message);
95 
96 /// close the file specified by name after writing, using passed output
97 /// stream, presenting context-specific error on failure
98 void close_file(std::ofstream& data_file, const std::string& output_filename,
99 		const std::string& context_message);
100 
101 //
102 //- Utilities for tabular write
103 //
104 
105 /// Output the header row (labels) for a tabular data file for
106 /// variables and responses, with variables in input spec order.
107 /// Conditionally include interface ID.  Primary uses: environment
108 /// tabular data, pre-run output, surrogate approx evals
109 void write_header_tabular(std::ostream& tabular_ostream, const Variables& vars,
110 			  const Response& response,
111 			  const std::string& counter_label,
112 			  unsigned short tabular_format);
113 /// Output the header row (labels) for a tabular data file for
114 /// variables and additional labels not tied to a response.  Variables
115 /// are in input spec order.  Conditionally include interface ID.
116 /// Primary uses: MCMC chain export, including calibration sigmas.
117 void write_header_tabular(std::ostream& tabular_ostream, const Variables& vars,
118 			  const StringArray& addtnl_labels,
119 			  const std::string& counter_label,
120 			  unsigned short tabular_format);
121 
122 /// Write the leading column with eval ID and conditionally, the interface ID
123 void write_leading_columns(std::ostream& tabular_ostream, size_t eval_id,
124 			   const String& iface_id,
125 			   unsigned short tabular_format);
126 
127 /// Output a row of tabular data from a variables object.  All active/inactive
128 /// variables written in input spec order.  Conditionally include interface ID.
129 /// Primary uses: output of sampling sets.
130 void write_data_tabular(std::ostream& tabular_ostream,
131 			const Variables& vars, const String& iface,
132 			size_t counter, unsigned short tabular_format);
133 
134 /// Output a row of tabular data from variables and response objects.
135 /// All active/inactive variables written in input spec order.
136 /// Conditionally include interface ID.  Primary uses: environment
137 /// tabular data, pre-run output, surrogate approx evals.
138 void write_data_tabular(std::ostream& tabular_ostream,
139 			const Variables& vars, const String& iface,
140 			const Response& response, size_t counter,
141 			unsigned short tabular_format);
142 
143 /// PCE export: write freeform format file with whitespace-separated
144 /// data where each row has num_fns reals from coeffs, followed
145 /// by num_vars unsigned shorts from indices
146 void write_data_tabular(const std::string& output_filename,
147 			const std::string& context_message,
148 			const RealVectorArray& output_coeffs,
149 			const UShort2DArray& output_indices);
150 
151 
152 //
153 //- Utilities for tabular read
154 //
155 
156 /// Check if an input stream contains unexpected additional data
157 bool exists_extra_data(std::istream& tabular_file);
158 
159 /// read and discard header line from the stream
160 StringArray read_header_tabular(std::istream& input_stream,
161 				unsigned short tabular_format);
162 
163 /// read leading columns [ int eval_id [ String iface_id ] ]
164 void read_leading_columns(std::istream& input_stream,
165 			  unsigned short tabular_format);
166 /// read leading columns [ int eval_id [ String iface_id ] ]
167 void read_leading_columns(std::istream& input_stream,
168 			  unsigned short tabular_format,
169 			  int& eval_id, String& iface_id);
170 
171 // TODO: The following need review, rework, and consolidation
172 
173 //
174 // Uses: LeastSq data import (to be transitioned to Bayesian case
175 //       where each row will denote an experiment)
176 //
177 /// read possibly header-annotated whitespace-separated data into a
178 /// vector of length num_entries; if annotated then it's a column
179 /// vector for now
180 void read_data_tabular(const std::string& input_filename,
181 		       const std::string& context_message,
182 		       RealVector& input_data, size_t num_entries,
183 		       unsigned short tabular_format);
184 
185 /// Tabular read for ApproximationInterface challenge data: read
186 /// possibly header-annotated whitespace-separated data of possible mixed Variables,
187 /// followed by num_fns, each into RealMatrix with minimal error checking
188 void read_data_tabular(const std::string& input_filename,
189 		       const std::string& context_message,
190 		       Variables vars, size_t num_fns,
191 		       RealMatrix& vars_matrix, RealMatrix& resp_matrix,
192                        unsigned short tabular_format,
193 		       bool verbose=false, bool use_var_labels=false,
194 		       bool active_only=false);
195 
196 /// Tabular read for PCE import: read possibly header-annotated
197 /// whitespace-separated data of unknown length where each row has
198 /// num_fns reals followed by num_vars unsigned shorts; append data to
199 /// arrays passed by reference
200 void read_data_tabular(const std::string& input_filename,
201 		       const std::string& context_message,
202 		       RealVectorArray& input_coeffs,
203 		       UShort2DArray& input_indices,
204 		       unsigned short tabular_format,
205 		       size_t num_vars, size_t num_fns);
206 
207 /// Tabular read for DataFitSurrModel (build points): read
208 /// whitespace-separated data with optional row and column headers
209 /// into lists of Variables and Responses until out of data
210 void read_data_tabular(const std::string& input_filename,
211 		       const std::string& context_message,
212 		       Variables vars, Response resp, PRPList& input_prp,
213 		       unsigned short tabular_format,
214 		       bool verbose=false, bool use_var_labels=false,
215 		       bool active_only=false);
216 
217 /// Tabular read for import_approx_points_file: read
218 /// whitespace-separated data with optional row and column headers
219 /// into a single matrix, with length of record as specified and
220 /// number of records to be determined by file content.  The matrix is
221 /// stored as record_len rows by num_records columns.
222 void read_data_tabular(const std::string& input_filename,
223 		       const std::string& context_message,
224 		       RealMatrix& input_matrix, size_t record_len,
225 		       unsigned short tabular_format, bool verbose=false);
226 
227 // BMA: Probably retire in favor of new data readers, rather than
228 // propagating to other Bayesian calibration and deterministic least
229 // squares
230 /// Tabular read for GPMSA data: read whitespace-separated data with
231 /// optional row and column headers into a single matrix, with size as
232 /// specified (one experiment per row)
233 void read_data_tabular(const std::string& input_filename,
234 		       const std::string& context_message,
235 		       RealMatrix& input_matrix,
236 		       size_t num_rows, size_t num_cols,
237 		       unsigned short tabular_format, bool verbose=false);
238 
239 // special reader for list parameter studies: probably move back to ParamStudy
240 /// Tabular read for ParamStudy: read specified input data file into
241 /// arrays with sizes specified by the passed vc_totals array
242 size_t read_data_tabular(const std::string& input_filename,
243 			 const std::string& context_message,
244 			 RealVectorArray& cva, IntVectorArray& diva,
245 			 StringMulti2DArray& dsva, RealVectorArray& drva,
246 			 unsigned short tabular_format,
247 			 bool active_only, Variables vars);
248 
249 } // namespace TabularIO
250 
251 } // namespace Dakota
252 
253 #endif // DAKOTA_TABULAR_IO_H
254