1 /* _______________________________________________________________________ 2 3 DAKOTA: Design Analysis Kit for Optimization and Terascale Applications 4 Copyright 2014-2020 National Technology & Engineering Solutions of Sandia, LLC (NTESS). 5 This software is distributed under the GNU Lesser General Public License. 6 For more information, see the README file in the top Dakota directory. 7 _______________________________________________________________________ */ 8 9 #ifndef DAKOTA_TABULAR_IO_H 10 #define DAKOTA_TABULAR_IO_H 11 12 #include "dakota_data_types.hpp" 13 #include "dakota_global_defs.hpp" 14 15 /** \file dakota_tabular_io.hpp 16 \brief Utility functions for reading and writing tabular data files 17 Emerging utilities for tabular file I/O. For now, just extraction 18 of capability from separate contexts to faciliate rework. These 19 augment (and leverage) those in data_util.h 20 21 Design/capability goals: 22 * Ability to read / write data with row/col headers or in free-form 23 * Detect premature end of file, report if extra data 24 * More consistent and reliable checks for file open errors 25 * Require right number of cols in header mode; only total data 26 checking in free-form (likely) 27 * Allow comment character for header rows or even in data? 28 * variables vs. variables/responses for both read and write 29 * Should we support CSV? delimiter = ','; other? 30 * Verify treatment of trailing newline without reading a zero 31 * Allow reading into the transpose of the data structure 32 */ 33 34 /* 35 Could consider class with attributes 36 * filename 37 * read/write 38 * context message 39 * reference to istream 40 41 For design and review consider: 42 * write Array[Variables] (allVariables in DakotaAnalyzer) 43 * write RealMatrix transposed (allSamples in DakotaAnalyzer) 44 * allow specifying lsq data in input file! 45 * truly separate functions that take a file name from those using a stream 46 * Verify: pre-run for active vs. all case; pre-run precision and formatting 47 * should readers or clients size data? If sized could reduce args. 48 49 GOALS: 50 * avoid creating dangling variables with no refs 51 * avoid temp vectors that aren't needed (copy overhead) 52 */ 53 54 55 namespace Dakota { 56 57 class SharedVariablesData; 58 59 namespace TabularIO { 60 61 // 62 //- Utilities for status messages 63 // 64 65 /// Translate tabular_format into a user-friendly name 66 String format_name(unsigned short tabular_format); 67 68 /// Describe the expected data file format based on passed parameters 69 void print_expected_format(std::ostream& s, unsigned short tabular_format, 70 size_t num_rows, size_t num_cols); 71 72 /// Print a warning if there's extra data in the file 73 void print_unexpected_data(std::ostream& s, const String& filename, 74 const String& context_message, 75 unsigned short tabular_format); 76 77 // 78 //- Utilities for opening and closing tabular files 79 // 80 81 /// open the file specified by name for reading, using passed input 82 /// stream, presenting context-specific error on failure 83 void open_file(std::ifstream& data_file, const std::string& input_filename, 84 const std::string& context_message); 85 86 /// open the file specified by name for writing, using passed output 87 /// stream, presenting context-specific error on failure 88 void open_file(std::ofstream& data_file, const std::string& output_filename, 89 const std::string& context_message); 90 91 /// close the file specified by name after reading, using passed input 92 /// stream, presenting context-specific error on failure 93 void close_file(std::ifstream& data_file, const std::string& input_filename, 94 const std::string& context_message); 95 96 /// close the file specified by name after writing, using passed output 97 /// stream, presenting context-specific error on failure 98 void close_file(std::ofstream& data_file, const std::string& output_filename, 99 const std::string& context_message); 100 101 // 102 //- Utilities for tabular write 103 // 104 105 /// Output the header row (labels) for a tabular data file for 106 /// variables and responses, with variables in input spec order. 107 /// Conditionally include interface ID. Primary uses: environment 108 /// tabular data, pre-run output, surrogate approx evals 109 void write_header_tabular(std::ostream& tabular_ostream, const Variables& vars, 110 const Response& response, 111 const std::string& counter_label, 112 unsigned short tabular_format); 113 /// Output the header row (labels) for a tabular data file for 114 /// variables and additional labels not tied to a response. Variables 115 /// are in input spec order. Conditionally include interface ID. 116 /// Primary uses: MCMC chain export, including calibration sigmas. 117 void write_header_tabular(std::ostream& tabular_ostream, const Variables& vars, 118 const StringArray& addtnl_labels, 119 const std::string& counter_label, 120 unsigned short tabular_format); 121 122 /// Write the leading column with eval ID and conditionally, the interface ID 123 void write_leading_columns(std::ostream& tabular_ostream, size_t eval_id, 124 const String& iface_id, 125 unsigned short tabular_format); 126 127 /// Output a row of tabular data from a variables object. All active/inactive 128 /// variables written in input spec order. Conditionally include interface ID. 129 /// Primary uses: output of sampling sets. 130 void write_data_tabular(std::ostream& tabular_ostream, 131 const Variables& vars, const String& iface, 132 size_t counter, unsigned short tabular_format); 133 134 /// Output a row of tabular data from variables and response objects. 135 /// All active/inactive variables written in input spec order. 136 /// Conditionally include interface ID. Primary uses: environment 137 /// tabular data, pre-run output, surrogate approx evals. 138 void write_data_tabular(std::ostream& tabular_ostream, 139 const Variables& vars, const String& iface, 140 const Response& response, size_t counter, 141 unsigned short tabular_format); 142 143 /// PCE export: write freeform format file with whitespace-separated 144 /// data where each row has num_fns reals from coeffs, followed 145 /// by num_vars unsigned shorts from indices 146 void write_data_tabular(const std::string& output_filename, 147 const std::string& context_message, 148 const RealVectorArray& output_coeffs, 149 const UShort2DArray& output_indices); 150 151 152 // 153 //- Utilities for tabular read 154 // 155 156 /// Check if an input stream contains unexpected additional data 157 bool exists_extra_data(std::istream& tabular_file); 158 159 /// read and discard header line from the stream 160 StringArray read_header_tabular(std::istream& input_stream, 161 unsigned short tabular_format); 162 163 /// read leading columns [ int eval_id [ String iface_id ] ] 164 void read_leading_columns(std::istream& input_stream, 165 unsigned short tabular_format); 166 /// read leading columns [ int eval_id [ String iface_id ] ] 167 void read_leading_columns(std::istream& input_stream, 168 unsigned short tabular_format, 169 int& eval_id, String& iface_id); 170 171 // TODO: The following need review, rework, and consolidation 172 173 // 174 // Uses: LeastSq data import (to be transitioned to Bayesian case 175 // where each row will denote an experiment) 176 // 177 /// read possibly header-annotated whitespace-separated data into a 178 /// vector of length num_entries; if annotated then it's a column 179 /// vector for now 180 void read_data_tabular(const std::string& input_filename, 181 const std::string& context_message, 182 RealVector& input_data, size_t num_entries, 183 unsigned short tabular_format); 184 185 /// Tabular read for ApproximationInterface challenge data: read 186 /// possibly header-annotated whitespace-separated data of possible mixed Variables, 187 /// followed by num_fns, each into RealMatrix with minimal error checking 188 void read_data_tabular(const std::string& input_filename, 189 const std::string& context_message, 190 Variables vars, size_t num_fns, 191 RealMatrix& vars_matrix, RealMatrix& resp_matrix, 192 unsigned short tabular_format, 193 bool verbose=false, bool use_var_labels=false, 194 bool active_only=false); 195 196 /// Tabular read for PCE import: read possibly header-annotated 197 /// whitespace-separated data of unknown length where each row has 198 /// num_fns reals followed by num_vars unsigned shorts; append data to 199 /// arrays passed by reference 200 void read_data_tabular(const std::string& input_filename, 201 const std::string& context_message, 202 RealVectorArray& input_coeffs, 203 UShort2DArray& input_indices, 204 unsigned short tabular_format, 205 size_t num_vars, size_t num_fns); 206 207 /// Tabular read for DataFitSurrModel (build points): read 208 /// whitespace-separated data with optional row and column headers 209 /// into lists of Variables and Responses until out of data 210 void read_data_tabular(const std::string& input_filename, 211 const std::string& context_message, 212 Variables vars, Response resp, PRPList& input_prp, 213 unsigned short tabular_format, 214 bool verbose=false, bool use_var_labels=false, 215 bool active_only=false); 216 217 /// Tabular read for import_approx_points_file: read 218 /// whitespace-separated data with optional row and column headers 219 /// into a single matrix, with length of record as specified and 220 /// number of records to be determined by file content. The matrix is 221 /// stored as record_len rows by num_records columns. 222 void read_data_tabular(const std::string& input_filename, 223 const std::string& context_message, 224 RealMatrix& input_matrix, size_t record_len, 225 unsigned short tabular_format, bool verbose=false); 226 227 // BMA: Probably retire in favor of new data readers, rather than 228 // propagating to other Bayesian calibration and deterministic least 229 // squares 230 /// Tabular read for GPMSA data: read whitespace-separated data with 231 /// optional row and column headers into a single matrix, with size as 232 /// specified (one experiment per row) 233 void read_data_tabular(const std::string& input_filename, 234 const std::string& context_message, 235 RealMatrix& input_matrix, 236 size_t num_rows, size_t num_cols, 237 unsigned short tabular_format, bool verbose=false); 238 239 // special reader for list parameter studies: probably move back to ParamStudy 240 /// Tabular read for ParamStudy: read specified input data file into 241 /// arrays with sizes specified by the passed vc_totals array 242 size_t read_data_tabular(const std::string& input_filename, 243 const std::string& context_message, 244 RealVectorArray& cva, IntVectorArray& diva, 245 StringMulti2DArray& dsva, RealVectorArray& drva, 246 unsigned short tabular_format, 247 bool active_only, Variables vars); 248 249 } // namespace TabularIO 250 251 } // namespace Dakota 252 253 #endif // DAKOTA_TABULAR_IO_H 254