1 /* _______________________________________________________________________
2
3 DAKOTA: Design Analysis Kit for Optimization and Terascale Applications
4 Copyright 2014-2020 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
5 This software is distributed under the GNU Lesser General Public License.
6 For more information, see the README file in the top Dakota directory.
7 _______________________________________________________________________ */
8
9 #include "dakota_data_io.hpp"
10 #include "dakota_tabular_io.hpp"
11 #include "DakotaVariables.hpp"
12 #include "DakotaResponse.hpp"
13 #include "ParamResponsePair.hpp"
14
15 namespace Dakota {
16
17 // BMA TODO: Consider removing this namespace if not helpful in
18 // protecting certain components
19 namespace TabularIO {
20
format_name(unsigned short tabular_format)21 String format_name(unsigned short tabular_format)
22 {
23 String file_format("annotated");
24 if (tabular_format == TABULAR_NONE)
25 file_format = "freeform";
26 else if (tabular_format < TABULAR_ANNOTATED)
27 file_format = "custom_annotated";
28
29 return file_format;
30 }
31
print_expected_format(std::ostream & s,unsigned short tabular_format,size_t num_rows,size_t num_cols)32 void print_expected_format(std::ostream& s, unsigned short tabular_format,
33 size_t num_rows, size_t num_cols)
34 {
35 s << "\nExpected " << format_name(tabular_format) << " tabular file:";
36 if (tabular_format > TABULAR_NONE) {
37 if (tabular_format & TABULAR_HEADER)
38 s << "\n * header row with labels";
39 if (tabular_format & TABULAR_EVAL_ID)
40 s << "\n * leading column with counter";
41 if (tabular_format & TABULAR_IFACE_ID)
42 s << "\n * leading column with interface ID";
43 s << "\nsurrounding ";
44 }
45 else
46 s << '\n';
47 s << "whitespace-separated data";
48 if (num_rows)
49 s << "; " << num_rows << " rows";
50 if (num_cols)
51 s << "; " << num_cols << " columns";
52 s << std::endl;
53 }
54
print_unexpected_data(std::ostream & s,const String & filename,const String & context_message,unsigned short tabular_format)55 void print_unexpected_data(std::ostream& s, const String& filename,
56 const String& context_message,
57 unsigned short tabular_format)
58 {
59 s << "\nWarning (" << context_message << "): found unexpected extra data in "
60 << format_name(tabular_format) << "\nfile " << filename << "." << std::endl;
61 }
62
63
64 //
65 //- Utilities for opening tabular files
66 //
67
open_file(std::ifstream & data_stream,const std::string & input_filename,const std::string & context_message)68 void open_file(std::ifstream& data_stream, const std::string& input_filename,
69 const std::string& context_message)
70 {
71 // TODO: try/catch
72 data_stream.open(input_filename.c_str());
73 if (!data_stream.good()) {
74 Cerr << "\nError (" << context_message << "): Could not open file "
75 << input_filename << " for reading tabular data." << std::endl;
76 abort_handler(-1);
77 }
78 // TODO (fix): can't except on failbit when trying to read to EOF
79 // data_stream.exceptions(std::fstream::failbit | std::fstream::badbit);
80 data_stream.exceptions(std::fstream::badbit);
81 }
82
83
open_file(std::ofstream & data_stream,const std::string & output_filename,const std::string & context_message)84 void open_file(std::ofstream& data_stream, const std::string& output_filename,
85 const std::string& context_message)
86 {
87 // TODO: try/catch
88 data_stream.open(output_filename.c_str());
89 if (!data_stream.good()) {
90 Cerr << "\nError (" << context_message << "): Could not open file "
91 << output_filename << " for writing tabular data." << std::endl;
92 abort_handler(-1);
93 }
94 data_stream.exceptions(std::fstream::failbit | std::fstream::badbit);
95 }
96
97
98 //
99 //- Utilities for closing tabular files
100 //
101 // Note: an fstream destructor can manage the different states and close the
102 // stream properly. However, for the case of a class-member stream, we should
103 // close it such that any subsequent re-opening works properly.
104
close_file(std::ifstream & data_stream,const std::string & input_filename,const std::string & context_message)105 void close_file(std::ifstream& data_stream, const std::string& input_filename,
106 const std::string& context_message)
107 {
108 // TODO: try/catch
109
110 // ifstream's have 4 states: good, eof, fail and bad. Testing this state
111 // prior to close() is likely overkill in the current context...
112 if (data_stream.good() || data_stream.eof())
113 data_stream.close();
114 else {
115 Cerr << "\nError (" << context_message << "): Could not close file "
116 << input_filename << " used for reading tabular data." << std::endl;
117 abort_handler(-1);
118 }
119 }
120
121
close_file(std::ofstream & data_stream,const std::string & output_filename,const std::string & context_message)122 void close_file(std::ofstream& data_stream, const std::string& output_filename,
123 const std::string& context_message)
124 {
125 // TODO: try/catch
126
127 // ofstream's have 4 states: good, eof, fail and bad. Testing this state
128 // prior to close() is likely overkill in the current context...
129 if (data_stream.good() || data_stream.eof())
130 data_stream.close();
131 else {
132 Cerr << "\nError (" << context_message << "): Could not close file "
133 << output_filename << " used for writing tabular data." << std::endl;
134 abort_handler(-1);
135 }
136 }
137
138
139 //
140 //- Utilities for tabular write
141 //
142
write_header_tabular(std::ostream & tabular_ostream,const Variables & vars,const Response & response,const std::string & counter_label,unsigned short tabular_format)143 void write_header_tabular(std::ostream& tabular_ostream,
144 const Variables& vars, const Response& response,
145 const std::string& counter_label,
146 unsigned short tabular_format)
147 {
148 if ( !(tabular_format & TABULAR_HEADER) )
149 return;
150
151 // headers use Matlab comment syntax
152 tabular_ostream << "%";
153
154 if (tabular_format & TABULAR_EVAL_ID)
155 tabular_ostream << counter_label << ' ';
156 if (tabular_format & TABULAR_IFACE_ID)
157 tabular_ostream << "interface ";
158 vars.write_tabular_labels(tabular_ostream);
159 response.write_tabular_labels(tabular_ostream);
160 }
161
162
write_header_tabular(std::ostream & tabular_ostream,const Variables & vars,const StringArray & addtnl_labels,const std::string & counter_label,unsigned short tabular_format)163 void write_header_tabular(std::ostream& tabular_ostream, const Variables& vars,
164 const StringArray& addtnl_labels,
165 const std::string& counter_label,
166 unsigned short tabular_format)
167 {
168 if ( !(tabular_format & TABULAR_HEADER) )
169 return;
170
171 // headers use Matlab comment syntax
172 tabular_ostream << "%";
173
174 if (tabular_format & TABULAR_EVAL_ID)
175 tabular_ostream << counter_label << ' ';
176 if (tabular_format & TABULAR_IFACE_ID)
177 tabular_ostream << "interface ";
178 vars.write_tabular_labels(tabular_ostream);
179 Dakota::write_data_tabular(tabular_ostream, addtnl_labels);
180 tabular_ostream << std::endl; // table row completed
181 }
182
183
write_leading_columns(std::ostream & tabular_ostream,size_t eval_id,const String & iface_id,unsigned short tabular_format)184 void write_leading_columns(std::ostream& tabular_ostream, size_t eval_id,
185 const String& iface_id,
186 unsigned short tabular_format)
187 {
188 // conditionally write evaluation ID and/or interface ID
189 if (tabular_format & TABULAR_EVAL_ID) {
190 // align left to make eval_id consistent w/ whitespace-delimited header row
191 std::ios_base::fmtflags before_left_align = tabular_ostream.flags();
192 tabular_ostream << std::setw(8) << std::left << eval_id << ' ';
193 tabular_ostream.flags(before_left_align);
194 }
195 if (tabular_format & TABULAR_IFACE_ID) {
196 // write the interface ID string, NO_ID for empty
197 // (Dakota 6.1 used EMPTY for missing ID)
198 if (iface_id.empty())
199 tabular_ostream << std::setw(9) << "NO_ID" << ' ';
200 else
201 tabular_ostream << std::setw(9) << iface_id << ' ';
202 }
203 }
204
205
write_data_tabular(std::ostream & tabular_ostream,const Variables & vars,const String & iface_id,const Response & response,size_t counter,unsigned short tabular_format)206 void write_data_tabular(std::ostream& tabular_ostream,
207 const Variables& vars, const String& iface_id,
208 const Response& response, size_t counter,
209 unsigned short tabular_format)
210 {
211 write_leading_columns(tabular_ostream, counter, iface_id, tabular_format);
212 vars.write_tabular(tabular_ostream);
213 response.write_tabular(tabular_ostream); // includes EOL
214 }
215
216
write_data_tabular(std::ostream & tabular_ostream,const Variables & vars,const String & iface_id,size_t counter,unsigned short tabular_format)217 void write_data_tabular(std::ostream& tabular_ostream,
218 const Variables& vars, const String& iface_id,
219 size_t counter, unsigned short tabular_format)
220 {
221 write_leading_columns(tabular_ostream, counter, iface_id, tabular_format);
222 vars.write_tabular(tabular_ostream); // no EOL
223 tabular_ostream << '\n';
224 }
225
226
227 // PCE export
write_data_tabular(const std::string & output_filename,const std::string & context_message,const RealVectorArray & output_coeffs,const UShort2DArray & output_indices)228 void write_data_tabular(const std::string& output_filename,
229 const std::string& context_message,
230 const RealVectorArray& output_coeffs,
231 const UShort2DArray& output_indices)
232 {
233 std::ofstream output_stream;
234 open_file(output_stream, output_filename, context_message);
235
236 size_t num_fns = output_coeffs.size();
237 size_t num_ind_rows = output_indices.size();
238
239 bool error_flag = false;
240 if (num_fns == 0) {
241 Cerr << "\nError (write_data_tabular): empty coefficient array."
242 << std::endl;
243 error_flag = true;
244 }
245 if (num_ind_rows == 0) {
246 Cerr << "\nError (write_data_tabular): empty indices array." << std::endl;
247 error_flag = true;
248 }
249 if (error_flag)
250 abort_handler(-1);
251
252 size_t num_coeff_rows = output_coeffs[0].length();
253 size_t num_vars = output_indices[0].size();
254 if (num_coeff_rows != num_ind_rows) {
255 Cerr << "\nError (write_data_tabular): mismatch in PCE coefficient and "
256 << "index lengths." << std::endl;
257 error_flag = true;
258 }
259 if (num_vars == 0) {
260 Cerr << "\nError (write_data_tabular): empty indices row." << std::endl;
261 error_flag = true;
262 }
263 if (error_flag)
264 abort_handler(-1);
265
266 // TODO: consider removing TabularIO namespace
267 for (size_t row = 0; row < num_coeff_rows; ++row) {
268 for (size_t fn_ind = 0; fn_ind < num_fns; ++fn_ind)
269 Dakota::
270 write_data_tabular(output_stream, &output_coeffs[fn_ind][row], 1);
271 Dakota::
272 write_data_tabular(output_stream, &output_indices[row][0], num_vars);
273 output_stream << std::endl;
274 }
275
276 close_file(output_stream, output_filename, context_message);
277 }
278
279
280 //
281 //- Utilities for tabular read
282 //
283
284 /** Discard header row from tabular file; alternate could read into a
285 string array. Requires header to be delimited by a newline. */
read_header_tabular(std::istream & input_stream,unsigned short tabular_format)286 StringArray read_header_tabular(std::istream& input_stream,
287 unsigned short tabular_format)
288 {
289 StringArray header_fields;
290 if (tabular_format & TABULAR_HEADER) {
291 input_stream >> std::ws;
292 String header;
293 getline(input_stream, header);
294 return strsplit(header);
295 }
296 return StringArray();
297 }
298
299
300 /** reads eval and interface ids; if no eval ID to read due to format,
301 increment the passed eval ID */
read_leading_columns(std::istream & input_stream,unsigned short tabular_format,int & eval_id,String & iface_id)302 void read_leading_columns(std::istream& input_stream,
303 unsigned short tabular_format,
304 int& eval_id, String& iface_id)
305 {
306 if (tabular_format & TABULAR_EVAL_ID)
307 input_stream >> eval_id;
308 else
309 ++eval_id;
310
311 if (tabular_format & TABULAR_IFACE_ID) {
312 input_stream >> iface_id;
313 // (Dakota 6.1 used EMPTY for missing ID)
314 if (iface_id == "EMPTY")
315 iface_id = "NO_ID";
316 }
317 else
318 iface_id = "NO_ID";
319 }
320
321
322 /** Discards the (eval ID and) interface data, which should be used
323 for validation */
read_leading_columns(std::istream & input_stream,unsigned short tabular_format)324 void read_leading_columns(std::istream& input_stream,
325 unsigned short tabular_format)
326 {
327 int eval_id; // discarded
328 String iface_id; // discarded
329 read_leading_columns(input_stream, tabular_format, eval_id, iface_id);
330 }
331
332
exists_extra_data(std::istream & input_stream)333 bool exists_extra_data(std::istream& input_stream)
334 {
335 input_stream >> std::ws;
336 while (input_stream.good() && !input_stream.eof()) {
337 try {
338 std::string extra_data;
339 input_stream >> extra_data;
340 if (!extra_data.empty())
341 return true;
342 }
343 catch (const std::ios_base::failure& failorbad_except) {
344 // TODO: report error in this branch
345 return false;
346 }
347 }
348 return false;
349 }
350
351
352 /// return indices (relative to first variable position) into the read
353 /// var labels that yield input spec ordered vars
354 std::vector<size_t>
find_vars_map(const StringArray::const_iterator & read_vars_begin,const StringArray & expected_vars)355 find_vars_map(const StringArray::const_iterator& read_vars_begin,
356 const StringArray& expected_vars)
357 {
358 // Pre-condition: read var labels are a permutation of expected
359 size_t num_vars = expected_vars.size();
360 std::vector<size_t> var_inds(num_vars);
361 for(size_t i=0; i<num_vars; ++i) {
362 auto lab_it = std::find(read_vars_begin, read_vars_begin + num_vars,
363 expected_vars[i]);
364 var_inds[i] = std::distance(read_vars_begin, lab_it);
365 }
366 return var_inds;
367 }
368
369
370 /// Given a row of a tabular file, reorder the variables, leaving
371 /// leading cols and responses as-is. var_inds are zero-based indices
372 /// into the variables only in the read row
reorder_row(const std::string & read_str,std::vector<size_t> var_inds,size_t num_lead)373 std::string reorder_row(const std::string& read_str,
374 std::vector<size_t> var_inds, size_t num_lead)
375 {
376 if (var_inds.empty()) return read_str; // no reordering needed
377
378 StringArray row_vals = strsplit(read_str);
379
380 // create a new string with reordered vars
381 std::ostringstream ordered_str;
382 std::ostream_iterator<String> os_it(ordered_str, " ");
383 auto num_vars = var_inds.size();
384 std::copy(row_vals.begin(), row_vals.begin() + num_lead, os_it);
385 for (const auto v_index : var_inds)
386 ordered_str << row_vals[num_lead + v_index] + " ";
387 std::copy(row_vals.begin() + num_lead + num_vars, row_vals.end(), os_it);
388
389 return ordered_str.str();
390 }
391
print_expected_labels(bool active_only,const StringArray & expected_vars,const StringArray::const_iterator & read_vars_begin,const StringArray::const_iterator & header_fields_end)392 void print_expected_labels(bool active_only,
393 const StringArray& expected_vars,
394 const StringArray::const_iterator& read_vars_begin,
395 const StringArray::const_iterator& header_fields_end)
396 {
397 std::ostream_iterator<String> out_it (Cout, " ");
398 Cout << "\nExpected labels (for "
399 << ((active_only) ? "active" : "all")
400 << " variables):\n ";
401 std::copy(expected_vars.begin(), expected_vars.end(), out_it);
402 Cout << std::endl << "Instead found these in header (including "
403 << "variable and response labels):\n ";
404 std::copy(read_vars_begin, header_fields_end, out_it);
405 Cout << '\n' << std::endl;
406 }
407
408
409 // NOTE: Passing all these args around begs for a class to
410 // encapsulate, BMA TODO: refactor procedural code
411 std::vector<size_t>
validate_header(std::ifstream & data_stream,const std::string & input_filename,const std::string & context_message,const Variables & vars,unsigned short tabular_format,bool verbose,bool use_var_labels,bool active_only)412 validate_header(std::ifstream& data_stream,
413 const std::string& input_filename,
414 const std::string& context_message,
415 const Variables& vars,
416 unsigned short tabular_format, bool verbose,
417 bool use_var_labels, bool active_only)
418 {
419 // TODO: Validate response labels
420 // TODO: Side-by-side diff of labels
421 // TODO: Can we guide the user further when data appear active vs. all?
422
423 size_t num_lead = 0;
424 if (tabular_format & TABULAR_EVAL_ID) ++num_lead;
425 if (tabular_format & TABULAR_IFACE_ID) ++num_lead;
426 size_t num_vars = active_only ? vars.total_active() : vars.tv();
427
428 StringArray expected_vars =
429 vars.ordered_labels(active_only ? ACTIVE_VARS : ALL_VARS);
430 StringArray header_fields = read_header_tabular(data_stream, tabular_format);
431 size_t read_fields = header_fields.size();
432
433 std::vector<size_t> var_inds; // only populated if reordering
434
435 // iterator to start of read vars, skipping any leading columns;
436 // take care to not advance beyond end()
437 auto read_vars_begin = (num_lead < read_fields) ?
438 header_fields.begin() + num_lead : header_fields.end();
439
440 bool vars_equal = (num_lead + num_vars > read_fields) ? false :
441 std::equal(expected_vars.begin(), expected_vars.end(), read_vars_begin);
442
443 bool vars_permuted = (num_lead + num_vars > read_fields) ? false :
444 std::is_permutation(expected_vars.begin(), expected_vars.end(),
445 read_vars_begin);
446
447 if (use_var_labels) {
448 // Input spec restricts to TABULAR_HEADER case; require equal or permutation
449 if (vars_equal) {
450 ; // no map needed (no-op to simplify logic)
451 }
452 else if (vars_permuted ) {
453 Cout << "\nInfo (" << context_message << "):\n"
454 << "Reordering variables imported from tabular file '"
455 << input_filename << "'\nbased on labels in header.\n" << std::endl;
456 var_inds = find_vars_map(read_vars_begin, expected_vars);
457 }
458 else {
459 Cerr << "\nError (" << context_message << "):\n"
460 << "Cannot reorder variables imported from tabular file '"
461 << input_filename << "'\nas requested by use_variable_labels. First "
462 << num_vars << " variable labels in tabular\nfile header are not a "
463 << "permutation of expected variable labels." << std::endl;
464 if (verbose)
465 print_expected_labels(active_only, expected_vars, read_vars_begin,
466 header_fields.end());
467 abort_handler(IO_ERROR);
468 }
469
470 }
471 else if (tabular_format & TABULAR_HEADER) {
472 if (!vars_equal) {
473 if (vars_permuted) {
474 Cout << "\nWarning (" << context_message << "):\n"
475 << "Variable labels in header of tabular file '" << input_filename
476 << "' are a\npermutation of expected variable labels;"
477 << " consider use_variable_labels keyword." << std::endl;
478 if (verbose)
479 print_expected_labels(active_only, expected_vars, read_vars_begin,
480 header_fields.end());
481 }
482 else {
483 Cout << "\nWarning (" << context_message << "):\n"
484 << "Variable labels in header of tabular file '" << input_filename
485 << "'\ndo not match " << num_vars << " variables being"
486 <<" imported to." << std::endl;
487 if (verbose)
488 print_expected_labels(active_only, expected_vars, read_vars_begin,
489 header_fields.end());
490 }
491 }
492 // Can't guide further as unable to reconcile vars vs. responses,
493 // but we decided not a hard error for now
494 }
495
496 return var_inds;
497 }
498
499
read_data_tabular(const std::string & input_filename,const std::string & context_message,RealVector & input_vector,size_t num_entries,unsigned short tabular_format)500 void read_data_tabular(const std::string& input_filename,
501 const std::string& context_message,
502 RealVector& input_vector, size_t num_entries,
503 unsigned short tabular_format)
504 {
505 // TODO: handle both row and col vectors in the text?
506 std::ifstream input_stream;
507 open_file(input_stream, input_filename, context_message);
508
509 read_header_tabular(input_stream, tabular_format);
510
511 input_vector.resize(num_entries);
512 try {
513 if (tabular_format & TABULAR_EVAL_ID || tabular_format & TABULAR_IFACE_ID) {
514 for (size_t row_ind = 0; row_ind < num_entries; ++row_ind) {
515 input_stream >> std::ws;
516 // discard the leading cols (typically eval or data ID, iface ID)
517 read_leading_columns(input_stream, tabular_format);
518 input_stream >> input_vector[row_ind];
519 }
520 } else {
521 // read raw whitespace separated data into (sized) vector
522 input_stream >> std::ws;
523 read_data(input_stream, input_vector);
524 }
525 }
526 catch (const std::ios_base::failure& failorbad_except) {
527 Cerr << "\nError (" << context_message << "): could not read file.";
528 print_expected_format(Cerr, tabular_format, num_entries, 1);
529 abort_handler(-1);
530 }
531
532 if (exists_extra_data(input_stream))
533 print_unexpected_data(Cout, input_filename, context_message,tabular_format);
534
535 close_file(input_stream, input_filename, context_message);
536 }
537
538
539 // New prototype to support mixed variable reads
read_data_tabular(const std::string & input_filename,const std::string & context_message,Variables vars,size_t num_fns,RealMatrix & vars_matrix,RealMatrix & resp_matrix,unsigned short tabular_format,bool verbose,bool use_var_labels,bool active_only)540 void read_data_tabular(const std::string& input_filename,
541 const std::string& context_message,
542 Variables vars, size_t num_fns,
543 RealMatrix& vars_matrix, RealMatrix& resp_matrix,
544 unsigned short tabular_format, bool verbose,
545 bool use_var_labels, bool active_only)
546 {
547 // Disallow string variables for now - RWH
548 if( (active_only && vars.dsv()) || (!active_only && vars.adsv()) ) {
549 Cerr << "\nError (" << context_message
550 << "): String variables are not currently supported.\n";
551 abort_handler(-1);
552 }
553
554 std::ifstream input_stream;
555 open_file(input_stream, input_filename, context_message);
556
557 size_t num_vars = active_only ?
558 vars.cv() + vars.div() + vars.dsv() + vars.drv() : vars.tv();
559
560 RealVectorArray work_vars_va;
561 RealVectorArray work_resp_va;
562 RealVector work_vars_vec(num_vars);
563 RealVector work_resp_vec(num_fns);
564
565 // Need to delay sizing of input_matrix
566 try {
567
568 // only populated if reordering
569 std::vector<size_t> var_inds =
570 validate_header(input_stream, input_filename, context_message, vars,
571 tabular_format, verbose, use_var_labels, active_only);
572
573 size_t line = (tabular_format & TABULAR_HEADER) ? 1 : 0;
574
575 size_t num_lead = 0;
576 if (tabular_format & TABULAR_EVAL_ID) ++num_lead;
577 if (tabular_format & TABULAR_IFACE_ID) ++num_lead;
578 size_t num_vars = active_only ? vars.total_active() : vars.tv();
579 size_t num_cols = num_lead + num_vars + num_fns;;
580
581 input_stream >> std::ws;
582 while (input_stream.good() && !input_stream.eof()) {
583
584 // Read a line, then use existing vars/resp read functions
585 input_stream >> std::ws;
586 String row_str;
587 getline(input_stream, row_str);
588 ++line;
589
590 size_t num_read = strsplit(row_str).size(); // TODO: count without storing
591 if (num_read != num_cols) {
592 // TODO: more detailed message about column contents
593 Cerr << "\nError (" << context_message
594 << "): wrong number of columns on line " << line << "\nof file '"
595 << input_filename << "'; expected " << num_cols << ", found "
596 << num_read << ".\n";
597 print_expected_format(Cerr, tabular_format, 0, num_cols);
598 abort_handler(IO_ERROR);
599 }
600
601 std::istringstream row_iss(var_inds.empty() ? row_str :
602 reorder_row(row_str, var_inds, num_lead));
603
604 // discard any leading columns
605 read_leading_columns(row_iss, tabular_format);
606
607 // use a variables object because it knows how to read active vs. all
608 vars.read_tabular(row_iss, (active_only ? ACTIVE_VARS : ALL_VARS) );
609
610 // Extract the variables
611 const RealVector& c_vars = active_only ? vars.continuous_variables()
612 : vars.all_continuous_variables();
613 const IntVector& di_vars = active_only ? vars.discrete_int_variables()
614 : vars.all_discrete_int_variables();
615 const RealVector& dr_vars = active_only ? vars.discrete_real_variables()
616 : vars.all_discrete_real_variables();
617 copy_data_partial(c_vars, work_vars_vec, 0);
618 merge_data_partial(di_vars, work_vars_vec, c_vars.length());
619 copy_data_partial(dr_vars, work_vars_vec,
620 c_vars.length()+di_vars.length());
621 //varsMatrix(row,:) = [vars.continuous_variables(),
622 // vars.discrete_int_variables(), vars.discrete_real_variables() ]
623 work_vars_va.push_back(work_vars_vec);
624 //Cout << "Working Variables vector contents: \n" << work_vars_vec
625 // << std::endl;
626
627 // read the raw function data
628 for (size_t fi = 0; fi < num_fns; ++fi) {
629 double read_value = std::numeric_limits<double>::quiet_NaN();
630 if (row_iss >> read_value)
631 work_resp_vec(fi) = read_value;
632 }
633 work_resp_va.push_back(work_resp_vec);
634 //Cout << "Working Response vector contents: \n" << work_resp_vec
635 // << std::endl;
636
637 input_stream >> std::ws;
638 }
639 }
640 catch (const std::ios_base::failure& failorbad_except) {
641 Cerr << "\nError (" << context_message << "): could not read file "
642 << input_filename << ".";
643 print_expected_format(Cerr, tabular_format, 0, num_vars);
644 abort_handler(-1);
645 }
646 catch (const TabularDataTruncated& tdtrunc) {
647 // this will be thrown if Variables was truncated
648 Cerr << "\nError (" << context_message
649 << "): could not read variables from file " << input_filename
650 << ";\n " << tdtrunc.what() << std::endl;
651 abort_handler(-1);
652 }
653 catch(...) {
654 Cerr << "\nError (" << context_message << "): could not read file "
655 << input_filename << " (unknown error).";
656 abort_handler(-1);
657 }
658
659 copy_data(work_vars_va, vars_matrix);
660 copy_data(work_resp_va, resp_matrix);
661
662 close_file(input_stream, input_filename, context_message);
663 }
664
665 /** Read possibly annotated data with unknown num_rows data into input_coeffs
666 (num_fns x num_rows) and input_indices (num_rows x num_vars) */
read_data_tabular(const std::string & input_filename,const std::string & context_message,RealVectorArray & input_coeffs,UShort2DArray & input_indices,unsigned short tabular_format,size_t num_vars,size_t num_fns)667 void read_data_tabular(const std::string& input_filename,
668 const std::string& context_message,
669 RealVectorArray& input_coeffs,
670 UShort2DArray& input_indices,
671 unsigned short tabular_format,
672 size_t num_vars, size_t num_fns)
673 {
674 std::ifstream input_stream;
675 open_file(input_stream, input_filename, context_message);
676
677 // clear so we can append
678 input_indices.clear();
679 // need a temporary due to layout of returned data; this will be
680 // num_rows x num_fns
681 Real2DArray coeffs_tmp;
682
683 try {
684
685 read_header_tabular(input_stream, tabular_format);
686
687 input_stream >> std::ws;
688 while (input_stream.good() && !input_stream.eof()) {
689
690 // discard any leading columns; annotated is unlikely in this case
691 read_leading_columns(input_stream, tabular_format);
692
693 // read the (required) coefficients of length num_fns
694 RealArray read_coeffs(num_fns, std::numeric_limits<double>::quiet_NaN());
695 if (input_stream >> read_coeffs) {
696 Cout << "read: " << read_coeffs << std::endl;
697 coeffs_tmp.push_back(read_coeffs);
698 }
699 else {
700 Cerr << "\nError (" << context_message << "): unexpected coeff read "
701 << "error in file " << input_filename << " for read: "
702 << read_coeffs << std::endl;
703 abort_handler(-1);
704 }
705
706 // read the (required) indices of length num_vars
707 UShortArray index_set(num_vars, 0);
708 // don't break as these are required data
709 // use templated stream extraction from data_io
710 if (input_stream >> index_set) {
711 Cout << "index set: " << index_set << std::endl;
712 input_indices.push_back(index_set);
713 }
714 else {
715 Cerr << "\nError (" << context_message << "): unexpected indices read "
716 << "error in file " << input_filename << " for read: "
717 << index_set << std::endl;
718 abort_handler(-1);
719 }
720 input_stream >> std::ws;
721 }
722 }
723 catch (const std::ios_base::failure& failorbad_except) {
724 Cerr << "\nError (" << context_message << "): could not read file "
725 << input_filename << ".";
726 print_expected_format(Cerr, tabular_format, 0, num_vars);
727 abort_handler(-1);
728 }
729 catch(...) {
730 Cerr << "\nError (" << context_message << "): could not read file "
731 << input_filename << " (unknown error).";
732 abort_handler(-1);
733 }
734
735 size_t num_rows = coeffs_tmp.size();
736
737 // transpose copy from coeffs_tmp to input_coeffs
738 if (input_coeffs.size() != num_fns)
739 input_coeffs.resize(num_fns);
740 for (size_t fn_ind = 0; fn_ind < num_fns; ++fn_ind) {
741 if (input_coeffs[fn_ind].length() != num_rows)
742 input_coeffs[fn_ind].sizeUninitialized(num_rows);
743 for (size_t row_ind = 0; row_ind < num_rows; ++row_ind)
744 input_coeffs[fn_ind][row_ind] = coeffs_tmp[row_ind][fn_ind];
745 }
746
747 close_file(input_stream, input_filename, context_message);
748 }
749
750
read_data_tabular(const std::string & input_filename,const std::string & context_message,Variables vars,Response resp,PRPList & input_prp,unsigned short tabular_format,bool verbose,bool use_var_labels,bool active_only)751 void read_data_tabular(const std::string& input_filename,
752 const std::string& context_message,
753 Variables vars, Response resp, PRPList& input_prp,
754 unsigned short tabular_format, bool verbose,
755 bool use_var_labels, bool active_only)
756 {
757 std::ifstream data_stream;
758 open_file(data_stream, input_filename, context_message);
759
760 // only populated if reordering
761 std::vector<size_t> var_inds =
762 validate_header(data_stream, input_filename, context_message, vars,
763 tabular_format, verbose, use_var_labels, active_only);
764
765 int eval_id = 0; // number the evals starting from 1 if not contained in file
766 String iface_id;
767 size_t line = (tabular_format & TABULAR_HEADER) ? 1 : 0;
768
769 size_t num_lead = 0;
770 if (tabular_format & TABULAR_EVAL_ID) ++num_lead;
771 if (tabular_format & TABULAR_IFACE_ID) ++num_lead;
772 size_t num_vars = active_only ? vars.total_active() : vars.tv();
773 size_t num_cols = num_lead + num_vars + resp.num_functions();;
774 // shouldn't need both good and eof checks
775 data_stream >> std::ws;
776 while (data_stream.good() && !data_stream.eof()) {
777 try {
778
779 // Read a line, then use existing vars/resp read functions
780 data_stream >> std::ws;
781 String row_str;
782 getline(data_stream, row_str);
783 ++line;
784
785 size_t num_read = strsplit(row_str).size(); // TODO: count without storing
786 if (num_read != num_cols) {
787 // TODO: more detailed message about column contents
788 Cerr << "\nError (" << context_message
789 << "): wrong number of columns on line " << line << "\nof file '"
790 << input_filename << "'; expected " << num_cols << ", found "
791 << num_read << ".\n";
792 print_expected_format(Cerr, tabular_format, 0, num_cols);
793 abort_handler(IO_ERROR);
794 }
795
796 std::istringstream row_iss(var_inds.empty() ? row_str :
797 reorder_row(row_str, var_inds, num_lead));
798
799 read_leading_columns(row_iss, tabular_format, eval_id, iface_id);
800 vars.read_tabular(row_iss, (active_only ? ACTIVE_VARS : ALL_VARS) );
801 resp.read_tabular(row_iss);
802 }
803 catch (const TabularDataTruncated& tdtrunc) {
804 // this will be thrown if either Variables or Response was truncated
805 Cerr << "\nError (" << context_message
806 << "): could not read variables or responses from file "
807 << input_filename << ";\n " << tdtrunc.what() << std::endl;
808 abort_handler(IO_ERROR);
809 }
810 catch(...) {
811 Cerr << "\nError (" << context_message << "): could not read file "
812 << input_filename << " (unknown error).";
813 abort_handler(IO_ERROR);
814 }
815 if (verbose) {
816 Cout << "Variables read:\n" << vars;
817 if (!iface_id.empty())
818 Cout << "\nInterface identifier = " << iface_id << '\n';
819 Cout << "\nResponse read:\n" << resp;
820 }
821
822 // append deep copy of vars,resp as PRP
823 input_prp.push_back(ParamResponsePair(vars, iface_id, resp, eval_id));
824
825 // advance so EOF can detect properly
826 data_stream >> std::ws;
827 }
828
829 close_file(data_stream, input_filename, context_message);
830 }
831
832
read_data_tabular(const std::string & input_filename,const std::string & context_message,RealMatrix & input_matrix,size_t num_rows,size_t num_cols,unsigned short tabular_format,bool verbose)833 void read_data_tabular(const std::string& input_filename,
834 const std::string& context_message,
835 RealMatrix& input_matrix,
836 size_t num_rows, size_t num_cols,
837 unsigned short tabular_format, bool verbose)
838 {
839 std::ifstream input_stream;
840 open_file(input_stream, input_filename, context_message);
841
842 if (verbose) {
843 Cout << "\nAttempting to read " << num_rows << " x " << num_cols << " = "
844 << num_rows*num_cols << " numeric data from "
845 << (tabular_format > TABULAR_NONE ? "header-annotated" : "free-form")
846 << " file " << input_filename << "..." << std::endl;
847 }
848
849 read_header_tabular(input_stream, tabular_format);
850
851 input_matrix.shapeUninitialized(num_rows, num_cols);
852 for (size_t row_ind = 0; row_ind < num_rows; ++row_ind) {
853 try {
854 // experiment data would never have an interface ID
855 if (tabular_format & TABULAR_EVAL_ID) {
856 // discard the row label (typically eval or data ID)
857 size_t discard_row_label;
858 input_stream >> discard_row_label;
859 }
860 for (size_t col_ind = 0; col_ind < num_cols; ++col_ind)
861 input_stream >> input_matrix(row_ind, col_ind);
862 }
863 catch (const std::ios_base::failure& failorbad_except) {
864 Cerr << "\nError (" << context_message << "): could not read file.";
865 print_expected_format(Cerr, tabular_format, num_rows, num_cols);
866 abort_handler(-1);
867 }
868 }
869
870 if (exists_extra_data(input_stream))
871 print_unexpected_data(Cout, input_filename, context_message, tabular_format);
872
873 close_file(input_stream, input_filename, context_message);
874 }
875
876
read_data_tabular(const std::string & input_filename,const std::string & context_message,RealMatrix & input_matrix,size_t record_len,unsigned short tabular_format,bool verbose)877 void read_data_tabular(const std::string& input_filename,
878 const std::string& context_message,
879 RealMatrix& input_matrix, size_t record_len,
880 unsigned short tabular_format, bool verbose)
881 {
882 std::ifstream input_stream;
883 open_file(input_stream, input_filename, context_message);
884
885 RealVectorArray rva;
886 RealVector read_rv(record_len);
887 try {
888
889 read_header_tabular(input_stream, tabular_format);
890
891 input_stream >> std::ws;
892 while (input_stream.good() && !input_stream.eof()) {
893
894 // discard any leading columns; annotated is unlikely in this case
895 read_leading_columns(input_stream, tabular_format);
896
897 // read the (required) coefficients of length num_fns
898 read_rv = std::numeric_limits<Real>::quiet_NaN();
899 if (input_stream >> read_rv) {
900 if (verbose) Cout << "read:\n" << read_rv;
901 rva.push_back(read_rv);
902 }
903 else {
904 Cerr << "\nError (" << context_message << "): unexpected row read "
905 << "error in file " << input_filename << ".\nread:\n" << read_rv;
906 abort_handler(-1);
907 }
908 input_stream >> std::ws; // advance to next input for EOF detection
909 }
910 }
911 catch (const std::ios_base::failure& failorbad_except) {
912 Cerr << "\nError (" << context_message << "): could not read file "
913 << input_filename << ".";
914 print_expected_format(Cerr, tabular_format, 0, record_len);
915 abort_handler(-1);
916 }
917 catch(...) {
918 Cerr << "\nError (" << context_message << "): could not read file "
919 << input_filename << " (unknown error).";
920 abort_handler(-1);
921 }
922
923 // this transposes the rva tabular layout (num_records X record_len) into the
924 // rm layout (record_len X num_records), since the natural place to store the
925 // ith vector rva[i] is as rm[i], a Teuchos column vector.
926 copy_data_transpose(rva, input_matrix);
927
928 close_file(input_stream, input_filename, context_message);
929 }
930
931
read_data_tabular(const std::string & input_filename,const std::string & context_message,RealVectorArray & cva,IntVectorArray & diva,StringMulti2DArray & dsva,RealVectorArray & drva,unsigned short tabular_format,bool active_only,Variables vars)932 size_t read_data_tabular(const std::string& input_filename,
933 const std::string& context_message,
934 RealVectorArray& cva, IntVectorArray& diva,
935 StringMulti2DArray& dsva, RealVectorArray& drva,
936 unsigned short tabular_format,
937 bool active_only, Variables vars)
938 {
939 size_t num_evals = 0, num_vars = vars.tv();
940 // temporary dynamic container to read string variables
941 std::vector<StringMultiArray> list_dsv_points;
942
943 std::ifstream input_stream;
944 open_file(input_stream, input_filename, context_message);
945
946 try {
947
948 read_header_tabular(input_stream, tabular_format);
949
950 input_stream >> std::ws; // advance to next readable input
951 while (input_stream.good() && !input_stream.eof()) {
952 // discard the row labels (typically eval and iface ID)
953 read_leading_columns(input_stream, tabular_format);
954
955 // read all or active, but set only the active variables into the lists
956 vars.read_tabular(input_stream, (active_only ? ACTIVE_VARS : ALL_VARS) );
957 ++num_evals;
958
959 // the Variables object vars passed in is a deep copy, but these
960 // accessors return views; force a deep copy of each vector for
961 // storage in array
962 RealVector c_vars(Teuchos::Copy, vars.continuous_variables().values(),
963 vars.continuous_variables().length());
964 cva.push_back(c_vars);
965 IntVector di_vars(Teuchos::Copy, vars.discrete_int_variables().values(),
966 vars.discrete_int_variables().length());
967 diva.push_back(di_vars);
968 list_dsv_points.push_back(vars.discrete_string_variables());
969 RealVector dr_vars(Teuchos::Copy, vars.discrete_real_variables().values(),
970 vars.discrete_real_variables().length());
971 drva.push_back(dr_vars);
972
973 input_stream >> std::ws; // advance to next readable input
974 }
975 }
976 catch (const std::ios_base::failure& failorbad_except) {
977 Cerr << "\nError (" << context_message << "): could not read file "
978 << input_filename << ".";
979 print_expected_format(Cerr, tabular_format, 0, num_vars);
980 abort_handler(-1);
981 }
982 catch (const TabularDataTruncated& tdtrunc) {
983 // this will be thrown if Variables was truncated
984 Cerr << "\nError (" << context_message << "): could not read variables from "
985 << "file " << input_filename << ";\n " << tdtrunc.what() << std::endl;
986 abort_handler(-1);
987 }
988 catch(...) {
989 Cerr << "\nError (" << context_message << "): could not read file "
990 << input_filename << " (unknown error)." << std::endl;
991 abort_handler(-1);
992 }
993
994
995 // copy into the string multiarray
996 size_t num_dsv = vars.dsv();
997 dsva.resize(boost::extents[num_evals][num_dsv]);
998 for (size_t i=0; i<num_evals; ++i)
999 dsva[i] = list_dsv_points[i];
1000 list_dsv_points.clear();
1001
1002 close_file(input_stream, input_filename, context_message);
1003
1004 return num_evals;
1005 }
1006
1007
1008 } // namespace TabularIO
1009
1010 } // namespace Dakota
1011