1 /* 2 * data_file.cc -- ePiX::data_file class 3 * 4 * This file is part of ePiX, a C++ library for creating high-quality 5 * figures in LaTeX 6 * 7 * Version 1.2.0-2 8 * Last Change: September 26, 2007 9 */ 10 11 /* 12 * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 13 * Andrew D. Hwang <rot 13 nujnat at zngupf dot ubylpebff dot rqh> 14 * Department of Mathematics and Computer Science 15 * College of the Holy Cross 16 * Worcester, MA, 01610-2395, USA 17 */ 18 19 /* 20 * ePiX is free software; you can redistribute it and/or modify it 21 * under the terms of the GNU General Public License as published by 22 * the Free Software Foundation; either version 2 of the License, or 23 * (at your option) any later version. 24 * 25 * ePiX is distributed in the hope that it will be useful, but WITHOUT 26 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 28 * License for more details. 29 * 30 * You should have received a copy of the GNU General Public License 31 * along with ePiX; if not, write to the Free Software Foundation, Inc., 32 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 33 */ 34 #include <cmath> 35 #include <fstream> 36 #include <sstream> 37 #include <list> 38 #include <vector> 39 #include <stdexcept> 40 41 #include "errors.h" 42 43 #include "functions.h" 44 #include "triples.h" 45 #include "path.h" 46 #include "spline.h" 47 48 #include "label_data.h" 49 #include "curves.h" 50 51 #include "interval.h" 52 #include "data_bins.h" 53 #include "data_mask.h" 54 #include "data_file.h" 55 56 namespace ePiX { 57 58 const std::string default_delim("\t"); 59 const std::string default_commt("%"); 60 61 // magic number -- precision for data_file.write() 62 const unsigned int PRECISION(6); 63 truncate(double arg,const unsigned int n)64 double truncate(double arg, const unsigned int n) 65 { 66 if (fabs(arg) < pow(0.1, n)) 67 return 0; 68 69 return arg; 70 } 71 72 // warning message for data_file::tokenize non_parsable(const std::string & msg)73 void non_parsable(const std::string& msg) 74 { 75 std::stringstream obuf; 76 obuf << "data_file::tokenise():" << std::endl 77 << " Non-parsable input \"" << msg << "\", setting value to 0"; 78 epix_warning(obuf.str()); 79 } 80 81 //// private data_file functions //// 82 // returns # of columns, i.e. # of entries in first parsable row entries(const char * filename)83 unsigned int data_file::entries(const char* filename) 84 { 85 std::ifstream input(filename); 86 if (!input) 87 { 88 epix_warning("Cannot open file \""+std::string(filename)+"\""); 89 return(0); 90 } 91 92 // else 93 std::string linebuf; 94 std::vector<double> tmp; 95 96 while (getline(input, linebuf) && tmp.size() == 0) 97 { 98 // strip comments 99 linebuf = linebuf.substr(0, linebuf.find_first_of(m_commt)); 100 if (linebuf.length() > 0) 101 tmp = tokenise(linebuf); 102 } 103 104 input.close(); 105 106 if (tmp.size() == 0) 107 { 108 std::stringstream obuf; // solely for code readability :) 109 obuf << "File \"" << std::string(filename) 110 << "\" contains no parsable data"; 111 112 epix_warning(obuf.str()); 113 } 114 115 return tmp.size(); 116 } // end of entries(const char*) 117 118 119 // private function: 120 // Tokenise line using our delimiter, return a vector of doubles tokenise(std::string line)121 std::vector<double> data_file::tokenise(std::string line) 122 { 123 size_t pos(line.find(m_delim, 0)); // first delimiter 124 std::string tmpStr; // current chunk of input 125 double tmpDbl; // current parsed chunk 126 std::vector<double> tmpVec; // output found so far 127 128 // read current line into a vector of doubles 129 while(pos != std::string::npos) 130 { 131 tmpStr = line.substr(0, pos); 132 line.erase(0, pos+1); 133 std::istringstream convStr(tmpStr); 134 135 if (!(convStr >> tmpDbl)) 136 { 137 tmpDbl = 0; 138 non_parsable(convStr.str()); 139 } 140 141 tmpVec.push_back(tmpDbl); 142 pos = line.find(m_delim, 0); 143 } 144 145 if (line.size()) // There's input remaining 146 { 147 std::istringstream convStr(line); 148 149 if (!(convStr >> tmpDbl)) 150 { 151 tmpDbl = 0; 152 non_parsable(convStr.str()); 153 } 154 155 else 156 tmpVec.push_back(tmpDbl); 157 } 158 159 return tmpVec; 160 } 161 162 //// public data_file functions //// data_file(unsigned int n)163 data_file::data_file(unsigned int n) 164 : m_precision(PRECISION), m_data(n), 165 m_delim(default_delim), 166 m_commt(default_commt) { } 167 168 // provide two version to avoid exposing default delim, commt data_file(const char * filename,const std::string & delim,const std::string & commt)169 data_file::data_file(const char* filename, 170 const std::string& delim, const std::string& commt ) 171 : m_precision(PRECISION), m_delim(delim), m_commt(commt) 172 { 173 // get number of columns by parsing first line of file 174 const unsigned int N(entries(filename)); 175 m_data.resize(N); 176 177 if (N > 0) // file contains data 178 read(filename); 179 180 } // end of data_file() 181 182 // filename-only version data_file(const char * filename)183 data_file::data_file(const char* filename) 184 : m_precision(PRECISION), m_delim(default_delim), m_commt(default_commt) 185 { 186 // get number of columns by parsing first line of file 187 const unsigned int N(entries(filename)); 188 m_data.resize(N); 189 190 if (N > 0) // file contains data 191 read(filename); 192 193 } // end of data_file(const char*) 194 195 196 // file made from components data_file(double f (double),double t_min,double t_max,unsigned int num_pts)197 data_file::data_file(double f(double), 198 double t_min, double t_max, unsigned int num_pts) 199 : m_precision(PRECISION), m_data(1), 200 m_delim(default_delim), m_commt(default_commt) 201 { 202 const double dt((t_max - t_min)/num_pts); 203 for (unsigned int i=0; i<= num_pts; ++i) 204 m_data.at(0).push_back(f(t_min+i*dt)); 205 } 206 data_file(double f1 (double),double f2 (double),double t_min,double t_max,unsigned int num_pts)207 data_file::data_file(double f1(double), double f2(double), 208 double t_min, double t_max, unsigned int num_pts) 209 : m_precision(PRECISION), m_data(2), 210 m_delim(default_delim), m_commt(default_commt) 211 { 212 const double dt((t_max - t_min)/num_pts); 213 for (unsigned int i=0; i<= num_pts; ++i) 214 { 215 m_data.at(0).push_back(f1(t_min+i*dt)); 216 m_data.at(1).push_back(f2(t_min+i*dt)); 217 } 218 } 219 220 data_file(double f1 (double),double f2 (double),double f3 (double),double t_min,double t_max,unsigned int num_pts)221 data_file::data_file(double f1(double), double f2(double), double f3(double), 222 double t_min, double t_max, unsigned int num_pts) 223 : m_precision(PRECISION), m_data(3), 224 m_delim(default_delim), m_commt(default_commt) 225 { 226 const double dt((t_max - t_min)/num_pts); 227 for (unsigned int i=0; i<= num_pts; ++i) 228 { 229 m_data.at(0).push_back(f1(t_min+i*dt)); 230 m_data.at(1).push_back(f2(t_min+i*dt)); 231 m_data.at(2).push_back(f3(t_min+i*dt)); 232 } 233 } 234 235 read(const char * filename)236 data_file& data_file::read(const char* filename) 237 { 238 unsigned int columns(entries(filename)); 239 if (columns != m_data.size()) 240 { 241 std::stringstream msg; 242 msg << "Column count mismatch in file " << filename; 243 epix_warning(msg.str()); 244 } 245 246 else if (0 < columns) 247 { 248 std::ifstream input(filename); 249 std::string linebuf; 250 std::vector<double> line; 251 252 bool warned(false); 253 254 while (getline(input, linebuf)) 255 { 256 // strip comments 257 linebuf = linebuf.substr(0, linebuf.find_first_of(m_commt)); 258 if (linebuf.length() > 0) 259 { 260 line = tokenise(linebuf); 261 262 if (line.size() > m_data.size()) 263 { 264 if (!warned) 265 { 266 epix_warning("File has more columns than allocated"); 267 warned = true; 268 } 269 } 270 else if (line.size() < m_data.size()) 271 { 272 if (!warned) 273 { 274 epix_warning("File has fewer columns than allocated"); 275 warned = true; 276 } 277 } 278 else 279 { 280 for (unsigned int i = 0; i < m_data.size(); i++) 281 m_data.at(i).push_back(line.at(i)); 282 } 283 } // linebuf non-empty 284 } // end of file 285 input.close(); 286 } 287 return *this; 288 } // end of data_file::read(const char*, const std::string&) 289 290 291 // transform column(s) transform(double f (double),unsigned int col)292 data_file& data_file::transform(double f(double), unsigned int col) 293 { 294 unsigned int rows(m_data.at(0).size()); 295 296 if (0 < col) // apply to selected column 297 for (unsigned int i=0; i<rows; ++i) 298 m_data.at(col-1).at(i) = f(m_data.at(col-1).at(i)); 299 300 else // apply to all columns, default 301 { 302 for (unsigned int j=0; j<m_data.size(); ++j) 303 for (unsigned int i=0; i<rows; ++i) 304 m_data.at(j).at(i) = f(m_data.at(j).at(i)); 305 } 306 307 return *this; 308 } 309 310 // apply f to selected columns, components of image go back to columns transform(P f (double,double),unsigned int col1,unsigned int col2)311 data_file& data_file::transform(P f(double, double), 312 unsigned int col1, unsigned int col2) 313 { 314 unsigned int rows(m_data.at(0).size()); 315 316 for (unsigned int i=0; i<rows; ++i) 317 { 318 P tmp(f(m_data.at(col1 - 1).at(i), m_data.at(col2 - 1).at(i))); 319 320 m_data.at(col1 - 1).at(i) = tmp.x1(); 321 m_data.at(col2 - 1).at(i) = tmp.x2(); 322 } 323 324 return *this; 325 } 326 transform(P f (double,double,double),unsigned int col1,unsigned int col2)327 data_file& data_file::transform(P f(double, double, double), 328 unsigned int col1, unsigned int col2) 329 { 330 unsigned int rows(m_data.at(0).size()); 331 332 for (unsigned int i=0; i<rows; ++i) 333 { 334 P tmp(f(m_data.at(col1 - 1).at(i), m_data.at(col2 - 1).at(i), 0)); 335 336 m_data.at(col1 - 1).at(i) = tmp.x1(); 337 m_data.at(col2 - 1).at(i) = tmp.x2(); 338 } 339 340 return *this; 341 } 342 transform(P f (double,double,double),unsigned int col1,unsigned int col2,unsigned int col3)343 data_file& data_file::transform(P f(double, double, double), 344 unsigned int col1, 345 unsigned int col2, 346 unsigned int col3) 347 { 348 unsigned int rows(m_data.at(0).size()); 349 350 for (unsigned int i=0; i<rows; ++i) 351 { 352 P tmp(f(m_data.at(col1 - 1).at(i), 353 m_data.at(col2 - 1).at(i), 354 m_data.at(col3 - 1).at(i))); 355 356 m_data.at(col1 - 1).at(i) = tmp.x1(); 357 m_data.at(col2 - 1).at(i) = tmp.x2(); 358 m_data.at(col3 - 1).at(i) = tmp.x3(); 359 } 360 361 return *this; 362 } 363 364 // Erase rows where the data is outside of the specified range prune(const data_mask & dm,const unsigned int col)365 data_file& data_file::prune(const data_mask& dm, const unsigned int col) 366 { 367 std::vector<std::vector<double>::iterator> iter(m_data.size()); 368 for (unsigned int i = 0; i < m_data.size(); i++) 369 iter.at(i) = m_data.at(i).begin(); 370 371 while (iter.at(0) != m_data.at(0).end()) 372 { 373 if ( dm.masks(*iter.at(col-1)) ) 374 for (unsigned int j = 0; j < m_data.size(); j++) 375 m_data.at(j).erase(iter.at(j)); 376 377 else 378 for (unsigned int j = 0; j < m_data.size(); j++) 379 iter.at(j)++; 380 } 381 382 return *this; 383 } 384 prune(const interval & range,const unsigned int col)385 data_file& data_file::prune(const interval& range, const unsigned int col) 386 { 387 data_mask dm(range); 388 return prune(dm, col); 389 } 390 prune(double rmin,double rmax,const unsigned int col)391 data_file& data_file::prune(double rmin, double rmax, 392 const unsigned int col) 393 { 394 data_mask dm(rmin, rmax); 395 return prune(dm, col); 396 } 397 398 399 // (col1|col2) dot(unsigned int col1,unsigned int col2) const400 double data_file::dot(unsigned int col1, unsigned int col2) const 401 { 402 double sum(0); 403 404 if ((col1 > m_data.size()) || (col2 > m_data.size()) ) 405 epix_warning("Invalid column index in dot product"); 406 407 else 408 for (unsigned int i=0; i < m_data.at(0).size(); ++i) 409 sum += m_data.at(col1-1).at(i)*m_data.at(col2-1).at(i); 410 411 return sum; 412 } // end of dot 413 414 // avg (mean) of col1 avg(unsigned int col1) const415 double data_file::avg(unsigned int col1) const 416 { 417 double sum(0); 418 419 if (col1 > m_data.size()) 420 epix_warning("Invalid column index in mean"); 421 422 else 423 for (unsigned int i=0; i < m_data.at(0).size(); ++i) 424 sum += m_data.at(col1-1).at(i); 425 426 return sum/m_data.at(0).size(); 427 } // end of avg 428 429 // variance (x|x) - n*\bar{x}^2 var(unsigned int col1) const430 double data_file::var(unsigned int col1) const 431 { 432 double mean(avg(col1)); 433 434 return dot(col1, col1) - mean*mean*m_data.at(0).size(); 435 } // end of var 436 437 // covariance (x|y) - n*\bar{x}*\bar{y} covar(unsigned int col1,unsigned int col2) const438 double data_file::covar(unsigned int col1, unsigned int col2) const 439 { 440 return dot(col1, col2) - (m_data.at(0).size())*avg(col1)*avg(col2); 441 } // end of covar 442 regression(unsigned int col1,unsigned int col2) const443 void data_file::regression(unsigned int col1, unsigned int col2) const 444 { 445 Line(P(avg(col1), avg(col2)), covar(col1, col2)/var(col1)); 446 } 447 448 449 //// Output functions //// 450 // return selected column column(unsigned int n) const451 std::vector<double> data_file::column(unsigned int n) const 452 { 453 if (1 <= n && n <= m_data.size()) 454 return m_data.at(n-1); 455 else 456 { 457 if (0 < m_data.size()) 458 { 459 epix_warning("Out of range argument to data_file::column()"); 460 return m_data.at(0); 461 } 462 else 463 { 464 epix_warning("data_file::column() requested for empty file"); 465 std::vector<double> err_val(1, 0); 466 return err_val; 467 } 468 } 469 } 470 471 // apply f to selected column column(double f (double),unsigned int n) const472 std::vector<double> data_file::column(double f(double), unsigned int n) const 473 { 474 // must copy data 475 std::vector<double> value(m_data.at(0).size()); 476 unsigned int col(n); 477 478 if (col < 1 || m_data.size() < col) 479 { 480 if (0 < m_data.size()) 481 { 482 epix_warning("Out of range argument to data_file::column()"); 483 col=1; 484 } 485 else 486 { 487 epix_warning("data_file::column() requested for empty file"); 488 std::vector<double> err_val(1, 0); 489 return err_val; 490 } 491 } 492 493 for (unsigned int i=0; i<m_data.at(0).size(); ++i) 494 value.at(i) = f(m_data.at(col-1).at(i)); 495 496 return value; 497 } 498 499 500 // set precision for write() precision(const unsigned int n) const501 void data_file::precision(const unsigned int n) const 502 { 503 m_precision=n; 504 } 505 506 // write file or selected columns write(const char * filename) const507 void data_file::write(const char* filename) const 508 { 509 std::ofstream output(filename); 510 if (!output) 511 { 512 epix_warning("data_file::write() cannot open file"); 513 return; 514 } 515 516 // else 517 output.precision(m_precision); 518 output.setf(std::ios_base::showpoint); // pad with 0s 519 output.setf(std::ios_base::right); // right-justify 520 521 unsigned int cols(m_data.size()); 522 523 for (unsigned int j=0; j < m_data.at(0).size(); ++j) 524 { 525 output << m_data.at(0).at(j); 526 for (unsigned int i=1; i < cols; ++i) 527 output << m_delim << m_data.at(i).at(j); 528 529 output << std::endl; 530 } 531 output.close(); 532 } 533 534 write(const char * fname,std::string pt_out (double,double),unsigned int col1,unsigned int col2) const535 void data_file::write(const char* fname, std::string pt_out(double, double), 536 unsigned int col1, unsigned int col2) const 537 { 538 std::ofstream output(fname); 539 if (!output) 540 { 541 epix_warning("data_file::write() cannot open file"); 542 return; 543 } 544 545 // else 546 output.precision(m_precision); 547 548 for (unsigned int i=0; i < m_data.at(0).size(); ++i) 549 try 550 { 551 output << pt_out(m_data.at(col1-1).at(i), m_data.at(col2-1).at(i)); 552 } 553 554 catch (std::out_of_range) 555 { 556 epix_warning("data_file::write(): Invalid column index"); 557 return; 558 } 559 560 output.close(); 561 } // end of data_file::write(const char*, string function) 562 563 // experimental: LaTeX tabular environment tabular(const char * filename,const std::string & alignment,const std::string & legend) const564 void data_file::tabular(const char* filename, 565 const std::string& alignment, 566 const std::string& legend) const 567 { 568 std::ofstream output(filename); 569 if (!output) 570 { 571 epix_warning("data_file::table() cannot open file"); 572 return; 573 } 574 575 // else 576 using std::endl; 577 578 output.precision(m_precision); 579 unsigned int cols(m_data.size()); 580 581 output << "\\begin{tabular}{" << alignment << "}" << endl 582 << "\\hline" << endl; 583 if (legend != "") 584 output << legend << endl << "\\hline" << endl; 585 586 for (unsigned int j=0; j < m_data.at(0).size(); ++j) 587 { 588 output << "$" << truncate(m_data.at(0).at(j), m_precision) << "$"; 589 for (unsigned int i=1; i < cols; ++i) 590 output << " & $" << truncate(m_data.at(i).at(j), m_precision) << "$"; 591 592 output << " \\\\" << endl; 593 } 594 595 output << "\\hline" << endl 596 << "\\end{tabular}" << endl; 597 598 output.close(); 599 } // end of data_file::tabular() 600 601 plot(epix_mark_type TYPE,unsigned int col1,unsigned int col2,unsigned int col3,P f (double,double,double)) const602 void data_file::plot(epix_mark_type TYPE, 603 unsigned int col1, unsigned int col2, unsigned int col3, 604 P f(double, double, double)) const 605 { 606 unsigned int num_entries(m_data.at(0).size()); 607 608 std::vector<P> data(num_entries); 609 610 // create path 611 for (unsigned int i=0; i < num_entries; ++i) 612 try 613 { 614 if (col3 == 0) 615 data.at(i) = f(m_data.at(col1-1).at(i), 616 m_data.at(col2-1).at(i), 617 0); 618 619 else 620 data.at(i) = f(m_data.at(col1-1).at(i), 621 m_data.at(col2-1).at(i), 622 m_data.at(col3-1).at(i)); 623 } 624 625 catch (std::out_of_range) 626 { 627 epix_warning("data_file::plot(): Invalid column index"); 628 return; 629 } 630 631 // and write it 632 if (PATH == TYPE) // N.B. Unaffected by "select" 633 { 634 path temp(data, false, false); // not closed or filled 635 temp.draw(); 636 } 637 638 else 639 for (unsigned int i=0; i < num_entries; ++i) 640 { 641 // if (m_select(data.at(i))) 642 label_data temp(data.at(i), TYPE); 643 temp.draw(); 644 } 645 } 646 plot(epix_mark_type TYPE,P f (double,double,double),unsigned int col1,unsigned int col2,unsigned int col3) const647 void data_file::plot(epix_mark_type TYPE, P f(double, double, double), 648 unsigned int col1, unsigned int col2, 649 unsigned int col3) const 650 { 651 plot(TYPE, col1, col2, col3, f); 652 } 653 654 //// global functions //// plot(const char * filename,epix_mark_type TYPE,unsigned int col1,unsigned int col2,unsigned int col3,P f (double,double,double))655 void plot(const char* filename, epix_mark_type TYPE, 656 unsigned int col1, unsigned int col2, unsigned int col3, 657 P f(double, double, double)) 658 { 659 data_file temp(filename); 660 temp.plot(TYPE, col1, col2, col3, f); 661 } 662 plot(const char * filename,epix_mark_type TYPE,P f (double,double,double),unsigned int col1,unsigned int col2,unsigned int col3)663 void plot(const char* filename, epix_mark_type TYPE, 664 P f(double, double, double), 665 unsigned int col1, unsigned int col2, unsigned int col3) 666 { 667 data_file temp(filename); 668 temp.plot(TYPE, col1, col2, col3, f); 669 } 670 671 // pass 3rd arg by value histogram(const char * filename,unsigned int col,data_bins db,double scale)672 void histogram(const char* filename, unsigned int col, data_bins db, 673 double scale) 674 { 675 data_file temp(filename); 676 db.read(temp.column(col)); 677 db.histogram(scale); 678 } 679 bar_chart(const char * filename,unsigned int col,data_bins db,double scale)680 void bar_chart(const char* filename, unsigned int col, data_bins db, 681 double scale) 682 { 683 data_file temp(filename); 684 db.read(temp.column(col)); 685 db.bar_chart(scale); 686 } 687 } // end of namespace 688