1 /*
2  * data_file.cc -- ePiX::data_file class
3  *
4  * This file is part of ePiX, a C++ library for creating high-quality
5  * figures in LaTeX
6  *
7  * Version 1.2.0-2
8  * Last Change: September 26, 2007
9  */
10 
11 /*
12  * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
13  * Andrew D. Hwang <rot 13 nujnat at zngupf dot ubylpebff dot rqh>
14  * Department of Mathematics and Computer Science
15  * College of the Holy Cross
16  * Worcester, MA, 01610-2395, USA
17  */
18 
19 /*
20  * ePiX is free software; you can redistribute it and/or modify it
21  * under the terms of the GNU General Public License as published by
22  * the Free Software Foundation; either version 2 of the License, or
23  * (at your option) any later version.
24  *
25  * ePiX is distributed in the hope that it will be useful, but WITHOUT
26  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
28  * License for more details.
29  *
30  * You should have received a copy of the GNU General Public License
31  * along with ePiX; if not, write to the Free Software Foundation, Inc.,
32  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33  */
34 #include <cmath>
35 #include <fstream>
36 #include <sstream>
37 #include <list>
38 #include <vector>
39 #include <stdexcept>
40 
41 #include "errors.h"
42 
43 #include "functions.h"
44 #include "triples.h"
45 #include "path.h"
46 #include "spline.h"
47 
48 #include "label_data.h"
49 #include "curves.h"
50 
51 #include "interval.h"
52 #include "data_bins.h"
53 #include "data_mask.h"
54 #include "data_file.h"
55 
56 namespace ePiX {
57 
58   const std::string default_delim("\t");
59   const std::string default_commt("%");
60 
61   // magic number -- precision for data_file.write()
62   const unsigned int PRECISION(6);
63 
truncate(double arg,const unsigned int n)64   double truncate(double arg, const unsigned int n)
65   {
66     if (fabs(arg) < pow(0.1, n))
67       return 0;
68 
69     return arg;
70   }
71 
72   // warning message for data_file::tokenize
non_parsable(const std::string & msg)73   void non_parsable(const std::string& msg)
74   {
75     std::stringstream obuf;
76     obuf << "data_file::tokenise():" << std::endl
77 	 << "  Non-parsable input \"" << msg << "\", setting value to 0";
78     epix_warning(obuf.str());
79   }
80 
81   //// private data_file functions ////
82   // returns # of columns, i.e. # of entries in first parsable row
entries(const char * filename)83   unsigned int data_file::entries(const char* filename)
84   {
85     std::ifstream input(filename);
86     if (!input)
87       {
88 	epix_warning("Cannot open file \""+std::string(filename)+"\"");
89 	return(0);
90       }
91 
92     // else
93     std::string linebuf;
94     std::vector<double> tmp;
95 
96     while (getline(input, linebuf) && tmp.size() == 0)
97       {
98 	// strip comments
99 	linebuf = linebuf.substr(0, linebuf.find_first_of(m_commt));
100 	if (linebuf.length() > 0)
101 	  tmp = tokenise(linebuf);
102       }
103 
104     input.close();
105 
106     if (tmp.size() == 0)
107       {
108 	std::stringstream obuf; // solely for code readability :)
109 	obuf << "File \"" << std::string(filename)
110 	     << "\" contains no parsable data";
111 
112 	epix_warning(obuf.str());
113       }
114 
115     return tmp.size();
116   } // end of entries(const char*)
117 
118 
119   // private function:
120   // Tokenise line using our delimiter, return a vector of doubles
tokenise(std::string line)121   std::vector<double> data_file::tokenise(std::string line)
122   {
123     size_t pos(line.find(m_delim, 0)); // first delimiter
124     std::string tmpStr;                // current chunk of input
125     double tmpDbl;                     // current parsed chunk
126     std::vector<double> tmpVec;        // output found so far
127 
128     // read current line into a vector of doubles
129     while(pos != std::string::npos)
130       {
131 	tmpStr = line.substr(0, pos);
132 	line.erase(0, pos+1);
133 	std::istringstream convStr(tmpStr);
134 
135 	if (!(convStr >> tmpDbl))
136 	  {
137 	    tmpDbl = 0;
138 	    non_parsable(convStr.str());
139 	  }
140 
141 	tmpVec.push_back(tmpDbl);
142 	pos = line.find(m_delim, 0);
143       }
144 
145     if (line.size()) // There's input remaining
146       {
147 	std::istringstream convStr(line);
148 
149 	if (!(convStr >> tmpDbl))
150 	  {
151 	    tmpDbl = 0;
152 	    non_parsable(convStr.str());
153 	  }
154 
155 	else
156 	  tmpVec.push_back(tmpDbl);
157       }
158 
159     return tmpVec;
160   }
161 
162   //// public data_file functions ////
data_file(unsigned int n)163   data_file::data_file(unsigned int n)
164     : m_precision(PRECISION), m_data(n),
165       m_delim(default_delim),
166       m_commt(default_commt) { }
167 
168   // provide two version to avoid exposing default delim, commt
data_file(const char * filename,const std::string & delim,const std::string & commt)169   data_file::data_file(const char* filename,
170 		       const std::string& delim, const std::string& commt )
171     : m_precision(PRECISION), m_delim(delim), m_commt(commt)
172   {
173     // get number of columns by parsing first line of file
174     const unsigned int N(entries(filename));
175     m_data.resize(N);
176 
177     if (N > 0) // file contains data
178       read(filename);
179 
180   } // end of data_file()
181 
182   // filename-only version
data_file(const char * filename)183   data_file::data_file(const char* filename)
184     : m_precision(PRECISION), m_delim(default_delim), m_commt(default_commt)
185   {
186     // get number of columns by parsing first line of file
187     const unsigned int N(entries(filename));
188     m_data.resize(N);
189 
190     if (N > 0) // file contains data
191       read(filename);
192 
193   } // end of data_file(const char*)
194 
195 
196   // file made from components
data_file(double f (double),double t_min,double t_max,unsigned int num_pts)197   data_file::data_file(double f(double),
198 		       double t_min, double t_max, unsigned int num_pts)
199     : m_precision(PRECISION), m_data(1),
200       m_delim(default_delim), m_commt(default_commt)
201   {
202     const double dt((t_max - t_min)/num_pts);
203     for (unsigned int i=0; i<= num_pts; ++i)
204       m_data.at(0).push_back(f(t_min+i*dt));
205   }
206 
data_file(double f1 (double),double f2 (double),double t_min,double t_max,unsigned int num_pts)207   data_file::data_file(double f1(double), double f2(double),
208 		       double t_min, double t_max, unsigned int num_pts)
209     : m_precision(PRECISION), m_data(2),
210       m_delim(default_delim), m_commt(default_commt)
211   {
212     const double dt((t_max - t_min)/num_pts);
213     for (unsigned int i=0; i<= num_pts; ++i)
214       {
215 	m_data.at(0).push_back(f1(t_min+i*dt));
216 	m_data.at(1).push_back(f2(t_min+i*dt));
217       }
218   }
219 
220 
data_file(double f1 (double),double f2 (double),double f3 (double),double t_min,double t_max,unsigned int num_pts)221   data_file::data_file(double f1(double), double f2(double), double f3(double),
222 		       double t_min, double t_max, unsigned int num_pts)
223     : m_precision(PRECISION), m_data(3),
224       m_delim(default_delim), m_commt(default_commt)
225   {
226     const double dt((t_max - t_min)/num_pts);
227     for (unsigned int i=0; i<= num_pts; ++i)
228       {
229 	m_data.at(0).push_back(f1(t_min+i*dt));
230 	m_data.at(1).push_back(f2(t_min+i*dt));
231 	m_data.at(2).push_back(f3(t_min+i*dt));
232       }
233   }
234 
235 
read(const char * filename)236   data_file& data_file::read(const char* filename)
237   {
238     unsigned int columns(entries(filename));
239     if (columns != m_data.size())
240       {
241 	std::stringstream msg;
242 	msg << "Column count mismatch in file " << filename;
243 	epix_warning(msg.str());
244       }
245 
246     else if (0 < columns)
247       {
248 	std::ifstream input(filename);
249 	std::string linebuf;
250 	std::vector<double> line;
251 
252 	bool warned(false);
253 
254 	while (getline(input, linebuf))
255 	  {
256 	    // strip comments
257 	    linebuf = linebuf.substr(0, linebuf.find_first_of(m_commt));
258 	    if (linebuf.length() > 0)
259 	      {
260 		line = tokenise(linebuf);
261 
262 		if (line.size() > m_data.size())
263 		  {
264 		    if (!warned)
265 		      {
266 			epix_warning("File has more columns than allocated");
267 			warned = true;
268 		      }
269 		  }
270 		else if (line.size() < m_data.size())
271 		  {
272 		    if (!warned)
273 		      {
274 			epix_warning("File has fewer columns than allocated");
275 			warned = true;
276 		      }
277 		  }
278 		else
279 		  {
280 		    for (unsigned int i = 0; i < m_data.size(); i++)
281 		      m_data.at(i).push_back(line.at(i));
282 		  }
283 	      } // linebuf non-empty
284 	  } // end of file
285 	input.close();
286       }
287     return *this;
288   } // end of data_file::read(const char*, const std::string&)
289 
290 
291   // transform column(s)
transform(double f (double),unsigned int col)292   data_file& data_file::transform(double f(double), unsigned int col)
293   {
294     unsigned int rows(m_data.at(0).size());
295 
296     if (0 < col) // apply to selected column
297       for (unsigned int i=0; i<rows; ++i)
298 	m_data.at(col-1).at(i) = f(m_data.at(col-1).at(i));
299 
300     else // apply to all columns, default
301       {
302 	for (unsigned int j=0; j<m_data.size(); ++j)
303 	  for (unsigned int i=0; i<rows; ++i)
304 	    m_data.at(j).at(i) = f(m_data.at(j).at(i));
305       }
306 
307     return *this;
308   }
309 
310     // apply f to selected columns, components of image go back to columns
transform(P f (double,double),unsigned int col1,unsigned int col2)311   data_file& data_file::transform(P f(double, double),
312 				  unsigned int col1, unsigned int col2)
313   {
314     unsigned int rows(m_data.at(0).size());
315 
316     for (unsigned int i=0; i<rows; ++i)
317       {
318 	P tmp(f(m_data.at(col1 - 1).at(i), m_data.at(col2 - 1).at(i)));
319 
320 	m_data.at(col1 - 1).at(i) = tmp.x1();
321 	m_data.at(col2 - 1).at(i) = tmp.x2();
322       }
323 
324     return *this;
325   }
326 
transform(P f (double,double,double),unsigned int col1,unsigned int col2)327   data_file& data_file::transform(P f(double, double, double),
328 				  unsigned int col1, unsigned int col2)
329   {
330     unsigned int rows(m_data.at(0).size());
331 
332     for (unsigned int i=0; i<rows; ++i)
333       {
334 	P tmp(f(m_data.at(col1 - 1).at(i), m_data.at(col2 - 1).at(i), 0));
335 
336 	m_data.at(col1 - 1).at(i) = tmp.x1();
337 	m_data.at(col2 - 1).at(i) = tmp.x2();
338       }
339 
340     return *this;
341   }
342 
transform(P f (double,double,double),unsigned int col1,unsigned int col2,unsigned int col3)343   data_file& data_file::transform(P f(double, double, double),
344 				  unsigned int col1,
345 				  unsigned int col2,
346 				  unsigned int col3)
347   {
348     unsigned int rows(m_data.at(0).size());
349 
350     for (unsigned int i=0; i<rows; ++i)
351       {
352 	P tmp(f(m_data.at(col1 - 1).at(i),
353 		m_data.at(col2 - 1).at(i),
354 		m_data.at(col3 - 1).at(i)));
355 
356 	m_data.at(col1 - 1).at(i) = tmp.x1();
357 	m_data.at(col2 - 1).at(i) = tmp.x2();
358 	m_data.at(col3 - 1).at(i) = tmp.x3();
359       }
360 
361     return *this;
362   }
363 
364   // Erase rows where the data is outside of the specified range
prune(const data_mask & dm,const unsigned int col)365   data_file& data_file::prune(const data_mask& dm, const unsigned int col)
366   {
367     std::vector<std::vector<double>::iterator> iter(m_data.size());
368     for (unsigned int i = 0; i < m_data.size(); i++)
369       iter.at(i) = m_data.at(i).begin();
370 
371     while (iter.at(0) != m_data.at(0).end())
372       {
373 	if ( dm.masks(*iter.at(col-1)) )
374 	  for (unsigned int j = 0; j < m_data.size(); j++)
375 	    m_data.at(j).erase(iter.at(j));
376 
377 	else
378 	  for (unsigned int j = 0; j < m_data.size(); j++)
379 	    iter.at(j)++;
380       }
381 
382     return *this;
383   }
384 
prune(const interval & range,const unsigned int col)385   data_file& data_file::prune(const interval& range, const unsigned int col)
386   {
387     data_mask dm(range);
388     return prune(dm, col);
389   }
390 
prune(double rmin,double rmax,const unsigned int col)391   data_file& data_file::prune(double rmin, double rmax,
392 			      const unsigned int col)
393   {
394     data_mask dm(rmin, rmax);
395     return prune(dm, col);
396   }
397 
398 
399   // (col1|col2)
dot(unsigned int col1,unsigned int col2) const400   double data_file::dot(unsigned int col1, unsigned int col2) const
401   {
402     double sum(0);
403 
404     if ((col1 > m_data.size()) || (col2 > m_data.size()) )
405       epix_warning("Invalid column index in dot product");
406 
407     else
408       for (unsigned int i=0; i < m_data.at(0).size(); ++i)
409 	sum += m_data.at(col1-1).at(i)*m_data.at(col2-1).at(i);
410 
411     return sum;
412   } // end of dot
413 
414   // avg (mean) of col1
avg(unsigned int col1) const415   double data_file::avg(unsigned int col1) const
416   {
417     double sum(0);
418 
419     if (col1 > m_data.size())
420       epix_warning("Invalid column index in mean");
421 
422     else
423       for (unsigned int i=0; i < m_data.at(0).size(); ++i)
424 	sum += m_data.at(col1-1).at(i);
425 
426     return sum/m_data.at(0).size();
427   } // end of avg
428 
429   // variance (x|x) - n*\bar{x}^2
var(unsigned int col1) const430   double data_file::var(unsigned int col1) const
431   {
432     double mean(avg(col1));
433 
434     return dot(col1, col1) - mean*mean*m_data.at(0).size();
435   } // end of var
436 
437   // covariance (x|y) - n*\bar{x}*\bar{y}
covar(unsigned int col1,unsigned int col2) const438   double data_file::covar(unsigned int col1, unsigned int col2) const
439   {
440     return dot(col1, col2) - (m_data.at(0).size())*avg(col1)*avg(col2);
441   } // end of covar
442 
regression(unsigned int col1,unsigned int col2) const443   void data_file::regression(unsigned int col1, unsigned int col2) const
444   {
445     Line(P(avg(col1), avg(col2)), covar(col1, col2)/var(col1));
446   }
447 
448 
449   //// Output functions ////
450   // return selected column
column(unsigned int n) const451   std::vector<double> data_file::column(unsigned int n) const
452   {
453     if (1 <= n && n <= m_data.size())
454       return m_data.at(n-1);
455     else
456       {
457 	if (0 < m_data.size())
458 	  {
459 	    epix_warning("Out of range argument to data_file::column()");
460 	    return m_data.at(0);
461 	  }
462 	else
463 	  {
464 	    epix_warning("data_file::column() requested for empty file");
465 	    std::vector<double> err_val(1, 0);
466 	    return err_val;
467 	  }
468       }
469   }
470 
471   // apply f to selected column
column(double f (double),unsigned int n) const472   std::vector<double> data_file::column(double f(double), unsigned int n) const
473   {
474     // must copy data
475     std::vector<double> value(m_data.at(0).size());
476     unsigned int col(n);
477 
478     if (col < 1 || m_data.size() < col)
479       {
480 	if (0 < m_data.size())
481 	  {
482 	    epix_warning("Out of range argument to data_file::column()");
483 	    col=1;
484 	  }
485 	else
486 	  {
487 	    epix_warning("data_file::column() requested for empty file");
488 	    std::vector<double> err_val(1, 0);
489 	    return err_val;
490 	  }
491       }
492 
493     for (unsigned int i=0; i<m_data.at(0).size(); ++i)
494       value.at(i) = f(m_data.at(col-1).at(i));
495 
496     return value;
497   }
498 
499 
500   // set precision for write()
precision(const unsigned int n) const501   void data_file::precision(const unsigned int n) const
502   {
503     m_precision=n;
504   }
505 
506   // write file or selected columns
write(const char * filename) const507   void data_file::write(const char* filename) const
508   {
509     std::ofstream output(filename);
510     if (!output)
511       {
512 	epix_warning("data_file::write() cannot open file");
513 	return;
514       }
515 
516     // else
517     output.precision(m_precision);
518     output.setf(std::ios_base::showpoint); // pad with 0s
519     output.setf(std::ios_base::right);     // right-justify
520 
521     unsigned int cols(m_data.size());
522 
523     for (unsigned int j=0; j < m_data.at(0).size(); ++j)
524       {
525 	output << m_data.at(0).at(j);
526 	for (unsigned int i=1; i < cols; ++i)
527 	  output << m_delim << m_data.at(i).at(j);
528 
529 	output << std::endl;
530       }
531     output.close();
532   }
533 
534 
write(const char * fname,std::string pt_out (double,double),unsigned int col1,unsigned int col2) const535   void data_file::write(const char* fname, std::string pt_out(double, double),
536 			unsigned int col1, unsigned int col2) const
537   {
538     std::ofstream output(fname);
539     if (!output)
540       {
541 	epix_warning("data_file::write() cannot open file");
542 	return;
543       }
544 
545     // else
546     output.precision(m_precision);
547 
548     for (unsigned int i=0; i < m_data.at(0).size(); ++i)
549       try
550 	{
551 	  output << pt_out(m_data.at(col1-1).at(i), m_data.at(col2-1).at(i));
552 	}
553 
554       catch (std::out_of_range)
555 	{
556 	  epix_warning("data_file::write(): Invalid column index");
557 	  return;
558 	}
559 
560     output.close();
561   } // end of data_file::write(const char*, string function)
562 
563   // experimental: LaTeX tabular environment
tabular(const char * filename,const std::string & alignment,const std::string & legend) const564   void data_file::tabular(const char* filename,
565 			  const std::string& alignment,
566 			  const std::string& legend) const
567   {
568     std::ofstream output(filename);
569     if (!output)
570       {
571 	epix_warning("data_file::table() cannot open file");
572 	return;
573       }
574 
575     // else
576     using std::endl;
577 
578     output.precision(m_precision);
579     unsigned int cols(m_data.size());
580 
581     output << "\\begin{tabular}{" << alignment << "}" << endl
582 	   << "\\hline" << endl;
583     if (legend != "")
584       output << legend << endl << "\\hline" << endl;
585 
586     for (unsigned int j=0; j < m_data.at(0).size(); ++j)
587       {
588 	output << "$" << truncate(m_data.at(0).at(j), m_precision) << "$";
589 	for (unsigned int i=1; i < cols; ++i)
590 	  output << " & $" << truncate(m_data.at(i).at(j), m_precision) << "$";
591 
592 	output << " \\\\" << endl;
593       }
594 
595     output << "\\hline" << endl
596 	   << "\\end{tabular}" << endl;
597 
598     output.close();
599   } // end of data_file::tabular()
600 
601 
plot(epix_mark_type TYPE,unsigned int col1,unsigned int col2,unsigned int col3,P f (double,double,double)) const602   void data_file::plot(epix_mark_type TYPE,
603 		       unsigned int col1, unsigned int col2, unsigned int col3,
604 		       P f(double, double, double)) const
605   {
606     unsigned int num_entries(m_data.at(0).size());
607 
608     std::vector<P> data(num_entries);
609 
610     // create path
611     for (unsigned int i=0; i < num_entries; ++i)
612       try
613 	{
614 	  if (col3 == 0)
615 	    data.at(i) = f(m_data.at(col1-1).at(i),
616 			   m_data.at(col2-1).at(i),
617 			   0);
618 
619 	  else
620 	    data.at(i) = f(m_data.at(col1-1).at(i),
621 			   m_data.at(col2-1).at(i),
622 			   m_data.at(col3-1).at(i));
623 	}
624 
625       catch (std::out_of_range)
626 	{
627 	  epix_warning("data_file::plot(): Invalid column index");
628 	  return;
629 	}
630 
631     // and write it
632     if (PATH == TYPE) // N.B. Unaffected by "select"
633       {
634 	path temp(data, false, false); // not closed or filled
635 	temp.draw();
636       }
637 
638     else
639       for (unsigned int i=0; i < num_entries; ++i)
640 	{
641 	  //	if (m_select(data.at(i)))
642 	  label_data temp(data.at(i), TYPE);
643 	  temp.draw();
644 	}
645   }
646 
plot(epix_mark_type TYPE,P f (double,double,double),unsigned int col1,unsigned int col2,unsigned int col3) const647   void data_file::plot(epix_mark_type TYPE, P f(double, double, double),
648 		       unsigned int col1, unsigned int col2,
649 		       unsigned int col3) const
650   {
651     plot(TYPE, col1, col2, col3, f);
652   }
653 
654   //// global functions ////
plot(const char * filename,epix_mark_type TYPE,unsigned int col1,unsigned int col2,unsigned int col3,P f (double,double,double))655   void plot(const char* filename, epix_mark_type TYPE,
656 	    unsigned int col1, unsigned int col2, unsigned int col3,
657 	    P f(double, double, double))
658   {
659     data_file temp(filename);
660     temp.plot(TYPE, col1, col2, col3, f);
661   }
662 
plot(const char * filename,epix_mark_type TYPE,P f (double,double,double),unsigned int col1,unsigned int col2,unsigned int col3)663   void plot(const char* filename, epix_mark_type TYPE,
664 	    P f(double, double, double),
665 	    unsigned int col1, unsigned int col2, unsigned int col3)
666   {
667     data_file temp(filename);
668     temp.plot(TYPE, col1, col2, col3, f);
669   }
670 
671   // pass 3rd arg by value
histogram(const char * filename,unsigned int col,data_bins db,double scale)672   void histogram(const char* filename, unsigned int col, data_bins db,
673 		 double scale)
674   {
675     data_file temp(filename);
676     db.read(temp.column(col));
677     db.histogram(scale);
678   }
679 
bar_chart(const char * filename,unsigned int col,data_bins db,double scale)680   void bar_chart(const char* filename, unsigned int col, data_bins db,
681 		 double scale)
682   {
683     data_file temp(filename);
684     db.read(temp.column(col));
685     db.bar_chart(scale);
686   }
687 } // end of namespace
688