1 //==============================================================================
2 //
3 // This file is part of GPSTk, the GPS Toolkit.
4 //
5 // The GPSTk is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU Lesser General Public License as published
7 // by the Free Software Foundation; either version 3.0 of the License, or
8 // any later version.
9 //
10 // The GPSTk is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with GPSTk; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
18 //
19 // This software was developed by Applied Research Laboratories at the
20 // University of Texas at Austin.
21 // Copyright 2004-2020, The Board of Regents of The University of Texas System
22 //
23 //==============================================================================
24
25 //==============================================================================
26 //
27 // This software was developed by Applied Research Laboratories at the
28 // University of Texas at Austin, under contract to an agency or agencies
29 // within the U.S. Department of Defense. The U.S. Government retains all
30 // rights to use, duplicate, distribute, disclose, or release this software.
31 //
32 // Pursuant to DoD Directive 523024
33 //
34 // DISTRIBUTION STATEMENT A: This software has been approved for public
35 // release, distribution is unlimited.
36 //
37 //==============================================================================
38
39 /// @file rstats.cpp
40 /// Read the data in one [or two] column(s) of a file, and output robust statistics,
41 /// two-sample statistics, a stem-and-leaf plot, a quantile-quantile plot,
42 /// and a robust polynomial fit. Options perform a variety of other analysis tasks.
43
44 //------------------------------------------------------------------------------------
45 // system includes
46 #include <ctime>
47 #include <iostream>
48 #include <iomanip>
49 #include <fstream>
50 #include <string>
51 #include <vector>
52 // GPSTk
53 #include "Exception.hpp"
54 #include "StringUtils.hpp"
55 #include "Epoch.hpp"
56 #include "Stats.hpp"
57 #include "FirstDiffFilter.hpp"
58 #include "WindowFilter.hpp"
59 #include "FDiffFilter.hpp"
60 #include "singleton.hpp"
61 #include "WNJfilter.hpp"
62 // geomatics
63 #include "CommandLine.hpp"
64 #include "RobustStats.hpp"
65 #include "MostCommonValue.hpp"
66 #include "expandtilde.hpp"
67 #include "logstream.hpp"
68
69 //------------------------------------------------------------------------------------
70 using namespace std;
71 using namespace gpstk;
72 using namespace StringUtils;
73
74 //------------------------------------------------------------------------------------
75 // NB Version below class GlobalData
76
77 //------------------------------------------------------------------------------------
78 // prototypes
79 /**
80 * @throw Exception
81 */
82 int GetCommandLine(int argc, char **argv);
83 /**
84 * @throw Exception
85 */
86 int Prepare(void);
87 /**
88 * @throw Exception
89 */
90 int ReadAndCompute(void);
91
92 /**
93 * @throw Exception
94 */
95 int StemAndLeaf(void);
96 /**
97 * @throw Exception
98 */
99 int QuantilePlot(void);
100 /**
101 * @throw Exception
102 */
103 int FindBins(void);
104 /**
105 * @throw Exception
106 */
107 int ComputeSum(void);
108 /**
109 * @throw Exception
110 */
111 int FitPoly(void);
112 /**
113 * @throw Exception
114 */
115 int Sequential(void);
116 /**
117 * @throw Exception
118 */
119 int Discontinuity(void);
120 /**
121 * @throw Exception
122 */
123 int FDFilter(void);
124 /**
125 * @throw Exception
126 */
127 int WindFilter(void);
128 /**
129 * @throw Exception
130 */
131 int FixFilter(void);
132 /**
133 * @throw Exception
134 */
135 int WhiteNoiseJerkFilter(void);
136 /**
137 * @throw Exception
138 */
139 int ComputeFFT(void);
140 /**
141 * @throw Exception
142 */
143 int OutputStats(void);
144 /**
145 * @throw Exception
146 */
147 int Outliers(void);
148
149 /** utility for develop and debug
150 * @throw Exception
151 */
152 int DumpData(string msg=string("DUMP"));
153
154 //------------------------------------------------------------------------------------
155 //------------------------------------------------------------------------------------
156 // Class GlobalData encapsulates global static data.
157 class GlobalData : public Singleton<GlobalData> {
158 public:
159 // Default and only constructor, sets defaults.
GlobalData()160 GlobalData() throw() { SetDefaults(); }
161
162 // prgm housekeeping
163 static const std::string Version; // see below
164 std::string PrgmName; // this program (must match Jamfile)
165 std::string Title; // name, version and runtime
166 std::ofstream oflog; // output log file stream
167
168 // command line input ----------------------------------------------------------
169 std::string cmdlineErrors,cmdlineDump,cmdlineUsage; // strings filled by parser
170 std::vector<std::string> cmdlineUnrecognized;
171
172 //std::string logfile; // name of log file
173 std::string inpath; // files and paths
174 std::string inputfile; // input file
175
176 // input
177 int col,xcol,wcol;
178 double xbeg,xend,dmin,dmax;
179 bool doxbeg,doxend,dodmin,dodmax;
180 string begstr,endstr,minstr,maxstr; // use strings so no default shows
181 double debias; // specify bias to remove
182 string debstr;
183 bool dodebias, debias0; // set dodebias using CommandLine::count()
184 // plots
185 bool doStemLeaf; // stem and leaf plot
186 bool doQplot; // quantile plot
187
188 bool doBin;
189 string binstr; // string for input of bin option(s)
190 int whichbin; // which input option: 1 n, 2 w, or 3 n,w,f
191 int nbin; // binning for histogram - number of bins
192 double widbin,firstbin; // width of bins, center of first bin
193
194 // analysis
195 bool doSum,doSumPlus,doFit; // sum, find range, step, gaps
196 string fitstr; // polynomial fit
197 unsigned int nfit; // degree of fit
198 vector<double> xevalfit; // evaluate fit at each x
199 bool doSeq; // sequential stats
200 bool doDisc; // discontinuities
201 string discstr; //
202 double ytol,xtol; // tolerances to discontinuities
203
204 // filters
205 bool doFDF,doFDF2,doWF,doXWF,doFixF;// first difference and window filters
206 double fdfstep,fdfsig,fdfrat; // step, sigma and ratio limit for FDF(2)
207 string fdfstr,fdf2str; // fdf and fdf2 filters
208 string windstr,xwindstr; // window filters
209 int windwidth; // width of window filter
210 double windstep, windratio; // slip tol. and ratio limit for WindowFilter
211 string fixfstr; // input for fixer filter
212 int fixN; // width of fixer filter
213 double fixlim,fixsig; // step and sigma limits of fixer filter
214
215 // other
216 bool doWNJ,doFFT; // white noise jerk, FFT
217 double wnjpom; // WNJ process/measurement noise ratio
218 double dtfft; // dt for FFT
219
220 // output
221 bool quiet; // suppress title, timing and other output
222 bool dostdout; // write rstats.out to stdout
223 bool brief; // 1-line output stats (conv,rob,2-samp,wted)
224 bool bc,br,bw,b2,brw; // 1-line output of only 1 stats
225 string label; // label added to brief/fit/disc/seq
226 bool nostats; // suppress output of stats
227 bool doKS; // do Anderson-Darling
228 bool doOuts; // outlier lists
229 string outstr; // use string so no default shows
230 double outscale; // outlier scaling for --outs
231 int width; // width of floating output
232 int prec; // precision of floating output
233
234 bool verbose,help; // help, etc
235 int debug;
236 std::string timefmt; // output format
237 // end command line input ------------------------------------------------------
238
239 // data, x-data and weights
240 std::vector<double> data,xdata,wdata,robwts;
241 // stats
242 Stats<double> cstats;
243 WtdStats<double> wstats,robwtstats;
244 TwoSampleStats<double> tsstats;
245 // robust
246 double median,mad,mest,Q1,Q3,KS;
247
248 // results
249 std::string msg; // msg for output
250 std::string longfmt; // times in error messages, etc.
251
252 private:
SetDefaults(void)253 void SetDefaults(void) throw()
254 {
255 PrgmName = std::string("rstats");
256
257 // command line input -------------------------------------------
258 // files and paths
259 //logfile = std::string("");
260 inputfile = std::string("");
261 // input
262 col = 1;
263 xcol = wcol = -1;
264 doxbeg = doxend = dodmin = dodmax = false;
265 begstr = endstr = minstr = maxstr = string("");
266 debstr = string("");
267 dodebias = debias0 = false;
268 // plots
269 doStemLeaf = doQplot = false;
270 doBin = false;
271 binstr = string("");
272 whichbin = 0;
273 // analysis
274 doSum = doSumPlus = doFit = false;
275 fitstr = string("");
276 nfit = 0;
277 doSeq = false;
278 discstr = string("");
279 // filters
280 {
281 vector<double> xdata,data;
282 vector<int> flags;
283 FirstDiffFilter<double> fdf(xdata,data,flags);
284 fdfstep = fdf.getLimit();
285 fdfsig = 0.02; fdfrat = 2.0; // TD need a default
286 fdfstr = asString(fdfstep,0);
287 fdf2str = asString(fdfstep,0) + string(",")
288 + asString(fdfsig,2) + string(",")
289 + asString(fdfrat,0);
290
291 WindowFilter<double> wf(xdata,data,flags);
292 windwidth = wf.getWidth();
293 windstep = wf.getMinStep();
294 windratio = wf.getMinRatio();
295 xwindstr = windstr = asString(windwidth) + string(",")
296 + asString(windstep,1) + string(",")
297 + asString(windratio,0);
298
299 fixfstr = "4,0.8,0.2";
300 fixN = 4;
301 fixlim = 0.8;
302 fixsig = 0.2;
303 }
304 // output
305 outstr = string("");
306 dostdout = quiet = nostats = doKS = doOuts = false;
307 brief = bc = br = bw = b2 = brw = false;
308 label = string("");
309 prec = 3;
310 width = 6;
311 timefmt = std::string("%4F %10.3g");
312 //timefmt = std::string("%.6Q"); // 1.5/86400 = 1.7e-5
313
314 // end command line input ---------------------------------------
315
316 longfmt = std::string("%04F %10.3g %04Y/%02m/%02d %02H:%02M:%06.3f %P");
317 }
318
319 }; // end class GlobalData
320
321 //------------------------------------------------------------------------------------
322 const string GlobalData::Version(string("3.1 6/19/19"));
323
324 //------------------------------------------------------------------------------------
325 //------------------------------------------------------------------------------------
main(int argc,char ** argv)326 int main(int argc, char **argv)
327 {
328 string PrgmName; // for catch
329 try {
330 // begin counting time
331 clock_t totaltime = clock();
332 Epoch wallbeg,wallend;
333 wallbeg.setLocalTime();
334
335 // get (create) the global data object (a singleton);
336 // since this is the first instance, this will also set default values
337 GlobalData& GD=GlobalData::Instance();
338 PrgmName = GD.PrgmName;
339
340 // Build title
341 GD.Title = GD.PrgmName + ", Ver. "
342 + GD.Version + wallbeg.printf(", Run %04Y/%02m/%02d at %02H:%02M:%02S");
343
344 // display title on screen -- see below after cmdline input
345
346 // process : loop once -----------------------------------------------------
347 int iret;
348 for(bool go=true; go; go=false) {
349
350 // process the command line ------------------------------------
351 iret = GetCommandLine(argc,argv);
352 if(iret) break;
353
354 // Title, debug output cmdline, define GD.msg, include_path(input file)
355 iret = Prepare();
356 if(iret) break;
357
358 // read input and compute stats
359 iret = ReadAndCompute();
360 if(iret) break;
361
362 //DumpData("INI");
363
364 // plots
365 if(GD.doStemLeaf) { iret = StemAndLeaf(); if(iret) break; }
366 if(GD.doQplot) { iret = QuantilePlot(); if(iret) break; }
367 // uses GD.cstats, sets nostats
368 if(GD.doBin) { iret = FindBins(); if(iret) break; }
369
370 // analysis
371 if(GD.doSum || GD.doSumPlus) { iret = ComputeSum(); if(iret) break; }
372 if(GD.doFit) { iret = FitPoly(); if(iret) break; }
373 if(GD.doSeq) { iret = Sequential(); if(iret) break; }
374 if(GD.doDisc) { iret = Discontinuity(); if(iret) break; }
375
376 // filters - all set nostats
377 if(GD.doFDF || GD.doFDF2) { iret = FDFilter(); if(iret) break; }
378 if(GD.doWF || GD.doXWF) { iret = WindFilter(); if(iret) break; }
379 if(GD.doFixF) { iret = FixFilter(); if(iret) break; }
380
381 // other - all set nostats
382 if(GD.doWNJ) { iret = WhiteNoiseJerkFilter(); if(iret) break; }
383 // uses GD.tsstats
384 if(GD.doFFT) { iret = ComputeFFT(); if(iret) break; }
385
386 // output stats
387 if(!GD.nostats) { iret = OutputStats(); if(iret) break; }
388 if(GD.doOuts) { iret = Outliers(); if(iret) break; }
389
390 } // end loop once
391
392 // error condition ---------------------------------------------------------
393 // return codes: 0 ok
394 // 1 help
395 // 2 cmd line errors
396 // 3 requested validation
397 // 4 invalid input
398 // 5 open fail
399 // 6 requested dump
400 // 7 other...
401 // -3 cmd line definition invalid (CommandLine)
402 //cout << "Return code is " << iret << endl;
403 if(iret != 0) {
404 if(iret != 1) {
405 string tmsg;
406 tmsg = GD.PrgmName + string(" is terminating with code ")
407 + StringUtils::asString(iret);
408 cout << tmsg << endl;
409 }
410
411 if(iret < 5) cout << "# " << GD.Title << endl;
412
413 if(iret == 1) { cout << GD.cmdlineUsage; }
414 else if(iret == 2) { cout << GD.cmdlineErrors; }
415 else if(iret == 3) { cout << "The user requested input validation."; }
416 else if(iret == 4) { cout << "The input is invalid."; }
417 else if(iret == 5) { cout << "There is not enough data."; }
418 else if(iret == 7) { cout << "The input file could not be opened."; }
419 else if(iret == -3) { // cmd line definition invalid
420 cout << "The command line definition is invalid.\n" << GD.cmdlineErrors;
421 }
422 else // fix this
423 cout << "temp - Some other return code..." << iret;
424 cout << endl;
425 }
426
427 // compute and print run time ----------------------------------------------
428 if(iret != 1 && !GD.quiet) {
429 wallend.setLocalTime();
430 totaltime = clock()-totaltime;
431 cout << "# " << PrgmName << " timing: " << fixed << setprecision(3)
432 << double(totaltime)/double(CLOCKS_PER_SEC) << " seconds. ("
433 << (wallend-wallbeg) << " sec)" << endl;
434 }
435
436 if(iret == 0) return 0; else return -1;
437 }
438 catch(Exception& e) {
439 cerr << PrgmName << " caught Exception:\n" << e.what() << endl;
440 }
441 catch (...) {
442 cerr << "Unknown error in " << PrgmName << ". Abort." << endl;
443 }
444 return -1;
445 } // end main()
446
447 //------------------------------------------------------------------------------------
448 //------------------------------------------------------------------------------------
GetCommandLine(int argc,char ** argv)449 int GetCommandLine(int argc, char **argv)
450 {
451 try {
452 int i;
453 GlobalData& GD=GlobalData::Instance();
454
455 // create list of command line options, and fill it
456 // put required options first - they will get listed first anyway
457 CommandLine opts;
458
459 // build the options list == syntax page
460 string PrgmDesc =
461 "Program " + GD.PrgmName + " reads one or more columns of numbers in an "
462 "ascii input file and\n computes standard and robust statistics on the data, "
463 "with options to perform\n a variety of other analysis tasks as well. Output "
464 "is to stdout or file rstats.out (unless --stdout).\n "
465 "Input is on the command line, and/or of the same format in a file\n "
466 "(see --file below); lines in the file that begin with '#' are ignored."
467 "\n Options are shown below, followed by a description, with default in ();\n "
468 "however {*} means option must be present to activate - defaults suggested.";
469
470 string pad(" ");
471 // opts.Add(char, opt, arg, repeat?, required?, &target, pre-descript, descript.);
472 bool req(false);
473 string dummy(""); // dummy for --file
474 opts.Add('f', "file", "name", true, req, &dummy, "\n# File I/O:",
475 "Name of file with options [#-EOL = comment] [-f]");
476 //opts.Add(0, "log", "name", false, req, &GD.logfile, "",
477 // "Name of output log file");
478 //opts.Add(0, "logpath", "path", false, req, &GD.logpath, "",
479 // "Path for output log file");
480 opts.Add('i', "input", "name", true, req, &GD.inputfile, "",
481 "Name of input file (-i|--input optional) [-i]");
482 opts.Add(0, "path", "dir", false, req, &GD.inpath, "",
483 "Path for input file");
484 // input
485 opts.Add('y', "col", "c", false, req, &GD.col, "\n# input:",
486 "read data in this column [-y|-c]");
487 opts.Add('x', "xcol", "c", false, req, &GD.xcol, "",
488 "also read 'x' data in this column [-x]");
489 opts.Add(0, "wt", "c", false, req, &GD.wcol, "",
490 "weight with fabs() of data in this column [-wt]");
491
492 // modify input
493 opts.Add(0, "beg", "xb", false, req, &GD.begstr, "\n# modify input:",
494 "include only input that satisfies x > xb");
495 opts.Add(0, "end", "xe", false, req, &GD.endstr, "",
496 "include only input that satisfies x < xe");
497 opts.Add(0, "min", "dmin", false, req, &GD.minstr, "",
498 "include only input that satisfies data > dmin");
499 opts.Add(0, "max", "dmax", false, req, &GD.maxstr, "",
500 "include only input that satisfies data < dmax");
501 opts.Add(0, "debias", "d", false, req, &GD.debstr, "",
502 "remove bias d from data to compute stats");
503 opts.Add(0, "debias0", "", false, req, &GD.debias0, "",
504 "remove bias = (1st data pt) from data to compute stats");
505 // plots
506 opts.Add(0, "plot", "", false, req, &GD.doStemLeaf, "\n# plots:",
507 "generate a stem-and-leaf plot from the data");
508 opts.Add(0, "qplot", "", false, req, &GD.doQplot, "",
509 "generate data for data vs. quantile plot [-> rstats.out]");
510 opts.Add(0, "bin", "", false, req, &GD.binstr, "",
511 "histogram: define bins and count data [ignore x, set nostats]\n"
512 +pad+" arg = <n> (int) compute approximately n bins\n"
513 +pad+" arg = <w> (float) compute bins of width w\n"
514 +pad+" arg = <n,w,cf> set bin number,width,center-of-first\n"
515 +pad+" (hint: pipe into plot -x 2 -y 3 --hist 0 -g 640x480)");
516 // analysis
517 opts.Add(0, "sum", "", false, req, &GD.doSum, "\n# analysis:",
518 "compute sum of data [ignore x]");
519 opts.Add(0, "sum+", "", false, req, &GD.doSumPlus, "",
520 "compute sum, range, gaps and estimated stepsize [ignore x]");
521 opts.Add(0, "fit", "f[,x]", false, req, &GD.fitstr, "",
522 "fit a robust polynomial of degree f to data(xdata) [-> rstats.out]\n"
523 +pad+" if 1 or more <,x> are present, also evaluate fit at x(s)");
524 opts.Add(0, "seq", "", false, req, &GD.doSeq, "",
525 "output data, in input order, with sequential stats");
526 opts.Add(0, "disc", "y[,x]", false, req, &GD.discstr, "",
527 "output data, first differences, and discontinuities with stats\n"
528 +pad+" per segment; use y,x for data,xdata limits");
529 // filters
530 opts.Add(0, "fdf", "x", false, req, &GD.fdfstr, "\n# stats filters {*}:",
531 "first-difference filter slip limit x");
532 opts.Add(0, "fdf2", "x,s,r", false, req, &GD.fdf2str, "",
533 "first-diff(2) filter, limits: slip x, sig s, ratio r");
534 opts.Add(0, "wind", "n[,s,r]", false, req, &GD.windstr, "",
535 "window filter width n, limits: slip s, ratio r");
536 opts.Add(0, "xwind", "n[,s,r]", false, req, &GD.xwindstr, "",
537 "window filter (--wind) with 2-sample stats");
538 opts.Add(0, "fix", "n[,x,s]", false, req, &GD.fixfstr, "",
539 "FDiff 'fix' filter (for SSD phase) width n, limits: slip x sigma s");
540 // others
541 opts.Add(0, "wnj", "pom", false, req, &GD.wnjpom, "\n# other {*}:",
542 "white noise jerk KF using pom = process/meas noises (small=>smooth)\n"
543 +pad+" e.g. rstats prsclk.dat -x 1 -y 11 --wnj 1.e-8 | grep KSU | plot\n"
544 +pad+" -x 3 -y 10,data -y 4,,'lines lt 3' -y2 11,res --points");
545 opts.Add(0, "fft", "dt", false, req, &GD.dtfft, "",
546 "compute FFT with dt [dt=0 => compute dt from xdata] NB beware -p\n"
547 +pad+" e.g. rstats testfft.data -x 1 -y 9 --fft 0.0034722 |\n"
548 +pad+" plotrfft -o2 'set xr [0:25]'");
549 opts.Add(0, "KS", "", false, req, &GD.doKS, "",
550 "also output the Anderson-Darling statistic, a KS-test,\n"
551 +pad+" where AD > 0.752 implies non-normal");
552 // output
553 opts.Add(0, "nostats", "", false, req, &GD.nostats, "\n# output:",
554 "supress total stats output (for analyses)");
555 opts.Add('q', "quiet", "", false, req, &GD.quiet, "",
556 "supress title, timing and other non-essential output [-q]");
557 opts.Add(0, "stdout", "", false, req, &GD.dostdout, "",
558 "send rstats.out output to stdout");
559 opts.Add(0, "outs", "s", false, req, &GD.outstr, "",
560 "explicitly list all data outside s*(outlier limits)");
561 opts.Add('b', "brief", "", false, req, &GD.brief, "",
562 "single-line quiet stats output (conv, rob, wtd, r-wtd, 2-samp) [-b]\n"
563 +pad+" [or -bc -br -bw -brw -b2 for single quiet brief output]");
564 opts.Add('l', "label", "L", false, req, &GD.label, "",
565 "add label L to the (brief/disc/seq/fdf/wind/fft) outputs [-l]");
566 opts.Add('p', "prec", "n", false, req, &GD.prec, "\n# format and help:",
567 "specify precision of all float outputs [-p]");
568 opts.Add('w', "width", "n", false, req, &GD.width, "",
569 "specify width of all float outputs");
570 // help,verbose,debug handled by CommandLine
571
572 // add options that are ignored (true if it has an arg)
573 //opts.Add_ignore("--PRSoutput",true);
574
575 // deprecated args
576 opts.Add_deprecated("-c","-y");
577
578 // --------------------------------------------------------------------------
579 // declare it and parse it; write all errors to string GD.cmdlineErrors
580 int iret = opts.ProcessCommandLine(argc,argv,PrgmDesc,
581 GD.cmdlineUsage,GD.cmdlineErrors,GD.cmdlineUnrecognized);
582 if(iret == -2) return iret; // bad alloc
583 if(iret == -3) return iret; // cmd line definition invalid
584
585 // --------------------------------------------------------------------------
586 // do extra parsing -- append errors to GD.cmdlineErrors
587 vector<string> fields;
588 ostringstream oss;
589
590 // unrecognized arguments - few exceptions or an error
591 if(GD.cmdlineUnrecognized.size() > 0) {
592 vector<string> unrecogs;
593 // backward compatibility...
594 for(i=0; i<GD.cmdlineUnrecognized.size(); i++) {
595 string str(GD.cmdlineUnrecognized[i]);
596 if(str == "-bc") GD.bc = true;
597 else if(str == "-br") GD.br = true;
598 else if(str == "-bw") GD.bw = true;
599 else if(str == "-brw") GD.brw = true;
600 else if(str == "-b2") GD.b2 = true;
601 else {
602 if(!GD.inputfile.empty()) unrecogs.push_back(str);
603 else GD.inputfile = str;
604 }
605 }
606 if(unrecogs.size()) {
607 oss << " Error - unrecognized arguments\n";
608 for(i=0; i<unrecogs.size(); i++)
609 oss << unrecogs[i] << "\n";
610 oss << " End of unrecognized arguments\n";
611 }
612 }
613
614 // check inconsistent cmdline input
615 if(GD.bw && GD.wcol == -1) {
616 oss << " Warning - brief(w) but no --wcol - ignore bw output";
617 GD.bw = false;
618 }
619 if(GD.b2 && GD.xcol == -1) {
620 oss << " Warning - brief(2) but no --xcol - ignore b2 output";
621 GD.b2 = false;
622 }
623
624 if(!GD.begstr.empty()) { GD.xbeg = asDouble(GD.begstr); GD.doxbeg = true; }
625 if(!GD.endstr.empty()) { GD.xend = asDouble(GD.endstr); GD.doxend = true; }
626 if(!GD.minstr.empty()) { GD.dmin = asDouble(GD.minstr); GD.dodmin = true; }
627 if(!GD.maxstr.empty()) { GD.dmax = asDouble(GD.maxstr); GD.dodmax = true; }
628 if(!GD.debstr.empty()) { GD.debias = asDouble(GD.debstr); GD.dodebias = true; }
629 if(!GD.outstr.empty()) { GD.outscale = asDouble(GD.outstr); GD.doOuts = true; }
630
631 // set switches using CommandLine::count()
632 GD.doFit = (opts.count("fit") > 0);
633 GD.doFDF = (opts.count("fdf") > 0);
634 GD.doFDF2 = (opts.count("fdf2") > 0);
635 GD.doWF = (opts.count("wind") > 0);
636 GD.doXWF = (opts.count("xwind") > 0);
637 GD.doFixF = (opts.count("fix") > 0);
638 GD.doWNJ = (opts.count("wnj") > 0);
639 GD.doFFT = (opts.count("fft") > 0);
640
641 // bin
642 if(!GD.binstr.empty()) {
643 GD.doBin = true;
644 fields = split(GD.binstr,',');
645 if(fields.size() == 1) {
646 if(isDigitString(fields[0])) {
647 GD.nbin = asInt(fields[0]);
648 GD.whichbin = 1;
649 }
650 else if(isDecimalString(fields[0])) {
651 GD.widbin = asDouble(fields[0]);
652 GD.whichbin = 2;
653 }
654 }
655 else if(fields.size() == 3) {
656 GD.nbin = asInt(fields[0]);
657 GD.widbin = asDouble(fields[1]);
658 GD.firstbin = asDouble(fields[2]);
659 GD.whichbin = 3;
660 }
661 else {
662 oss << " Error - invalid argument to --bin " << GD.binstr << "\n";
663 GD.doBin = false;
664 }
665 }
666
667 // fit
668 if(GD.doFit) {
669 fields = split(GD.fitstr,',');
670 GD.nfit = asInt(fields[0]);
671 if(fields.size() > 1) {
672 for(i=1; i<fields.size(); i++)
673 GD.xevalfit.push_back(asDouble(fields[i]));
674 }
675 }
676
677 // discontinuities
678 if(!GD.discstr.empty()) {
679 GD.doDisc = true;
680 fields = split(GD.discstr,',');
681 if(fields.size() == 1) {
682 GD.ytol = asDouble(fields[0]);
683 GD.xtol = -1.0;
684 }
685 else if(fields.size() == 2) {
686 GD.ytol = asDouble(fields[0]);
687 GD.xtol = asDouble(fields[1]);
688 }
689 else {
690 oss << " Error - invalid argument to --disc " << GD.discstr << "\n";
691 GD.doDisc = false;
692 }
693 }
694
695 // fdf filter
696 if(GD.doFDF) {
697 fields = split(GD.fdfstr,',');
698 if(fields.size() == 1) {
699 GD.fdfstep = asDouble(fields[0]);
700 }
701 else {
702 oss << " Error - invalid argument to --fdf " << GD.fdfstr << "\n";
703 GD.doFDF = false;
704 }
705 }
706
707 // fdf2 filter
708 if(GD.doFDF2) {
709 fields = split(GD.fdf2str,',');
710 if(fields.size() == 3) {
711 GD.fdfstep = asDouble(fields[0]);
712 GD.fdfsig = asDouble(fields[1]);
713 GD.fdfrat = asDouble(fields[2]);
714 }
715 else {
716 oss << " Error - invalid argument to --fdf2 " << GD.fdf2str << "\n";
717 GD.doFDF2 = false;
718 }
719 }
720
721 // fix filter
722 if(GD.doFixF) {
723 fields = split(GD.fixfstr,',');
724 if(fields.size() == 1) {
725 GD.fixN = asInt(fields[0]);
726 }
727 else if(fields.size() == 3) {
728 GD.fixN = asInt(fields[0]);
729 GD.fixlim = asDouble(fields[1]);
730 GD.fixsig = asDouble(fields[2]);
731 }
732 else {
733 oss << " Error - invalid argument to --fix " << GD.fixfstr << "\n";
734 GD.doFixF = false;
735 }
736 }
737
738 // window filters
739 if(GD.doWF || GD.doXWF) {
740 fields = split((GD.doWF ? GD.windstr : GD.xwindstr),',');
741 if(fields.size() == 1)
742 GD.windwidth = asInt(fields[0]);
743 else if(fields.size() == 3) {
744 GD.windstep = asDouble(fields[1]);
745 GD.windratio = asDouble(fields[2]);
746 }
747 else
748 oss << " Error - invalid argument to --" << (GD.doWF ? "":"x") << "wind "
749 << (GD.doWF ? GD.windstr : GD.xwindstr) << "\n";
750 }
751
752 // set nostats
753 if(GD.doBin || GD.doFDF || GD.doFDF2 || GD.doWF || GD.doXWF
754 || GD.doFixF || GD.doWNJ || GD.doFFT)
755 GD.nostats = true;
756
757 // set -b flags
758 if(GD.brief) {
759 GD.bc = GD.br = GD.brw = true;
760 if(GD.wcol > -1) GD.bw = true;
761 if(GD.xcol > -1) GD.b2 = true;
762 }
763
764 // set quiet when brief
765 GD.quiet = (GD.quiet || GD.brief || GD.bc || GD.br || GD.bw || GD.brw || GD.b2);
766
767 // --------------------------------------------------------------------------
768 // add to command line errors
769 GD.cmdlineErrors += oss.str();
770
771 // set verbose and debug (CommandLine sets LOGlevel)
772 GD.verbose = (LOGlevel >= VERBOSE);
773 GD.debug = (LOGlevel - DEBUG);
774
775 // --------------------------------------------------------------------------
776 // dump it
777 oss.str(""); // clear it
778 oss << "------ Summary of " << GD.PrgmName
779 << " command line configuration --------" << endl;
780 opts.DumpConfiguration(oss);
781 // perhaps dump the 'extra parsing' things
782 oss << "------ End configuration summary --------" << endl;
783 GD.cmdlineDump = oss.str();
784
785 if(opts.hasHelp()) return 1;
786 if(opts.hasErrors()) return 2;
787 if(!GD.cmdlineErrors.empty()) return 2;
788 return iret;
789 }
790 catch(Exception& e) { GPSTK_RETHROW(e); }
791 }
792
793 //------------------------------------------------------------------------------------
794 // Title, debug output of cmdline, define GD.msg, include_path for input file
Prepare(void)795 int Prepare(void)
796 {
797 try {
798 int i,j;
799 GlobalData& GD=GlobalData::Instance();
800
801 // display title
802 if(!GD.quiet) cout << "# " << GD.Title << endl;
803
804 if(GD.debug > -1) {
805 cout << "Found debug switch at level " << GD.debug << endl;
806 cout << "\n" << GD.cmdlineUsage << endl; // this will contain list of args
807 GD.verbose = true;
808 }
809 if(GD.verbose && !GD.quiet) cout << GD.cmdlineDump << endl;
810
811 // set msg
812 ostringstream oss;
813 oss << "Data of col " << GD.col;
814 if(GD.xcol > -1) oss << ", x of col " << GD.xcol;
815 oss << ", file " << GD.inputfile;
816 GD.msg = oss.str();
817
818 //include_path(GD.logpath,GD.logfile);
819 //expand_filename(GD.logfile);
820
821 if(GD.inputfile.empty()) {
822 GD.inputfile = string("stdin");
823 if(GD.verbose && !GD.quiet)
824 cout << "# Input from stdin\n";
825 }
826 else {
827 include_path(GD.inpath,GD.inputfile);
828 expand_filename(GD.inputfile);
829 if(GD.verbose && !GD.quiet)
830 cout << "# Found input file name " << GD.inputfile << endl;
831 }
832
833 return 0;
834 }
835 catch(Exception& e) { GPSTK_RETHROW(e); }
836 }
837
838 //------------------------------------------------------------------------------------
ReadAndCompute(void)839 int ReadAndCompute(void)
840 {
841 try {
842 GlobalData& GD=GlobalData::Instance();
843
844 // open input file -------------------------------------------
845 istream *pin;
846 if(GD.inputfile != string("stdin")) {
847 pin = new ifstream(GD.inputfile.c_str());
848 if(pin->fail()) {
849 cout << "Could not open file " << GD.inputfile << " .. abort.\n";
850 return -2;
851 }
852 }
853 else {
854 pin = &cin;
855 }
856
857 // read input file -------------------------------------------
858 unsigned int i,nd(0),nxd(0);
859 {
860 int j; // not unsigned!
861 double d,x(-1.0),w(-1.0),bias;
862 string line;
863 vector<string> words;
864 while(1) {
865 getline(*pin,line);
866 if(pin->eof() || !pin->good()) break;
867
868 StringUtils::stripLeading(line," ");
869 if(line[0] == '#') continue;
870
871 StringUtils::stripTrailing(line,"\n");
872 StringUtils::stripTrailing(line,"\r");
873 StringUtils::stripTrailing(line," ");
874 StringUtils::change(line,"\t"," ");
875
876 if(line.empty()) continue;
877
878 words = StringUtils::split(line,' ');
879 j = words.size();
880
881 //cout << "LINE (" << j << ") " << line << endl;
882
883 // check input NB col numbers start at 1, indexes start at 0
884 if(j == 0) continue;
885 if(GD.col > j) { nd++; continue; }
886 if(GD.xcol > j) { nxd++; continue; }
887 if(GD.wcol > j) continue;
888 if(!(isScientificString(words[GD.col-1]))) { nd++; continue; }
889 if(GD.xcol > -1 && !(isScientificString(words[GD.xcol-1])))
890 { nd++; continue; }
891 if(GD.wcol > -1 && !(isScientificString(words[GD.wcol-1]))) continue;
892
893 // user limits on data
894 d = asDouble(words[GD.col-1]);
895 if(GD.dodmin && d < GD.dmin) continue;
896 if(GD.dodmax && d > GD.dmax) continue;
897
898 if(GD.xcol > -1) {
899 x = asDouble(words[GD.xcol-1]);
900 // user limits on x
901 if(GD.doxbeg && x < GD.xbeg) continue;
902 if(GD.doxend && x > GD.xend) continue;
903 }
904 if(GD.wcol > -1)
905 w = asDouble(words[GD.wcol-1]);
906
907 // debias
908 if(GD.debias0 && GD.data.size() == 0) { GD.debias = d; GD.dodebias = true; }
909 if(GD.dodebias) d -= GD.debias;
910
911 //cout << "READ " << i << " " << d << " " << x << endl;
912 GD.data.push_back(d);
913 if(GD.xcol > -1) GD.xdata.push_back(x);
914 if(GD.wcol > -1) GD.wdata.push_back(w);
915
916 }
917
918 if(pin != &cin) {
919 ((ifstream *)pin)->close();
920 delete pin;
921 }
922 }
923
924 // check that input is good ----------------------------------
925 if(GD.data.size() < 2) {
926 cout << "Abort: not enough data: " << GD.data.size() << " data read";
927 if(nd > 0) cout << " [data(col) not found on " << nd << " lines]";
928 if(nxd > 0) cout << " [data(xcol) not found on " << nxd << " lines]";
929 cout << endl;
930 return 5;
931 }
932 if(GD.xcol != -1 && GD.xdata.size() == 0) {
933 cout << "Abort: No data found in 'x' column." << endl;
934 return 5;
935 }
936 if(nd > GD.data.size()/2)
937 cout << "Warning: data(col) not found on " << nd << " lines" << endl;
938 if(nxd > GD.xdata.size()/2)
939 cout << "Warning: data(xcol) not found on " << nxd << " lines" << endl;
940
941 if(GD.verbose) cout << "Found " << GD.data.size() << " data.\n";
942
943 // compute stats ---------------------------------------------
944 const unsigned N(GD.data.size());
945 for(i=0; i<N; i++) {
946 GD.cstats.Add(GD.data[i]);
947 if(GD.xcol > -1) GD.tsstats.Add(GD.xdata[i],GD.data[i]);
948 if(GD.wcol > -1) GD.wstats.Add(GD.data[i], GD.wdata[i]);
949 }
950
951 // compute robust stats --------------------------------------------------
952 {
953 // NB do not sort GD.data and GD.xdata
954 vector<double> data(GD.data),xdata(GD.xdata),robwts(N);
955
956 if(xdata.size() > 0) QSort(&data[0],&xdata[0],N);
957 else QSort(&data[0],N);
958
959 Robust::Quartiles(&data[0],data.size(),GD.Q1,GD.Q3);
960 // outlier limit (high) 2.5Q3-1.5Q1
961 // outlier limit (low ) 2.5Q1-1.5Q3
962 GD.mad = Robust::MedianAbsoluteDeviation(&data[0],N,GD.median);
963 GD.mest = Robust::MEstimate(&data[0], N, GD.median, GD.mad, &robwts[0]);
964
965 for(i=0; i<N; i++)
966 GD.robwtstats.Add(data[i],robwts[i]);
967 }
968
969 return 0;
970 }
971 catch(Exception& e) { GPSTK_RETHROW(e); }
972 }
973
974 //------------------------------------------------------------------------------------
OutputStats(void)975 int OutputStats(void)
976 {
977 try {
978 GlobalData& GD=GlobalData::Instance();
979
980 const int N(GD.data.size());
981 cout << fixed << setprecision(GD.prec);
982
983 // output stats ----------------------------------------------------------
984 string fitmsg(GD.nfit==0 ? "" : "(fit resid)");
985 string label(GD.label.empty() ? "" : " "+GD.label);
986 // TD handle bias output better
987
988 if(GD.bc || GD.b2) {
989 cout << "rstats(con):" << label
990 << " N " << setw(GD.width) << GD.cstats.N()
991 << " Ave " << setw(GD.width) << GD.cstats.Average()
992 << " Std " << setw(GD.width) << GD.cstats.StdDev()
993 << " Var " << setw(GD.width) << GD.cstats.Variance()
994 << " Min " << setw(GD.width) << GD.cstats.Minimum()
995 << " Max " << setw(GD.width) << GD.cstats.Maximum()
996 << " P2P " << setw(GD.width) << GD.cstats.Maximum()-GD.cstats.Minimum();
997 if(GD.dodebias) cout << " Bias " << GD.debias;
998 cout << endl;
999 }
1000 else if(!GD.quiet) {
1001 cout << "Conventional statistics: " << GD.msg << ":\n";
1002 cout << GD.cstats << " Median = " << GD.median << endl;
1003 if(GD.dodebias) cout << " Bias = " << GD.debias << endl;
1004 }
1005
1006 if(GD.xcol > -1) {
1007 if(GD.b2) {
1008 cout << "rstats(two):" << label
1009 << " N " << setw(GD.width) << GD.data.size()
1010 //<< " VarX " << setprecision(GD.prec) << GD.tsstats.VarianceX()
1011 //<< " VarY " << setprecision(GD.prec) << GD.tsstats.VarianceY()
1012 << " Int " << setprecision(GD.prec) << GD.tsstats.Intercept()
1013 << " Slp " << setprecision(GD.prec) << GD.tsstats.Slope()
1014 << " +- " << setprecision(GD.prec) << GD.tsstats.SigmaSlope()
1015 << " CSig " << setprecision(GD.prec) << GD.tsstats.SigmaYX()
1016 << " Corr " << setprecision(GD.prec) << GD.tsstats.Correlation();
1017 if(GD.dodebias) cout << " Bias " << GD.debias;
1018 cout << endl;
1019 }
1020 else if(!GD.quiet) {
1021 cout << "Two-sample statistics: " << GD.msg << ":\n"
1022 << setprecision(GD.prec) << GD.tsstats << endl;
1023 if(GD.dodebias) cout << " Bias " << GD.debias;
1024 }
1025 }
1026
1027 if(GD.bw && GD.wcol > -1) {
1028 cout << "rstats(wtd):" << label
1029 << " N " << setw(GD.width) << GD.wstats.N()
1030 << " Ave " << setw(GD.width) << GD.wstats.Average()
1031 << " Std " << setw(GD.width) << GD.wstats.StdDev()
1032 << " Var " << setw(GD.width) << GD.wstats.Variance()
1033 << " Min " << setw(GD.width) << GD.wstats.Minimum()
1034 << " Max " << setw(GD.width) << GD.wstats.Maximum()
1035 << " P2P " << setw(GD.width) << GD.wstats.Maximum()-GD.wstats.Minimum();
1036 if(GD.dodebias) cout << " Bias " << GD.debias;
1037 cout << endl;
1038 }
1039 else if(!GD.quiet && GD.wcol > -1) {
1040 cout << "Conventional weighted statistics: "
1041 << GD.msg << ", wt of col " << GD.wcol << ":\n"
1042 << fixed << setprecision(GD.prec) << GD.wstats << endl;
1043 if(GD.dodebias) cout << " Bias = " << GD.debias << endl;
1044 }
1045
1046 if(GD.brw) {
1047 cout << "rstats(rwt):" << label
1048 << " N " << setw(GD.width) << GD.robwtstats.N()
1049 << " Ave " << setw(GD.width) << GD.robwtstats.Average()
1050 << " Std " << setw(GD.width) << GD.robwtstats.StdDev()
1051 << " Var " << setw(GD.width) << GD.robwtstats.Variance()
1052 << " Min " << setw(GD.width) << GD.robwtstats.Minimum()
1053 << " Max " << setw(GD.width) << GD.robwtstats.Maximum()
1054 << " P2P " << setw(GD.width)
1055 << GD.robwtstats.Maximum()-GD.robwtstats.Minimum();
1056 if(GD.dodebias) cout << " Bias " << GD.debias;
1057 cout << endl;
1058 }
1059 else if(!GD.quiet) {
1060 cout << "Conventional statistics with robust weighting: " << GD.msg << ":\n"
1061 << fixed << setprecision(GD.prec) << GD.robwtstats << endl;
1062 if(GD.dodebias) cout << " Bias = " << GD.debias << endl;
1063 }
1064
1065 if(GD.br) {
1066 cout << "rstats(rob):" << label
1067 << " N " << setw(GD.width) << GD.data.size()
1068 << " Med " << setw(GD.width) << GD.median << " MAD " << GD.mad
1069 << " Min " << setw(GD.width) << GD.cstats.Minimum()
1070 << " Max " << setw(GD.width) << GD.cstats.Maximum()
1071 << " P2P " << setw(GD.width) << GD.cstats.Maximum()-GD.cstats.Minimum()
1072 << " Q1 " << setw(GD.width) << GD.Q1 << " Q3 " << setw(GD.width)<< GD.Q3
1073 << " QL " << setw(GD.width) << 2.5*GD.Q1-1.5*GD.Q3
1074 << " QH " << setw(GD.width) << 2.5*GD.Q3-1.5*GD.Q1;
1075 if(GD.dodebias) cout << " Bias " << GD.debias;
1076 cout << endl;
1077 }
1078 else if(!GD.quiet) {
1079 cout << "Robust statistics: " << GD.msg << ":\n";
1080 cout << " Number = " << GD.data.size() << endl;
1081 cout << " Quartiles = " << setw(11) << setprecision(GD.prec) << GD.Q1
1082 << "(1) " << setw(11) << GD.Q3
1083 << "(3) " << setw(11) << 2.5*GD.Q3-1.5*GD.Q1
1084 << "(H) " << setw(11) << 2.5*GD.Q1-1.5*GD.Q3
1085 << "(L)" << endl;
1086 cout << " Median = " << GD.median << " MEstimate = " << GD.mest
1087 << " MAD = " << GD.mad << endl;
1088 if(GD.dodebias) cout << " Bias = " << GD.debias << endl;
1089 }
1090
1091 if(GD.doKS) {
1092 GD.KS = ADtest(&GD.data[0],N,GD.cstats.Average(),GD.cstats.StdDev(),false);
1093 cout << "rstats KS test = " << setprecision(GD.prec) << GD.KS << endl;
1094 }
1095
1096 return 0;
1097 }
1098 catch(Exception& e) { GPSTK_RETHROW(e); }
1099 }
1100
1101 //------------------------------------------------------------------------------------
StemAndLeaf(void)1102 int StemAndLeaf(void)
1103 {
1104 try {
1105 GlobalData& GD=GlobalData::Instance();
1106
1107 try {
1108 vector<double> data(GD.data);
1109 QSort(&data[0],data.size());
1110
1111 // NB assumes array is sorted
1112 Robust::StemLeafPlot(cout, &data[0], data.size(), GD.msg);
1113 }
1114 catch(Exception& e) {
1115 if(e.getText(0) == string("Invalid input") ||
1116 e.getText(0) == string("Array has zero range")) {
1117 cout << "(No stem and leaf plot; data is trivial)\n";
1118 return 0;
1119 }
1120 GPSTK_RETHROW(e);
1121 }
1122
1123 return 0;
1124 }
1125 catch(Exception& e) { GPSTK_RETHROW(e); }
1126 }
1127
1128 //------------------------------------------------------------------------------------
QuantilePlot(void)1129 int QuantilePlot(void)
1130 {
1131 try {
1132 unsigned int i;
1133 GlobalData& GD=GlobalData::Instance();
1134
1135 // get quantiles
1136 vector<double> qdata(GD.data.size());
1137 Robust::Quantiles(&qdata[0],qdata.size());
1138
1139 // output to file rstats.out
1140 ostream *pout = new ofstream("rstats.out");
1141 if(GD.dostdout || pout->fail()) {
1142 if(!GD.dostdout) cout << "Unable to open file rstats.out - output to screen\n";
1143 delete pout;
1144 pout = &cout;
1145 }
1146 else if(!GD.quiet) cout << "Output quantiles, data to file rstats.out\n";
1147
1148 // get TS stats
1149 TwoSampleStats<double> TSS;
1150 for(i=0; i<GD.data.size(); i++) TSS.Add(qdata[i],GD.data[i]);
1151
1152 *pout << "# Quantile plot mean " << fixed << setprecision(GD.prec)
1153 << TSS.Intercept() << " std (slope) " << TSS.Slope()
1154 << " quantile data line follow:" << endl;
1155 for(i=0; i<GD.data.size(); i++)
1156 *pout << qdata[i] << " " << GD.data[i]
1157 << " " << TSS.Intercept() + TSS.Slope()*qdata[i] << endl;
1158
1159 if(pout != &cout)
1160 {
1161 ((ofstream *)pout)->close();
1162 delete pout;
1163 }
1164
1165 cout << "Data vs quantiles fit to line yields y-intercept (=mean) "
1166 << fixed << setprecision(GD.prec) << TSS.Intercept()
1167 << " and slope (=std.dev.) " << TSS.Slope() << endl;
1168
1169 return 0;
1170 }
1171 catch(Exception& e) { GPSTK_RETHROW(e); }
1172 }
1173
1174 //------------------------------------------------------------------------------------
1175 // bins - given min, max and number of bins, find "pretty" bin boundaries
1176 // (first-bin,step,<have nbins>) where first-bin is the *center* of the first bin,
1177 // input min,max,nbins, output the rest
1178 // return 0 if ok
Bins(const double & min,const double & max,int & nbins,double & firstbin,double & binstep,int & prec)1179 int Bins(const double& min, const double& max, int& nbins,
1180 double& firstbin, double& binstep, int& prec)
1181 {
1182 try {
1183 if(nbins <= 2) GPSTK_THROW(Exception("Too few bins"));
1184 firstbin = binstep = 0.0;
1185
1186 double amin(min),amax(max);
1187 if(amin > amax) { amin = max; amax = min; }
1188 else if(amin == amax) GPSTK_THROW(Exception("Equal limits"));
1189
1190 binstep = (amax-amin)/double(nbins);
1191 double tmp = log10(double(binstep)) - 1.0;
1192 prec = int(tmp + (tmp > 0 ? 0.5 : -0.5));
1193 double scal = pow(10.0,prec);
1194 while(binstep/scal < 1.0) { scal /= 10.0; prec--; }
1195 while(binstep/scal >= 10.0) { scal *= 10.0; prec++; }
1196 binstep = double(int(0.5+binstep/scal)*scal);
1197 if(::fabs(binstep) < 1.e-8) { cout << " Error - binstep < 1.e-8\n"; return -1; }
1198 double half(binstep/2.0);
1199 firstbin = binstep * int((amin + (amin > 0 ? 0.5 : -0.5))/binstep);
1200 while(firstbin-half > min) firstbin -= binstep;
1201 while(firstbin+half < min) firstbin += binstep;
1202 nbins = int((max-firstbin+half)/binstep);
1203 while(firstbin+(nbins-1.5)*binstep > max) nbins--;
1204 while(firstbin+(nbins-0.5)*binstep < max) nbins++;
1205 return 0;
1206 }
1207 catch(Exception& e) { GPSTK_RETHROW(e); }
1208 }
1209
1210 //------------------------------------------------------------------------------------
FindBins(void)1211 int FindBins(void)
1212 {
1213 try {
1214 int i,j,k;
1215 GlobalData& GD=GlobalData::Instance();
1216 int prec;
1217 const double min(GD.cstats.Minimum()), max(GD.cstats.Maximum());
1218 if(GD.whichbin == 1) { // only n user input
1219 i = Bins(min,max,GD.nbin,GD.firstbin,GD.widbin,prec); // bins
1220 if(i) return -1;
1221 if(prec >= 0) prec = 0; else prec = -prec;
1222 }
1223 else if(GD.whichbin == 2) { // compute firstbin from min, max and GD.widbin
1224 GD.nbin = 1+int(0.5+(max-min)/GD.widbin);
1225 GD.firstbin = int((min/GD.widbin)+(min>0.0 ? 0.5:-0.5))*GD.widbin;
1226 if(min < GD.firstbin-GD.widbin/2.0) { GD.nbin++; GD.firstbin -= GD.widbin; }
1227 if(max > GD.firstbin+(GD.nbin-0.5)*GD.widbin) { GD.nbin++; }
1228 }
1229 else if(GD.whichbin == 3) { // nbin,widbin and firstbin are specified
1230 ;
1231 }
1232
1233 // compute precision exponent
1234 if(GD.whichbin != 1) {
1235 double tmp = log10(double(GD.widbin)) - 1.0;
1236 prec = int(tmp + (tmp > 0 ? 0.5 : -0.5));
1237 double scal = pow(10.0,prec);
1238 while(GD.widbin/scal < 1.0) { scal /= 10.0; prec--; }
1239 while(GD.widbin/scal >= 10.0) { scal *= 10.0; prec++; }
1240 if(prec < 0) prec = -prec+1;
1241 }
1242
1243 if(GD.nbin > 60) {
1244 cout << "Error - too many bins: " << GD.nbin << endl;
1245 return 0;
1246 }
1247
1248 double half(GD.widbin/2.0);
1249 vector<int> bins(GD.nbin,0);
1250 for(k=0,i=0; i<GD.data.size(); i++) {
1251 j = int((GD.data[i]-GD.firstbin+half)/GD.widbin);
1252 if(j < 0 || j > GD.nbin-1) {
1253 cout << "# Warning - invalid bin " << j << " for data " << GD.data[i]
1254 << endl;
1255 continue;
1256 }
1257 bins[j]++;
1258 k++;
1259 }
1260
1261 // write out data (bin centers)
1262 cout << "# bins: N,width,first " << GD.nbin
1263 << "," << GD.widbin << "," << GD.firstbin << endl;
1264 cout << "# n center samples (low_edge to high_edge)" << endl;
1265 cout << "# total number of samples within bins " << k //<< " prec " << prec
1266 << endl;
1267 for(i=0; i<GD.nbin; i++)
1268 cout << setw(3) << i+1 << " " << fixed << setprecision(prec)
1269 << GD.firstbin+i*GD.widbin << " " << setw(GD.width) << bins[i]
1270 << setprecision(prec+1)
1271 << " (" << GD.firstbin+(i-0.5)*GD.widbin << " to "
1272 << GD.firstbin+(i+0.5)*GD.widbin << ")" << endl;
1273
1274 return 0;
1275 }
1276 catch(Exception& e) { GPSTK_RETHROW(e); }
1277 }
1278
1279 //------------------------------------------------------------------------------------
ComputeSum(void)1280 int ComputeSum(void)
1281 {
1282 try {
1283 unsigned int i,j,k;
1284 GlobalData& GD=GlobalData::Instance();
1285
1286 // compute sum, dt, gaps -------------------------------------------------
1287 bool first(true);
1288 const int ndtmax(9);
1289 int jj,kk,nleast,ndt[ndtmax];
1290 double sum(0.0),prev,dt,bestdt[ndtmax];
1291 vector<int> gapcounts;
1292 if(GD.doSumPlus) for(j=0; j<ndtmax; j++) ndt[j] = -1;
1293
1294 // compute dt and sum
1295 prev = sum = GD.data[0];
1296 for(i=1; i<GD.data.size(); i++) {
1297 sum += GD.data[i];
1298 if(GD.doSumPlus) {
1299 dt = GD.data[i] - prev;
1300 for(j=0; j<ndtmax; j++) {
1301 if(ndt[j] <= 0) { bestdt[j]= dt; ndt[j]=1; break; }
1302 if(dt != 0.0 && fabs((dt-bestdt[j])/dt) < 0.01) { ndt[j]++; break; }
1303 if(j == ndtmax-1) {
1304 kk = 0;
1305 nleast = ndt[kk];
1306 for(k=1; k<ndtmax; k++)
1307 if(ndt[k]<=nleast) { kk=k; nleast=ndt[k]; }
1308 ndt[kk]=1; bestdt[kk]=dt;
1309 }
1310 }
1311 prev = GD.data[i];
1312 }
1313 }
1314
1315 if(GD.doSumPlus) {
1316 for(j=0,i=1; i<ndtmax; i++) { if(ndt[i]>ndt[j]) j=i; }
1317 bestdt[0] = bestdt[j]; ndt[0] = ndt[j];
1318
1319 // find gaps
1320 for(i=0; i<GD.data.size(); i++) {
1321 dt = GD.data[i] - GD.data[0];
1322 jj = int(0.5+dt/bestdt[0])+1; // compute the gapcount
1323
1324 // update gap count for epochs
1325 if(gapcounts.size() == 0) { // create the list
1326 gapcounts.push_back(jj); // start time
1327 gapcounts.push_back(jj-1); // end time
1328 }
1329 // update existing list
1330 k = gapcounts.size() - 1; // index of the current end time
1331 if(jj == gapcounts[k] + 1) { // no gap
1332 gapcounts[k] = jj;
1333 }
1334 else { // found a gap
1335 gapcounts.push_back(jj); // start time
1336 gapcounts.push_back(jj); // end time
1337 }
1338 }
1339 }
1340
1341 // output
1342 cout << "Sum = " << fixed << setprecision(GD.prec) << sum << endl;
1343 if(GD.doSumPlus) {
1344 k = gapcounts.size()-1; // size() is at least 2
1345 cout << "Best step = " << bestdt[0]
1346 << " (" << 100.*ndt[0]/(GD.data.size()-1) << "%)"
1347 << " : total " << GD.data.size() << " data, counts "
1348 << gapcounts[0] << " thru " << gapcounts[k] << endl;
1349 cout << "Range " << GD.data[0] << " thru " << GD.data[GD.data.size()-1] << endl;
1350 if(k <= 2)
1351 cout << "No gaps." << endl;
1352 else
1353 for(i=1; i<=k-2; i+=2) {
1354 jj = gapcounts[i+1]-gapcounts[i]-1;
1355 cout << "Gap at cnt " << gapcounts[i]+1
1356 << " = data " << GD.data[0]+gapcounts[i]*bestdt[0]
1357 << " : size " << jj << " cnts = " << jj*bestdt[0] << " data" << endl;
1358 }
1359 cout << endl;
1360 }
1361
1362 return 0;
1363 }
1364 catch(Exception& e) { GPSTK_RETHROW(e); }
1365 }
1366
1367 //------------------------------------------------------------------------------------
FitPoly(void)1368 int FitPoly(void)
1369 {
1370 try {
1371 unsigned int i,j;
1372 GlobalData& GD=GlobalData::Instance();
1373
1374 vector<double> savedata(GD.data), robwts(GD.data.size());
1375 double *coef,eval,xx,x0;
1376
1377 coef = new double[GD.nfit];
1378 if(!coef) {
1379 cout << "Abort: allocate coefficients failed.\n";
1380 return -4;
1381 }
1382
1383 int iret = Robust::RobustPolyFit(&GD.data[0], &GD.xdata[0], GD.data.size(),
1384 GD.nfit, &coef[0], &robwts[0]);
1385
1386 cout << "RobustPolyFit returns " << iret << endl;
1387 if(iret == 0) {
1388 cout << " Coefficients:" << setprecision(GD.prec);
1389 for(i=0; i<GD.nfit; i++) {
1390 if(fabs(coef[i]) < 0.001)
1391 cout << " " << scientific;
1392 else
1393 cout << " " << fixed;
1394 cout << coef[i];
1395 }
1396 cout << endl << fixed << setprecision(GD.prec);
1397 cout << " Offsets: Y(col " << GD.col << ") " << savedata[0]
1398 << " X(col " << GD.xcol << ") " << GD.xdata[0] << endl;
1399
1400 // output to file rstats.out
1401 ostream *pout = new ofstream("rstats.out");
1402 if(GD.dostdout || pout->fail()) {
1403 if(!GD.dostdout)
1404 cout << "Unable to open file rstats.out - output to screen\n";
1405 delete pout;
1406 pout = &cout;
1407 }
1408 else if(!GD.quiet) {
1409 cout << " Output polynomial fit to file rstats.out (try plotrfit)\n";
1410 }
1411
1412 // write to rstats.out
1413 x0 = GD.xdata[0];
1414 *pout << "#Xdata, Data, fit, resid, weight (" << GD.data.size() << " pts):"
1415 << fixed << setprecision(GD.prec) << endl;
1416 for(i=0; i<GD.data.size(); i++) {
1417 eval = savedata[0] + coef[0];
1418 xx = GD.xdata[i]-x0;
1419 for(j=1; j<GD.nfit; j++) { eval += coef[j]*xx; xx *= (GD.xdata[i]-x0); }
1420 *pout << fixed << setprecision(GD.prec)
1421 << GD.xdata[i] << " " << savedata[i]
1422 << " " << eval << " " << GD.data[i]
1423 << scientific << " " << robwts[i] << endl;
1424 }
1425 for(i=0; i<GD.xevalfit.size(); i++) {
1426 eval = savedata[0] + coef[0];
1427 xx = GD.xevalfit[i]-x0;
1428 for(j=1; j<GD.nfit; j++) { eval += coef[j]*xx; xx *= (GD.xevalfit[i]-x0); }
1429 *pout << fixed << setprecision(GD.prec) << "#Evaluate_Fit(X): X = "
1430 << GD.xevalfit[i] << " F(X) = " << eval << endl;
1431 if(!GD.dostdout && !GD.quiet) cout << fixed << setprecision(GD.prec)
1432 << " Evaluate Fit(" << GD.xevalfit[i] << ") = " << eval << endl;
1433 }
1434
1435 if(pout != &cout)
1436 {
1437 ((ofstream *)pout)->close();
1438 delete pout;
1439 }
1440
1441 //QSort(&robwts[0],robwts.size());
1442 //Robust::StemLeafPlot(cout, &robwts[0], robwts.size(), "weights");
1443 }
1444 //cout << endl;
1445 delete[] coef;
1446
1447 ostringstream oss;
1448 oss << "Residuals of fit (deg " << GD.nfit << ") col " << GD.col
1449 << " vs x col " << GD.xcol << ", file " << GD.inputfile;
1450 GD.msg = oss.str();
1451
1452 return 0;
1453 }
1454 catch(Exception& e) { GPSTK_RETHROW(e); }
1455 }
1456
1457 //------------------------------------------------------------------------------------
Sequential(void)1458 int Sequential(void)
1459 {
1460 try {
1461 GlobalData& GD=GlobalData::Instance();
1462
1463 Stats<double> stats;
1464 cout << "Data and sequential stats ([lab] [xdata] data n ave std)\n";
1465 cout << fixed << setprecision(GD.prec);
1466 for(unsigned int i=0; i<GD.data.size(); i++) {
1467 stats.Add(GD.data[i]);
1468 if(!GD.label.empty()) cout << GD.label << " ";
1469 if(GD.xdata.size()>0) cout << GD.xdata[i] << " ";
1470 cout << GD.data[i] << " " << stats.N() << " "
1471 << stats.Average() << " "
1472 << (stats.N() > 1 ? stats.StdDev() : 0.0) << endl;
1473 }
1474
1475 return 0;
1476 }
1477 catch(Exception& e) { GPSTK_RETHROW(e); }
1478 }
1479
1480 //------------------------------------------------------------------------------------
Discontinuity(void)1481 int Discontinuity(void)
1482 {
1483 try {
1484 unsigned int i,j,k;
1485 GlobalData& GD=GlobalData::Instance();
1486
1487 j = GD.data.size();
1488 k = GD.xdata.size();
1489
1490 cout << fixed << setprecision(GD.prec);
1491 cout << "# Output " << (k > 0 ? "xdata,":"")
1492 << " data, 1st diff, and stats;\n# discontinuity tolerance y="
1493 << GD.ytol << ", x=" << GD.xtol << endl;
1494 cout << "#" << (GD.label.empty() ? "" : " [lab]") << (k > 0 ? " xdata" : "")
1495 << " data 1st-diff-y n ave std MSG(for disc.s)\n";
1496 cout << "# MSG=DISC" << (k > 0 ? " del-x" : "") << " del-data"
1497 << " (stats-for-prev-seg:) N ave std" << (k > 0 ? " xbeg xend" : "")
1498 << " gap/slip/EOD" << endl;
1499
1500 double xlast(k > 0 ? GD.xdata[0] : 0);
1501 Stats<double> segstats;
1502 ostringstream oss;
1503
1504 segstats.Add(GD.data[0]);
1505 if(k > 0) cout << GD.xdata[0] << " ";
1506 cout << GD.data[0] << " " << 0.0 << " "
1507 << 1 << " " << GD.data[0] << " " << 0.0 << endl;
1508
1509 for(i=1; i<j; i++) { // loop over data (starting at 1)
1510 bool slip(false), gap(false);
1511 double fd(GD.data[i]-GD.data[i-1]);
1512 oss.str("");
1513
1514 // is there a gap?
1515 if(k > 0 && GD.xtol > 0 && fabs(GD.xdata[i]-GD.xdata[i-1]) > GD.xtol)
1516 gap=true;
1517
1518 // is there a potential discontinuity?
1519 if(fabs(fd) > GD.ytol)
1520 slip=true;
1521
1522 if(gap || slip || i==j-1) {
1523 oss << " DISC" << fixed << setprecision(GD.prec);
1524 if(k > 0) {
1525 if(i < j-1) oss << " " << GD.xdata[i]-GD.xdata[i-1];
1526 else oss << " " << "00";
1527 }
1528 if(i < j-1) oss << " " << fd;
1529 else oss << " " << "00";
1530
1531 if(i == j-1) segstats.Add(GD.data[i]);
1532
1533 oss << " " << segstats.N()
1534 << " " << segstats.Average()
1535 << " " << (segstats.N()>1 ? segstats.StdDev() : 0.0);
1536 if(k > 0) oss << " " << xlast << " " << GD.xdata[i==j-1 ? i:i-1];
1537
1538 if(slip) oss << " slip";
1539 if(gap) oss << " gap";
1540 if(i==j-1) oss << " EOD";
1541
1542 segstats.Reset();
1543 }
1544
1545 if(gap) xlast = GD.xdata[i];
1546
1547 segstats.Add(GD.data[i]);
1548
1549 // output data
1550 if(!GD.label.empty()) cout << GD.label << " ";
1551 if(k > 0) cout << GD.xdata[i] << " ";
1552 cout << GD.data[i] << " " << fd << " "
1553 << segstats.N() << " "
1554 << segstats.Average() << " "
1555 << (segstats.N() > 1 ? segstats.StdDev() : 0.0)
1556 << oss.str()
1557 << endl;
1558 }
1559
1560 return 0;
1561 }
1562 catch(Exception& e) { GPSTK_RETHROW(e); }
1563 }
1564
1565 //------------------------------------------------------------------------------------
FDFilter(void)1566 int FDFilter(void)
1567 {
1568 try {
1569 unsigned int i,j,k;
1570 GlobalData& GD=GlobalData::Instance();
1571
1572 // for output (TD make option) fill flags first
1573 vector<int> flags(GD.data.size(),0);
1574
1575 // xdata and flags must exist but may be empty
1576 FirstDiffFilter<double> fdf(GD.xdata, GD.data, flags);
1577 fdf.setprecision(GD.prec);
1578 fdf.setLimit(GD.fdfstep);
1579 i = fdf.filter();
1580 cout << "# FD Filter returns " << i << endl;
1581 if(i < 0) cout << "# FD Filter failed (" << i << ")" << endl;
1582 else {
1583 if(GD.doFDF2) {
1584 string str;
1585 i = fdf.analyze2(GD.fdfrat,GD.fdfsig,str);
1586 cout << "#" << str << endl;
1587 return 0;
1588 }
1589 else i = fdf.analyze();
1590
1591 if(i < 0) cout << "# FD Filter analysis failed (" << i << ")" << endl;
1592
1593 for(i=0; i<fdf.results.size(); i++)
1594 fdf.getStats(fdf.results[i]);
1595 fdf.setDumpNoAnal(GD.verbose);
1596 fdf.dump(cout, GD.label);
1597
1598 // clean the data based on results of filter
1599 vector< FilterHit<double> > hit=fdf.getResults();
1600 for(j=0; j<hit.size(); j++) {
1601 if(hit[j].type == FilterHit<double>::BOD) continue;
1602 else if(hit[j].type == FilterHit<double>::outlier) {
1603 for(k=0; k<hit[j].npts; k++)
1604 flags[hit[j].index+k] = -1; // flag for outlier
1605 }
1606 else if(hit[j].type == FilterHit<double>::slip) {
1607 for(k=hit[j].index; k<GD.data.size(); k++)
1608 GD.data[k] -= hit[j].step;
1609 }
1610 }
1611
1612 // write cleaned data to rstats.out
1613 ostream *pout = new ofstream("rstats.out");
1614 if(GD.dostdout || pout->fail()) {
1615 if(!GD.dostdout)
1616 cout << "Unable to open file rstats.out - output to screen\n";
1617 delete pout;
1618 pout = &cout;
1619 }
1620
1621 for(i=0; i<GD.data.size(); i++)
1622 *pout << fixed << setprecision(GD.prec) << i
1623 << " " << (GD.xdata.size() ? GD.xdata[i] : (double)(i))
1624 << " " << GD.data[i] << " " << flags[i] << endl;
1625 if(pout != &cout)
1626 {
1627 ((ofstream *)pout)->close();
1628 delete pout;
1629 }
1630 }
1631
1632 return 0;
1633 }
1634 catch(Exception& e) { GPSTK_RETHROW(e); }
1635 }
1636
1637 //------------------------------------------------------------------------------------
WindFilter(void)1638 int WindFilter(void)
1639 {
1640 try {
1641 unsigned int i;
1642 GlobalData& GD=GlobalData::Instance();
1643
1644 // for output fill flags first
1645 vector<int> flags(GD.data.size(),0);
1646
1647 // xdata and flags must exist but may be empty
1648 WindowFilter<double> wf(GD.xdata, GD.data, flags);
1649 wf.setTwoSample(GD.doXWF);
1650 wf.setWidth(GD.windwidth);
1651 wf.setMinRatio(GD.windratio);
1652 wf.setMinStep(GD.windstep);
1653 wf.setprecision(GD.prec);
1654 i = wf.filter();
1655 if(i < 0) cout << "# window filter failed (" << i << ")" << endl;
1656 else {
1657 if(GD.debug > -1) wf.setDebug(true);
1658 wf.analyze(); // ignore return values
1659 if(GD.verbose) wf.setDumpAnalMsg(true);
1660 wf.dump(cout, GD.label);
1661 }
1662
1663 return 0;
1664 }
1665 catch(Exception& e) { GPSTK_RETHROW(e); }
1666 }
1667
1668 //------------------------------------------------------------------------------------
FixFilter(void)1669 int FixFilter(void)
1670 {
1671 try {
1672 int N;
1673 unsigned int i,j,k,Nsig;
1674 double Esiglim(0);
1675 GlobalData& GD=GlobalData::Instance();
1676
1677 if(!GD.quiet) cout << "# fix filter with width " << GD.fixN << " limit "
1678 << fixed << setprecision(GD.prec)
1679 << GD.fixlim << " and siglim " << GD.fixsig << endl;
1680
1681 // new call
1682 vector<int> flags;
1683 vector< FilterHit<double> > results;
1684
1685 // NB arrays xdata, data, flags do NOT get edited - const
1686 IterativeFDiffFilter<double> ifdf(GD.xdata, GD.data, flags);
1687 ifdf.setWidth(GD.fixN);
1688 ifdf.setLimit(GD.fixlim);
1689 ifdf.setSigma(GD.fixsig);
1690 ifdf.setw(GD.width);
1691 ifdf.setprecision(GD.prec);
1692 ifdf.doVerbose(GD.verbose);
1693 ifdf.doResetSigma(true);
1694 ifdf.doSmallSlips(false);
1695
1696 N = ifdf.analysis();
1697 results = ifdf.getResults();
1698 Nsig = ifdf.getNhighSigma();
1699 Esiglim = ifdf.getEstimatedSigmaLimit();
1700
1701 if(N < 0)
1702 cout << "Error - not enough data for fix filter.\n";
1703 else if(N==0)
1704 cout << "No outliers or slips found.\n";
1705 else {
1706 unsigned int nb=0,ns=0,npts(0);
1707 double xbeg,xend;
1708 for(i=0; i<results.size(); i++) {
1709 cout << "FIXRES " << setw(2) << (i+1)
1710 << " " << results[i].asStringRead(GD.prec)
1711 << fixed << setprecision(GD.prec)
1712 << " (x=" << GD.xdata[results[i].index] << ")"
1713 << endl;
1714 if(i == 0) xbeg = xend = GD.xdata[results[i].index];
1715 if(GD.xdata[results[i].index] < xbeg) xbeg = GD.xdata[results[i].index];
1716 if(GD.xdata[results[i].index] > xend) xend = GD.xdata[results[i].index];
1717 if(results[i].isOutlier()) { nb++; npts += results[i].npts; }
1718 if(results[i].isSlip()) ns++;
1719 }
1720 cout << "FIXRES found " << N << " results: "
1721 << nb << " outlier" << (nb==1 ? "":"s")
1722 << " rejecting " << npts << (npts==1 ? " pt":" pts")
1723 << " and " << ns << " slip" << (nb==1 ? "":"s")
1724 << " (small slips " << (ifdf.doSmallSlips() ? "reported":"ignored") << ")"
1725 << fixed << setprecision(GD.prec)
1726 << " in x-interval (" << xbeg << " " << xend << ") = " << xend-xbeg
1727 << "\nFIXRES with " << Nsig << " high sigma" << (Nsig==1 ? "":"s")
1728 << " remaining and estimated limit " << Esiglim << endl;
1729 }
1730
1731 return 0;
1732 }
1733 catch(Exception& e) { GPSTK_RETHROW(e); }
1734 }
1735
1736 //------------------------------------------------------------------------------------
WhiteNoiseJerkFilter(void)1737 int WhiteNoiseJerkFilter(void)
1738 {
1739 try {
1740 unsigned int i;
1741 GlobalData& GD=GlobalData::Instance();
1742
1743 // WNJ uses LOG
1744 pLOGstrm = &cout;
1745 ConfigureLOG::ReportLevels() = false;
1746 ConfigureLOG::ReportTimeTags() = false;
1747
1748 // compute white noise jerk filter
1749 // smaller process noise -> smoother; since mn and pn are const,
1750 // only ratio mn/pn determines smoothness - mn/pn ~ 1.e-8 typically
1751 vector<double> x,v,a;
1752 WNJfilter wnjkf;
1753 wnjkf.ptrx = &x;
1754 wnjkf.ptrv = &v;
1755 wnjkf.ptra = &a;
1756 wnjkf.Reset(3);
1757 if(GD.data.size() == 0) { LOG(ERROR) << "Error - no data"; return -1; }
1758 if(GD.xdata.size() < GD.data.size())
1759 { LOG(ERROR) << "Error - x data is missing"; return -1; }
1760 for(i=0; i<GD.data.size(); i++) {
1761 wnjkf.ttag.push_back(GD.xdata[i]); // leave x as-is -xdata[0]);
1762 wnjkf.data.push_back(GD.data[i]);
1763 wnjkf.msig.push_back(1.0);
1764 wnjkf.psig.push_back(GD.wnjpom); // pom = process/measurement noises
1765 }
1766 wnjkf.prec = GD.prec;
1767 wnjkf.width = GD.width;
1768 cout << "# White noise jerk filter, unweighted, times since "
1769 << fixed << setprecision(3) << GD.xdata[0] << endl;
1770 // get dt, the nominal timestep
1771 MostCommonValue mcv;
1772 mcv.setTol(0.1); // don't need fine tuning
1773 for(i=1; i<GD.xdata.size(); i++)
1774 mcv.add(GD.xdata[i]-GD.xdata[i-1]);
1775 double dt = mcv.bestDT();
1776 double tbeg(wnjkf.ttag[0]), tend(wnjkf.ttag[wnjkf.ttag.size()-1]);
1777
1778 wnjkf.apState(0) = GD.data[0];
1779 wnjkf.apNoise(0) = 10000000.; // no info
1780 for(i=1; i<3; i++) {
1781 wnjkf.apState(i) = 0.0;
1782 wnjkf.apNoise(i) = wnjkf.apNoise(i-1)/dt;
1783 }
1784
1785 // run the filter
1786 if(GD.debug > -1) LOGlevel = ConfigureLOG::Level("DEBUG");
1787 wnjkf.filterOutput = true;
1788 wnjkf.setSmoother(true);
1789 wnjkf.setSRISU(true);
1790 wnjkf.initializeFilter();
1791 wnjkf.ForwardFilter(tend,dt);
1792 wnjkf.BackwardFilter(0);
1793 if(GD.debug > -1) LOGlevel = ConfigureLOG::Level("INFO");
1794
1795 return 0;
1796 }
1797 catch(Exception& e) { GPSTK_RETHROW(e); }
1798 }
1799
1800 //------------------------------------------------------------------------------------
1801 // discrete Fourier transform
DFT(const vector<double> & data,vector<double> & ampcos,vector<double> & ampsin)1802 void DFT(const vector<double>& data, vector<double>& ampcos, vector<double>& ampsin)
1803 {
1804 const double TWO_PI(6.2831853071796);
1805 int i,j,N(data.size());
1806 double oon(1.0/double(N)),ton(2.0/double(N));
1807 double tpon(TWO_PI*oon);
1808 ampsin = vector<double>(1+N/2);
1809 ampcos = vector<double>(1+N/2);
1810
1811 for(i=0; i<N/2; i++) {
1812 ampsin[i] = ampcos[i] = 0.0;
1813 for(j=0; j<N; j++) {
1814 ampcos[i] += data[j] * ::cos(tpon*i*j);
1815 ampsin[i] += data[j] * ::sin(tpon*i*j);
1816 }
1817 ampcos[i] *= (i==0 || i==N/2) ? oon : ton;
1818 ampsin[i] *= ton;
1819 }
1820 }
1821
1822 // inverse discrete Fourier transform
invDFT(const vector<double> & ampcos,const vector<double> & ampsin,const int & N,vector<double> & reform)1823 void invDFT(const vector<double>& ampcos, const vector<double>& ampsin, const int& N,
1824 vector<double>& reform)
1825 {
1826 const double TWO_PI(6.2831853071796);
1827 int i,j;
1828 double tpon(TWO_PI/double(N));
1829 reform = vector<double>(N);
1830 for(i=0; i<N; i++) {
1831 reform[i] = ampcos[0];
1832 // for low pass, change j<=N/2 to, say, j<=N/8
1833 for(j=1; j<=N/2; j++) {
1834 reform[i] += ampcos[j] * ::cos(tpon*i*j) + ampsin[j] * ::sin(tpon*i*j);
1835 }
1836 }
1837 }
1838
1839 //------------------------------------------------------------------------------------
ComputeFFT(void)1840 int ComputeFFT(void)
1841 {
1842 try {
1843 unsigned int i;
1844 GlobalData& GD=GlobalData::Instance();
1845
1846 // NB providing --xcol means it will search for and fill gaps in x.
1847 // also this will compute dt
1848 if(GD.xcol == -1) {
1849 // replace with count -- assume no gaps in data
1850 for(i=0; i<GD.data.size(); i++)
1851 GD.xdata.push_back(double(i));
1852 }
1853
1854 // first find step in xdata
1855 MostCommonValue mcv;
1856 mcv.setTol(0.002); // make it larger than a millisecond
1857 for(i=1; i<GD.xdata.size(); i++) mcv.add(GD.xdata[i]-GD.xdata[i-1]);
1858 //mcv.dump(cout,prec);
1859 int count(mcv.bestN());
1860 const double dt(mcv.bestDT()), tol(mcv.getTol());
1861 cout << "#FFT Computed X-step is " << fixed << setprecision(GD.prec) << dt
1862 << " with " << count << " occurances." << endl;
1863 if(GD.dtfft == 0.0) GD.dtfft = dt;
1864 else cout << "#FFT X-step is forced by user to be "
1865 << scientific << setprecision(GD.prec) << GD.dtfft << endl;
1866
1867 // find average
1868 const double ave(GD.tsstats.AverageY());
1869 cout << "#FFT Remove data average " << scientific << setprecision(GD.prec)
1870 << ave << endl;
1871
1872 // copy over, removing average and filling gaps
1873 vector<double> vxdata,vdata;
1874 double vx(GD.xdata[0]);
1875 vxdata.push_back(vx);
1876 vdata.push_back(GD.data[0]-ave);
1877 for(i=1; i<GD.data.size(); i++) {
1878 while(GD.xdata[i]-vx-dt > tol) {
1879 vx += dt;
1880 vxdata.push_back(vx);
1881 vdata.push_back(0.0);
1882 }
1883 vxdata.push_back(GD.xdata[i]);
1884 vdata.push_back(GD.data[i]-ave);
1885 vx = GD.xdata[i];
1886 }
1887
1888 unsigned N(vdata.size());
1889
1890 // get the DFT (not fast) of real valued data
1891 vector<double> ampsin(1+N/2),ampcos(1+N/2);
1892 //vector<double> reform(N);
1893
1894 DFT(vdata, ampcos, ampsin); // data is unchanged
1895 //invDFT(ampcos, ampsin, N, reform); // amps unchanged
1896
1897 // output
1898 double amp, dtot(0.0), ftot(0.0), fact(2.0/N);
1899 cout << "#FFT N=" << N << fixed << setprecision(GD.prec) << " dx is " << GD.dtfft
1900 << " Nyquist = 1/2dx = " << 1.0/(2*GD.dtfft)
1901 << ", freq at i is (2i/N)*Nyquist = i * " << scientific << 1.0/(N*GD.dtfft)
1902 << ", WL at i is N*dx/i = " << N*GD.dtfft << " / i " << endl;
1903 cout << "#FFT i xd(i*dx) data freq |ampfft| xdata wl " << endl;
1904 for(i=0; i<N; i++) {
1905 amp = (i < 1+N/2 ? ::sqrt(ampsin[i]*ampsin[i]+ampcos[i]*ampcos[i]) : 0.0);
1906 cout << "FFT " << fixed << setprecision(GD.prec) << i
1907 << " " << double(i)*GD.dtfft << " " << vdata[i]
1908 << " " << i/(N*GD.dtfft) << " " << amp
1909 //<< " " << reform[i]
1910 << " " << GD.xdata[i]
1911 << " " << (i==0 ? 0 : (N*GD.dtfft)/i)
1912 << endl;
1913 dtot += fact*vdata[i]*vdata[i];
1914 if(i < 1+N/2) ftot += amp*amp;
1915 }
1916 cout << "#FFT Total power (2/N)*sum(data^2) = sum(fft^2) = " << scientific
1917 << setprecision(GD.prec) << dtot << " " << ftot << " " << GD.label << endl;
1918
1919 return 0;
1920 }
1921 catch(Exception& e) { GPSTK_RETHROW(e); }
1922 }
1923
1924 //------------------------------------------------------------------------------------
Outliers(void)1925 int Outliers(void)
1926 {
1927 try {
1928 unsigned int i,j;
1929 GlobalData& GD=GlobalData::Instance();
1930
1931 double ave(GD.cstats.Average()),sig(GD.cstats.StdDev());
1932 // normally GD.outscale=1: 2.5*Q3 - 1.5*Q1;
1933 double OH = GD.Q3 + GD.outscale*1.5*(GD.Q3-GD.Q1);
1934 // normally GD.outscale=1: 2.5*Q1 - 1.5*Q3;
1935 double OL = GD.Q1 - GD.outscale*1.5*(GD.Q3-GD.Q1);
1936 vector<int> outhi,outlo;
1937 for(i=0; i<GD.data.size(); i++) {
1938 if(GD.data[i] > OH)
1939 outhi.push_back(i);
1940 else if(GD.data[i] < OL)
1941 outlo.push_back(i);
1942 }
1943 cout << "There are " << outhi.size()+outlo.size() << " outliers; "
1944 << outlo.size() << " low (< " << setprecision(GD.prec) << OL << ") and "
1945 << outhi.size() << " high (> " << setprecision(GD.prec) << OH << ")."
1946 << endl << " n " << (GD.xdata.size() > 0 ? "x-value" : "")
1947 << " value val/outlim val-ave (val-ave)/sig" << endl;
1948 // NB data and xdata have been sorted together
1949 for(j=1,i=0; i<outlo.size(); i++,j++) {
1950 cout << " OTL " << j << " ";
1951 if(GD.xdata.size() > 0) cout << GD.xdata[outlo[i]] << " ";
1952 cout << GD.data[outlo[i]] << " " << GD.data[outlo[i]]/OL
1953 << " " << GD.data[outlo[i]]-ave
1954 << " " << (GD.data[outlo[i]]-ave)/sig << endl;
1955 }
1956 for(i=0; i<outhi.size(); i++,j++) {
1957 cout << " OTH " << j << " ";
1958 if(GD.xdata.size() > 0) cout << GD.xdata[outhi[i]] << " ";
1959 cout << GD.data[outhi[i]] << " " << GD.data[outhi[i]]/OH
1960 << " " << GD.data[outhi[i]]-ave
1961 << " " << (GD.data[outhi[i]]-ave)/sig << endl;
1962 }
1963
1964 return 0;
1965 }
1966 catch(Exception& e) { GPSTK_RETHROW(e); }
1967 }
1968
1969 //------------------------------------------------------------------------------------
DumpData(string msg)1970 int DumpData(string msg)
1971 {
1972 try {
1973 unsigned int i;
1974 GlobalData& GD=GlobalData::Instance();
1975
1976 for(i=0; i<GD.data.size(); i++) {
1977 cout << msg << " " << fixed << setprecision(GD.prec) << i;
1978 if(GD.xcol > -1) cout << " " << GD.xdata[i];
1979 cout << " " << GD.data[i];
1980 if(GD.wcol > -1) cout << " " << GD.wdata[i];
1981 cout << endl;
1982 }
1983
1984 return 0;
1985 }
1986 catch(Exception& e) { GPSTK_RETHROW(e); }
1987 }
1988
1989 //------------------------------------------------------------------------------------
1990 //------------------------------------------------------------------------------------
1991