1 //==============================================================================
2 //
3 //  This file is part of GPSTk, the GPS Toolkit.
4 //
5 //  The GPSTk is free software; you can redistribute it and/or modify
6 //  it under the terms of the GNU Lesser General Public License as published
7 //  by the Free Software Foundation; either version 3.0 of the License, or
8 //  any later version.
9 //
10 //  The GPSTk is distributed in the hope that it will be useful,
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //  GNU Lesser General Public License for more details.
14 //
15 //  You should have received a copy of the GNU Lesser General Public
16 //  License along with GPSTk; if not, write to the Free Software Foundation,
17 //  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
18 //
19 //  This software was developed by Applied Research Laboratories at the
20 //  University of Texas at Austin.
21 //  Copyright 2004-2020, The Board of Regents of The University of Texas System
22 //
23 //==============================================================================
24 
25 //==============================================================================
26 //
27 //  This software was developed by Applied Research Laboratories at the
28 //  University of Texas at Austin, under contract to an agency or agencies
29 //  within the U.S. Department of Defense. The U.S. Government retains all
30 //  rights to use, duplicate, distribute, disclose, or release this software.
31 //
32 //  Pursuant to DoD Directive 523024
33 //
34 //  DISTRIBUTION STATEMENT A: This software has been approved for public
35 //                            release, distribution is unlimited.
36 //
37 //==============================================================================
38 
39 /// @file rstats.cpp
40 /// Read the data in one [or two] column(s) of a file, and output robust statistics,
41 /// two-sample statistics, a stem-and-leaf plot, a quantile-quantile plot,
42 /// and a robust polynomial fit. Options perform a variety of other analysis tasks.
43 
44 //------------------------------------------------------------------------------------
45 // system includes
46 #include <ctime>
47 #include <iostream>
48 #include <iomanip>
49 #include <fstream>
50 #include <string>
51 #include <vector>
52 // GPSTk
53 #include "Exception.hpp"
54 #include "StringUtils.hpp"
55 #include "Epoch.hpp"
56 #include "Stats.hpp"
57 #include "FirstDiffFilter.hpp"
58 #include "WindowFilter.hpp"
59 #include "FDiffFilter.hpp"
60 #include "singleton.hpp"
61 #include "WNJfilter.hpp"
62 // geomatics
63 #include "CommandLine.hpp"
64 #include "RobustStats.hpp"
65 #include "MostCommonValue.hpp"
66 #include "expandtilde.hpp"
67 #include "logstream.hpp"
68 
69 //------------------------------------------------------------------------------------
70 using namespace std;
71 using namespace gpstk;
72 using namespace StringUtils;
73 
74 //------------------------------------------------------------------------------------
75 // NB Version below class GlobalData
76 
77 //------------------------------------------------------------------------------------
78 // prototypes
79 /**
80  * @throw Exception
81  */
82 int GetCommandLine(int argc, char **argv);
83 /**
84  * @throw Exception
85  */
86 int Prepare(void);
87 /**
88  * @throw Exception
89  */
90 int ReadAndCompute(void);
91 
92 /**
93  * @throw Exception
94  */
95 int StemAndLeaf(void);
96 /**
97  * @throw Exception
98  */
99 int QuantilePlot(void);
100 /**
101  * @throw Exception
102  */
103 int FindBins(void);
104 /**
105  * @throw Exception
106  */
107 int ComputeSum(void);
108 /**
109  * @throw Exception
110  */
111 int FitPoly(void);
112 /**
113  * @throw Exception
114  */
115 int Sequential(void);
116 /**
117  * @throw Exception
118  */
119 int Discontinuity(void);
120 /**
121  * @throw Exception
122  */
123 int FDFilter(void);
124 /**
125  * @throw Exception
126  */
127 int WindFilter(void);
128 /**
129  * @throw Exception
130  */
131 int FixFilter(void);
132 /**
133  * @throw Exception
134  */
135 int WhiteNoiseJerkFilter(void);
136 /**
137  * @throw Exception
138  */
139 int ComputeFFT(void);
140 /**
141  * @throw Exception
142  */
143 int OutputStats(void);
144 /**
145  * @throw Exception
146  */
147 int Outliers(void);
148 
149 /** utility for develop and debug
150  * @throw Exception
151  */
152 int DumpData(string msg=string("DUMP"));
153 
154 //------------------------------------------------------------------------------------
155 //------------------------------------------------------------------------------------
156 // Class GlobalData encapsulates global static data.
157 class GlobalData : public Singleton<GlobalData> {
158 public:
159    // Default and only constructor, sets defaults.
GlobalData()160    GlobalData() throw() { SetDefaults(); }
161 
162    // prgm housekeeping
163    static const std::string Version;   // see below
164    std::string PrgmName;               // this program (must match Jamfile)
165    std::string Title;                  // name, version and runtime
166    std::ofstream oflog;                // output log file stream
167 
168    // command line input ----------------------------------------------------------
169    std::string cmdlineErrors,cmdlineDump,cmdlineUsage;  // strings filled by parser
170    std::vector<std::string> cmdlineUnrecognized;
171 
172    //std::string logfile;                // name of log file
173    std::string inpath;                 // files and paths
174    std::string inputfile;              // input file
175 
176    // input
177    int col,xcol,wcol;
178    double xbeg,xend,dmin,dmax;
179    bool doxbeg,doxend,dodmin,dodmax;
180    string begstr,endstr,minstr,maxstr; // use strings so no default shows
181    double debias;                      // specify bias to remove
182    string debstr;
183    bool dodebias, debias0;             // set dodebias using CommandLine::count()
184    // plots
185    bool doStemLeaf;                    // stem and leaf plot
186    bool doQplot;                       // quantile plot
187 
188    bool doBin;
189    string binstr;                      // string for input of bin option(s)
190    int whichbin;                       // which input option: 1 n, 2 w, or 3 n,w,f
191    int nbin;                           // binning for histogram - number of bins
192    double widbin,firstbin;             // width of bins, center of first bin
193 
194    // analysis
195    bool doSum,doSumPlus,doFit;         // sum, find range, step, gaps
196    string fitstr;                      // polynomial fit
197    unsigned int nfit;                  // degree of fit
198    vector<double> xevalfit;            // evaluate fit at each x
199    bool doSeq;                         // sequential stats
200    bool doDisc;                        // discontinuities
201    string discstr;                     //
202    double ytol,xtol;                   // tolerances to discontinuities
203 
204    // filters
205    bool doFDF,doFDF2,doWF,doXWF,doFixF;// first difference and window filters
206    double fdfstep,fdfsig,fdfrat;       // step, sigma and ratio limit for FDF(2)
207    string fdfstr,fdf2str;              // fdf and fdf2 filters
208    string windstr,xwindstr;            // window filters
209    int windwidth;                      // width of window filter
210    double windstep, windratio;         // slip tol. and ratio limit for WindowFilter
211    string fixfstr;                     // input for fixer filter
212    int fixN;                           // width of fixer filter
213    double fixlim,fixsig;               // step and sigma limits of fixer filter
214 
215    // other
216    bool doWNJ,doFFT;                   // white noise jerk, FFT
217    double wnjpom;                      // WNJ process/measurement noise ratio
218    double dtfft;                       // dt for FFT
219 
220    // output
221    bool quiet;                         // suppress title, timing and other output
222    bool dostdout;                      // write rstats.out to stdout
223    bool brief;                         // 1-line output stats (conv,rob,2-samp,wted)
224    bool bc,br,bw,b2,brw;               // 1-line output of only 1 stats
225    string label;                       // label added to brief/fit/disc/seq
226    bool nostats;                       // suppress output of stats
227    bool doKS;                          // do Anderson-Darling
228    bool doOuts;                        // outlier lists
229    string outstr;                      // use string so no default shows
230    double outscale;                    // outlier scaling for --outs
231    int width;                          // width of floating output
232    int prec;                           // precision of floating output
233 
234    bool verbose,help;                  // help, etc
235    int debug;
236    std::string timefmt;                // output format
237    // end command line input ------------------------------------------------------
238 
239    // data, x-data and weights
240    std::vector<double> data,xdata,wdata,robwts;
241    // stats
242    Stats<double> cstats;
243    WtdStats<double> wstats,robwtstats;
244    TwoSampleStats<double> tsstats;
245    // robust
246    double median,mad,mest,Q1,Q3,KS;
247 
248    // results
249    std::string msg;                    // msg for output
250    std::string longfmt;                // times in error messages, etc.
251 
252 private:
SetDefaults(void)253    void SetDefaults(void) throw()
254    {
255       PrgmName = std::string("rstats");
256 
257       // command line input -------------------------------------------
258       // files and paths
259       //logfile = std::string("");
260       inputfile = std::string("");
261       // input
262       col = 1;
263       xcol = wcol = -1;
264       doxbeg = doxend = dodmin = dodmax = false;
265       begstr = endstr = minstr = maxstr = string("");
266       debstr = string("");
267       dodebias = debias0 = false;
268       // plots
269       doStemLeaf = doQplot = false;
270       doBin = false;
271       binstr = string("");
272       whichbin = 0;
273       // analysis
274       doSum = doSumPlus = doFit = false;
275       fitstr = string("");
276       nfit = 0;
277       doSeq = false;
278       discstr = string("");
279       // filters
280       {
281          vector<double> xdata,data;
282          vector<int> flags;
283          FirstDiffFilter<double> fdf(xdata,data,flags);
284          fdfstep = fdf.getLimit();
285          fdfsig = 0.02; fdfrat = 2.0;             // TD need a default
286          fdfstr = asString(fdfstep,0);
287          fdf2str = asString(fdfstep,0) + string(",")
288                           + asString(fdfsig,2) + string(",")
289                           + asString(fdfrat,0);
290 
291          WindowFilter<double> wf(xdata,data,flags);
292          windwidth = wf.getWidth();
293          windstep = wf.getMinStep();
294          windratio = wf.getMinRatio();
295          xwindstr = windstr = asString(windwidth) + string(",")
296                             + asString(windstep,1) + string(",")
297                             + asString(windratio,0);
298 
299          fixfstr = "4,0.8,0.2";
300          fixN = 4;
301          fixlim = 0.8;
302          fixsig = 0.2;
303       }
304       // output
305       outstr = string("");
306       dostdout = quiet = nostats = doKS = doOuts = false;
307       brief = bc = br = bw = b2 = brw = false;
308       label = string("");
309       prec = 3;
310       width = 6;
311       timefmt = std::string("%4F %10.3g");
312       //timefmt = std::string("%.6Q");         // 1.5/86400 = 1.7e-5
313 
314       // end command line input ---------------------------------------
315 
316       longfmt = std::string("%04F %10.3g %04Y/%02m/%02d %02H:%02M:%06.3f %P");
317    }
318 
319 }; // end class GlobalData
320 
321 //------------------------------------------------------------------------------------
322 const string GlobalData::Version(string("3.1 6/19/19"));
323 
324 //------------------------------------------------------------------------------------
325 //------------------------------------------------------------------------------------
main(int argc,char ** argv)326 int main(int argc, char **argv)
327 {
328    string PrgmName;        // for catch
329 try {
330    // begin counting time
331    clock_t totaltime = clock();
332    Epoch wallbeg,wallend;
333    wallbeg.setLocalTime();
334 
335    // get (create) the global data object (a singleton);
336    // since this is the first instance, this will also set default values
337    GlobalData& GD=GlobalData::Instance();
338    PrgmName = GD.PrgmName;
339 
340    // Build title
341    GD.Title = GD.PrgmName + ", Ver. "
342       + GD.Version + wallbeg.printf(", Run %04Y/%02m/%02d at %02H:%02M:%02S");
343 
344    // display title on screen -- see below after cmdline input
345 
346    // process : loop once -----------------------------------------------------
347    int iret;
348    for(bool go=true; go; go=false)  {
349 
350       // process the command line ------------------------------------
351       iret = GetCommandLine(argc,argv);
352       if(iret) break;
353 
354       // Title, debug output cmdline, define GD.msg, include_path(input file)
355       iret = Prepare();
356       if(iret) break;
357 
358       // read input and compute stats
359       iret = ReadAndCompute();
360       if(iret) break;
361 
362       //DumpData("INI");
363 
364       // plots
365       if(GD.doStemLeaf) { iret = StemAndLeaf(); if(iret) break; }
366       if(GD.doQplot) { iret = QuantilePlot(); if(iret) break; }
367       // uses GD.cstats, sets nostats
368       if(GD.doBin) { iret = FindBins(); if(iret) break; }
369 
370       // analysis
371       if(GD.doSum || GD.doSumPlus) { iret = ComputeSum(); if(iret) break; }
372       if(GD.doFit) { iret = FitPoly(); if(iret) break; }
373       if(GD.doSeq) { iret = Sequential(); if(iret) break; }
374       if(GD.doDisc) { iret = Discontinuity(); if(iret) break; }
375 
376       // filters - all set nostats
377       if(GD.doFDF || GD.doFDF2) { iret = FDFilter(); if(iret) break; }
378       if(GD.doWF || GD.doXWF) { iret = WindFilter(); if(iret) break; }
379       if(GD.doFixF) { iret = FixFilter(); if(iret) break; }
380 
381       // other - all set nostats
382       if(GD.doWNJ) { iret = WhiteNoiseJerkFilter(); if(iret) break; }
383       // uses GD.tsstats
384       if(GD.doFFT) { iret = ComputeFFT(); if(iret) break; }
385 
386       // output stats
387       if(!GD.nostats) { iret = OutputStats(); if(iret) break; }
388       if(GD.doOuts) { iret = Outliers(); if(iret) break; }
389 
390    }  // end loop once
391 
392    // error condition ---------------------------------------------------------
393    // return codes: 0 ok
394    //               1 help
395    //               2 cmd line errors
396    //               3 requested validation
397    //               4 invalid input
398    //               5 open fail
399    //               6 requested dump
400    //               7 other...
401    //              -3 cmd line definition invalid (CommandLine)
402    //cout << "Return code is " << iret << endl;
403    if(iret != 0) {
404       if(iret != 1) {
405          string tmsg;
406          tmsg = GD.PrgmName + string(" is terminating with code ")
407                            + StringUtils::asString(iret);
408          cout << tmsg << endl;
409       }
410 
411       if(iret < 5) cout << "# " << GD.Title << endl;
412 
413       if(iret == 1) { cout << GD.cmdlineUsage; }
414       else if(iret == 2) { cout << GD.cmdlineErrors; }
415       else if(iret == 3) { cout << "The user requested input validation."; }
416       else if(iret == 4) { cout << "The input is invalid."; }
417       else if(iret == 5) { cout << "There is not enough data."; }
418       else if(iret == 7) { cout << "The input file could not be opened."; }
419       else if(iret == -3) { // cmd line definition invalid
420          cout << "The command line definition is invalid.\n" << GD.cmdlineErrors;
421       }
422       else                 // fix this
423          cout << "temp - Some other return code..." << iret;
424       cout << endl;
425    }
426 
427    // compute and print run time ----------------------------------------------
428    if(iret != 1 && !GD.quiet) {
429       wallend.setLocalTime();
430       totaltime = clock()-totaltime;
431       cout << "# " << PrgmName << " timing: " << fixed << setprecision(3)
432          << double(totaltime)/double(CLOCKS_PER_SEC) << " seconds. ("
433          << (wallend-wallbeg) << " sec)" << endl;
434    }
435 
436    if(iret == 0) return 0; else return -1;
437 }
438 catch(Exception& e) {
439    cerr << PrgmName << " caught Exception:\n" << e.what() << endl;
440 }
441 catch (...) {
442    cerr << "Unknown error in " << PrgmName << ".  Abort." << endl;
443 }
444    return -1;
445 }   // end main()
446 
447 //------------------------------------------------------------------------------------
448 //------------------------------------------------------------------------------------
GetCommandLine(int argc,char ** argv)449 int GetCommandLine(int argc, char **argv)
450 {
451 try {
452    int i;
453    GlobalData& GD=GlobalData::Instance();
454 
455    // create list of command line options, and fill it
456    // put required options first - they will get listed first anyway
457    CommandLine opts;
458 
459    // build the options list == syntax page
460    string PrgmDesc =
461       "Program " + GD.PrgmName + " reads one or more columns of numbers in an "
462       "ascii input file and\n computes standard and robust statistics on the data, "
463       "with options to perform\n a variety of other analysis tasks as well. Output "
464       "is to stdout or file rstats.out (unless --stdout).\n "
465       "Input is on the command line, and/or of the same format in a file\n "
466       "(see --file below); lines in the file that begin with '#' are ignored."
467       "\n Options are shown below, followed by a description, with default in ();\n "
468       "however {*} means option must be present to activate - defaults suggested.";
469 
470    string pad("                   ");
471    // opts.Add(char, opt, arg, repeat?, required?, &target, pre-descript, descript.);
472    bool req(false);
473    string dummy("");         // dummy for --file
474    opts.Add('f', "file", "name", true, req, &dummy, "\n# File I/O:",
475             "Name of file with options [#-EOL = comment] [-f]");
476    //opts.Add(0, "log", "name", false, req, &GD.logfile, "",
477    //         "Name of output log file");
478    //opts.Add(0, "logpath", "path", false, req, &GD.logpath, "",
479    //         "Path for output log file");
480    opts.Add('i', "input", "name", true, req, &GD.inputfile, "",
481             "Name of input file (-i|--input optional) [-i]");
482    opts.Add(0, "path", "dir", false, req, &GD.inpath, "",
483             "Path for input file");
484    // input
485    opts.Add('y', "col", "c", false, req, &GD.col, "\n# input:",
486             "read data in this column [-y|-c]");
487    opts.Add('x', "xcol", "c", false, req, &GD.xcol, "",
488             "also read 'x' data in this column [-x]");
489    opts.Add(0, "wt", "c", false, req, &GD.wcol, "",
490             "weight with fabs() of data in this column [-wt]");
491 
492    // modify input
493    opts.Add(0, "beg", "xb", false, req, &GD.begstr, "\n# modify input:",
494             "include only input that satisfies x > xb");
495    opts.Add(0, "end", "xe", false, req, &GD.endstr, "",
496             "include only input that satisfies x < xe");
497    opts.Add(0, "min", "dmin", false, req, &GD.minstr, "",
498             "include only input that satisfies data > dmin");
499    opts.Add(0, "max", "dmax", false, req, &GD.maxstr, "",
500             "include only input that satisfies data < dmax");
501    opts.Add(0, "debias", "d", false, req, &GD.debstr, "",
502             "remove bias d from data to compute stats");
503    opts.Add(0, "debias0", "", false, req, &GD.debias0, "",
504             "remove bias = (1st data pt) from data to compute stats");
505    // plots
506    opts.Add(0, "plot", "", false, req, &GD.doStemLeaf, "\n# plots:",
507             "generate a stem-and-leaf plot from the data");
508    opts.Add(0, "qplot", "", false, req, &GD.doQplot, "",
509             "generate data for data vs. quantile plot [-> rstats.out]");
510    opts.Add(0, "bin", "", false, req, &GD.binstr, "",
511             "histogram: define bins and count data [ignore x, set nostats]\n"
512             +pad+"  arg = <n> (int) compute approximately n bins\n"
513             +pad+"  arg = <w> (float) compute bins of width w\n"
514             +pad+"  arg = <n,w,cf> set bin number,width,center-of-first\n"
515             +pad+"  (hint: pipe into plot -x 2 -y 3 --hist 0 -g 640x480)");
516    // analysis
517    opts.Add(0, "sum", "", false, req, &GD.doSum, "\n# analysis:",
518             "compute sum of data [ignore x]");
519    opts.Add(0, "sum+", "", false, req, &GD.doSumPlus, "",
520             "compute sum, range, gaps and estimated stepsize [ignore x]");
521    opts.Add(0, "fit", "f[,x]", false, req, &GD.fitstr, "",
522             "fit a robust polynomial of degree f to data(xdata) [-> rstats.out]\n"
523             +pad+"  if 1 or more <,x> are present, also evaluate fit at x(s)");
524    opts.Add(0, "seq", "", false, req, &GD.doSeq, "",
525             "output data, in input order, with sequential stats");
526    opts.Add(0, "disc", "y[,x]", false, req, &GD.discstr, "",
527             "output data, first differences, and discontinuities with stats\n"
528             +pad+"  per segment; use y,x for data,xdata limits");
529    // filters
530    opts.Add(0, "fdf", "x", false, req, &GD.fdfstr, "\n# stats filters {*}:",
531             "first-difference filter slip limit x");
532    opts.Add(0, "fdf2", "x,s,r", false, req, &GD.fdf2str, "",
533             "first-diff(2) filter, limits: slip x, sig s, ratio r");
534    opts.Add(0, "wind", "n[,s,r]", false, req, &GD.windstr, "",
535             "window filter width n, limits: slip s, ratio r");
536    opts.Add(0, "xwind", "n[,s,r]", false, req, &GD.xwindstr, "",
537             "window filter (--wind) with 2-sample stats");
538    opts.Add(0, "fix", "n[,x,s]", false, req, &GD.fixfstr, "",
539             "FDiff 'fix' filter (for SSD phase) width n, limits: slip x sigma s");
540    // others
541    opts.Add(0, "wnj", "pom", false, req, &GD.wnjpom, "\n# other {*}:",
542             "white noise jerk KF using pom = process/meas noises (small=>smooth)\n"
543             +pad+"  e.g. rstats prsclk.dat -x 1 -y 11 --wnj 1.e-8 | grep KSU | plot\n"
544             +pad+"  -x 3 -y 10,data -y 4,,'lines lt 3' -y2 11,res --points");
545    opts.Add(0, "fft", "dt", false, req, &GD.dtfft, "",
546             "compute FFT with dt [dt=0 => compute dt from xdata] NB beware -p\n"
547             +pad+"  e.g. rstats testfft.data -x 1 -y 9 --fft 0.0034722 |\n"
548             +pad+"       plotrfft -o2 'set xr [0:25]'");
549    opts.Add(0, "KS", "", false, req, &GD.doKS, "",
550             "also output the Anderson-Darling statistic, a KS-test,\n"
551             +pad+"  where AD > 0.752 implies non-normal");
552    // output
553    opts.Add(0, "nostats", "", false, req, &GD.nostats, "\n# output:",
554             "supress total stats output (for analyses)");
555    opts.Add('q', "quiet", "", false, req, &GD.quiet, "",
556             "supress title, timing and other non-essential output [-q]");
557    opts.Add(0, "stdout", "", false, req, &GD.dostdout, "",
558             "send rstats.out output to stdout");
559    opts.Add(0, "outs", "s", false, req, &GD.outstr, "",
560             "explicitly list all data outside s*(outlier limits)");
561    opts.Add('b', "brief", "", false, req, &GD.brief, "",
562             "single-line quiet stats output (conv, rob, wtd, r-wtd, 2-samp) [-b]\n"
563             +pad+"  [or -bc -br -bw -brw -b2 for single quiet brief output]");
564    opts.Add('l', "label", "L", false, req, &GD.label, "",
565             "add label L to the (brief/disc/seq/fdf/wind/fft) outputs [-l]");
566    opts.Add('p', "prec", "n", false, req, &GD.prec, "\n# format and help:",
567             "specify precision of all float outputs [-p]");
568    opts.Add('w', "width", "n", false, req, &GD.width, "",
569             "specify width of all float outputs");
570    // help,verbose,debug handled by CommandLine
571 
572    // add options that are ignored (true if it has an arg)
573    //opts.Add_ignore("--PRSoutput",true);
574 
575    // deprecated args
576    opts.Add_deprecated("-c","-y");
577 
578    // --------------------------------------------------------------------------
579    // declare it and parse it; write all errors to string GD.cmdlineErrors
580    int iret = opts.ProcessCommandLine(argc,argv,PrgmDesc,
581                          GD.cmdlineUsage,GD.cmdlineErrors,GD.cmdlineUnrecognized);
582    if(iret == -2) return iret;      // bad alloc
583    if(iret == -3) return iret;      // cmd line definition invalid
584 
585    // --------------------------------------------------------------------------
586    // do extra parsing -- append errors to GD.cmdlineErrors
587    vector<string> fields;
588    ostringstream oss;
589 
590    // unrecognized arguments - few exceptions or an error
591    if(GD.cmdlineUnrecognized.size() > 0) {
592       vector<string> unrecogs;
593       // backward compatibility...
594       for(i=0; i<GD.cmdlineUnrecognized.size(); i++) {
595          string str(GD.cmdlineUnrecognized[i]);
596          if(str == "-bc")      GD.bc = true;
597          else if(str == "-br") GD.br = true;
598          else if(str == "-bw") GD.bw = true;
599          else if(str == "-brw") GD.brw = true;
600          else if(str == "-b2") GD.b2 = true;
601          else {
602             if(!GD.inputfile.empty()) unrecogs.push_back(str);
603             else GD.inputfile = str;
604          }
605       }
606       if(unrecogs.size()) {
607          oss << " Error - unrecognized arguments\n";
608          for(i=0; i<unrecogs.size(); i++)
609             oss << unrecogs[i] << "\n";
610          oss << " End of unrecognized arguments\n";
611       }
612    }
613 
614    // check inconsistent cmdline input
615    if(GD.bw && GD.wcol == -1) {
616       oss << " Warning - brief(w) but no --wcol - ignore bw output";
617       GD.bw = false;
618    }
619    if(GD.b2 && GD.xcol == -1) {
620       oss << " Warning - brief(2) but no --xcol - ignore b2 output";
621       GD.b2 = false;
622    }
623 
624    if(!GD.begstr.empty()) { GD.xbeg = asDouble(GD.begstr); GD.doxbeg = true; }
625    if(!GD.endstr.empty()) { GD.xend = asDouble(GD.endstr); GD.doxend = true; }
626    if(!GD.minstr.empty()) { GD.dmin = asDouble(GD.minstr); GD.dodmin = true; }
627    if(!GD.maxstr.empty()) { GD.dmax = asDouble(GD.maxstr); GD.dodmax = true; }
628    if(!GD.debstr.empty()) { GD.debias = asDouble(GD.debstr); GD.dodebias = true; }
629    if(!GD.outstr.empty()) { GD.outscale = asDouble(GD.outstr); GD.doOuts = true; }
630 
631    // set switches using CommandLine::count()
632    GD.doFit = (opts.count("fit") > 0);
633    GD.doFDF = (opts.count("fdf") > 0);
634    GD.doFDF2 = (opts.count("fdf2") > 0);
635    GD.doWF = (opts.count("wind") > 0);
636    GD.doXWF = (opts.count("xwind") > 0);
637    GD.doFixF = (opts.count("fix") > 0);
638    GD.doWNJ = (opts.count("wnj") > 0);
639    GD.doFFT = (opts.count("fft") > 0);
640 
641    // bin
642    if(!GD.binstr.empty()) {
643       GD.doBin = true;
644       fields = split(GD.binstr,',');
645       if(fields.size() == 1) {
646          if(isDigitString(fields[0])) {
647             GD.nbin = asInt(fields[0]);
648             GD.whichbin = 1;
649          }
650          else if(isDecimalString(fields[0])) {
651             GD.widbin = asDouble(fields[0]);
652             GD.whichbin = 2;
653          }
654       }
655       else if(fields.size() == 3) {
656          GD.nbin = asInt(fields[0]);
657          GD.widbin = asDouble(fields[1]);
658          GD.firstbin = asDouble(fields[2]);
659          GD.whichbin = 3;
660       }
661       else {
662          oss << " Error - invalid argument to --bin " << GD.binstr << "\n";
663          GD.doBin = false;
664       }
665    }
666 
667    // fit
668    if(GD.doFit) {
669       fields = split(GD.fitstr,',');
670       GD.nfit = asInt(fields[0]);
671       if(fields.size() > 1) {
672          for(i=1; i<fields.size(); i++)
673             GD.xevalfit.push_back(asDouble(fields[i]));
674       }
675    }
676 
677    // discontinuities
678    if(!GD.discstr.empty()) {
679       GD.doDisc = true;
680       fields = split(GD.discstr,',');
681       if(fields.size() == 1) {
682          GD.ytol = asDouble(fields[0]);
683          GD.xtol = -1.0;
684       }
685       else if(fields.size() == 2) {
686          GD.ytol = asDouble(fields[0]);
687          GD.xtol = asDouble(fields[1]);
688       }
689       else {
690          oss << " Error - invalid argument to --disc " << GD.discstr << "\n";
691          GD.doDisc = false;
692       }
693    }
694 
695    // fdf filter
696    if(GD.doFDF) {
697       fields = split(GD.fdfstr,',');
698       if(fields.size() == 1) {
699          GD.fdfstep = asDouble(fields[0]);
700       }
701       else {
702          oss << " Error - invalid argument to --fdf " << GD.fdfstr << "\n";
703          GD.doFDF = false;
704       }
705    }
706 
707    // fdf2 filter
708    if(GD.doFDF2) {
709       fields = split(GD.fdf2str,',');
710       if(fields.size() == 3) {
711          GD.fdfstep = asDouble(fields[0]);
712          GD.fdfsig = asDouble(fields[1]);
713          GD.fdfrat = asDouble(fields[2]);
714       }
715       else {
716          oss << " Error - invalid argument to --fdf2 " << GD.fdf2str << "\n";
717          GD.doFDF2 = false;
718       }
719    }
720 
721    // fix filter
722    if(GD.doFixF) {
723       fields = split(GD.fixfstr,',');
724       if(fields.size() == 1) {
725          GD.fixN = asInt(fields[0]);
726       }
727       else if(fields.size() == 3) {
728          GD.fixN = asInt(fields[0]);
729          GD.fixlim = asDouble(fields[1]);
730          GD.fixsig = asDouble(fields[2]);
731       }
732       else {
733          oss << " Error - invalid argument to --fix " << GD.fixfstr << "\n";
734          GD.doFixF = false;
735       }
736    }
737 
738    // window filters
739    if(GD.doWF || GD.doXWF) {
740       fields = split((GD.doWF ? GD.windstr : GD.xwindstr),',');
741       if(fields.size() == 1)
742          GD.windwidth = asInt(fields[0]);
743       else if(fields.size() == 3) {
744          GD.windstep = asDouble(fields[1]);
745          GD.windratio = asDouble(fields[2]);
746       }
747       else
748          oss << " Error - invalid argument to --" << (GD.doWF ? "":"x") << "wind "
749                << (GD.doWF ? GD.windstr : GD.xwindstr) << "\n";
750    }
751 
752    // set nostats
753    if(GD.doBin || GD.doFDF || GD.doFDF2 || GD.doWF || GD.doXWF
754                || GD.doFixF || GD.doWNJ || GD.doFFT)
755       GD.nostats = true;
756 
757    // set -b flags
758    if(GD.brief) {
759       GD.bc = GD.br = GD.brw = true;
760       if(GD.wcol > -1) GD.bw = true;
761       if(GD.xcol > -1) GD.b2 = true;
762    }
763 
764    // set quiet when brief
765    GD.quiet = (GD.quiet || GD.brief || GD.bc || GD.br || GD.bw || GD.brw || GD.b2);
766 
767    // --------------------------------------------------------------------------
768    // add to command line errors
769    GD.cmdlineErrors += oss.str();
770 
771    // set verbose and debug (CommandLine sets LOGlevel)
772    GD.verbose = (LOGlevel >= VERBOSE);
773    GD.debug = (LOGlevel - DEBUG);
774 
775    // --------------------------------------------------------------------------
776    // dump it
777    oss.str("");         // clear it
778    oss << "------ Summary of " << GD.PrgmName
779       << " command line configuration --------" << endl;
780    opts.DumpConfiguration(oss);
781       // perhaps dump the 'extra parsing' things
782    oss << "------ End configuration summary --------" << endl;
783    GD.cmdlineDump = oss.str();
784 
785    if(opts.hasHelp()) return 1;
786    if(opts.hasErrors()) return 2;
787    if(!GD.cmdlineErrors.empty()) return 2;
788    return iret;
789 }
790 catch(Exception& e) { GPSTK_RETHROW(e); }
791 }
792 
793 //------------------------------------------------------------------------------------
794 // Title, debug output of cmdline, define GD.msg, include_path for input file
Prepare(void)795 int Prepare(void)
796 {
797 try {
798    int i,j;
799    GlobalData& GD=GlobalData::Instance();
800 
801    // display title
802    if(!GD.quiet) cout << "# " << GD.Title << endl;
803 
804    if(GD.debug > -1) {
805       cout << "Found debug switch at level " << GD.debug << endl;
806       cout << "\n" << GD.cmdlineUsage << endl;  // this will contain list of args
807       GD.verbose = true;
808    }
809    if(GD.verbose && !GD.quiet) cout << GD.cmdlineDump << endl;
810 
811    // set msg
812    ostringstream oss;
813    oss << "Data of col " << GD.col;
814    if(GD.xcol > -1) oss << ", x of col " << GD.xcol;
815    oss << ", file " << GD.inputfile;
816    GD.msg = oss.str();
817 
818    //include_path(GD.logpath,GD.logfile);
819    //expand_filename(GD.logfile);
820 
821    if(GD.inputfile.empty()) {
822       GD.inputfile = string("stdin");
823       if(GD.verbose && !GD.quiet)
824          cout << "# Input from stdin\n";
825    }
826    else {
827       include_path(GD.inpath,GD.inputfile);
828       expand_filename(GD.inputfile);
829       if(GD.verbose && !GD.quiet)
830          cout << "# Found input file name " << GD.inputfile << endl;
831    }
832 
833    return 0;
834 }
835 catch(Exception& e) { GPSTK_RETHROW(e); }
836 }
837 
838 //------------------------------------------------------------------------------------
ReadAndCompute(void)839 int ReadAndCompute(void)
840 {
841 try {
842    GlobalData& GD=GlobalData::Instance();
843 
844    // open input file -------------------------------------------
845    istream *pin;
846    if(GD.inputfile != string("stdin")) {
847       pin = new ifstream(GD.inputfile.c_str());
848       if(pin->fail()) {
849          cout << "Could not open file " << GD.inputfile << " .. abort.\n";
850          return -2;
851       }
852    }
853    else {
854       pin = &cin;
855    }
856 
857    // read input file -------------------------------------------
858    unsigned int i,nd(0),nxd(0);
859    {
860       int j;                              // not unsigned!
861       double d,x(-1.0),w(-1.0),bias;
862       string line;
863       vector<string> words;
864       while(1) {
865          getline(*pin,line);
866          if(pin->eof() || !pin->good()) break;
867 
868          StringUtils::stripLeading(line," ");
869          if(line[0] == '#') continue;
870 
871          StringUtils::stripTrailing(line,"\n");
872          StringUtils::stripTrailing(line,"\r");
873          StringUtils::stripTrailing(line," ");
874          StringUtils::change(line,"\t"," ");
875 
876          if(line.empty()) continue;
877 
878          words = StringUtils::split(line,' ');
879          j = words.size();
880 
881          //cout << "LINE (" << j << ") " << line << endl;
882 
883          // check input   NB col numbers start at 1, indexes start at 0
884          if(j == 0) continue;
885          if(GD.col > j) { nd++; continue; }
886          if(GD.xcol > j) { nxd++; continue; }
887          if(GD.wcol > j) continue;
888          if(!(isScientificString(words[GD.col-1]))) { nd++; continue; }
889          if(GD.xcol > -1 && !(isScientificString(words[GD.xcol-1])))
890                { nd++; continue; }
891          if(GD.wcol > -1 && !(isScientificString(words[GD.wcol-1]))) continue;
892 
893          // user limits on data
894          d = asDouble(words[GD.col-1]);
895          if(GD.dodmin && d < GD.dmin) continue;
896          if(GD.dodmax && d > GD.dmax) continue;
897 
898          if(GD.xcol > -1) {
899             x = asDouble(words[GD.xcol-1]);
900             // user limits on x
901             if(GD.doxbeg && x < GD.xbeg) continue;
902             if(GD.doxend && x > GD.xend) continue;
903          }
904          if(GD.wcol > -1)
905             w = asDouble(words[GD.wcol-1]);
906 
907          // debias
908          if(GD.debias0 && GD.data.size() == 0) { GD.debias = d; GD.dodebias = true; }
909          if(GD.dodebias) d -= GD.debias;
910 
911          //cout << "READ " << i << " " << d << " " << x << endl;
912          GD.data.push_back(d);
913          if(GD.xcol > -1) GD.xdata.push_back(x);
914          if(GD.wcol > -1) GD.wdata.push_back(w);
915 
916       }
917 
918       if(pin != &cin) {
919          ((ifstream *)pin)->close();
920          delete pin;
921       }
922    }
923 
924    // check that input is good ----------------------------------
925    if(GD.data.size() < 2) {
926       cout << "Abort: not enough data: " << GD.data.size() << " data read";
927       if(nd > 0) cout << " [data(col) not found on " << nd << " lines]";
928       if(nxd > 0) cout << " [data(xcol) not found on " << nxd << " lines]";
929       cout << endl;
930       return 5;
931    }
932    if(GD.xcol != -1 && GD.xdata.size() == 0) {
933       cout << "Abort: No data found in 'x' column." << endl;
934       return 5;
935    }
936    if(nd > GD.data.size()/2)
937       cout << "Warning: data(col) not found on " << nd << " lines" << endl;
938    if(nxd > GD.xdata.size()/2)
939       cout << "Warning: data(xcol) not found on " << nxd << " lines" << endl;
940 
941    if(GD.verbose) cout << "Found " << GD.data.size() << " data.\n";
942 
943    // compute stats ---------------------------------------------
944    const unsigned N(GD.data.size());
945    for(i=0; i<N; i++) {
946       GD.cstats.Add(GD.data[i]);
947       if(GD.xcol > -1) GD.tsstats.Add(GD.xdata[i],GD.data[i]);
948       if(GD.wcol > -1) GD.wstats.Add(GD.data[i], GD.wdata[i]);
949    }
950 
951    // compute robust stats --------------------------------------------------
952    {
953       // NB do not sort GD.data and GD.xdata
954       vector<double> data(GD.data),xdata(GD.xdata),robwts(N);
955 
956       if(xdata.size() > 0) QSort(&data[0],&xdata[0],N);
957       else                 QSort(&data[0],N);
958 
959       Robust::Quartiles(&data[0],data.size(),GD.Q1,GD.Q3);
960       // outlier limit (high) 2.5Q3-1.5Q1
961       // outlier limit (low ) 2.5Q1-1.5Q3
962       GD.mad = Robust::MedianAbsoluteDeviation(&data[0],N,GD.median);
963       GD.mest = Robust::MEstimate(&data[0], N, GD.median, GD.mad, &robwts[0]);
964 
965       for(i=0; i<N; i++)
966          GD.robwtstats.Add(data[i],robwts[i]);
967    }
968 
969    return 0;
970 }
971 catch(Exception& e) { GPSTK_RETHROW(e); }
972 }
973 
974 //------------------------------------------------------------------------------------
OutputStats(void)975 int OutputStats(void)
976 {
977 try {
978    GlobalData& GD=GlobalData::Instance();
979 
980    const int N(GD.data.size());
981    cout << fixed << setprecision(GD.prec);
982 
983    // output stats ----------------------------------------------------------
984    string fitmsg(GD.nfit==0 ? "" : "(fit resid)");
985    string label(GD.label.empty() ? "" : " "+GD.label);
986    // TD handle bias output better
987 
988    if(GD.bc || GD.b2) {
989       cout << "rstats(con):" << label
990          << " N " << setw(GD.width) << GD.cstats.N()
991          << "  Ave " << setw(GD.width) << GD.cstats.Average()
992          << "  Std " << setw(GD.width) << GD.cstats.StdDev()
993          << "  Var " << setw(GD.width) << GD.cstats.Variance()
994          << "  Min " << setw(GD.width) << GD.cstats.Minimum()
995          << "  Max " << setw(GD.width) << GD.cstats.Maximum()
996          << "  P2P " << setw(GD.width) << GD.cstats.Maximum()-GD.cstats.Minimum();
997       if(GD.dodebias) cout << " Bias " << GD.debias;
998       cout << endl;
999    }
1000    else if(!GD.quiet) {
1001       cout << "Conventional statistics: " << GD.msg << ":\n";
1002       cout << GD.cstats << "  Median = " << GD.median << endl;
1003       if(GD.dodebias) cout << " Bias    = " << GD.debias << endl;
1004    }
1005 
1006    if(GD.xcol > -1) {
1007       if(GD.b2) {
1008          cout << "rstats(two):" << label
1009             << " N " << setw(GD.width) << GD.data.size()
1010             //<< " VarX " << setprecision(GD.prec) << GD.tsstats.VarianceX()
1011             //<< " VarY " << setprecision(GD.prec) << GD.tsstats.VarianceY()
1012             << "  Int " << setprecision(GD.prec) << GD.tsstats.Intercept()
1013             << "  Slp " << setprecision(GD.prec) << GD.tsstats.Slope()
1014             << " +- " << setprecision(GD.prec) << GD.tsstats.SigmaSlope()
1015             << "  CSig " << setprecision(GD.prec) << GD.tsstats.SigmaYX()
1016             << "  Corr " << setprecision(GD.prec) << GD.tsstats.Correlation();
1017          if(GD.dodebias) cout << "  Bias " << GD.debias;
1018          cout << endl;
1019       }
1020       else if(!GD.quiet) {
1021          cout << "Two-sample statistics: " << GD.msg << ":\n"
1022             << setprecision(GD.prec) << GD.tsstats << endl;
1023          if(GD.dodebias) cout << "  Bias " << GD.debias;
1024       }
1025    }
1026 
1027    if(GD.bw && GD.wcol > -1) {
1028       cout << "rstats(wtd):" << label
1029          << " N " << setw(GD.width) << GD.wstats.N()
1030          << "  Ave " << setw(GD.width) << GD.wstats.Average()
1031          << "  Std " << setw(GD.width) << GD.wstats.StdDev()
1032          << "  Var " << setw(GD.width) << GD.wstats.Variance()
1033          << "  Min " << setw(GD.width) << GD.wstats.Minimum()
1034          << "  Max " << setw(GD.width) << GD.wstats.Maximum()
1035          << "  P2P " << setw(GD.width) << GD.wstats.Maximum()-GD.wstats.Minimum();
1036       if(GD.dodebias) cout << " Bias " << GD.debias;
1037       cout << endl;
1038    }
1039    else if(!GD.quiet && GD.wcol > -1) {
1040       cout << "Conventional weighted statistics: "
1041             << GD.msg << ", wt of col " << GD.wcol << ":\n"
1042             << fixed << setprecision(GD.prec) << GD.wstats << endl;
1043       if(GD.dodebias) cout << " Bias    = " << GD.debias << endl;
1044    }
1045 
1046    if(GD.brw) {
1047       cout << "rstats(rwt):" << label
1048          << " N " << setw(GD.width) << GD.robwtstats.N()
1049          << "  Ave " << setw(GD.width) << GD.robwtstats.Average()
1050          << "  Std " << setw(GD.width) << GD.robwtstats.StdDev()
1051          << "  Var " << setw(GD.width) << GD.robwtstats.Variance()
1052          << "  Min " << setw(GD.width) << GD.robwtstats.Minimum()
1053          << "  Max " << setw(GD.width) << GD.robwtstats.Maximum()
1054          << "  P2P " << setw(GD.width)
1055                                  << GD.robwtstats.Maximum()-GD.robwtstats.Minimum();
1056       if(GD.dodebias) cout << " Bias " << GD.debias;
1057       cout << endl;
1058    }
1059    else if(!GD.quiet) {
1060       cout << "Conventional statistics with robust weighting: " << GD.msg << ":\n"
1061          << fixed << setprecision(GD.prec) << GD.robwtstats << endl;
1062       if(GD.dodebias) cout << " Bias    = " << GD.debias << endl;
1063    }
1064 
1065    if(GD.br) {
1066       cout << "rstats(rob):" << label
1067          << " N " << setw(GD.width) << GD.data.size()
1068          << "  Med " << setw(GD.width) << GD.median << "  MAD " << GD.mad
1069          << "  Min " << setw(GD.width) << GD.cstats.Minimum()
1070          << "  Max " << setw(GD.width) << GD.cstats.Maximum()
1071          << "  P2P " << setw(GD.width) << GD.cstats.Maximum()-GD.cstats.Minimum()
1072          << "  Q1 " << setw(GD.width) << GD.Q1 << "  Q3 " << setw(GD.width)<< GD.Q3
1073          << "  QL " << setw(GD.width) << 2.5*GD.Q1-1.5*GD.Q3
1074          << "  QH " << setw(GD.width) << 2.5*GD.Q3-1.5*GD.Q1;
1075       if(GD.dodebias) cout << "  Bias " << GD.debias;
1076       cout << endl;
1077    }
1078    else if(!GD.quiet) {
1079       cout << "Robust statistics: " << GD.msg << ":\n";
1080 	   cout << " Number    = " << GD.data.size() << endl;
1081 	   cout << " Quartiles = " << setw(11) << setprecision(GD.prec) << GD.Q1
1082                      << "(1) " << setw(11) << GD.Q3
1083                      << "(3) " << setw(11) << 2.5*GD.Q3-1.5*GD.Q1
1084                      << "(H) " << setw(11) << 2.5*GD.Q1-1.5*GD.Q3
1085                      << "(L)" << endl;
1086 	   cout << " Median = " << GD.median << "   MEstimate = " << GD.mest
1087 	        << "   MAD = " << GD.mad << endl;
1088       if(GD.dodebias) cout << " Bias      = " << GD.debias << endl;
1089    }
1090 
1091    if(GD.doKS) {
1092       GD.KS = ADtest(&GD.data[0],N,GD.cstats.Average(),GD.cstats.StdDev(),false);
1093 	   cout << "rstats KS test = " << setprecision(GD.prec) << GD.KS << endl;
1094    }
1095 
1096    return 0;
1097 }
1098 catch(Exception& e) { GPSTK_RETHROW(e); }
1099 }
1100 
1101 //------------------------------------------------------------------------------------
StemAndLeaf(void)1102 int StemAndLeaf(void)
1103 {
1104 try {
1105    GlobalData& GD=GlobalData::Instance();
1106 
1107    try {
1108       vector<double> data(GD.data);
1109       QSort(&data[0],data.size());
1110 
1111       // NB assumes array is sorted
1112       Robust::StemLeafPlot(cout, &data[0], data.size(), GD.msg);
1113    }
1114    catch(Exception& e) {
1115       if(e.getText(0) == string("Invalid input") ||
1116          e.getText(0) == string("Array has zero range")) {
1117          cout << "(No stem and leaf plot; data is trivial)\n";
1118          return 0;
1119       }
1120       GPSTK_RETHROW(e);
1121    }
1122 
1123    return 0;
1124 }
1125 catch(Exception& e) { GPSTK_RETHROW(e); }
1126 }
1127 
1128 //------------------------------------------------------------------------------------
QuantilePlot(void)1129 int QuantilePlot(void)
1130 {
1131 try {
1132    unsigned int i;
1133    GlobalData& GD=GlobalData::Instance();
1134 
1135    // get quantiles
1136    vector<double> qdata(GD.data.size());
1137    Robust::Quantiles(&qdata[0],qdata.size());
1138 
1139    // output to file rstats.out
1140    ostream *pout = new ofstream("rstats.out");
1141    if(GD.dostdout || pout->fail()) {
1142       if(!GD.dostdout) cout << "Unable to open file rstats.out - output to screen\n";
1143       delete pout;
1144       pout = &cout;
1145    }
1146    else if(!GD.quiet) cout << "Output quantiles, data to file rstats.out\n";
1147 
1148    // get TS stats
1149    TwoSampleStats<double> TSS;
1150    for(i=0; i<GD.data.size(); i++) TSS.Add(qdata[i],GD.data[i]);
1151 
1152    *pout << "# Quantile plot mean " << fixed << setprecision(GD.prec)
1153       << TSS.Intercept() << " std (slope) " << TSS.Slope()
1154       << " quantile data line follow:" << endl;
1155    for(i=0; i<GD.data.size(); i++)
1156       *pout << qdata[i] << " " << GD.data[i]
1157             << " " << TSS.Intercept() + TSS.Slope()*qdata[i] << endl;
1158 
1159    if(pout != &cout)
1160    {
1161       ((ofstream *)pout)->close();
1162       delete pout;
1163    }
1164 
1165    cout << "Data vs quantiles fit to line yields y-intercept (=mean) "
1166       << fixed << setprecision(GD.prec) << TSS.Intercept()
1167       << " and slope (=std.dev.) " << TSS.Slope() << endl;
1168 
1169    return 0;
1170 }
1171 catch(Exception& e) { GPSTK_RETHROW(e); }
1172 }
1173 
1174 //------------------------------------------------------------------------------------
1175 // bins - given min, max and number of bins, find "pretty" bin boundaries
1176 // (first-bin,step,<have nbins>) where first-bin is the *center* of the first bin,
1177 // input min,max,nbins, output the rest
1178 // return 0 if ok
Bins(const double & min,const double & max,int & nbins,double & firstbin,double & binstep,int & prec)1179 int Bins(const double& min, const double& max, int& nbins,
1180          double& firstbin, double& binstep, int& prec)
1181 {
1182 try {
1183    if(nbins <= 2) GPSTK_THROW(Exception("Too few bins"));
1184    firstbin = binstep = 0.0;
1185 
1186    double amin(min),amax(max);
1187    if(amin > amax) { amin = max; amax = min; }
1188    else if(amin == amax) GPSTK_THROW(Exception("Equal limits"));
1189 
1190    binstep = (amax-amin)/double(nbins);
1191    double tmp = log10(double(binstep)) - 1.0;
1192    prec = int(tmp + (tmp > 0 ? 0.5 : -0.5));
1193    double scal = pow(10.0,prec);
1194    while(binstep/scal < 1.0) { scal /= 10.0; prec--; }
1195    while(binstep/scal >= 10.0) { scal *= 10.0; prec++; }
1196    binstep = double(int(0.5+binstep/scal)*scal);
1197    if(::fabs(binstep) < 1.e-8) { cout << " Error - binstep < 1.e-8\n"; return -1; }
1198    double half(binstep/2.0);
1199    firstbin = binstep * int((amin + (amin > 0 ? 0.5 : -0.5))/binstep);
1200    while(firstbin-half > min) firstbin -= binstep;
1201    while(firstbin+half < min) firstbin += binstep;
1202    nbins = int((max-firstbin+half)/binstep);
1203    while(firstbin+(nbins-1.5)*binstep > max) nbins--;
1204    while(firstbin+(nbins-0.5)*binstep < max) nbins++;
1205    return 0;
1206 }
1207 catch(Exception& e) { GPSTK_RETHROW(e); }
1208 }
1209 
1210 //------------------------------------------------------------------------------------
FindBins(void)1211 int FindBins(void)
1212 {
1213 try {
1214    int i,j,k;
1215    GlobalData& GD=GlobalData::Instance();
1216    int prec;
1217    const double min(GD.cstats.Minimum()), max(GD.cstats.Maximum());
1218    if(GD.whichbin == 1) {        // only n user input
1219       i = Bins(min,max,GD.nbin,GD.firstbin,GD.widbin,prec);       // bins
1220       if(i) return -1;
1221       if(prec >= 0) prec = 0; else prec = -prec;
1222    }
1223    else if(GD.whichbin == 2) {   // compute firstbin from min, max and GD.widbin
1224       GD.nbin = 1+int(0.5+(max-min)/GD.widbin);
1225       GD.firstbin = int((min/GD.widbin)+(min>0.0 ? 0.5:-0.5))*GD.widbin;
1226       if(min < GD.firstbin-GD.widbin/2.0) { GD.nbin++; GD.firstbin -= GD.widbin; }
1227       if(max > GD.firstbin+(GD.nbin-0.5)*GD.widbin) { GD.nbin++; }
1228    }
1229    else if(GD.whichbin == 3) {   // nbin,widbin and firstbin are specified
1230       ;
1231    }
1232 
1233    // compute precision exponent
1234    if(GD.whichbin != 1) {
1235       double tmp = log10(double(GD.widbin)) - 1.0;
1236       prec = int(tmp + (tmp > 0 ? 0.5 : -0.5));
1237       double scal = pow(10.0,prec);
1238       while(GD.widbin/scal < 1.0) { scal /= 10.0; prec--; }
1239       while(GD.widbin/scal >= 10.0) { scal *= 10.0; prec++; }
1240       if(prec < 0) prec = -prec+1;
1241    }
1242 
1243    if(GD.nbin > 60) {
1244       cout << "Error - too many bins: " << GD.nbin << endl;
1245       return 0;
1246    }
1247 
1248    double half(GD.widbin/2.0);
1249    vector<int> bins(GD.nbin,0);
1250    for(k=0,i=0; i<GD.data.size(); i++) {
1251       j = int((GD.data[i]-GD.firstbin+half)/GD.widbin);
1252       if(j < 0 || j > GD.nbin-1) {
1253          cout << "# Warning - invalid bin " << j << " for data " << GD.data[i]
1254                   << endl;
1255          continue;
1256       }
1257       bins[j]++;
1258       k++;
1259    }
1260 
1261    // write out data (bin centers)
1262    cout << "# bins: N,width,first " << GD.nbin
1263          << "," << GD.widbin << "," << GD.firstbin << endl;
1264    cout << "# n center samples (low_edge to high_edge)" << endl;
1265    cout << "# total number of samples within bins " << k //<< " prec " << prec
1266    << endl;
1267    for(i=0; i<GD.nbin; i++)
1268       cout << setw(3) << i+1 << " " << fixed << setprecision(prec)
1269          << GD.firstbin+i*GD.widbin << " " << setw(GD.width) << bins[i]
1270          << setprecision(prec+1)
1271          << "    (" << GD.firstbin+(i-0.5)*GD.widbin << " to "
1272          << GD.firstbin+(i+0.5)*GD.widbin << ")" << endl;
1273 
1274    return 0;
1275 }
1276 catch(Exception& e) { GPSTK_RETHROW(e); }
1277 }
1278 
1279 //------------------------------------------------------------------------------------
ComputeSum(void)1280 int ComputeSum(void)
1281 {
1282 try {
1283    unsigned int i,j,k;
1284    GlobalData& GD=GlobalData::Instance();
1285 
1286    // compute sum, dt, gaps -------------------------------------------------
1287    bool first(true);
1288    const int ndtmax(9);
1289    int jj,kk,nleast,ndt[ndtmax];
1290    double sum(0.0),prev,dt,bestdt[ndtmax];
1291    vector<int> gapcounts;
1292    if(GD.doSumPlus) for(j=0; j<ndtmax; j++) ndt[j] = -1;
1293 
1294    // compute dt and sum
1295    prev = sum = GD.data[0];
1296    for(i=1; i<GD.data.size(); i++) {
1297       sum += GD.data[i];
1298       if(GD.doSumPlus) {
1299          dt = GD.data[i] - prev;
1300          for(j=0; j<ndtmax; j++) {
1301             if(ndt[j] <= 0) { bestdt[j]= dt; ndt[j]=1; break; }
1302             if(dt != 0.0 && fabs((dt-bestdt[j])/dt) < 0.01) { ndt[j]++; break; }
1303             if(j == ndtmax-1) {
1304                kk = 0;
1305                nleast = ndt[kk];
1306                for(k=1; k<ndtmax; k++)
1307                   if(ndt[k]<=nleast) { kk=k; nleast=ndt[k]; }
1308                ndt[kk]=1; bestdt[kk]=dt;
1309             }
1310          }
1311          prev = GD.data[i];
1312       }
1313    }
1314 
1315    if(GD.doSumPlus) {
1316       for(j=0,i=1; i<ndtmax; i++) { if(ndt[i]>ndt[j]) j=i; }
1317       bestdt[0] = bestdt[j]; ndt[0] = ndt[j];
1318 
1319       // find gaps
1320       for(i=0; i<GD.data.size(); i++) {
1321          dt = GD.data[i] - GD.data[0];
1322          jj = int(0.5+dt/bestdt[0])+1;    // compute the gapcount
1323 
1324          // update gap count for epochs
1325          if(gapcounts.size() == 0) {      // create the list
1326             gapcounts.push_back(jj);      // start time
1327             gapcounts.push_back(jj-1);    // end time
1328          }
1329          // update existing list
1330          k = gapcounts.size() - 1;        // index of the current end time
1331          if(jj == gapcounts[k] + 1) {     // no gap
1332             gapcounts[k] = jj;
1333          }
1334          else {                           // found a gap
1335             gapcounts.push_back(jj);      // start time
1336             gapcounts.push_back(jj);      // end time
1337          }
1338       }
1339    }
1340 
1341    // output
1342    cout << "Sum = " << fixed << setprecision(GD.prec) << sum << endl;
1343    if(GD.doSumPlus) {
1344       k = gapcounts.size()-1;          // size() is at least 2
1345       cout << "Best step = " << bestdt[0]
1346          << " (" << 100.*ndt[0]/(GD.data.size()-1) << "%)"
1347          << " : total " << GD.data.size() << " data, counts "
1348          << gapcounts[0] << " thru " << gapcounts[k] << endl;
1349       cout << "Range " << GD.data[0] << " thru " << GD.data[GD.data.size()-1] << endl;
1350       if(k <= 2)
1351          cout << "No gaps." << endl;
1352       else
1353          for(i=1; i<=k-2; i+=2) {
1354          jj = gapcounts[i+1]-gapcounts[i]-1;
1355          cout << "Gap at cnt " << gapcounts[i]+1
1356             << " = data " << GD.data[0]+gapcounts[i]*bestdt[0]
1357             << " : size " << jj << " cnts = " << jj*bestdt[0] << " data" << endl;
1358       }
1359       cout << endl;
1360    }
1361 
1362    return 0;
1363 }
1364 catch(Exception& e) { GPSTK_RETHROW(e); }
1365 }
1366 
1367 //------------------------------------------------------------------------------------
FitPoly(void)1368 int FitPoly(void)
1369 {
1370 try {
1371    unsigned int i,j;
1372    GlobalData& GD=GlobalData::Instance();
1373 
1374    vector<double> savedata(GD.data), robwts(GD.data.size());
1375    double *coef,eval,xx,x0;
1376 
1377    coef = new double[GD.nfit];
1378    if(!coef) {
1379       cout << "Abort: allocate coefficients failed.\n";
1380       return -4;
1381    }
1382 
1383    int iret = Robust::RobustPolyFit(&GD.data[0], &GD.xdata[0], GD.data.size(),
1384                                     GD.nfit, &coef[0], &robwts[0]);
1385 
1386    cout << "RobustPolyFit returns " << iret << endl;
1387    if(iret == 0) {
1388       cout << " Coefficients:" << setprecision(GD.prec);
1389       for(i=0; i<GD.nfit; i++) {
1390          if(fabs(coef[i]) < 0.001)
1391             cout << " " << scientific;
1392          else
1393             cout << " " << fixed;
1394          cout << coef[i];
1395       }
1396       cout << endl << fixed << setprecision(GD.prec);
1397       cout << " Offsets: Y(col " << GD.col << ") " << savedata[0]
1398          << " X(col " << GD.xcol << ") " << GD.xdata[0] << endl;
1399 
1400       // output to file rstats.out
1401       ostream *pout = new ofstream("rstats.out");
1402       if(GD.dostdout || pout->fail()) {
1403          if(!GD.dostdout)
1404             cout << "Unable to open file rstats.out - output to screen\n";
1405          delete pout;
1406          pout = &cout;
1407       }
1408       else if(!GD.quiet) {
1409          cout << " Output polynomial fit to file rstats.out (try plotrfit)\n";
1410       }
1411 
1412       // write to rstats.out
1413       x0 = GD.xdata[0];
1414       *pout << "#Xdata, Data, fit, resid, weight (" << GD.data.size() << " pts):"
1415          << fixed << setprecision(GD.prec) << endl;
1416       for(i=0; i<GD.data.size(); i++) {
1417          eval = savedata[0] + coef[0];
1418          xx = GD.xdata[i]-x0;
1419          for(j=1; j<GD.nfit; j++) { eval += coef[j]*xx; xx *= (GD.xdata[i]-x0); }
1420          *pout << fixed << setprecision(GD.prec)
1421                << GD.xdata[i] << " " << savedata[i]
1422                << " " << eval << " " << GD.data[i]
1423                << scientific << " " << robwts[i] << endl;
1424       }
1425       for(i=0; i<GD.xevalfit.size(); i++) {
1426          eval = savedata[0] + coef[0];
1427          xx = GD.xevalfit[i]-x0;
1428          for(j=1; j<GD.nfit; j++) { eval += coef[j]*xx; xx *= (GD.xevalfit[i]-x0); }
1429          *pout << fixed << setprecision(GD.prec) << "#Evaluate_Fit(X): X = "
1430                << GD.xevalfit[i] << " F(X) = " << eval << endl;
1431          if(!GD.dostdout && !GD.quiet) cout << fixed << setprecision(GD.prec)
1432             << " Evaluate Fit(" << GD.xevalfit[i] << ") = " << eval << endl;
1433       }
1434 
1435       if(pout != &cout)
1436       {
1437          ((ofstream *)pout)->close();
1438          delete pout;
1439       }
1440 
1441       //QSort(&robwts[0],robwts.size());
1442       //Robust::StemLeafPlot(cout, &robwts[0], robwts.size(), "weights");
1443    }
1444    //cout << endl;
1445    delete[] coef;
1446 
1447    ostringstream oss;
1448    oss << "Residuals of fit (deg " << GD.nfit << ") col " << GD.col
1449       << " vs x col " << GD.xcol << ", file " << GD.inputfile;
1450    GD.msg = oss.str();
1451 
1452    return 0;
1453 }
1454 catch(Exception& e) { GPSTK_RETHROW(e); }
1455 }
1456 
1457 //------------------------------------------------------------------------------------
Sequential(void)1458 int Sequential(void)
1459 {
1460 try {
1461    GlobalData& GD=GlobalData::Instance();
1462 
1463    Stats<double> stats;
1464    cout << "Data and sequential stats ([lab] [xdata] data n ave std)\n";
1465    cout << fixed << setprecision(GD.prec);
1466    for(unsigned int i=0; i<GD.data.size(); i++) {
1467       stats.Add(GD.data[i]);
1468       if(!GD.label.empty()) cout << GD.label << " ";
1469       if(GD.xdata.size()>0) cout << GD.xdata[i] << " ";
1470       cout << GD.data[i] << "   " << stats.N() << " "
1471            << stats.Average() << " "
1472            << (stats.N() > 1 ? stats.StdDev() : 0.0) << endl;
1473    }
1474 
1475    return 0;
1476 }
1477 catch(Exception& e) { GPSTK_RETHROW(e); }
1478 }
1479 
1480 //------------------------------------------------------------------------------------
Discontinuity(void)1481 int Discontinuity(void)
1482 {
1483 try {
1484    unsigned int i,j,k;
1485    GlobalData& GD=GlobalData::Instance();
1486 
1487    j = GD.data.size();
1488    k = GD.xdata.size();
1489 
1490    cout << fixed << setprecision(GD.prec);
1491    cout << "# Output " << (k > 0 ? "xdata,":"")
1492          << " data, 1st diff, and stats;\n# discontinuity tolerance y="
1493          << GD.ytol << ", x=" << GD.xtol << endl;
1494    cout << "#" << (GD.label.empty() ? "" : " [lab]") << (k > 0 ? " xdata" : "")
1495       << " data 1st-diff-y n ave std MSG(for disc.s)\n";
1496    cout << "# MSG=DISC" << (k > 0 ? " del-x" : "") << " del-data"
1497       << " (stats-for-prev-seg:) N ave std" << (k > 0 ? " xbeg xend" : "")
1498       << " gap/slip/EOD" << endl;
1499 
1500    double xlast(k > 0 ? GD.xdata[0] : 0);
1501    Stats<double> segstats;
1502    ostringstream oss;
1503 
1504    segstats.Add(GD.data[0]);
1505    if(k > 0) cout << GD.xdata[0] << " ";
1506    cout << GD.data[0] << " " << 0.0 << "   "
1507         << 1 << " " << GD.data[0] << " " << 0.0 << endl;
1508 
1509    for(i=1; i<j; i++) {                // loop over data (starting at 1)
1510       bool slip(false), gap(false);
1511       double fd(GD.data[i]-GD.data[i-1]);
1512       oss.str("");
1513 
1514       // is there a gap?
1515       if(k > 0 && GD.xtol > 0 && fabs(GD.xdata[i]-GD.xdata[i-1]) > GD.xtol)
1516          gap=true;
1517 
1518       // is there a potential discontinuity?
1519       if(fabs(fd) > GD.ytol)
1520          slip=true;
1521 
1522       if(gap || slip || i==j-1) {
1523          oss << " DISC" << fixed << setprecision(GD.prec);
1524          if(k > 0) {
1525             if(i < j-1) oss << " " << GD.xdata[i]-GD.xdata[i-1];
1526             else        oss << " " << "00";
1527          }
1528          if(i < j-1) oss << " " << fd;
1529          else        oss << " " << "00";
1530 
1531          if(i == j-1) segstats.Add(GD.data[i]);
1532 
1533          oss << " " << segstats.N()
1534             << " " << segstats.Average()
1535             << " " << (segstats.N()>1 ? segstats.StdDev() : 0.0);
1536          if(k > 0) oss << " " << xlast << " " << GD.xdata[i==j-1 ? i:i-1];
1537 
1538          if(slip) oss << " slip";
1539          if(gap) oss << " gap";
1540          if(i==j-1) oss << " EOD";
1541 
1542          segstats.Reset();
1543       }
1544 
1545       if(gap) xlast = GD.xdata[i];
1546 
1547       segstats.Add(GD.data[i]);
1548 
1549       // output data
1550       if(!GD.label.empty()) cout << GD.label << " ";
1551       if(k > 0) cout << GD.xdata[i] << " ";
1552       cout << GD.data[i] << " " << fd << "   "
1553            << segstats.N() << " "
1554            << segstats.Average() << " "
1555            << (segstats.N() > 1 ? segstats.StdDev() : 0.0)
1556            << oss.str()
1557            << endl;
1558    }
1559 
1560    return 0;
1561 }
1562 catch(Exception& e) { GPSTK_RETHROW(e); }
1563 }
1564 
1565 //------------------------------------------------------------------------------------
FDFilter(void)1566 int FDFilter(void)
1567 {
1568 try {
1569    unsigned int i,j,k;
1570    GlobalData& GD=GlobalData::Instance();
1571 
1572    // for output (TD make option) fill flags first
1573    vector<int> flags(GD.data.size(),0);
1574 
1575    // xdata and flags must exist but may be empty
1576    FirstDiffFilter<double> fdf(GD.xdata, GD.data, flags);
1577    fdf.setprecision(GD.prec);
1578    fdf.setLimit(GD.fdfstep);
1579    i = fdf.filter();
1580    cout << "# FD Filter returns " << i << endl;
1581    if(i < 0) cout << "# FD Filter failed (" << i << ")" << endl;
1582    else {
1583       if(GD.doFDF2) {
1584          string str;
1585          i = fdf.analyze2(GD.fdfrat,GD.fdfsig,str);
1586          cout << "#" << str << endl;
1587          return 0;
1588       }
1589       else i = fdf.analyze();
1590 
1591       if(i < 0) cout << "# FD Filter analysis failed (" << i << ")" << endl;
1592 
1593       for(i=0; i<fdf.results.size(); i++)
1594          fdf.getStats(fdf.results[i]);
1595       fdf.setDumpNoAnal(GD.verbose);
1596       fdf.dump(cout, GD.label);
1597 
1598       // clean the data based on results of filter
1599       vector< FilterHit<double> > hit=fdf.getResults();
1600       for(j=0; j<hit.size(); j++) {
1601          if(hit[j].type == FilterHit<double>::BOD) continue;
1602          else if(hit[j].type == FilterHit<double>::outlier) {
1603             for(k=0; k<hit[j].npts; k++)
1604                flags[hit[j].index+k] = -1;      // flag for outlier
1605          }
1606          else if(hit[j].type == FilterHit<double>::slip) {
1607             for(k=hit[j].index; k<GD.data.size(); k++)
1608                GD.data[k] -= hit[j].step;
1609          }
1610       }
1611 
1612       // write cleaned data to rstats.out
1613       ostream *pout = new ofstream("rstats.out");
1614       if(GD.dostdout || pout->fail()) {
1615          if(!GD.dostdout)
1616             cout << "Unable to open file rstats.out - output to screen\n";
1617          delete pout;
1618          pout = &cout;
1619       }
1620 
1621       for(i=0; i<GD.data.size(); i++)
1622          *pout << fixed << setprecision(GD.prec) << i
1623                << " " << (GD.xdata.size() ? GD.xdata[i] : (double)(i))
1624                << " " << GD.data[i] << " " << flags[i] << endl;
1625       if(pout != &cout)
1626       {
1627          ((ofstream *)pout)->close();
1628          delete pout;
1629       }
1630    }
1631 
1632    return 0;
1633 }
1634 catch(Exception& e) { GPSTK_RETHROW(e); }
1635 }
1636 
1637 //------------------------------------------------------------------------------------
WindFilter(void)1638 int WindFilter(void)
1639 {
1640 try {
1641    unsigned int i;
1642    GlobalData& GD=GlobalData::Instance();
1643 
1644    // for output fill flags first
1645    vector<int> flags(GD.data.size(),0);
1646 
1647    // xdata and flags must exist but may be empty
1648    WindowFilter<double> wf(GD.xdata, GD.data, flags);
1649    wf.setTwoSample(GD.doXWF);
1650    wf.setWidth(GD.windwidth);
1651    wf.setMinRatio(GD.windratio);
1652    wf.setMinStep(GD.windstep);
1653    wf.setprecision(GD.prec);
1654    i = wf.filter();
1655    if(i < 0) cout << "# window filter failed (" << i << ")" << endl;
1656    else {
1657       if(GD.debug > -1) wf.setDebug(true);
1658       wf.analyze();           // ignore return values
1659       if(GD.verbose) wf.setDumpAnalMsg(true);
1660       wf.dump(cout, GD.label);
1661    }
1662 
1663    return 0;
1664 }
1665 catch(Exception& e) { GPSTK_RETHROW(e); }
1666 }
1667 
1668 //------------------------------------------------------------------------------------
FixFilter(void)1669 int FixFilter(void)
1670 {
1671 try {
1672    int N;
1673    unsigned int i,j,k,Nsig;
1674    double Esiglim(0);
1675    GlobalData& GD=GlobalData::Instance();
1676 
1677    if(!GD.quiet) cout << "# fix filter with width " << GD.fixN << " limit "
1678          << fixed << setprecision(GD.prec)
1679          << GD.fixlim << " and siglim " << GD.fixsig << endl;
1680 
1681    // new call
1682    vector<int> flags;
1683    vector< FilterHit<double> > results;
1684 
1685    // NB arrays xdata, data, flags do NOT get edited - const
1686    IterativeFDiffFilter<double> ifdf(GD.xdata, GD.data, flags);
1687    ifdf.setWidth(GD.fixN);
1688    ifdf.setLimit(GD.fixlim);
1689    ifdf.setSigma(GD.fixsig);
1690    ifdf.setw(GD.width);
1691    ifdf.setprecision(GD.prec);
1692    ifdf.doVerbose(GD.verbose);
1693    ifdf.doResetSigma(true);
1694    ifdf.doSmallSlips(false);
1695 
1696    N = ifdf.analysis();
1697    results = ifdf.getResults();
1698    Nsig = ifdf.getNhighSigma();
1699    Esiglim = ifdf.getEstimatedSigmaLimit();
1700 
1701    if(N < 0)
1702       cout << "Error - not enough data for fix filter.\n";
1703    else if(N==0)
1704       cout << "No outliers or slips found.\n";
1705    else {
1706       unsigned int nb=0,ns=0,npts(0);
1707       double xbeg,xend;
1708       for(i=0; i<results.size(); i++) {
1709          cout << "FIXRES " << setw(2) << (i+1)
1710                << " " << results[i].asStringRead(GD.prec)
1711                << fixed << setprecision(GD.prec)
1712                << " (x=" << GD.xdata[results[i].index] << ")"
1713                << endl;
1714          if(i == 0) xbeg = xend = GD.xdata[results[i].index];
1715          if(GD.xdata[results[i].index] < xbeg) xbeg = GD.xdata[results[i].index];
1716          if(GD.xdata[results[i].index] > xend) xend = GD.xdata[results[i].index];
1717          if(results[i].isOutlier()) { nb++; npts += results[i].npts; }
1718          if(results[i].isSlip()) ns++;
1719       }
1720       cout << "FIXRES found " << N << " results: "
1721             << nb << " outlier" << (nb==1 ? "":"s")
1722             << " rejecting " << npts << (npts==1 ? " pt":" pts")
1723             << " and " << ns << " slip" << (nb==1 ? "":"s")
1724             << " (small slips " << (ifdf.doSmallSlips() ? "reported":"ignored") << ")"
1725             << fixed << setprecision(GD.prec)
1726             << " in x-interval (" << xbeg << " " << xend << ") = " << xend-xbeg
1727             << "\nFIXRES    with " << Nsig << " high sigma" << (Nsig==1 ? "":"s")
1728             << " remaining and estimated limit " << Esiglim << endl;
1729    }
1730 
1731    return 0;
1732 }
1733 catch(Exception& e) { GPSTK_RETHROW(e); }
1734 }
1735 
1736 //------------------------------------------------------------------------------------
WhiteNoiseJerkFilter(void)1737 int WhiteNoiseJerkFilter(void)
1738 {
1739 try {
1740    unsigned int i;
1741    GlobalData& GD=GlobalData::Instance();
1742 
1743    // WNJ uses LOG
1744    pLOGstrm = &cout;
1745    ConfigureLOG::ReportLevels() = false;
1746    ConfigureLOG::ReportTimeTags() = false;
1747 
1748    // compute white noise jerk filter
1749    // smaller process noise -> smoother; since mn and pn are const,
1750    // only ratio mn/pn determines smoothness - mn/pn ~ 1.e-8 typically
1751    vector<double> x,v,a;
1752    WNJfilter wnjkf;
1753    wnjkf.ptrx = &x;
1754    wnjkf.ptrv = &v;
1755    wnjkf.ptra = &a;
1756    wnjkf.Reset(3);
1757    if(GD.data.size() == 0) { LOG(ERROR) << "Error - no data"; return -1; }
1758    if(GD.xdata.size() < GD.data.size())
1759       { LOG(ERROR) << "Error - x data is missing"; return -1; }
1760    for(i=0; i<GD.data.size(); i++) {
1761       wnjkf.ttag.push_back(GD.xdata[i]);  // leave x as-is    -xdata[0]);
1762       wnjkf.data.push_back(GD.data[i]);
1763       wnjkf.msig.push_back(1.0);
1764       wnjkf.psig.push_back(GD.wnjpom);    // pom = process/measurement noises
1765    }
1766    wnjkf.prec = GD.prec;
1767    wnjkf.width = GD.width;
1768    cout << "# White noise jerk filter, unweighted, times since "
1769       << fixed << setprecision(3) << GD.xdata[0] << endl;
1770    // get dt, the nominal timestep
1771    MostCommonValue mcv;
1772    mcv.setTol(0.1);                 // don't need fine tuning
1773    for(i=1; i<GD.xdata.size(); i++)
1774       mcv.add(GD.xdata[i]-GD.xdata[i-1]);
1775    double dt = mcv.bestDT();
1776    double tbeg(wnjkf.ttag[0]), tend(wnjkf.ttag[wnjkf.ttag.size()-1]);
1777 
1778    wnjkf.apState(0) = GD.data[0];
1779    wnjkf.apNoise(0) = 10000000.;       // no info
1780    for(i=1; i<3; i++) {
1781       wnjkf.apState(i) = 0.0;
1782       wnjkf.apNoise(i) = wnjkf.apNoise(i-1)/dt;
1783    }
1784 
1785    // run the filter
1786    if(GD.debug > -1) LOGlevel = ConfigureLOG::Level("DEBUG");
1787    wnjkf.filterOutput = true;
1788    wnjkf.setSmoother(true);
1789    wnjkf.setSRISU(true);
1790    wnjkf.initializeFilter();
1791    wnjkf.ForwardFilter(tend,dt);
1792    wnjkf.BackwardFilter(0);
1793    if(GD.debug > -1) LOGlevel = ConfigureLOG::Level("INFO");
1794 
1795    return 0;
1796 }
1797 catch(Exception& e) { GPSTK_RETHROW(e); }
1798 }
1799 
1800 //------------------------------------------------------------------------------------
1801 // discrete Fourier transform
DFT(const vector<double> & data,vector<double> & ampcos,vector<double> & ampsin)1802 void DFT(const vector<double>& data, vector<double>& ampcos, vector<double>& ampsin)
1803 {
1804    const double TWO_PI(6.2831853071796);
1805    int i,j,N(data.size());
1806    double oon(1.0/double(N)),ton(2.0/double(N));
1807    double tpon(TWO_PI*oon);
1808    ampsin = vector<double>(1+N/2);
1809    ampcos = vector<double>(1+N/2);
1810 
1811    for(i=0; i<N/2; i++) {
1812       ampsin[i] = ampcos[i] = 0.0;
1813       for(j=0; j<N; j++) {
1814          ampcos[i] += data[j] * ::cos(tpon*i*j);
1815          ampsin[i] += data[j] * ::sin(tpon*i*j);
1816       }
1817       ampcos[i] *= (i==0 || i==N/2) ? oon : ton;
1818       ampsin[i] *= ton;
1819    }
1820 }
1821 
1822 // inverse discrete Fourier transform
invDFT(const vector<double> & ampcos,const vector<double> & ampsin,const int & N,vector<double> & reform)1823 void invDFT(const vector<double>& ampcos, const vector<double>& ampsin, const int& N,
1824    vector<double>& reform)
1825 {
1826    const double TWO_PI(6.2831853071796);
1827    int i,j;
1828    double tpon(TWO_PI/double(N));
1829    reform = vector<double>(N);
1830    for(i=0; i<N; i++) {
1831       reform[i] = ampcos[0];
1832       // for low pass, change j<=N/2 to, say, j<=N/8
1833       for(j=1; j<=N/2; j++) {
1834          reform[i] += ampcos[j] * ::cos(tpon*i*j) + ampsin[j] * ::sin(tpon*i*j);
1835       }
1836    }
1837 }
1838 
1839 //------------------------------------------------------------------------------------
ComputeFFT(void)1840 int ComputeFFT(void)
1841 {
1842 try {
1843    unsigned int i;
1844    GlobalData& GD=GlobalData::Instance();
1845 
1846    // NB providing --xcol means it will search for and fill gaps in x.
1847    // also this will compute dt
1848    if(GD.xcol == -1) {
1849       // replace with count -- assume no gaps in data
1850       for(i=0; i<GD.data.size(); i++)
1851          GD.xdata.push_back(double(i));
1852    }
1853 
1854    // first find step in xdata
1855    MostCommonValue mcv;
1856    mcv.setTol(0.002);         // make it larger than a millisecond
1857    for(i=1; i<GD.xdata.size(); i++) mcv.add(GD.xdata[i]-GD.xdata[i-1]);
1858    //mcv.dump(cout,prec);
1859    int count(mcv.bestN());
1860    const double dt(mcv.bestDT()), tol(mcv.getTol());
1861    cout << "#FFT Computed X-step is " << fixed << setprecision(GD.prec) << dt
1862       << " with " << count << " occurances." << endl;
1863    if(GD.dtfft == 0.0) GD.dtfft = dt;
1864    else cout << "#FFT X-step is forced by user to be "
1865                << scientific << setprecision(GD.prec) << GD.dtfft << endl;
1866 
1867    // find average
1868    const double ave(GD.tsstats.AverageY());
1869    cout << "#FFT Remove data average " << scientific << setprecision(GD.prec)
1870             << ave << endl;
1871 
1872    // copy over, removing average and filling gaps
1873    vector<double> vxdata,vdata;
1874    double vx(GD.xdata[0]);
1875    vxdata.push_back(vx);
1876    vdata.push_back(GD.data[0]-ave);
1877    for(i=1; i<GD.data.size(); i++) {
1878       while(GD.xdata[i]-vx-dt > tol) {
1879          vx += dt;
1880          vxdata.push_back(vx);
1881          vdata.push_back(0.0);
1882       }
1883       vxdata.push_back(GD.xdata[i]);
1884       vdata.push_back(GD.data[i]-ave);
1885       vx = GD.xdata[i];
1886    }
1887 
1888    unsigned N(vdata.size());
1889 
1890    // get the DFT (not fast) of real valued data
1891    vector<double> ampsin(1+N/2),ampcos(1+N/2);
1892    //vector<double> reform(N);
1893 
1894    DFT(vdata, ampcos, ampsin);             // data is unchanged
1895    //invDFT(ampcos, ampsin, N, reform);     // amps unchanged
1896 
1897    // output
1898    double amp, dtot(0.0), ftot(0.0), fact(2.0/N);
1899    cout << "#FFT N=" << N << fixed << setprecision(GD.prec) << " dx is " << GD.dtfft
1900       << " Nyquist = 1/2dx = " << 1.0/(2*GD.dtfft)
1901       << ", freq at i is (2i/N)*Nyquist = i * " << scientific << 1.0/(N*GD.dtfft)
1902       << ", WL at i is N*dx/i = " << N*GD.dtfft << " / i " << endl;
1903    cout << "#FFT i xd(i*dx) data freq |ampfft| xdata wl " << endl;
1904    for(i=0; i<N; i++) {
1905       amp = (i < 1+N/2 ? ::sqrt(ampsin[i]*ampsin[i]+ampcos[i]*ampcos[i]) : 0.0);
1906       cout << "FFT " << fixed << setprecision(GD.prec) << i
1907          << " " << double(i)*GD.dtfft << " " << vdata[i]
1908          << " " << i/(N*GD.dtfft) << " " << amp
1909          //<< " " << reform[i]
1910          << " " << GD.xdata[i]
1911          << " " << (i==0 ? 0 : (N*GD.dtfft)/i)
1912          << endl;
1913       dtot += fact*vdata[i]*vdata[i];
1914       if(i < 1+N/2) ftot += amp*amp;
1915    }
1916    cout << "#FFT Total power (2/N)*sum(data^2) = sum(fft^2) = " << scientific
1917       << setprecision(GD.prec) << dtot << " " << ftot << " " << GD.label << endl;
1918 
1919    return 0;
1920 }
1921 catch(Exception& e) { GPSTK_RETHROW(e); }
1922 }
1923 
1924 //------------------------------------------------------------------------------------
Outliers(void)1925 int Outliers(void)
1926 {
1927 try {
1928    unsigned int i,j;
1929    GlobalData& GD=GlobalData::Instance();
1930 
1931    double ave(GD.cstats.Average()),sig(GD.cstats.StdDev());
1932    // normally GD.outscale=1: 2.5*Q3 - 1.5*Q1;
1933    double OH = GD.Q3 + GD.outscale*1.5*(GD.Q3-GD.Q1);
1934    // normally GD.outscale=1: 2.5*Q1 - 1.5*Q3;
1935    double OL = GD.Q1 - GD.outscale*1.5*(GD.Q3-GD.Q1);
1936    vector<int> outhi,outlo;
1937    for(i=0; i<GD.data.size(); i++) {
1938       if(GD.data[i] > OH)
1939          outhi.push_back(i);
1940       else if(GD.data[i] < OL)
1941          outlo.push_back(i);
1942    }
1943    cout << "There are " << outhi.size()+outlo.size() << " outliers; "
1944       << outlo.size() << " low (< " << setprecision(GD.prec) << OL << ") and "
1945       << outhi.size() << " high (> " << setprecision(GD.prec) << OH << ")."
1946       << endl << "     n  " << (GD.xdata.size() > 0 ? "x-value" : "")
1947       << "   value  val/outlim  val-ave (val-ave)/sig" << endl;
1948    // NB data and xdata have been sorted together
1949    for(j=1,i=0; i<outlo.size(); i++,j++) {
1950       cout << " OTL " << j << " ";
1951       if(GD.xdata.size() > 0) cout << GD.xdata[outlo[i]] << " ";
1952       cout << GD.data[outlo[i]] << " " << GD.data[outlo[i]]/OL
1953          << " " << GD.data[outlo[i]]-ave
1954          << " " << (GD.data[outlo[i]]-ave)/sig << endl;
1955    }
1956    for(i=0; i<outhi.size(); i++,j++) {
1957       cout << " OTH " << j << " ";
1958       if(GD.xdata.size() > 0) cout << GD.xdata[outhi[i]] << " ";
1959       cout << GD.data[outhi[i]] << " " << GD.data[outhi[i]]/OH
1960          << " " << GD.data[outhi[i]]-ave
1961          << " " << (GD.data[outhi[i]]-ave)/sig << endl;
1962    }
1963 
1964    return 0;
1965 }
1966 catch(Exception& e) { GPSTK_RETHROW(e); }
1967 }
1968 
1969 //------------------------------------------------------------------------------------
DumpData(string msg)1970 int DumpData(string msg)
1971 {
1972 try {
1973    unsigned int i;
1974    GlobalData& GD=GlobalData::Instance();
1975 
1976    for(i=0; i<GD.data.size(); i++) {
1977       cout << msg << " " << fixed << setprecision(GD.prec) << i;
1978       if(GD.xcol > -1) cout << " " << GD.xdata[i];
1979       cout << " " << GD.data[i];
1980       if(GD.wcol > -1) cout << " " << GD.wdata[i];
1981       cout << endl;
1982    }
1983 
1984    return 0;
1985 }
1986 catch(Exception& e) { GPSTK_RETHROW(e); }
1987 }
1988 
1989 //------------------------------------------------------------------------------------
1990 //------------------------------------------------------------------------------------
1991