1 //==============================================================================
2 //
3 //  This file is part of GPSTk, the GPS Toolkit.
4 //
5 //  The GPSTk is free software; you can redistribute it and/or modify
6 //  it under the terms of the GNU Lesser General Public License as published
7 //  by the Free Software Foundation; either version 3.0 of the License, or
8 //  any later version.
9 //
10 //  The GPSTk is distributed in the hope that it will be useful,
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //  GNU Lesser General Public License for more details.
14 //
15 //  You should have received a copy of the GNU Lesser General Public
16 //  License along with GPSTk; if not, write to the Free Software Foundation,
17 //  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
18 //
19 //  This software was developed by Applied Research Laboratories at the
20 //  University of Texas at Austin.
21 //  Copyright 2004-2020, The Board of Regents of The University of Texas System
22 //
23 //==============================================================================
24 
25 //==============================================================================
26 //
27 //  This software was developed by Applied Research Laboratories at the
28 //  University of Texas at Austin, under contract to an agency or agencies
29 //  within the U.S. Department of Defense. The U.S. Government retains all
30 //  rights to use, duplicate, distribute, disclose, or release this software.
31 //
32 //  Pursuant to DoD Directive 523024
33 //
34 //  DISTRIBUTION STATEMENT A: This software has been approved for public
35 //                            release, distribution is unlimited.
36 //
37 //==============================================================================
38 
39 /// @file FDiffFilter.hpp
40 
41 ///    This class implements a statistical filter that uses first-differences, like
42 /// FirstDiffFilter, but this is simpler, more effective and more robust, at least
43 /// for single differenced (across satellites) phase data.
44 /// Also includes here is a class, IterativeFDiffFilter, that applies the FDiffFilter
45 /// more than once; this is the most effective way to use this filter.
46 ///
47 /// There are several statistical filters implemented as classes. These classes are
48 /// templates; the template parameter should be a float (probably double);
49 /// it is used to construct gpstk::Stats<T>, gpstk::TwoSampleStats<T> and
50 /// gpstk::SeqStats<T>, which are fundamental to these algorithms.
51 ///    All the filters look for outliers and discontinuities (slips) in a timeseries.
52 /// The first difference filter analyses the simple first difference of the data.
53 /// The window filter uses a 2-pane sliding window centered on the data point in
54 /// question; statistics on the data in each to the 2 panes are computed and used
55 /// in the analysis.
56 ///    All filters' analysis() function computes a vector of FilterHit objects named
57 /// 'results'that give the caller the results of the filtering.
58 ///    All filters have a getStats(FilterHit) function that computes statistics on
59 /// the filter quantities (NOT the data) over the interval covered by the event, and
60 /// stores them in the FilterHit. These stats are slightly different for the two
61 /// filters; FirstDiffFilter::getStats computes min, max, median and mad of the first
62 /// differences.
63 ///
64 ///    The structure of these filters allows the caller to call filters repeatedly,
65 /// and to call different filters on the same dataset, because none of the filters
66 /// modify the data array(s) in any way. The arrays are passed as constant references
67 /// to the constructor.  The xdata reference must be provided, but it may be empty
68 /// (except for the window filter if two sample statistics are to be used).
69 /// If xdata is not empty, values of xdata are included in the dump() output.
70 ///    Similarly, an integer vector of flags is also passed to the constructor (also
71 /// as a const reference), and it may be empty. If it is not empty, flag[i] != 0
72 /// causes the data at index i to be ignored by the filters.
73 ///    The arrays data, xdata and flags must always be parallel, and xdata and flags
74 /// cannot be shorter than data unless they are empty (when they are ignored).
75 ///    The filter() function has optional input of the starting index and the number
76 /// of points to process, so that segments of the data (and xdata and flags) array(s)
77 /// can be processed in the filters.
78 ///    These features allow the user to, for example, call a filter, mark data in the
79 /// flags array (e.g. outliers) and then filter again. If a slip is found, the caller
80 /// can then filter the data again but starting at the slip, or filter only the
81 /// segment of data before the slip [using filter(index, npts)].
82 ///    NB the caller must construct a new filter at each call - if you declare a
83 /// Filter object, run filter(), then use the results to modify flags[] and try to
84 /// call filter() again, it does not see the changes to flags[]. Instead you need to
85 /// call the constructor again.
86 
87 #ifndef FDIFF_FILTER_INCLUDE
88 #define FDIFF_FILTER_INCLUDE
89 
90 #include "Stats.hpp"
91 #include "StatsFilterHit.hpp"
92 #include "RobustStats.hpp"
93 #include "StringUtils.hpp"
94 //#include "stl_helpers.hpp"
95 
96 #include <vector>
97 
98 //------------------------------------------------------------------------------------
99 // TODO
100 // ?make first result BOD?
101 
102 //------------------------------------------------------------------------------------
103 // forward declaration
104 template <class T> class IterativeFDiffFilter;
105 
106 //------------------------------------------------------------------------------------
107 /// This class computes the first difference of the input data array at each point,
108 /// as well as stats on both the first difference and the data for the Nwind points
109 /// ending at the current point. The difference, sigmaYX of the difference statistics,
110 /// and the slope from the data statistics, are stored in an "analysis vector" Avec.
111 /// The Avec is used to find outliers, gaps and slips in the data; the analysis()
112 /// method returns a vector of "result" objects FilterHit<T>.
113 /// The caller should filter and analyze iteratively, since a single call is often
114 /// not sufficient to find and fix all the problems.
115 /// NB Flags and xdata are required and critical. Outliers must be flagged or
116 /// removed between iterations, and xdata is needed for gaps and to correct slip
117 /// magnitudes for the slope of the data.
118 /// Probably the major weakness of this filter is it tends to find false slips
119 /// after large gaps.
120 template <class T> class FDiffFilter
121 {
122 private:
123    friend class IterativeFDiffFilter<T>;
124 
125    // embedded class -----------------------------------------
126    /// class used to store analysis arrays filled by the first difference filter
127    class Analysis
128    {
129    public:
130       /// constructor
Analysis()131       Analysis() { index=0; sloInd=0; diff = sigN = T(0); }
132       // member data
133       unsigned int index;  ///< index in original arrays to which this info applies.
134       T diff;              ///< first difference = data[index]-data[index-1]
135       T sigN;              ///< sigmaYX for N pts of first diff ending at index
136       T sloN;              ///< 2-sample slope of data for N pts ending at index
137       int sloInd;          ///< index in Avec of slope used in current calc. of diff
138    }; // end class Analysis
139 
140    /// vector of Analysis objects, holding first differences and indexes,
141    /// generated by filter(), used by analyze() and included in dump() output.
142    std::vector<Analysis> Avec;
143 
144    /// vector of results of analysis()
145    std::vector< FilterHit<T> > results;
146 
147    // member data ---------------------------------------
148    int osw,osp;                  ///< width and precision for dump(), (default 8,3)
149    bool noxdata;                 ///< true when xdata is not given
150    bool noflags;                 ///< true when flags array is not given
151 
152    const std::vector<T>& xdata;  ///< reference to xdata - 'time'; if empty use count
153    const std::vector<T>& data;   ///< reference to data to be filtered
154    const std::vector<int>& flags;///< reference to flags, parallel to data, 0 == good
155    int ilimit;                   ///< largest allowed index in data[] is ilimit-1
156 
157    T fdlim;                      ///< |first diff| must be > this to be outlier/slip
158    T siglim;                     ///< sigma > this indicates possible outlier/slip
159    T Esiglim;                    ///< compute siglim (= robust outlier limit)
160    unsigned int Nwind;           ///< size of sliding window
161    unsigned int Nsig;            ///< filter()'s count of high sigma - analysis needed
162    bool doSmall;                 ///< if true, include small slips (<fdlim) in results
163    bool keepSigIndex;            ///< if true, keep vector of HighSigmaIndex
164    std::vector<unsigned int> sigIndexes; ///< saved indexes of Nsig high sigma pts
165 
166    T medSlope, madSlope;         ///< robust stats on slope
167 
168 public:
169    // member functions ---------------------------------------
170    /// constructor with two arrays - x is used only in dump(); x and f must exist
171    /// but may be empty
172    /// @param x const vector<T> of 'times' values, NB (x,d,f) all parallel
173    /// @param d const vector<T> of data values
174    /// @param f const vector<int> of flags, 0 means good. this array may be empty.
FDiffFilter(const std::vector<T> & x,const std::vector<T> & d,const std::vector<int> & f)175    FDiffFilter(const std::vector<T>& x,
176                const std::vector<T>& d,
177                const std::vector<int>& f) : data(d), xdata(x), flags(f)
178    {
179       fdlim = T(0.8);
180       siglim = T(0.3);
181       Esiglim = T(0);
182       noxdata = (xdata.size() == 0);
183       noflags = (flags.size() == 0);
184       osw = 8; osp = 3;
185       Nsig = 0;
186       doSmall = true;
187       keepSigIndex = false;
188    }
189 
190    /// get and set
setWidth(unsigned int w)191    inline void setWidth(unsigned int w) { Nwind = w; }
getWidth(void)192    inline unsigned int getWidth(void) { return Nwind; }
setLimit(T val)193    inline void setLimit(T val) { fdlim = val; }
getLimit(void)194    inline T getLimit(void) { return fdlim; }
setSigma(T val)195    inline void setSigma(T val) { siglim = val; }
getSigma(void)196    inline T getSigma(void) { return siglim; }
doSmallSlips(const bool & doit)197    inline bool doSmallSlips(const bool& doit) { doSmall = doit; return doit; }
doSmallSlips(void)198    inline bool doSmallSlips(void) { return doSmall; }
indexHighSigmas(const bool & doit)199    inline bool indexHighSigmas(const bool& doit) { keepSigIndex = doit; return doit; }
indexingHighSigmas(void)200    inline bool indexingHighSigmas(void) { return keepSigIndex; }
201    /// get and set for dump
setw(int w)202    inline void setw(int w) { osw=w; }
setprecision(int p)203    inline void setprecision(int p) { osp=p; }
204    /// get results vector of FilterHit
getResults(void)205    inline std::vector< FilterHit<T> > getResults(void) { return results; }
206 
207    /// get the number of high-sigma points :
208    /// after filter() can tell if possible outliers/slips present
getNhighSigma(void)209    inline unsigned int getNhighSigma(void) { return Nsig; }
210 
211    /// get the indexes of high-sigma points (only if indexHighSigmas(true))
getHighSigmaIndex(void)212    inline std::vector<unsigned int> getHighSigmaIndex(void) { return sigIndexes; }
213 
214    /// filter routine that computes the first difference, and uses siglim to mark
215    /// areas of possible outlier/slips.
216    /// @param index in data arrays at which to start, defaults to 0
217    /// @param npts  number of data to process, defaults to -1, meaning to end of data.
218    /// @return > 0 number of points in the analysis vector == number good data
219    ///          -1 the data array is too short (< 2)
220    ///          -3 if the flags array is given but shorter than data array
221    /// This routine clears the analysis vector.
222    int filter(const size_t index=0, int npts=-1);
223 
224    /// after filter(), and before analyze(), compute robust stats on the sigma of
225    /// first differences, to get a suggested siglim
226    /// @param N int output: computed number of sigmas > new limit, -1 not enough data
227    /// @param new_siglim T output: computed limit (high outlier) on sigma for analyze
228    void ComputeRobustSigmaLimit(int& N, T& new_siglim);
229 
230    /// analyze the output of the filter(), filling the results array with outliers
231    /// @param return vector of FilterHit
232    /// @return the number of outliers
233    int analysis(void);
234 
235    /// dump the data and analysis; optionally include a tag at the start of each line,
236    /// and configure width and precision.
237    void dump(std::ostream& s, std::string tag=std::string());
238 
239 }; // end class FDiffFilter
240 
241 //------------------------------------------------------------------------------------
filter(const size_t i0,int dsize)242 template<class T> int FDiffFilter<T>::filter(const size_t i0, int dsize)
243 {
244    if(dsize == -1) dsize = data.size()-i0;
245 
246    // largest allowed index is ilimit-1
247    ilimit = dsize+i0;
248 
249    // is there enough data? if not return -1
250    int i(i0),n(0);
251    if(!noflags) {
252       while(i<ilimit && n < 2) { if(flags[i]==0) n++; i++; }
253       if(i==ilimit) return -1;
254    }
255    else if(dsize < 2) return -1;
256 
257    // if xdata or flags is there, make sure its big enough
258    if(!noflags && flags.size()-i0 < dsize) return -3;
259 
260    // generate the analysis vector
261    Avec.clear();
262 
263    // compute stats on sigmas and data in a sliding window of width Nwind
264    gpstk::TwoSampleStats<T> fstats;       // stats on the first diffs in window
265    gpstk::TwoSampleStats<T> dstats;       // stats on the data in window
266    std::vector<T> slopes;                 // store slopes, for robust stats
267 
268    // loop over all data, computing first difference and stats in sliding window
269    Nsig = 0;
270    int iprev(-1),j,islope(0);
271    // start at the first good point
272    n = 0; i = i0; if(!noflags) while(i<ilimit && flags[i]) i++;
273    while(i<ilimit) {
274       Analysis A;
275       A.index = i;
276       n++;              // count data points, just for approx slope when n==2
277 
278       // add to stats on data (for slope)
279       dstats.Add(xdata[i],data[i]);
280 
281       // compute first difference, accounting for slope of data
282       if(iprev == -1)
283          A.diff = T(0);
284       else {
285          // get approx slope at first pt (often slope==0 here => OUT; index=0)
286          if(n==2) Avec[islope].sloN =     // first slope = 2nd slope = d(data)/dx
287                   (data[i]-data[iprev])/(xdata[i]-xdata[iprev]);
288          // index of latest good slope
289          A.sloInd = islope;
290          // compute the difference = change in data - correction for slope
291          A.diff = data[i]-data[iprev]
292                   - Avec[islope].sloN * (xdata[i]-xdata[iprev]);
293          // add diff to stats
294          fstats.Add(xdata[i], A.diff);
295       }
296 
297       // remove old data from stats buffers if full
298       j = Avec.size()-Nwind;              // index of earliest of the Nwind points
299       if(fstats.N() > Nwind)
300          fstats.Subtract(xdata[Avec[j].index], Avec[j].diff);
301       if(dstats.N() > Nwind)
302          dstats.Subtract(xdata[Avec[j].index], data[Avec[j].index]);
303 
304       //NO A.sigN = fstats.SigmaYX();     // sigma first diff, given slope in fdiffs
305       A.sigN = fstats.StdDevY();          // sigma first diff
306       A.sloN = dstats.Slope();            // slope of data
307       if(A.sigN > siglim) {
308          Nsig++;                          // count it if sigma is high
309          if(keepSigIndex) sigIndexes.push_back(i);
310       }
311       else {
312          islope = Avec.size();          // keep this, the most recent good slope
313          // keep slopes for roubst stats
314          slopes.push_back(A.sloN);
315       }
316 
317       Avec.push_back(A);
318       iprev = i;
319       i++; if(!noflags) while(i<ilimit && flags[i]) i++;
320    }
321 
322    // compute robust stats on slopes
323    madSlope = gpstk::Robust::MedianAbsoluteDeviation(&slopes[0], slopes.size(),
324                                                       medSlope, false);
325 
326    return Avec.size();
327 
328 }  // end FDiffFilter::filter()
329 
330 //------------------------------------------------------------------------------------
331 // after filter(), and before analysis(), compute robust stats on the sigma of
332 // first differences, to get a suggested siglim
333 // param new_siglim T computed limit on sigma for analysis == high outlier limit
334 // return -1 if not enough data, else number of data outside new sigma limit
ComputeRobustSigmaLimit(int & N,T & new_siglim)335 template<class T> void FDiffFilter<T>::ComputeRobustSigmaLimit(int& N, T& new_siglim)
336 {
337    new_siglim = siglim;
338    if(Avec.size() < 2) { N=-1; return; }
339 
340    // compute high-outlier limit of sigmas using robust stats
341    // compute quartiles - must sort first
342    unsigned int i;
343    std::vector<T> sd;                                    // put sigmas in temp vector
344    for(i=0; i<Avec.size(); i++)
345       sd.push_back(Avec[i].sigN);
346 
347    T Q1,Q3;
348    gpstk::QSort(&sd[0],sd.size());                       // sort
349    gpstk::Robust::Quartiles(&sd[0],sd.size(),Q1,Q3);     // get Quartiles
350 
351    // compute new sigma limit ; outlier limit (high) 2.5Q3-1.5Q1
352    new_siglim = 2.5*Q3 - 1.5*Q1;
353    if(new_siglim <= siglim) { N = Nsig; return; }
354 
355    // count up hi-sigma points
356    for(N=0,i=0; i<Avec.size(); i++)
357       if(Avec[i].sigN > new_siglim) N++;
358 }
359 
360 //------------------------------------------------------------------------------------
analysis(void)361 template<class T> int FDiffFilter<T>::analysis(void)
362 {
363    //const bool debug(true);
364 
365    // don't clear results - may be a case where caller wants to keep old ones
366 
367    // loop over analysis vector
368    // outliers have >= Nwind big sigmas
369    //      unless its the first point, then there are Nwind-2 bad sigmas
370    // slips have exactly Nwind big sigmas
371    bool isBad(false);
372    int nbad(0),nout,n1st,Nw(Nwind),k;        // DO NOT use unsigned
373    unsigned int i,j,bad0;
374    for(i=0; i<Avec.size(); i++) {
375       //if(debug)
376       //   std::cout << "Loop " << i << " " << xdata[Avec[i].index] << std::endl;
377 
378       if(Avec[i].sigN > siglim) {
379          if(!isBad) { bad0=i; isBad=true;}
380          nbad++;
381       }
382 
383       else if(isBad) {
384          nout = int(bad0)+nbad-Nw;
385          n1st = int(i)-Nw;
386          //if(debug) std::cout << "FDFa: isBad at i " << i
387          //      << " index " << Avec[i].index << " x " << xdata[Avec[i].index]
388          //      << " bad0 " << bad0 << " nbad " << nbad << " nout " << nout
389          //      << " Nwind " << Nw << " 2Nw-2 " << 2*Nw-2 << std::endl;
390 
391          if(nout>0 && nbad > Nw) {                  // outliers - more than a slip
392             //if(debug) for(j=bad0; j<nout; j++)
393             //   std::cout << "FDFa: OUT " << Avec[j].index
394             //            << " " << xdata[Avec[j].index] << std::endl;
395             j = bad0;
396             FilterHit<T> fdfr;
397             fdfr.type = FilterHit<T>::outlier;
398             fdfr.index = Avec[j].index;
399             fdfr.npts = nbad-Nw;
400             fdfr.dx = xdata[Avec[int(j)+nbad-Nw].index] - xdata[Avec[j].index];
401             results.push_back(fdfr);
402          }
403 
404          else if(nbad == Nw) {                   // slip
405             j = bad0;
406             //if(debug) std::cout << "FDFa: SLIP " << Avec[j].index << std::fixed
407             //   << " " << xdata[Avec[j].index] << std::setprecision(osp)
408             //   << " " << Avec[j].diff << std::endl;
409             FilterHit<double> fdfr;
410             fdfr.type = FilterHit<T>::slip;
411             fdfr.index = Avec[j].index;
412             fdfr.sigma = Avec[j].sigN;
413 
414             // find number of missing pts before the slip
415             k = Avec[j].index-1;                      // j is an Avec[] index
416             if(noflags && k>0) k--;                   // NB k is int
417             else while(k>0 && flags[k] != 0) k--;     // k is the data[] index
418             fdfr.dx = xdata[Avec[j].index]-xdata[k];
419             fdfr.npts = Avec[j].index - k;
420 
421             // get the step = first difference of data across slip
422             fdfr.step = Avec[j].diff;
423             // score here is just step/fdlim as a percentage (max 100)
424             fdfr.score = (unsigned int)(0.5 + 100.*::fabs(fdfr.step)/fdlim);
425             if(fdfr.score > 100) fdfr.score = 100;
426 
427             // BUT if step is of order few*MADslope*dx, then probably a false positive
428             if(fdfr.score == 100 && ::fabs(fdfr.step) < 3*madSlope*fdfr.dx) {
429                //if(debug)
430                //std::cout << "FDFa: F+ slip (|diff| < 3*madSlope*dx) at index "
431                //<< Avec[j].index << std::fixed << std::setprecision(osp)
432                //<< ": " << Avec[j].diff << " < 3 * " << madSlope
433                //<< " * " << fdfr.dx << " = " << 3*madSlope*fdfr.dx << std::endl;
434                fdfr.score = -100;
435             }
436 
437             // save the hit
438             if(doSmall || fdfr.score == 100) results.push_back(fdfr);
439          }
440 
441          else if(int(i) <= 2*Nw-2 && n1st >= 0) {   // first <Nwind pts outliers
442             //if(debug) std::cout << "FDFa: isBad at small i " << i << std::endl;
443             //if(debug) for(int n=0; n<n1st; n++)
444             //   std::cout << "FDFa OUT " << Avec[n].index << " "
445             //            << xdata[Avec[n].index] << " small i " << i << std::endl;
446             FilterHit<double> fdfr;
447             fdfr.type = FilterHit<T>::outlier;
448             fdfr.index = Avec[0].index;
449             fdfr.npts = n1st;
450             fdfr.dx = xdata[Avec[n1st].index] - xdata[Avec[0].index];
451             results.push_back(fdfr);
452          }
453 
454          isBad = false;
455          nbad = 0;
456       }
457    }
458 
459    // catch outliers at the very end
460    if(isBad) {                            // bad at the very end
461       //if(debug) for(j=bad0; j<Avec.size(); j++)
462       //   std::cout << "FDFa: OUT " << Avec[j].index
463       //            << " " << xdata[Avec[j].index] << std::endl;
464       j = bad0;
465       FilterHit<double> fdfr;
466       fdfr.type = FilterHit<T>::outlier;
467       fdfr.index = Avec[j].index;
468       fdfr.npts = Avec.size()-j;
469       fdfr.dx = xdata[Avec[Avec.size()-1].index] - xdata[Avec[j].index];
470       results.push_back(fdfr);
471    }
472 
473    return results.size();
474 
475 }  // end FDiffFilter::analysis()
476 
477 //------------------------------------------------------------------------------------
dump(std::ostream & os,std::string tag)478 template<class T> void FDiffFilter<T>::dump(std::ostream& os, std::string tag)
479 {
480    size_t i,j,k,iprev;
481    int w(osw>5?osw+1:5);
482    os << "#" << tag << " FDiffFilter::dump() with limit "
483       << std::fixed << std::setprecision(osp) << fdlim << " sigma limit " << siglim
484       << std::setprecision(osp+2) << " med,mad slope " << medSlope << " " << madSlope
485       << std::setprecision(osp) << (noxdata ? " (xdata is index)" : "")
486       << "\n#" << tag << " " << std::setw(2) << "i"
487       << " " << std::setw(w) << "xd" << " "
488       << std::setw(3) << "flg" << " " << std::setw(w) << "data" << " "
489       << std::setw(w) << "fdif" << " " << std::setw(w) << "sig" << " "
490       << std::setw(w) << "slope" << " " << std::setw(w) << "slp_u" << " "
491       << std::setw(w) << "sl*dx" << " " << std::setw(w) << "slu*dx" << " "
492       << std::setw(5) << "dx"
493       //<< " " << std::setw(w) << "sigslope*dx"
494       << std::endl;
495 
496    T dt(0);
497    std::string sdif,ssig,sldx,slop,slou,sludx;//,sigslo;
498    const size_t N(Avec.size());
499    for(i=0,j=0,k=0,iprev=-1; i<ilimit; i++) {
500       // find j where Avec[j].index == i
501       while(j<N && Avec[j].index < i) j++;
502       bool haveAvec(Avec[j].index == i);
503       if(haveAvec) {
504          sdif = gpstk::StringUtils::asString(Avec[j].diff,osp);
505          ssig = gpstk::StringUtils::asString(Avec[j].sigN,osp);
506          if(iprev > -1) dt = (noxdata ? T(i-iprev) : xdata[i]-xdata[iprev]);
507          slop = gpstk::StringUtils::asString(Avec[j].sloN,osp);
508          slou = gpstk::StringUtils::asString(Avec[Avec[j].sloInd].sloN,osp);
509          sldx = gpstk::StringUtils::asString(Avec[j].sloN*dt,osp);
510          sludx = gpstk::StringUtils::asString(Avec[Avec[j].sloInd].sloN*dt,osp);
511          //sigslo = gpstk::StringUtils::asString(madSlope*dt,osp);
512       }
513       else {
514          sdif = ssig = sldx = sludx = slou = "?"; //sigslo = "?";
515       }
516       os << tag << std::fixed << std::setprecision(osp)
517          << " " << std::setw(3) << i
518          << " " << std::setw(w) << (noxdata ? T(i) : xdata[i])
519          << " " << std::setw(3) << (noflags ? 0 : flags[i])
520          << " " << std::setw(w) << data[i]
521          << " " << std::setw(w) << sdif
522          << " " << std::setw(w) << ssig
523          << " " << std::setw(w) << slop
524          << " " << std::setw(w) << slou
525          << " " << std::setw(w) << sldx
526          << " " << std::setw(w) << sludx
527          << " " << std::setprecision(2) << std::setw(5) << dt
528          //<< " " << std::setw(w) << sigslo
529          << (haveAvec && Avec[j].sigN > siglim ? " SIG":"")
530          << (haveAvec ? "":" NA");
531       if(k < results.size() && haveAvec && i == results[k].index) {
532          os << " " << results[k].asString()
533             << (results[k].type == FilterHit<T>::slip &&
534                results[k].score < 100 ? " SMALL":"");
535          k++;
536       }
537       os << std::endl;
538       if(haveAvec) iprev = Avec[j].index;
539    }
540 }  // end FDiffFilter<T>::dump()
541 
542 //------------------------------------------------------------------------------------
543 //------------------------------------------------------------------------------------
544 /// Just an iteration loop that applies FDiffFilters to the data, rejecting outliers
545 /// and fixing slips in each iteration.
546 template <class T> class IterativeFDiffFilter
547 {
548 private:
549    /// vector of results of the iterative analysis
550    std::vector< FilterHit<T> > results;
551 
552    // member data ---------------------------------------
553    // first those passes to FDiffFilter
554    int osw,osp;                  ///< width and precision for dump(), (default 8,3)
555 
556    const std::vector<T>& xdata;  ///< reference to xdata - 'time'; if empty use count
557    const std::vector<T>& data;   ///< reference to data to be filtered
558    const std::vector<int>& flags;///< reference to flags, parallel to data, 0 == good
559 
560    T fdlim;                      ///< |first diff| must be > this to be outlier/slip
561    T siglim;                     ///< sigma > this indicates possible outlier/slip
562    unsigned int Nwind;           ///< size of sliding window
563    unsigned int Nsig;            ///< filter()'s count of high sigma - analysis needed
564    bool doSmall;                 ///< if true, include small slips (<fdlim) in results
565 
566    // and those unique to this class
567    unsigned int itermax;         ///< maximum number of itertions (3)
568    T Esiglim;                    ///< Estimated sigma limit computed from robust stats
569    std::ostream& logstrm;        ///< write to output stream
570    bool resetSigma;              ///< if true, set siglim to Esiglim in each iteration
571    T siguse;                     ///< sigma limit used in last iteration
572    bool keepSigIndex;            ///< if true, keep vector of HighSigmaIndex
573    std::vector<unsigned int> sigIndexes; ///< saved indexes of Nsig high sigma pts
574    bool verbose;                 ///< output comments and dump FDiffFilters
575    std::string label;            ///< put on output lines when verbose
576 
577 public:
578    // member functions ---------------------------------------
579    /// Constructor with three parallel arrays, xdata (~time), data, flags (0=good).
580    /// Flags f may be empty (size 0), but on output of analysis() it will be filled.
581    /// @param x const vector<T> of 'times' values, NB (x,d,f) all parallel
582    /// @param d const vector<T> of data values
583    /// @param f const vector<int> of flags, 0 means good. this array may be empty.
IterativeFDiffFilter(const std::vector<T> & x,const std::vector<T> & d,const std::vector<int> & f,std::ostream & os=std::cout)584    IterativeFDiffFilter(const std::vector<T>& x,
585                         const std::vector<T>& d,
586                         const std::vector<int>& f,
587                         std::ostream& os=std::cout)
588       : data(d), xdata(x), flags(f), logstrm(os)
589    {
590       keepSigIndex = resetSigma = verbose = false;
591       doSmall = true;
592       itermax = 3;
593       label = std::string();
594       // rest are defaults of FDiffFilter
595       FDiffFilter<T> fdf(x,d,f);
596       osw = fdf.osw;
597       osp = fdf.osp;
598    }
599 
600    /// get and set
setWidth(unsigned int w)601    inline void setWidth(unsigned int w) { Nwind = w; }
getWidth(void)602    inline unsigned int getWidth(void) { return Nwind; }
setLimit(T val)603    inline void setLimit(T val) { fdlim = val; }
getLimit(void)604    inline T getLimit(void) { return fdlim; }
setSigma(T val)605    inline void setSigma(T val) { siglim = val; }
getSigma(void)606    inline T getSigma(void) { return siglim; }
doSmallSlips(const bool & doit)607    inline bool doSmallSlips(const bool& doit) { doSmall = doit; return doit; }
doSmallSlips(void)608    inline bool doSmallSlips(void) { return doSmall; }
doResetSigma(const bool & doit)609    inline bool doResetSigma(const bool& doit) { resetSigma = doit; return doit; }
indexHighSigmas(const bool & doit)610    inline bool indexHighSigmas(const bool& doit) { keepSigIndex = doit; return doit; }
indexingHighSigmas(void)611    inline bool indexingHighSigmas(void) { return keepSigIndex; }
doVerbose(const bool & doit)612    inline bool doVerbose(const bool& doit) { verbose = doit; return doit; }
setLabel(const std::string doit)613    inline void setLabel(const std::string doit) { label = doit; }
getLabel(void)614    inline std::string getLabel(void) { return label; }
615 
616    /// get and set for dump
setw(int w)617    inline void setw(int w) { osw=w; }
setprecision(int p)618    inline void setprecision(int p) { osp=p; }
619 
620    /// get computed sigma limit (= robust outlier limit) after analysis()
getEstimatedSigmaLimit(void)621    inline T getEstimatedSigmaLimit(void) { return Esiglim; }
622 
623    /// get sigma limit used in last iteration
getUsedSigmaLimit(void)624    inline T getUsedSigmaLimit(void) { return siguse; }
625 
626    /// get results vector of FilterHit
getResults(void)627    std::vector< FilterHit<T> > getResults(void) { return results; }
628 
629    /// get the number of remaining high-sigma points after analysis()
getNhighSigma(void)630    inline unsigned int getNhighSigma(void) { return Nsig; }
631 
632    /// get the indexes of high-sigma points (only if indexHighSigmas(true))
getHighSigmaIndex(void)633    inline std::vector<unsigned int> getHighSigmaIndex(void) { return sigIndexes; }
634 
635    /// Analyze the data using FDiffFilters, optionally computing new sigma outlier
636    /// limit, in an interative loop. Best for single differenced phase.
637    /// If doResetSigma(true), sigma limit is reduced, if possible, to outlier limit.
638    /// Flags array may be empty (size 0), but on output it will be filled.
639    /// analyze the output of the filter(), filling the results array with outliers
640    /// Return results in Nsig, Esiglim, and vector<FilterHit> results.
641    /// @return the number of outliers, or -1 if not enough good data to analyze.
642    int analysis(void);
643 
644    /// Edit the data for the caller using results created by analysis().
645    /// NB data arrays are NOT edited by filter.
646    /// NB must pass SAME arrays used in c'tor, after calling analysis(), (not const).
647    /// @param data vector<T> of data values
648    /// @param flags vector<int> of flags, 0 means good. this array may be empty.
649    /// @param doInt if true, integerize the slips before fixing (T)
650    /// @param badFlag set flags[] to this int when marking it bad (1)
651    /// @return the number of edits (slips + pts rejected)
652    int editArrays(std::vector<T>& data, std::vector<int>& flags,
653                   const bool doInt=true, const int badFlag=1);
654 
655 }; // end class IterativeFDiffFilter
656 
657 //------------------------------------------------------------------------------------
658 /// Analyze the data using FDiffFilters, optionally computing new sigma outlier limit,
659 /// in an interative loop. Best for single differenced phase.
analysis(void)660 template<class T> int IterativeFDiffFilter<T>::analysis(void)
661 {
662    // make sure vectors are sized
663    const unsigned int size(xdata.size() < data.size() ? xdata.size() : data.size());
664    if(size <= 2) return -1;
665 
666    // save all results in each iteration; use results during each iteration
667    std::vector< std::vector< FilterHit<T> > > Results;
668 
669    // handle input data ---------------------------------
670    // use a copy of the data within the iteration loop
671    std::vector<T> Tdata(data);
672    // copy flags, create if needed
673    std::vector<int> Tflags;
674    if(flags.size() < size) Tflags = std::vector<int>(size,0);
675    else Tflags = std::vector<int>(flags);
676 
677 
678    // analysis ------------------------------------------
679    unsigned int iter,i,j,k,nr(0);
680    siguse = siglim;
681 
682    // iterate over (filter / compute new sigma limit / analysis)
683    iter = 1;
684    while(iter <= itermax) {
685       // must redefine filter each time since arrays (const in fdf) change
686       FDiffFilter<T> fdf(xdata, Tdata, Tflags);
687       fdf.setWidth(Nwind);          // fdf.Nwind
688       fdf.setLimit(fdlim);          // fdf.fdlim
689       fdf.setSigma(siguse);         // fdf.siglim
690       fdf.setprecision(osp);        // fdf.osp
691       fdf.setw(osw);                // fdf.osw
692       fdf.doSmallSlips(doSmall);    // fdf.doSmall
693       fdf.indexHighSigmas(iter==itermax && keepSigIndex);
694 
695       // filter the data -----------
696       i = fdf.filter();
697       //if(verbose) logstrm << "# " << label << " filter returned " << i << std::endl;
698       if(i <= 2) { logstrm << "Not enough data, abort.\n"; return -1; }
699 
700       if(iter==itermax && keepSigIndex)
701          sigIndexes = fdf.getHighSigmaIndex();
702 
703       // compute outlier limit from robust stats, and count outliers
704       int N;
705       fdf.ComputeRobustSigmaLimit(N, Esiglim);
706       if(verbose) logstrm << "# " << label << " Estimated sigma limit "
707                   << std::fixed << std::setprecision(osp) << Esiglim
708                   << " and used sigma limit " << siguse
709                   << " (input was " << siglim << ") with "
710                   << N << " hi-sigma points " << std::endl;
711 
712       // reset sigma limit in filter, but not if its too large
713       // Esiglim/siglim < 3.0            // not too big
714       // Esiglim/siglim > 0.1            // not too small
715       if(resetSigma) {
716          if(Esiglim > siglim) {
717             if(Esiglim/siglim < 3.0) siguse = Esiglim;
718             else                     siguse = 3*siglim;
719          }
720          else if(Esiglim < siglim) {
721             if(Esiglim/siglim > 0.1) siguse = Esiglim;
722             else                     siguse = 0.1*siglim;
723          }
724          fdf.setSigma(siguse);      // use the new sigma limit
725       }
726 
727       // analysis ------------------
728       fdf.analysis();               // get the outliers and slips
729 
730       // consider results ----------
731       results = fdf.getResults();
732       std::vector<unsigned int> eraseIndex;       // index in results[] to be erased
733 
734       // loop over results: mark duplicates to be erased, mark outliers, fix slips
735       for(i=0; i<results.size(); i++) {
736          if(verbose) logstrm << "# " << label << " Result " << ++nr
737                << std::fixed << std::setprecision(osp)
738                << " " << xdata[results[i].index]
739                << " " << results[i].asString(osp) << std::endl;
740 
741          // mark outliers
742          if(results[i].type == FilterHit<T>::outlier) {
743             k = results[i].index;
744             for(j=0; j<results[i].npts; j++) Tflags[k+j] = 1;
745          }
746 
747          // slips: handle duplicates, and edit data
748          if(results[i].type == FilterHit<T>::slip) {
749 
750             // search previous results, if they exist, for duplicate slips
751             bool skip(false);
752             for(k=0; k<Results.size(); k++) {
753 
754                for(j=0; j<Results[k].size(); j++) {         // loop over prev results
755                   FilterHit<T>& oldres(Results[k][j]);
756 
757                   if(oldres.type != FilterHit<T>::slip)     // not a slip
758                      continue;
759                   if(results[i].index != oldres.index)      // different index
760                      continue;
761 
762                   // if both slips are non-small: add new to previous and delete new
763                   if(results[i].score == 100 && oldres.score == 100)
764                   {
765                      //logstrm << " Combine slips " << results[i].asString(osp)
766                      // << " and " << oldres.asString(osp) << std::endl;
767                      T step = oldres.step + results[i].step;
768                      oldres.step = step;
769                      // keep the later sigma - outliers have been removed
770                      oldres.sigma = results[i].sigma;
771                      oldres.score = (unsigned int)(0.5+100.*::fabs(step)/fdlim);
772                      if(oldres.score > 100) oldres.score = 100;
773                      //logstrm << " New combined slip is "
774                      // << oldres.asString(osp) << std::endl;
775                   }
776 
777                   // if both slips are small, they should be identical => delete new
778                   // ...so, if only one is small, go on
779                   else if(results[i].score == 100 || oldres.score == 100)
780                      continue;
781 
782                   // else both are small: delete new result
783                   skip = true;
784                   break;
785                }
786             }
787 
788             // save the index, to be deleted later
789             if(skip) { eraseIndex.push_back(i); continue; }
790 
791             // if slip is too small to fix, go on
792             if(::fabs(results[i].step) < fdlim) {
793                //if(verbose) logstrm << "# " << label << " Slip too small: "
794                // << std::fixed << std::setprecision(osp) << results[i].step
795                //<< " < " << fdlim << std::endl;
796                continue;
797             }
798 
799             // fix the slip in the Tdata
800             int islip(results[i].step + (results[i].step >= 0.0 ? 0.5:-0.5));
801             if(islip) {
802                if(verbose) logstrm << "# " << label << " Fix slip " << islip
803                      << " " << results[i].asString(osp) << std::endl;
804 
805                // NB if slips in Tdata not fixed, its going to affect later
806                // runs of the analysis
807                for(j=results[i].index; j<Tdata.size(); j++)
808                   Tdata[j] -= islip;
809             }
810          }  // end if slip
811 
812       }  // end loop over results
813 
814       // erase marked slips
815       if(eraseIndex.size() > 0) {
816          //sort(eraseIndex.begin(),eraseIndex.end());
817          for(i=eraseIndex.size()-1; i>=0; i--) {
818             //logstrm << " Erase " << results[eraseIndex[i]].asString(osp)
819             //<< std::endl;
820             results.erase(results.begin()+eraseIndex[i]);    // erase vector
821             if(i == 0) break;    // i is unsigned
822          }
823       }
824 
825       // save to Results
826       Nsig = fdf.getNhighSigma();
827       Results.push_back(results);
828       results.clear();
829 
830       // dump this analysis
831       if(verbose) fdf.dump(logstrm,"FIX"+gpstk::StringUtils::asString(iter-1)+label);
832 
833       ++iter;                    // next iteration
834 
835    }  // end iteration loop
836 
837    // copy all Results into results
838    results.clear();
839    for(k=0; k<Results.size(); k++)
840       for(j=0; j<Results[k].size(); j++)
841          results.push_back(Results[k][j]);
842 
843    // scan for remaining small slips (result of combination of two large)
844    if(!doSmall) {
845       std::vector<unsigned int> eraseIndex;       // index in results[] to be erased
846       for(i=0; i<results.size(); i++) {
847          if(results[i].type != FilterHit<T>::slip) continue;
848          //logstrm << " Final check " << results[i].asString(osp) << std::endl;
849          if(results[i].score == 100) continue;
850          eraseIndex.push_back(i);
851       }  // end loop over results
852       if(eraseIndex.size() > 0) {
853          for(i=eraseIndex.size()-1; i>=0; i--) {
854             //logstrm << " Final Erase " << results[eraseIndex[i]].asString(osp)
855             //<< std::endl;
856             results.erase(results.begin()+eraseIndex[i]);    // erase vector
857             if(i == 0) break;    // i is unsigned
858          }
859       }
860       //logstrm << " End Final check" << std::endl;
861    }
862 
863 
864    return results.size();
865 }  // end IterativeFDiffFilter::analysis()
866 
867 //------------------------------------------------------------------------------------
868 // Edit the data for the caller using results created by analysis().
869 // NB data arrays are NOT edited by filter.
870 // NB must pass SAME arrays used in c'tor, after calling analysis(), but not const.
871 // param data vector<T> of data values
872 // param flags vector<int> of flags, 0 means good. this array may be empty.
873 // param doInt if true, integerize the slips before fixing
874 // param badFlag set flags[] to this int when marking it bad (1)
875 // return the number of edits (slips + pts rejected)
editArrays(std::vector<T> & data,std::vector<int> & flags,const bool doInt,const int badFlag)876 template <class T> int IterativeFDiffFilter<T>::editArrays(
877                      std::vector<T>& data, std::vector<int>& flags,
878                      const bool doInt, const int badFlag)
879 {
880    int nedit(0);
881    unsigned int i,j,n;
882 
883    for(i=0; i<results.size(); i++) {
884       if(results[i].type==FilterHit<T>::BOD || results[i].type==FilterHit<T>::other)
885          continue;
886 
887       else if(results[i].type==FilterHit<T>::outlier) {
888          j = results[i].index;
889          // TD should this loop account for flags[j] already set?
890          for(n=0; n<results[i].npts; nedit++,n++,j++)
891             flags[j] = badFlag;
892       }
893 
894       else if(results[i].type==FilterHit<T>::slip) {
895          double slip(results[i].step);
896          if(doInt) {
897             int islip(slip + (slip >= 0.0 ? 0.5:-0.5));
898             if(islip == 0) continue;
899             slip = double(islip);
900          }
901          for(j=results[i].index; j<data.size(); j++)
902             data[j] -= slip;
903          nedit++;
904       }
905 
906    }
907 
908    return nedit;
909 }  // end int IterativeFDiffFilter::editArrays()
910 
911 //------------------------------------------------------------------------------------
912 //------------------------------------------------------------------------------------
913 #endif // #define FDIFF_FILTER_INCLUDE
914