1 //==============================================================================
2 //
3 // This file is part of GPSTk, the GPS Toolkit.
4 //
5 // The GPSTk is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU Lesser General Public License as published
7 // by the Free Software Foundation; either version 3.0 of the License, or
8 // any later version.
9 //
10 // The GPSTk is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with GPSTk; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
18 //
19 // This software was developed by Applied Research Laboratories at the
20 // University of Texas at Austin.
21 // Copyright 2004-2020, The Board of Regents of The University of Texas System
22 //
23 //==============================================================================
24
25 //==============================================================================
26 //
27 // This software was developed by Applied Research Laboratories at the
28 // University of Texas at Austin, under contract to an agency or agencies
29 // within the U.S. Department of Defense. The U.S. Government retains all
30 // rights to use, duplicate, distribute, disclose, or release this software.
31 //
32 // Pursuant to DoD Directive 523024
33 //
34 // DISTRIBUTION STATEMENT A: This software has been approved for public
35 // release, distribution is unlimited.
36 //
37 //==============================================================================
38
39 /// @file FDiffFilter.hpp
40
41 /// This class implements a statistical filter that uses first-differences, like
42 /// FirstDiffFilter, but this is simpler, more effective and more robust, at least
43 /// for single differenced (across satellites) phase data.
44 /// Also includes here is a class, IterativeFDiffFilter, that applies the FDiffFilter
45 /// more than once; this is the most effective way to use this filter.
46 ///
47 /// There are several statistical filters implemented as classes. These classes are
48 /// templates; the template parameter should be a float (probably double);
49 /// it is used to construct gpstk::Stats<T>, gpstk::TwoSampleStats<T> and
50 /// gpstk::SeqStats<T>, which are fundamental to these algorithms.
51 /// All the filters look for outliers and discontinuities (slips) in a timeseries.
52 /// The first difference filter analyses the simple first difference of the data.
53 /// The window filter uses a 2-pane sliding window centered on the data point in
54 /// question; statistics on the data in each to the 2 panes are computed and used
55 /// in the analysis.
56 /// All filters' analysis() function computes a vector of FilterHit objects named
57 /// 'results'that give the caller the results of the filtering.
58 /// All filters have a getStats(FilterHit) function that computes statistics on
59 /// the filter quantities (NOT the data) over the interval covered by the event, and
60 /// stores them in the FilterHit. These stats are slightly different for the two
61 /// filters; FirstDiffFilter::getStats computes min, max, median and mad of the first
62 /// differences.
63 ///
64 /// The structure of these filters allows the caller to call filters repeatedly,
65 /// and to call different filters on the same dataset, because none of the filters
66 /// modify the data array(s) in any way. The arrays are passed as constant references
67 /// to the constructor. The xdata reference must be provided, but it may be empty
68 /// (except for the window filter if two sample statistics are to be used).
69 /// If xdata is not empty, values of xdata are included in the dump() output.
70 /// Similarly, an integer vector of flags is also passed to the constructor (also
71 /// as a const reference), and it may be empty. If it is not empty, flag[i] != 0
72 /// causes the data at index i to be ignored by the filters.
73 /// The arrays data, xdata and flags must always be parallel, and xdata and flags
74 /// cannot be shorter than data unless they are empty (when they are ignored).
75 /// The filter() function has optional input of the starting index and the number
76 /// of points to process, so that segments of the data (and xdata and flags) array(s)
77 /// can be processed in the filters.
78 /// These features allow the user to, for example, call a filter, mark data in the
79 /// flags array (e.g. outliers) and then filter again. If a slip is found, the caller
80 /// can then filter the data again but starting at the slip, or filter only the
81 /// segment of data before the slip [using filter(index, npts)].
82 /// NB the caller must construct a new filter at each call - if you declare a
83 /// Filter object, run filter(), then use the results to modify flags[] and try to
84 /// call filter() again, it does not see the changes to flags[]. Instead you need to
85 /// call the constructor again.
86
87 #ifndef FDIFF_FILTER_INCLUDE
88 #define FDIFF_FILTER_INCLUDE
89
90 #include "Stats.hpp"
91 #include "StatsFilterHit.hpp"
92 #include "RobustStats.hpp"
93 #include "StringUtils.hpp"
94 //#include "stl_helpers.hpp"
95
96 #include <vector>
97
98 //------------------------------------------------------------------------------------
99 // TODO
100 // ?make first result BOD?
101
102 //------------------------------------------------------------------------------------
103 // forward declaration
104 template <class T> class IterativeFDiffFilter;
105
106 //------------------------------------------------------------------------------------
107 /// This class computes the first difference of the input data array at each point,
108 /// as well as stats on both the first difference and the data for the Nwind points
109 /// ending at the current point. The difference, sigmaYX of the difference statistics,
110 /// and the slope from the data statistics, are stored in an "analysis vector" Avec.
111 /// The Avec is used to find outliers, gaps and slips in the data; the analysis()
112 /// method returns a vector of "result" objects FilterHit<T>.
113 /// The caller should filter and analyze iteratively, since a single call is often
114 /// not sufficient to find and fix all the problems.
115 /// NB Flags and xdata are required and critical. Outliers must be flagged or
116 /// removed between iterations, and xdata is needed for gaps and to correct slip
117 /// magnitudes for the slope of the data.
118 /// Probably the major weakness of this filter is it tends to find false slips
119 /// after large gaps.
120 template <class T> class FDiffFilter
121 {
122 private:
123 friend class IterativeFDiffFilter<T>;
124
125 // embedded class -----------------------------------------
126 /// class used to store analysis arrays filled by the first difference filter
127 class Analysis
128 {
129 public:
130 /// constructor
Analysis()131 Analysis() { index=0; sloInd=0; diff = sigN = T(0); }
132 // member data
133 unsigned int index; ///< index in original arrays to which this info applies.
134 T diff; ///< first difference = data[index]-data[index-1]
135 T sigN; ///< sigmaYX for N pts of first diff ending at index
136 T sloN; ///< 2-sample slope of data for N pts ending at index
137 int sloInd; ///< index in Avec of slope used in current calc. of diff
138 }; // end class Analysis
139
140 /// vector of Analysis objects, holding first differences and indexes,
141 /// generated by filter(), used by analyze() and included in dump() output.
142 std::vector<Analysis> Avec;
143
144 /// vector of results of analysis()
145 std::vector< FilterHit<T> > results;
146
147 // member data ---------------------------------------
148 int osw,osp; ///< width and precision for dump(), (default 8,3)
149 bool noxdata; ///< true when xdata is not given
150 bool noflags; ///< true when flags array is not given
151
152 const std::vector<T>& xdata; ///< reference to xdata - 'time'; if empty use count
153 const std::vector<T>& data; ///< reference to data to be filtered
154 const std::vector<int>& flags;///< reference to flags, parallel to data, 0 == good
155 int ilimit; ///< largest allowed index in data[] is ilimit-1
156
157 T fdlim; ///< |first diff| must be > this to be outlier/slip
158 T siglim; ///< sigma > this indicates possible outlier/slip
159 T Esiglim; ///< compute siglim (= robust outlier limit)
160 unsigned int Nwind; ///< size of sliding window
161 unsigned int Nsig; ///< filter()'s count of high sigma - analysis needed
162 bool doSmall; ///< if true, include small slips (<fdlim) in results
163 bool keepSigIndex; ///< if true, keep vector of HighSigmaIndex
164 std::vector<unsigned int> sigIndexes; ///< saved indexes of Nsig high sigma pts
165
166 T medSlope, madSlope; ///< robust stats on slope
167
168 public:
169 // member functions ---------------------------------------
170 /// constructor with two arrays - x is used only in dump(); x and f must exist
171 /// but may be empty
172 /// @param x const vector<T> of 'times' values, NB (x,d,f) all parallel
173 /// @param d const vector<T> of data values
174 /// @param f const vector<int> of flags, 0 means good. this array may be empty.
FDiffFilter(const std::vector<T> & x,const std::vector<T> & d,const std::vector<int> & f)175 FDiffFilter(const std::vector<T>& x,
176 const std::vector<T>& d,
177 const std::vector<int>& f) : data(d), xdata(x), flags(f)
178 {
179 fdlim = T(0.8);
180 siglim = T(0.3);
181 Esiglim = T(0);
182 noxdata = (xdata.size() == 0);
183 noflags = (flags.size() == 0);
184 osw = 8; osp = 3;
185 Nsig = 0;
186 doSmall = true;
187 keepSigIndex = false;
188 }
189
190 /// get and set
setWidth(unsigned int w)191 inline void setWidth(unsigned int w) { Nwind = w; }
getWidth(void)192 inline unsigned int getWidth(void) { return Nwind; }
setLimit(T val)193 inline void setLimit(T val) { fdlim = val; }
getLimit(void)194 inline T getLimit(void) { return fdlim; }
setSigma(T val)195 inline void setSigma(T val) { siglim = val; }
getSigma(void)196 inline T getSigma(void) { return siglim; }
doSmallSlips(const bool & doit)197 inline bool doSmallSlips(const bool& doit) { doSmall = doit; return doit; }
doSmallSlips(void)198 inline bool doSmallSlips(void) { return doSmall; }
indexHighSigmas(const bool & doit)199 inline bool indexHighSigmas(const bool& doit) { keepSigIndex = doit; return doit; }
indexingHighSigmas(void)200 inline bool indexingHighSigmas(void) { return keepSigIndex; }
201 /// get and set for dump
setw(int w)202 inline void setw(int w) { osw=w; }
setprecision(int p)203 inline void setprecision(int p) { osp=p; }
204 /// get results vector of FilterHit
getResults(void)205 inline std::vector< FilterHit<T> > getResults(void) { return results; }
206
207 /// get the number of high-sigma points :
208 /// after filter() can tell if possible outliers/slips present
getNhighSigma(void)209 inline unsigned int getNhighSigma(void) { return Nsig; }
210
211 /// get the indexes of high-sigma points (only if indexHighSigmas(true))
getHighSigmaIndex(void)212 inline std::vector<unsigned int> getHighSigmaIndex(void) { return sigIndexes; }
213
214 /// filter routine that computes the first difference, and uses siglim to mark
215 /// areas of possible outlier/slips.
216 /// @param index in data arrays at which to start, defaults to 0
217 /// @param npts number of data to process, defaults to -1, meaning to end of data.
218 /// @return > 0 number of points in the analysis vector == number good data
219 /// -1 the data array is too short (< 2)
220 /// -3 if the flags array is given but shorter than data array
221 /// This routine clears the analysis vector.
222 int filter(const size_t index=0, int npts=-1);
223
224 /// after filter(), and before analyze(), compute robust stats on the sigma of
225 /// first differences, to get a suggested siglim
226 /// @param N int output: computed number of sigmas > new limit, -1 not enough data
227 /// @param new_siglim T output: computed limit (high outlier) on sigma for analyze
228 void ComputeRobustSigmaLimit(int& N, T& new_siglim);
229
230 /// analyze the output of the filter(), filling the results array with outliers
231 /// @param return vector of FilterHit
232 /// @return the number of outliers
233 int analysis(void);
234
235 /// dump the data and analysis; optionally include a tag at the start of each line,
236 /// and configure width and precision.
237 void dump(std::ostream& s, std::string tag=std::string());
238
239 }; // end class FDiffFilter
240
241 //------------------------------------------------------------------------------------
filter(const size_t i0,int dsize)242 template<class T> int FDiffFilter<T>::filter(const size_t i0, int dsize)
243 {
244 if(dsize == -1) dsize = data.size()-i0;
245
246 // largest allowed index is ilimit-1
247 ilimit = dsize+i0;
248
249 // is there enough data? if not return -1
250 int i(i0),n(0);
251 if(!noflags) {
252 while(i<ilimit && n < 2) { if(flags[i]==0) n++; i++; }
253 if(i==ilimit) return -1;
254 }
255 else if(dsize < 2) return -1;
256
257 // if xdata or flags is there, make sure its big enough
258 if(!noflags && flags.size()-i0 < dsize) return -3;
259
260 // generate the analysis vector
261 Avec.clear();
262
263 // compute stats on sigmas and data in a sliding window of width Nwind
264 gpstk::TwoSampleStats<T> fstats; // stats on the first diffs in window
265 gpstk::TwoSampleStats<T> dstats; // stats on the data in window
266 std::vector<T> slopes; // store slopes, for robust stats
267
268 // loop over all data, computing first difference and stats in sliding window
269 Nsig = 0;
270 int iprev(-1),j,islope(0);
271 // start at the first good point
272 n = 0; i = i0; if(!noflags) while(i<ilimit && flags[i]) i++;
273 while(i<ilimit) {
274 Analysis A;
275 A.index = i;
276 n++; // count data points, just for approx slope when n==2
277
278 // add to stats on data (for slope)
279 dstats.Add(xdata[i],data[i]);
280
281 // compute first difference, accounting for slope of data
282 if(iprev == -1)
283 A.diff = T(0);
284 else {
285 // get approx slope at first pt (often slope==0 here => OUT; index=0)
286 if(n==2) Avec[islope].sloN = // first slope = 2nd slope = d(data)/dx
287 (data[i]-data[iprev])/(xdata[i]-xdata[iprev]);
288 // index of latest good slope
289 A.sloInd = islope;
290 // compute the difference = change in data - correction for slope
291 A.diff = data[i]-data[iprev]
292 - Avec[islope].sloN * (xdata[i]-xdata[iprev]);
293 // add diff to stats
294 fstats.Add(xdata[i], A.diff);
295 }
296
297 // remove old data from stats buffers if full
298 j = Avec.size()-Nwind; // index of earliest of the Nwind points
299 if(fstats.N() > Nwind)
300 fstats.Subtract(xdata[Avec[j].index], Avec[j].diff);
301 if(dstats.N() > Nwind)
302 dstats.Subtract(xdata[Avec[j].index], data[Avec[j].index]);
303
304 //NO A.sigN = fstats.SigmaYX(); // sigma first diff, given slope in fdiffs
305 A.sigN = fstats.StdDevY(); // sigma first diff
306 A.sloN = dstats.Slope(); // slope of data
307 if(A.sigN > siglim) {
308 Nsig++; // count it if sigma is high
309 if(keepSigIndex) sigIndexes.push_back(i);
310 }
311 else {
312 islope = Avec.size(); // keep this, the most recent good slope
313 // keep slopes for roubst stats
314 slopes.push_back(A.sloN);
315 }
316
317 Avec.push_back(A);
318 iprev = i;
319 i++; if(!noflags) while(i<ilimit && flags[i]) i++;
320 }
321
322 // compute robust stats on slopes
323 madSlope = gpstk::Robust::MedianAbsoluteDeviation(&slopes[0], slopes.size(),
324 medSlope, false);
325
326 return Avec.size();
327
328 } // end FDiffFilter::filter()
329
330 //------------------------------------------------------------------------------------
331 // after filter(), and before analysis(), compute robust stats on the sigma of
332 // first differences, to get a suggested siglim
333 // param new_siglim T computed limit on sigma for analysis == high outlier limit
334 // return -1 if not enough data, else number of data outside new sigma limit
ComputeRobustSigmaLimit(int & N,T & new_siglim)335 template<class T> void FDiffFilter<T>::ComputeRobustSigmaLimit(int& N, T& new_siglim)
336 {
337 new_siglim = siglim;
338 if(Avec.size() < 2) { N=-1; return; }
339
340 // compute high-outlier limit of sigmas using robust stats
341 // compute quartiles - must sort first
342 unsigned int i;
343 std::vector<T> sd; // put sigmas in temp vector
344 for(i=0; i<Avec.size(); i++)
345 sd.push_back(Avec[i].sigN);
346
347 T Q1,Q3;
348 gpstk::QSort(&sd[0],sd.size()); // sort
349 gpstk::Robust::Quartiles(&sd[0],sd.size(),Q1,Q3); // get Quartiles
350
351 // compute new sigma limit ; outlier limit (high) 2.5Q3-1.5Q1
352 new_siglim = 2.5*Q3 - 1.5*Q1;
353 if(new_siglim <= siglim) { N = Nsig; return; }
354
355 // count up hi-sigma points
356 for(N=0,i=0; i<Avec.size(); i++)
357 if(Avec[i].sigN > new_siglim) N++;
358 }
359
360 //------------------------------------------------------------------------------------
analysis(void)361 template<class T> int FDiffFilter<T>::analysis(void)
362 {
363 //const bool debug(true);
364
365 // don't clear results - may be a case where caller wants to keep old ones
366
367 // loop over analysis vector
368 // outliers have >= Nwind big sigmas
369 // unless its the first point, then there are Nwind-2 bad sigmas
370 // slips have exactly Nwind big sigmas
371 bool isBad(false);
372 int nbad(0),nout,n1st,Nw(Nwind),k; // DO NOT use unsigned
373 unsigned int i,j,bad0;
374 for(i=0; i<Avec.size(); i++) {
375 //if(debug)
376 // std::cout << "Loop " << i << " " << xdata[Avec[i].index] << std::endl;
377
378 if(Avec[i].sigN > siglim) {
379 if(!isBad) { bad0=i; isBad=true;}
380 nbad++;
381 }
382
383 else if(isBad) {
384 nout = int(bad0)+nbad-Nw;
385 n1st = int(i)-Nw;
386 //if(debug) std::cout << "FDFa: isBad at i " << i
387 // << " index " << Avec[i].index << " x " << xdata[Avec[i].index]
388 // << " bad0 " << bad0 << " nbad " << nbad << " nout " << nout
389 // << " Nwind " << Nw << " 2Nw-2 " << 2*Nw-2 << std::endl;
390
391 if(nout>0 && nbad > Nw) { // outliers - more than a slip
392 //if(debug) for(j=bad0; j<nout; j++)
393 // std::cout << "FDFa: OUT " << Avec[j].index
394 // << " " << xdata[Avec[j].index] << std::endl;
395 j = bad0;
396 FilterHit<T> fdfr;
397 fdfr.type = FilterHit<T>::outlier;
398 fdfr.index = Avec[j].index;
399 fdfr.npts = nbad-Nw;
400 fdfr.dx = xdata[Avec[int(j)+nbad-Nw].index] - xdata[Avec[j].index];
401 results.push_back(fdfr);
402 }
403
404 else if(nbad == Nw) { // slip
405 j = bad0;
406 //if(debug) std::cout << "FDFa: SLIP " << Avec[j].index << std::fixed
407 // << " " << xdata[Avec[j].index] << std::setprecision(osp)
408 // << " " << Avec[j].diff << std::endl;
409 FilterHit<double> fdfr;
410 fdfr.type = FilterHit<T>::slip;
411 fdfr.index = Avec[j].index;
412 fdfr.sigma = Avec[j].sigN;
413
414 // find number of missing pts before the slip
415 k = Avec[j].index-1; // j is an Avec[] index
416 if(noflags && k>0) k--; // NB k is int
417 else while(k>0 && flags[k] != 0) k--; // k is the data[] index
418 fdfr.dx = xdata[Avec[j].index]-xdata[k];
419 fdfr.npts = Avec[j].index - k;
420
421 // get the step = first difference of data across slip
422 fdfr.step = Avec[j].diff;
423 // score here is just step/fdlim as a percentage (max 100)
424 fdfr.score = (unsigned int)(0.5 + 100.*::fabs(fdfr.step)/fdlim);
425 if(fdfr.score > 100) fdfr.score = 100;
426
427 // BUT if step is of order few*MADslope*dx, then probably a false positive
428 if(fdfr.score == 100 && ::fabs(fdfr.step) < 3*madSlope*fdfr.dx) {
429 //if(debug)
430 //std::cout << "FDFa: F+ slip (|diff| < 3*madSlope*dx) at index "
431 //<< Avec[j].index << std::fixed << std::setprecision(osp)
432 //<< ": " << Avec[j].diff << " < 3 * " << madSlope
433 //<< " * " << fdfr.dx << " = " << 3*madSlope*fdfr.dx << std::endl;
434 fdfr.score = -100;
435 }
436
437 // save the hit
438 if(doSmall || fdfr.score == 100) results.push_back(fdfr);
439 }
440
441 else if(int(i) <= 2*Nw-2 && n1st >= 0) { // first <Nwind pts outliers
442 //if(debug) std::cout << "FDFa: isBad at small i " << i << std::endl;
443 //if(debug) for(int n=0; n<n1st; n++)
444 // std::cout << "FDFa OUT " << Avec[n].index << " "
445 // << xdata[Avec[n].index] << " small i " << i << std::endl;
446 FilterHit<double> fdfr;
447 fdfr.type = FilterHit<T>::outlier;
448 fdfr.index = Avec[0].index;
449 fdfr.npts = n1st;
450 fdfr.dx = xdata[Avec[n1st].index] - xdata[Avec[0].index];
451 results.push_back(fdfr);
452 }
453
454 isBad = false;
455 nbad = 0;
456 }
457 }
458
459 // catch outliers at the very end
460 if(isBad) { // bad at the very end
461 //if(debug) for(j=bad0; j<Avec.size(); j++)
462 // std::cout << "FDFa: OUT " << Avec[j].index
463 // << " " << xdata[Avec[j].index] << std::endl;
464 j = bad0;
465 FilterHit<double> fdfr;
466 fdfr.type = FilterHit<T>::outlier;
467 fdfr.index = Avec[j].index;
468 fdfr.npts = Avec.size()-j;
469 fdfr.dx = xdata[Avec[Avec.size()-1].index] - xdata[Avec[j].index];
470 results.push_back(fdfr);
471 }
472
473 return results.size();
474
475 } // end FDiffFilter::analysis()
476
477 //------------------------------------------------------------------------------------
dump(std::ostream & os,std::string tag)478 template<class T> void FDiffFilter<T>::dump(std::ostream& os, std::string tag)
479 {
480 size_t i,j,k,iprev;
481 int w(osw>5?osw+1:5);
482 os << "#" << tag << " FDiffFilter::dump() with limit "
483 << std::fixed << std::setprecision(osp) << fdlim << " sigma limit " << siglim
484 << std::setprecision(osp+2) << " med,mad slope " << medSlope << " " << madSlope
485 << std::setprecision(osp) << (noxdata ? " (xdata is index)" : "")
486 << "\n#" << tag << " " << std::setw(2) << "i"
487 << " " << std::setw(w) << "xd" << " "
488 << std::setw(3) << "flg" << " " << std::setw(w) << "data" << " "
489 << std::setw(w) << "fdif" << " " << std::setw(w) << "sig" << " "
490 << std::setw(w) << "slope" << " " << std::setw(w) << "slp_u" << " "
491 << std::setw(w) << "sl*dx" << " " << std::setw(w) << "slu*dx" << " "
492 << std::setw(5) << "dx"
493 //<< " " << std::setw(w) << "sigslope*dx"
494 << std::endl;
495
496 T dt(0);
497 std::string sdif,ssig,sldx,slop,slou,sludx;//,sigslo;
498 const size_t N(Avec.size());
499 for(i=0,j=0,k=0,iprev=-1; i<ilimit; i++) {
500 // find j where Avec[j].index == i
501 while(j<N && Avec[j].index < i) j++;
502 bool haveAvec(Avec[j].index == i);
503 if(haveAvec) {
504 sdif = gpstk::StringUtils::asString(Avec[j].diff,osp);
505 ssig = gpstk::StringUtils::asString(Avec[j].sigN,osp);
506 if(iprev > -1) dt = (noxdata ? T(i-iprev) : xdata[i]-xdata[iprev]);
507 slop = gpstk::StringUtils::asString(Avec[j].sloN,osp);
508 slou = gpstk::StringUtils::asString(Avec[Avec[j].sloInd].sloN,osp);
509 sldx = gpstk::StringUtils::asString(Avec[j].sloN*dt,osp);
510 sludx = gpstk::StringUtils::asString(Avec[Avec[j].sloInd].sloN*dt,osp);
511 //sigslo = gpstk::StringUtils::asString(madSlope*dt,osp);
512 }
513 else {
514 sdif = ssig = sldx = sludx = slou = "?"; //sigslo = "?";
515 }
516 os << tag << std::fixed << std::setprecision(osp)
517 << " " << std::setw(3) << i
518 << " " << std::setw(w) << (noxdata ? T(i) : xdata[i])
519 << " " << std::setw(3) << (noflags ? 0 : flags[i])
520 << " " << std::setw(w) << data[i]
521 << " " << std::setw(w) << sdif
522 << " " << std::setw(w) << ssig
523 << " " << std::setw(w) << slop
524 << " " << std::setw(w) << slou
525 << " " << std::setw(w) << sldx
526 << " " << std::setw(w) << sludx
527 << " " << std::setprecision(2) << std::setw(5) << dt
528 //<< " " << std::setw(w) << sigslo
529 << (haveAvec && Avec[j].sigN > siglim ? " SIG":"")
530 << (haveAvec ? "":" NA");
531 if(k < results.size() && haveAvec && i == results[k].index) {
532 os << " " << results[k].asString()
533 << (results[k].type == FilterHit<T>::slip &&
534 results[k].score < 100 ? " SMALL":"");
535 k++;
536 }
537 os << std::endl;
538 if(haveAvec) iprev = Avec[j].index;
539 }
540 } // end FDiffFilter<T>::dump()
541
542 //------------------------------------------------------------------------------------
543 //------------------------------------------------------------------------------------
544 /// Just an iteration loop that applies FDiffFilters to the data, rejecting outliers
545 /// and fixing slips in each iteration.
546 template <class T> class IterativeFDiffFilter
547 {
548 private:
549 /// vector of results of the iterative analysis
550 std::vector< FilterHit<T> > results;
551
552 // member data ---------------------------------------
553 // first those passes to FDiffFilter
554 int osw,osp; ///< width and precision for dump(), (default 8,3)
555
556 const std::vector<T>& xdata; ///< reference to xdata - 'time'; if empty use count
557 const std::vector<T>& data; ///< reference to data to be filtered
558 const std::vector<int>& flags;///< reference to flags, parallel to data, 0 == good
559
560 T fdlim; ///< |first diff| must be > this to be outlier/slip
561 T siglim; ///< sigma > this indicates possible outlier/slip
562 unsigned int Nwind; ///< size of sliding window
563 unsigned int Nsig; ///< filter()'s count of high sigma - analysis needed
564 bool doSmall; ///< if true, include small slips (<fdlim) in results
565
566 // and those unique to this class
567 unsigned int itermax; ///< maximum number of itertions (3)
568 T Esiglim; ///< Estimated sigma limit computed from robust stats
569 std::ostream& logstrm; ///< write to output stream
570 bool resetSigma; ///< if true, set siglim to Esiglim in each iteration
571 T siguse; ///< sigma limit used in last iteration
572 bool keepSigIndex; ///< if true, keep vector of HighSigmaIndex
573 std::vector<unsigned int> sigIndexes; ///< saved indexes of Nsig high sigma pts
574 bool verbose; ///< output comments and dump FDiffFilters
575 std::string label; ///< put on output lines when verbose
576
577 public:
578 // member functions ---------------------------------------
579 /// Constructor with three parallel arrays, xdata (~time), data, flags (0=good).
580 /// Flags f may be empty (size 0), but on output of analysis() it will be filled.
581 /// @param x const vector<T> of 'times' values, NB (x,d,f) all parallel
582 /// @param d const vector<T> of data values
583 /// @param f const vector<int> of flags, 0 means good. this array may be empty.
IterativeFDiffFilter(const std::vector<T> & x,const std::vector<T> & d,const std::vector<int> & f,std::ostream & os=std::cout)584 IterativeFDiffFilter(const std::vector<T>& x,
585 const std::vector<T>& d,
586 const std::vector<int>& f,
587 std::ostream& os=std::cout)
588 : data(d), xdata(x), flags(f), logstrm(os)
589 {
590 keepSigIndex = resetSigma = verbose = false;
591 doSmall = true;
592 itermax = 3;
593 label = std::string();
594 // rest are defaults of FDiffFilter
595 FDiffFilter<T> fdf(x,d,f);
596 osw = fdf.osw;
597 osp = fdf.osp;
598 }
599
600 /// get and set
setWidth(unsigned int w)601 inline void setWidth(unsigned int w) { Nwind = w; }
getWidth(void)602 inline unsigned int getWidth(void) { return Nwind; }
setLimit(T val)603 inline void setLimit(T val) { fdlim = val; }
getLimit(void)604 inline T getLimit(void) { return fdlim; }
setSigma(T val)605 inline void setSigma(T val) { siglim = val; }
getSigma(void)606 inline T getSigma(void) { return siglim; }
doSmallSlips(const bool & doit)607 inline bool doSmallSlips(const bool& doit) { doSmall = doit; return doit; }
doSmallSlips(void)608 inline bool doSmallSlips(void) { return doSmall; }
doResetSigma(const bool & doit)609 inline bool doResetSigma(const bool& doit) { resetSigma = doit; return doit; }
indexHighSigmas(const bool & doit)610 inline bool indexHighSigmas(const bool& doit) { keepSigIndex = doit; return doit; }
indexingHighSigmas(void)611 inline bool indexingHighSigmas(void) { return keepSigIndex; }
doVerbose(const bool & doit)612 inline bool doVerbose(const bool& doit) { verbose = doit; return doit; }
setLabel(const std::string doit)613 inline void setLabel(const std::string doit) { label = doit; }
getLabel(void)614 inline std::string getLabel(void) { return label; }
615
616 /// get and set for dump
setw(int w)617 inline void setw(int w) { osw=w; }
setprecision(int p)618 inline void setprecision(int p) { osp=p; }
619
620 /// get computed sigma limit (= robust outlier limit) after analysis()
getEstimatedSigmaLimit(void)621 inline T getEstimatedSigmaLimit(void) { return Esiglim; }
622
623 /// get sigma limit used in last iteration
getUsedSigmaLimit(void)624 inline T getUsedSigmaLimit(void) { return siguse; }
625
626 /// get results vector of FilterHit
getResults(void)627 std::vector< FilterHit<T> > getResults(void) { return results; }
628
629 /// get the number of remaining high-sigma points after analysis()
getNhighSigma(void)630 inline unsigned int getNhighSigma(void) { return Nsig; }
631
632 /// get the indexes of high-sigma points (only if indexHighSigmas(true))
getHighSigmaIndex(void)633 inline std::vector<unsigned int> getHighSigmaIndex(void) { return sigIndexes; }
634
635 /// Analyze the data using FDiffFilters, optionally computing new sigma outlier
636 /// limit, in an interative loop. Best for single differenced phase.
637 /// If doResetSigma(true), sigma limit is reduced, if possible, to outlier limit.
638 /// Flags array may be empty (size 0), but on output it will be filled.
639 /// analyze the output of the filter(), filling the results array with outliers
640 /// Return results in Nsig, Esiglim, and vector<FilterHit> results.
641 /// @return the number of outliers, or -1 if not enough good data to analyze.
642 int analysis(void);
643
644 /// Edit the data for the caller using results created by analysis().
645 /// NB data arrays are NOT edited by filter.
646 /// NB must pass SAME arrays used in c'tor, after calling analysis(), (not const).
647 /// @param data vector<T> of data values
648 /// @param flags vector<int> of flags, 0 means good. this array may be empty.
649 /// @param doInt if true, integerize the slips before fixing (T)
650 /// @param badFlag set flags[] to this int when marking it bad (1)
651 /// @return the number of edits (slips + pts rejected)
652 int editArrays(std::vector<T>& data, std::vector<int>& flags,
653 const bool doInt=true, const int badFlag=1);
654
655 }; // end class IterativeFDiffFilter
656
657 //------------------------------------------------------------------------------------
658 /// Analyze the data using FDiffFilters, optionally computing new sigma outlier limit,
659 /// in an interative loop. Best for single differenced phase.
analysis(void)660 template<class T> int IterativeFDiffFilter<T>::analysis(void)
661 {
662 // make sure vectors are sized
663 const unsigned int size(xdata.size() < data.size() ? xdata.size() : data.size());
664 if(size <= 2) return -1;
665
666 // save all results in each iteration; use results during each iteration
667 std::vector< std::vector< FilterHit<T> > > Results;
668
669 // handle input data ---------------------------------
670 // use a copy of the data within the iteration loop
671 std::vector<T> Tdata(data);
672 // copy flags, create if needed
673 std::vector<int> Tflags;
674 if(flags.size() < size) Tflags = std::vector<int>(size,0);
675 else Tflags = std::vector<int>(flags);
676
677
678 // analysis ------------------------------------------
679 unsigned int iter,i,j,k,nr(0);
680 siguse = siglim;
681
682 // iterate over (filter / compute new sigma limit / analysis)
683 iter = 1;
684 while(iter <= itermax) {
685 // must redefine filter each time since arrays (const in fdf) change
686 FDiffFilter<T> fdf(xdata, Tdata, Tflags);
687 fdf.setWidth(Nwind); // fdf.Nwind
688 fdf.setLimit(fdlim); // fdf.fdlim
689 fdf.setSigma(siguse); // fdf.siglim
690 fdf.setprecision(osp); // fdf.osp
691 fdf.setw(osw); // fdf.osw
692 fdf.doSmallSlips(doSmall); // fdf.doSmall
693 fdf.indexHighSigmas(iter==itermax && keepSigIndex);
694
695 // filter the data -----------
696 i = fdf.filter();
697 //if(verbose) logstrm << "# " << label << " filter returned " << i << std::endl;
698 if(i <= 2) { logstrm << "Not enough data, abort.\n"; return -1; }
699
700 if(iter==itermax && keepSigIndex)
701 sigIndexes = fdf.getHighSigmaIndex();
702
703 // compute outlier limit from robust stats, and count outliers
704 int N;
705 fdf.ComputeRobustSigmaLimit(N, Esiglim);
706 if(verbose) logstrm << "# " << label << " Estimated sigma limit "
707 << std::fixed << std::setprecision(osp) << Esiglim
708 << " and used sigma limit " << siguse
709 << " (input was " << siglim << ") with "
710 << N << " hi-sigma points " << std::endl;
711
712 // reset sigma limit in filter, but not if its too large
713 // Esiglim/siglim < 3.0 // not too big
714 // Esiglim/siglim > 0.1 // not too small
715 if(resetSigma) {
716 if(Esiglim > siglim) {
717 if(Esiglim/siglim < 3.0) siguse = Esiglim;
718 else siguse = 3*siglim;
719 }
720 else if(Esiglim < siglim) {
721 if(Esiglim/siglim > 0.1) siguse = Esiglim;
722 else siguse = 0.1*siglim;
723 }
724 fdf.setSigma(siguse); // use the new sigma limit
725 }
726
727 // analysis ------------------
728 fdf.analysis(); // get the outliers and slips
729
730 // consider results ----------
731 results = fdf.getResults();
732 std::vector<unsigned int> eraseIndex; // index in results[] to be erased
733
734 // loop over results: mark duplicates to be erased, mark outliers, fix slips
735 for(i=0; i<results.size(); i++) {
736 if(verbose) logstrm << "# " << label << " Result " << ++nr
737 << std::fixed << std::setprecision(osp)
738 << " " << xdata[results[i].index]
739 << " " << results[i].asString(osp) << std::endl;
740
741 // mark outliers
742 if(results[i].type == FilterHit<T>::outlier) {
743 k = results[i].index;
744 for(j=0; j<results[i].npts; j++) Tflags[k+j] = 1;
745 }
746
747 // slips: handle duplicates, and edit data
748 if(results[i].type == FilterHit<T>::slip) {
749
750 // search previous results, if they exist, for duplicate slips
751 bool skip(false);
752 for(k=0; k<Results.size(); k++) {
753
754 for(j=0; j<Results[k].size(); j++) { // loop over prev results
755 FilterHit<T>& oldres(Results[k][j]);
756
757 if(oldres.type != FilterHit<T>::slip) // not a slip
758 continue;
759 if(results[i].index != oldres.index) // different index
760 continue;
761
762 // if both slips are non-small: add new to previous and delete new
763 if(results[i].score == 100 && oldres.score == 100)
764 {
765 //logstrm << " Combine slips " << results[i].asString(osp)
766 // << " and " << oldres.asString(osp) << std::endl;
767 T step = oldres.step + results[i].step;
768 oldres.step = step;
769 // keep the later sigma - outliers have been removed
770 oldres.sigma = results[i].sigma;
771 oldres.score = (unsigned int)(0.5+100.*::fabs(step)/fdlim);
772 if(oldres.score > 100) oldres.score = 100;
773 //logstrm << " New combined slip is "
774 // << oldres.asString(osp) << std::endl;
775 }
776
777 // if both slips are small, they should be identical => delete new
778 // ...so, if only one is small, go on
779 else if(results[i].score == 100 || oldres.score == 100)
780 continue;
781
782 // else both are small: delete new result
783 skip = true;
784 break;
785 }
786 }
787
788 // save the index, to be deleted later
789 if(skip) { eraseIndex.push_back(i); continue; }
790
791 // if slip is too small to fix, go on
792 if(::fabs(results[i].step) < fdlim) {
793 //if(verbose) logstrm << "# " << label << " Slip too small: "
794 // << std::fixed << std::setprecision(osp) << results[i].step
795 //<< " < " << fdlim << std::endl;
796 continue;
797 }
798
799 // fix the slip in the Tdata
800 int islip(results[i].step + (results[i].step >= 0.0 ? 0.5:-0.5));
801 if(islip) {
802 if(verbose) logstrm << "# " << label << " Fix slip " << islip
803 << " " << results[i].asString(osp) << std::endl;
804
805 // NB if slips in Tdata not fixed, its going to affect later
806 // runs of the analysis
807 for(j=results[i].index; j<Tdata.size(); j++)
808 Tdata[j] -= islip;
809 }
810 } // end if slip
811
812 } // end loop over results
813
814 // erase marked slips
815 if(eraseIndex.size() > 0) {
816 //sort(eraseIndex.begin(),eraseIndex.end());
817 for(i=eraseIndex.size()-1; i>=0; i--) {
818 //logstrm << " Erase " << results[eraseIndex[i]].asString(osp)
819 //<< std::endl;
820 results.erase(results.begin()+eraseIndex[i]); // erase vector
821 if(i == 0) break; // i is unsigned
822 }
823 }
824
825 // save to Results
826 Nsig = fdf.getNhighSigma();
827 Results.push_back(results);
828 results.clear();
829
830 // dump this analysis
831 if(verbose) fdf.dump(logstrm,"FIX"+gpstk::StringUtils::asString(iter-1)+label);
832
833 ++iter; // next iteration
834
835 } // end iteration loop
836
837 // copy all Results into results
838 results.clear();
839 for(k=0; k<Results.size(); k++)
840 for(j=0; j<Results[k].size(); j++)
841 results.push_back(Results[k][j]);
842
843 // scan for remaining small slips (result of combination of two large)
844 if(!doSmall) {
845 std::vector<unsigned int> eraseIndex; // index in results[] to be erased
846 for(i=0; i<results.size(); i++) {
847 if(results[i].type != FilterHit<T>::slip) continue;
848 //logstrm << " Final check " << results[i].asString(osp) << std::endl;
849 if(results[i].score == 100) continue;
850 eraseIndex.push_back(i);
851 } // end loop over results
852 if(eraseIndex.size() > 0) {
853 for(i=eraseIndex.size()-1; i>=0; i--) {
854 //logstrm << " Final Erase " << results[eraseIndex[i]].asString(osp)
855 //<< std::endl;
856 results.erase(results.begin()+eraseIndex[i]); // erase vector
857 if(i == 0) break; // i is unsigned
858 }
859 }
860 //logstrm << " End Final check" << std::endl;
861 }
862
863
864 return results.size();
865 } // end IterativeFDiffFilter::analysis()
866
867 //------------------------------------------------------------------------------------
868 // Edit the data for the caller using results created by analysis().
869 // NB data arrays are NOT edited by filter.
870 // NB must pass SAME arrays used in c'tor, after calling analysis(), but not const.
871 // param data vector<T> of data values
872 // param flags vector<int> of flags, 0 means good. this array may be empty.
873 // param doInt if true, integerize the slips before fixing
874 // param badFlag set flags[] to this int when marking it bad (1)
875 // return the number of edits (slips + pts rejected)
editArrays(std::vector<T> & data,std::vector<int> & flags,const bool doInt,const int badFlag)876 template <class T> int IterativeFDiffFilter<T>::editArrays(
877 std::vector<T>& data, std::vector<int>& flags,
878 const bool doInt, const int badFlag)
879 {
880 int nedit(0);
881 unsigned int i,j,n;
882
883 for(i=0; i<results.size(); i++) {
884 if(results[i].type==FilterHit<T>::BOD || results[i].type==FilterHit<T>::other)
885 continue;
886
887 else if(results[i].type==FilterHit<T>::outlier) {
888 j = results[i].index;
889 // TD should this loop account for flags[j] already set?
890 for(n=0; n<results[i].npts; nedit++,n++,j++)
891 flags[j] = badFlag;
892 }
893
894 else if(results[i].type==FilterHit<T>::slip) {
895 double slip(results[i].step);
896 if(doInt) {
897 int islip(slip + (slip >= 0.0 ? 0.5:-0.5));
898 if(islip == 0) continue;
899 slip = double(islip);
900 }
901 for(j=results[i].index; j<data.size(); j++)
902 data[j] -= slip;
903 nedit++;
904 }
905
906 }
907
908 return nedit;
909 } // end int IterativeFDiffFilter::editArrays()
910
911 //------------------------------------------------------------------------------------
912 //------------------------------------------------------------------------------------
913 #endif // #define FDIFF_FILTER_INCLUDE
914