1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2020 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 
18 
19 
20 #ifndef LIBMESH_STATISTICS_H
21 #define LIBMESH_STATISTICS_H
22 
23 // Local includes
24 #include "libmesh/libmesh_common.h"
25 #include "libmesh/id_types.h"
26 
27 // C++ includes
28 #include <vector>
29 #include <cstdlib> // *must* precede <cmath> for proper std:abs() on PGI, Sun Studio CC
30 #include <cmath>
31 
32 namespace libMesh
33 {
34 
35 /**
36  * The StatisticsVector class is derived from the std::vector<> and
37  * therefore has all of its useful features.  It was designed to not
38  * have any internal state, i.e. no public or private data members.
39  * Also, it was only designed for classes and types for which the
40  * operators +,*,/ have meaning, specifically floats, doubles, ints,
41  * etc.  The main reason for this design decision was to allow a
42  * std::vector<> to be successfully cast to a StatisticsVector,
43  * thereby enabling its additional functionality.  We do not
44  * anticipate any problems with deriving from an stl container which
45  * lacks a virtual destructor in this case.
46  *
47  * Where manipulation of the data set was necessary (for example
48  * sorting) two versions of member functions have been implemented.
49  * The non-const versions perform sorting directly in the data set,
50  * invalidating pointers and changing the entries.  const versions of
51  * the same functions are generally available, and will be
52  * automatically invoked on const StatisticsVector objects.  A
53  * draw-back to the const versions is that they simply make a copy of
54  * the original object and therefore double the original memory
55  * requirement for the data set.
56  *
57  * Most of the actual code was copied or adapted from the GNU
58  * Scientific Library (GSL). More precisely, the recursion relations
59  * for computing the mean were implemented in order to avoid possible
60  * problems with buffer overruns.
61  *
62  * \author John W. Peterson
63  * \date 2002
64  * \brief A std::vector derived class for implementing simple statistical algorithms.
65  */
66 template <typename T>
67 class StatisticsVector : public std::vector<T>
68 {
69 public:
70 
71   /**
72    * Call the std::vector constructor.
73    */
74   explicit
75   StatisticsVector(dof_id_type i=0) : std::vector<T> (i) {}
76 
77   /**
78    * Call the std::vector constructor, fill each entry with \p val.
79    */
StatisticsVector(dof_id_type i,T val)80   StatisticsVector(dof_id_type i, T val) : std::vector<T> (i,val) {}
81 
82   /**
83    * Destructor.  Virtual so we can derive from the \p StatisticsVector
84    */
~StatisticsVector()85   virtual ~StatisticsVector () {}
86 
87 
88   /**
89    * \returns The l2 norm of the data set.
90    */
91   virtual Real l2_norm() const;
92 
93   /**
94    * \returns The minimum value in the data set.
95    */
96   virtual T minimum() const;
97 
98   /**
99    * \returns The maximum value in the data set.
100    */
101   virtual T maximum() const;
102 
103   /**
104    * \returns The mean value of the data set using a recurrence
105    * relation.
106    *
107    * Source: GNU Scientific Library
108    */
109   virtual Real mean() const;
110 
111   /**
112    * \returns The median (e.g. the middle) value of the data set.
113    *
114    * This function modifies the original data by sorting, so it can't
115    * be called on const objects.  Source: GNU Scientific Library.
116    */
117   virtual Real median();
118 
119   /**
120    * A const version of the median function.  Requires twice the memory
121    * of original data set but does not change the original.
122    */
123   virtual Real median() const;
124 
125   /**
126    * \returns The variance of the data set.
127    *
128    * Uses a recurrence relation to prevent data overflow for large
129    * sums.
130    *
131    * \note The variance is equal to the standard deviation squared.
132    * Source: GNU Scientific Library.
133    */
variance()134   virtual Real variance() const
135   { return this->variance(this->mean()); }
136 
137   /**
138    * \returns The variance of the data set where the \p mean is
139    * provided.
140    *
141    * This is useful for efficiency when you have already calculated
142    * the mean. Uses a recurrence relation to prevent data overflow for
143    * large sums.
144    *
145    * \note The variance is equal to the standard deviation squared.
146    * Source: GNU Scientific Library.
147    */
148   virtual Real variance(const Real known_mean) const;
149 
150   /**
151    * \returns The standard deviation of the data set, which is simply
152    * the square-root of the variance.
153    */
stddev()154   virtual Real stddev() const
155   { return std::sqrt(this->variance()); }
156 
157   /**
158    * \returns Computes the standard deviation of the data set, which
159    * is simply the square-root of the variance.
160    *
161    * This method can be used for efficiency when the \p mean has
162    * already been computed.
163    */
stddev(const Real known_mean)164   virtual Real stddev(const Real known_mean) const
165   { return std::sqrt(this->variance(known_mean)); }
166 
167   /**
168    * Divides all entries by the largest entry and stores the result.
169    */
170   void normalize();
171 
172   /**
173    * \returns A histogram with n_bins bins for the data set.
174    *
175    * For simplicity, the bins are assumed to be of uniform size.  Upon
176    * return, the bin_members vector will contain unsigned integers
177    * which give the number of members in each bin.  WARNING: This
178    * non-const function sorts the vector, changing its order.  Source:
179    * GNU Scientific Library.
180    */
181   virtual void histogram (std::vector<dof_id_type> & bin_members,
182                           unsigned int n_bins=10);
183 
184   /**
185    * Generates a Matlab/Octave style file which can be used to
186    * make a plot of the histogram having the desired number of bins.
187    * Uses the histogram(...) function in this class
188    * WARNING: The histogram(...) function is non-const, and changes
189    * the order of the vector.
190    */
191   void plot_histogram(const processor_id_type my_procid,
192                       const std::string & filename,
193                       unsigned int n_bins);
194 
195   /**
196    * A const version of the histogram function.
197    */
198   virtual void histogram (std::vector<dof_id_type> & bin_members,
199                           unsigned int n_bins=10) const;
200 
201   /**
202    * \returns A vector of dof_id_types which corresponds
203    * to the indices of every member of the data set
204    * below the cutoff value "cut".
205    */
206   virtual std::vector<dof_id_type> cut_below(Real cut) const;
207 
208   /**
209    * \returns A vector of dof_id_types which corresponds to the
210    * indices of every member of the data set above the cutoff value
211    * cut.
212    *
213    * I chose not to combine these two functions since the interface is
214    * cleaner with one passed parameter instead of two.
215    */
216   virtual std::vector<dof_id_type> cut_above(Real cut) const;
217 };
218 
219 } // namespace libMesh
220 
221 #endif // LIBMESH_STATISTICS_H
222