math/example/students_t_single_sample.cpp

// Copyright John Maddock 2006
// Copyright Paul A. Bristow 2007, 2010

// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt
// or copy at http://www.boost.org/LICENSE_1_0.txt)

#ifdef _MSC_VER
#  pragma warning(disable: 4512) // assignment operator could not be generated.
#  pragma warning(disable: 4510) // default constructor could not be generated.
#  pragma warning(disable: 4610) // can never be instantiated - user defined constructor required.
#endif

#include <boost/math/distributions/students_t.hpp>

// avoid "using namespace std;" and "using namespace boost::math;"
// to avoid potential ambiguity with names in std random.
#include <iostream>
using std::cout; using std::endl;
using std::left; using std::fixed; using std::right; using std::scientific;
#include <iomanip>
using std::setw;
using std::setprecision;

void confidence_limits_on_mean(double Sm, double Sd, unsigned Sn)
{
   //
   // Sm = Sample Mean.
   // Sd = Sample Standard Deviation.
   // Sn = Sample Size.
   //
   // Calculate confidence intervals for the mean.
   // For example if we set the confidence limit to
   // 0.95, we know that if we repeat the sampling
   // 100 times, then we expect that the true mean
   // will be between out limits on 95 occations.
   // Note: this is not the same as saying a 95%
   // confidence interval means that there is a 95%
   // probability that the interval contains the true mean.
   // The interval computed from a given sample either
   // contains the true mean or it does not.
   // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm

   using boost::math::students_t;

   // Print out general info:
   cout <<
      "__________________________________\n"
      "2-Sided Confidence Limits For Mean\n"
      "__________________________________\n\n";
   cout << setprecision(7);
   cout << setw(40) << left << "Number of Observations" << "=  " << Sn << "\n";
   cout << setw(40) << left << "Mean" << "=  " << Sm << "\n";
   cout << setw(40) << left << "Standard Deviation" << "=  " << Sd << "\n";
   //
   // Define a table of significance/risk levels:
   //
   double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
   //
   // Start by declaring the distribution we'll need:
   //
   students_t dist(Sn - 1);
   //
   // Print table header:
   //
   cout << "\n\n"
           "_______________________________________________________________\n"
           "Confidence       T           Interval          Lower          Upper\n"
           " Value (%)     Value          Width            Limit          Limit\n"
           "_______________________________________________________________\n";
   //
   // Now print out the data for the table rows.
   //
   for(unsigned i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
   {
      // Confidence value:
      cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
      // calculate T:
      double T = quantile(complement(dist, alpha[i] / 2));
      // Print T:
      cout << fixed << setprecision(3) << setw(10) << right << T;
      // Calculate width of interval (one sided):
      double w = T * Sd / sqrt(double(Sn));
      // Print width:
      if(w < 0.01)
         cout << scientific << setprecision(3) << setw(17) << right << w;
      else
         cout << fixed << setprecision(3) << setw(17) << right << w;
      // Print Limits:
      cout << fixed << setprecision(5) << setw(15) << right << Sm - w;
      cout << fixed << setprecision(5) << setw(15) << right << Sm + w << endl;
   }
   cout << endl;
} // void confidence_limits_on_mean

void single_sample_t_test(double M, double Sm, double Sd, unsigned Sn, double alpha)
{
   //
   // M = true mean.
   // Sm = Sample Mean.
   // Sd = Sample Standard Deviation.
   // Sn = Sample Size.
   // alpha = Significance Level.
   //
   // A Students t test applied to a single set of data.
   // We are testing the null hypothesis that the true
   // mean of the sample is M, and that any variation is down
   // to chance.  We can also test the alternative hypothesis
   // that any difference is not down to chance.
   // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm

   using boost::math::students_t;

   // Print header:
   cout <<
      "__________________________________\n"
      "Student t test for a single sample\n"
      "__________________________________\n\n";
   cout << setprecision(5);
   cout << setw(55) << left << "Number of Observations" << "=  " << Sn << "\n";
   cout << setw(55) << left << "Sample Mean" << "=  " << Sm << "\n";
   cout << setw(55) << left << "Sample Standard Deviation" << "=  " << Sd << "\n";
   cout << setw(55) << left << "Expected True Mean" << "=  " << M << "\n\n";
   //
   // Now we can calculate and output some stats:
   //
   // Difference in means:
   double diff = Sm - M;
   cout << setw(55) << left << "Sample Mean - Expected Test Mean" << "=  " << diff << "\n";
   // Degrees of freedom:
   unsigned v = Sn - 1;
   cout << setw(55) << left << "Degrees of Freedom" << "=  " << v << "\n";
   // t-statistic:
   double t_stat = diff * sqrt(double(Sn)) / Sd;
   cout << setw(55) << left << "T Statistic" << "=  " << t_stat << "\n";
   //
   // Finally define our distribution, and get the probability:
   //
   students_t dist(v);
   double q = cdf(complement(dist, fabs(t_stat)));
   cout << setw(55) << left << "Probability that difference is due to chance" << "=  "
      << setprecision(3) << scientific << 2 * q << "\n\n";
   //
   // Finally print out results of alternative hypothesis:
   //
   cout << setw(55) << left <<
      "Results for Alternative Hypothesis and alpha" << "=  "
      << setprecision(4) << fixed << alpha << "\n\n";
   cout << "Alternative Hypothesis     Conclusion\n";
   cout << "Mean != " << setprecision(3) << fixed << M << "            ";
   if(q < alpha / 2)
      cout << "NOT REJECTED\n";
   else
      cout << "REJECTED\n";
   cout << "Mean  < " << setprecision(3) << fixed << M << "            ";
   if(cdf(complement(dist, t_stat)) > alpha)
      cout << "NOT REJECTED\n";
   else
      cout << "REJECTED\n";
   cout << "Mean  > " << setprecision(3) << fixed << M << "            ";
   if(cdf(dist, t_stat) > alpha)
      cout << "NOT REJECTED\n";
   else
      cout << "REJECTED\n";
   cout << endl << endl;
} // void single_sample_t_test(

void single_sample_find_df(double M, double Sm, double Sd)
{
   //
   // M = true mean.
   // Sm = Sample Mean.
   // Sd = Sample Standard Deviation.
   //

   using boost::math::students_t;

   // Print out general info:
   cout <<
      "_____________________________________________________________\n"
      "Estimated sample sizes required for various confidence levels\n"
      "_____________________________________________________________\n\n";
   cout << setprecision(5);
   cout << setw(40) << left << "True Mean" << "=  " << M << "\n";
   cout << setw(40) << left << "Sample Mean" << "=  " << Sm << "\n";
   cout << setw(40) << left << "Sample Standard Deviation" << "=  " << Sd << "\n";
   //
   // Define a table of significance intervals:
   //
   double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
   //
   // Print table header:
   //
   cout << "\n\n"
           "_______________________________________________________________\n"
           "Confidence       Estimated          Estimated\n"
           " Value (%)      Sample Size        Sample Size\n"
           "              (one sided test)    (two sided test)\n"
           "_______________________________________________________________\n";
   //
   // Now print out the data for the table rows.
   //
   for(unsigned i = 1; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
   {
      // Confidence value:
      cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
      // calculate df for single sided test:
      double df = students_t::find_degrees_of_freedom(
         fabs(M - Sm), alpha[i], alpha[i], Sd);
      // convert to sample size, always one more than the degrees of freedom:
      double size = ceil(df) + 1;
      // Print size:
      cout << fixed << setprecision(0) << setw(16) << right << size;
      // calculate df for two sided test:
      df = students_t::find_degrees_of_freedom(
         fabs(M - Sm), alpha[i]/2, alpha[i], Sd);
      // convert to sample size:
      size = ceil(df) + 1;
      // Print size:
      cout << fixed << setprecision(0) << setw(16) << right << size << endl;
   }
   cout << endl;
} // void single_sample_find_df

int main()
{
   //
   // Run tests for Heat Flow Meter data
   // see http://www.itl.nist.gov/div898/handbook/eda/section4/eda428.htm
   // The data was collected while calibrating a heat flow meter
   // against a known value.
   //
   confidence_limits_on_mean(9.261460, 0.2278881e-01, 195);
   single_sample_t_test(5, 9.261460, 0.2278881e-01, 195, 0.05);
   single_sample_find_df(5, 9.261460, 0.2278881e-01);

   //
   // Data for this example from:
   // P.K.Hou, O. W. Lau & M.C. Wong, Analyst (1983) vol. 108, p 64.
   // from Statistics for Analytical Chemistry, 3rd ed. (1994), pp 54-55
   // J. C. Miller and J. N. Miller, Ellis Horwood ISBN 0 13 0309907
   //
   // Determination of mercury by cold-vapour atomic absorption,
   // the following values were obtained fusing a trusted
   // Standard Reference Material containing 38.9% mercury,
   // which we assume is correct or 'true'.
   //
   confidence_limits_on_mean(37.8, 0.964365, 3);
   // 95% test:
   single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.05);
   // 90% test:
   single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.1);
   // parameter estimate:
   single_sample_find_df(38.9, 37.8, 0.964365);

   return 0;
} // int main()

/*

Output:

------ Rebuild All started: Project: students_t_single_sample, Configuration: Release Win32 ------
  students_t_single_sample.cpp
  Generating code
  Finished generating code
  students_t_single_sample.vcxproj -> J:\Cpp\MathToolkit\test\Math_test\Release\students_t_single_sample.exe
__________________________________
2-Sided Confidence Limits For Mean
__________________________________

Number of Observations                  =  195
Mean                                    =  9.26146
Standard Deviation                      =  0.02278881


_______________________________________________________________
Confidence       T           Interval          Lower          Upper
 Value (%)     Value          Width            Limit          Limit
_______________________________________________________________
    50.000     0.676       1.103e-003        9.26036        9.26256
    75.000     1.154       1.883e-003        9.25958        9.26334
    90.000     1.653       2.697e-003        9.25876        9.26416
    95.000     1.972       3.219e-003        9.25824        9.26468
    99.000     2.601       4.245e-003        9.25721        9.26571
    99.900     3.341       5.453e-003        9.25601        9.26691
    99.990     3.973       6.484e-003        9.25498        9.26794
    99.999     4.537       7.404e-003        9.25406        9.26886

__________________________________
Student t test for a single sample
__________________________________

Number of Observations                                 =  195
Sample Mean                                            =  9.26146
Sample Standard Deviation                              =  0.02279
Expected True Mean                                     =  5.00000

Sample Mean - Expected Test Mean                       =  4.26146
Degrees of Freedom                                     =  194
T Statistic                                            =  2611.28380
Probability that difference is due to chance           =  0.000e+000

Results for Alternative Hypothesis and alpha           =  0.0500

Alternative Hypothesis     Conclusion
Mean != 5.000            NOT REJECTED
Mean  < 5.000            REJECTED
Mean  > 5.000            NOT REJECTED


_____________________________________________________________
Estimated sample sizes required for various confidence levels
_____________________________________________________________

True Mean                               =  5.00000
Sample Mean                             =  9.26146
Sample Standard Deviation               =  0.02279


_______________________________________________________________
Confidence       Estimated          Estimated
 Value (%)      Sample Size        Sample Size
              (one sided test)    (two sided test)
_______________________________________________________________
    75.000               2               2
    90.000               2               2
    95.000               2               2
    99.000               2               2
    99.900               3               3
    99.990               3               3
    99.999               4               4

__________________________________
2-Sided Confidence Limits For Mean
__________________________________

Number of Observations                  =  3
Mean                                    =  37.8000000
Standard Deviation                      =  0.9643650


_______________________________________________________________
Confidence       T           Interval          Lower          Upper
 Value (%)     Value          Width            Limit          Limit
_______________________________________________________________
    50.000     0.816            0.455       37.34539       38.25461
    75.000     1.604            0.893       36.90717       38.69283
    90.000     2.920            1.626       36.17422       39.42578
    95.000     4.303            2.396       35.40438       40.19562
    99.000     9.925            5.526       32.27408       43.32592
    99.900    31.599           17.594       20.20639       55.39361
    99.990    99.992           55.673      -17.87346       93.47346
    99.999   316.225          176.067     -138.26683      213.86683

__________________________________
Student t test for a single sample
__________________________________

Number of Observations                                 =  3
Sample Mean                                            =  37.80000
Sample Standard Deviation                              =  0.96437
Expected True Mean                                     =  38.90000

Sample Mean - Expected Test Mean                       =  -1.10000
Degrees of Freedom                                     =  2
T Statistic                                            =  -1.97566
Probability that difference is due to chance           =  1.869e-001

Results for Alternative Hypothesis and alpha           =  0.0500

Alternative Hypothesis     Conclusion
Mean != 38.900            REJECTED
Mean  < 38.900            NOT REJECTED
Mean  > 38.900            NOT REJECTED


__________________________________
Student t test for a single sample
__________________________________

Number of Observations                                 =  3
Sample Mean                                            =  37.80000
Sample Standard Deviation                              =  0.96437
Expected True Mean                                     =  38.90000

Sample Mean - Expected Test Mean                       =  -1.10000
Degrees of Freedom                                     =  2
T Statistic                                            =  -1.97566
Probability that difference is due to chance           =  1.869e-001

Results for Alternative Hypothesis and alpha           =  0.1000

Alternative Hypothesis     Conclusion
Mean != 38.900            REJECTED
Mean  < 38.900            NOT REJECTED
Mean  > 38.900            REJECTED


_____________________________________________________________
Estimated sample sizes required for various confidence levels
_____________________________________________________________

True Mean                               =  38.90000
Sample Mean                             =  37.80000
Sample Standard Deviation               =  0.96437


_______________________________________________________________
Confidence       Estimated          Estimated
 Value (%)      Sample Size        Sample Size
              (one sided test)    (two sided test)
_______________________________________________________________
    75.000               3               4
    90.000               7               9
    95.000              11              13
    99.000              20              22
    99.900              35              37
    99.990              50              53
    99.999              66              68

*/