1 // Copyright John Maddock 2006
2 // Copyright Paul A. Bristow 2007, 2010
3 
4 // Use, modification and distribution are subject to the
5 // Boost Software License, Version 1.0.
6 // (See accompanying file LICENSE_1_0.txt
7 // or copy at http://www.boost.org/LICENSE_1_0.txt)
8 
9 #ifdef _MSC_VER
10 #  pragma warning(disable: 4512) // assignment operator could not be generated.
11 #  pragma warning(disable: 4510) // default constructor could not be generated.
12 #  pragma warning(disable: 4610) // can never be instantiated - user defined constructor required.
13 #endif
14 
15 #include <boost/math/distributions/students_t.hpp>
16 
17 // avoid "using namespace std;" and "using namespace boost::math;"
18 // to avoid potential ambiguity with names in std random.
19 #include <iostream>
20 using std::cout; using std::endl;
21 using std::left; using std::fixed; using std::right; using std::scientific;
22 #include <iomanip>
23 using std::setw;
24 using std::setprecision;
25 
confidence_limits_on_mean(double Sm,double Sd,unsigned Sn)26 void confidence_limits_on_mean(double Sm, double Sd, unsigned Sn)
27 {
28    //
29    // Sm = Sample Mean.
30    // Sd = Sample Standard Deviation.
31    // Sn = Sample Size.
32    //
33    // Calculate confidence intervals for the mean.
34    // For example if we set the confidence limit to
35    // 0.95, we know that if we repeat the sampling
36    // 100 times, then we expect that the true mean
37    // will be between out limits on 95 occations.
38    // Note: this is not the same as saying a 95%
39    // confidence interval means that there is a 95%
40    // probability that the interval contains the true mean.
41    // The interval computed from a given sample either
42    // contains the true mean or it does not.
43    // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm
44 
45    using boost::math::students_t;
46 
47    // Print out general info:
48    cout <<
49       "__________________________________\n"
50       "2-Sided Confidence Limits For Mean\n"
51       "__________________________________\n\n";
52    cout << setprecision(7);
53    cout << setw(40) << left << "Number of Observations" << "=  " << Sn << "\n";
54    cout << setw(40) << left << "Mean" << "=  " << Sm << "\n";
55    cout << setw(40) << left << "Standard Deviation" << "=  " << Sd << "\n";
56    //
57    // Define a table of significance/risk levels:
58    //
59    double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
60    //
61    // Start by declaring the distribution we'll need:
62    //
63    students_t dist(Sn - 1);
64    //
65    // Print table header:
66    //
67    cout << "\n\n"
68            "_______________________________________________________________\n"
69            "Confidence       T           Interval          Lower          Upper\n"
70            " Value (%)     Value          Width            Limit          Limit\n"
71            "_______________________________________________________________\n";
72    //
73    // Now print out the data for the table rows.
74    //
75    for(unsigned i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
76    {
77       // Confidence value:
78       cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
79       // calculate T:
80       double T = quantile(complement(dist, alpha[i] / 2));
81       // Print T:
82       cout << fixed << setprecision(3) << setw(10) << right << T;
83       // Calculate width of interval (one sided):
84       double w = T * Sd / sqrt(double(Sn));
85       // Print width:
86       if(w < 0.01)
87          cout << scientific << setprecision(3) << setw(17) << right << w;
88       else
89          cout << fixed << setprecision(3) << setw(17) << right << w;
90       // Print Limits:
91       cout << fixed << setprecision(5) << setw(15) << right << Sm - w;
92       cout << fixed << setprecision(5) << setw(15) << right << Sm + w << endl;
93    }
94    cout << endl;
95 } // void confidence_limits_on_mean
96 
single_sample_t_test(double M,double Sm,double Sd,unsigned Sn,double alpha)97 void single_sample_t_test(double M, double Sm, double Sd, unsigned Sn, double alpha)
98 {
99    //
100    // M = true mean.
101    // Sm = Sample Mean.
102    // Sd = Sample Standard Deviation.
103    // Sn = Sample Size.
104    // alpha = Significance Level.
105    //
106    // A Students t test applied to a single set of data.
107    // We are testing the null hypothesis that the true
108    // mean of the sample is M, and that any variation is down
109    // to chance.  We can also test the alternative hypothesis
110    // that any difference is not down to chance.
111    // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm
112 
113    using boost::math::students_t;
114 
115    // Print header:
116    cout <<
117       "__________________________________\n"
118       "Student t test for a single sample\n"
119       "__________________________________\n\n";
120    cout << setprecision(5);
121    cout << setw(55) << left << "Number of Observations" << "=  " << Sn << "\n";
122    cout << setw(55) << left << "Sample Mean" << "=  " << Sm << "\n";
123    cout << setw(55) << left << "Sample Standard Deviation" << "=  " << Sd << "\n";
124    cout << setw(55) << left << "Expected True Mean" << "=  " << M << "\n\n";
125    //
126    // Now we can calculate and output some stats:
127    //
128    // Difference in means:
129    double diff = Sm - M;
130    cout << setw(55) << left << "Sample Mean - Expected Test Mean" << "=  " << diff << "\n";
131    // Degrees of freedom:
132    unsigned v = Sn - 1;
133    cout << setw(55) << left << "Degrees of Freedom" << "=  " << v << "\n";
134    // t-statistic:
135    double t_stat = diff * sqrt(double(Sn)) / Sd;
136    cout << setw(55) << left << "T Statistic" << "=  " << t_stat << "\n";
137    //
138    // Finally define our distribution, and get the probability:
139    //
140    students_t dist(v);
141    double q = cdf(complement(dist, fabs(t_stat)));
142    cout << setw(55) << left << "Probability that difference is due to chance" << "=  "
143       << setprecision(3) << scientific << 2 * q << "\n\n";
144    //
145    // Finally print out results of alternative hypothesis:
146    //
147    cout << setw(55) << left <<
148       "Results for Alternative Hypothesis and alpha" << "=  "
149       << setprecision(4) << fixed << alpha << "\n\n";
150    cout << "Alternative Hypothesis     Conclusion\n";
151    cout << "Mean != " << setprecision(3) << fixed << M << "            ";
152    if(q < alpha / 2)
153       cout << "NOT REJECTED\n";
154    else
155       cout << "REJECTED\n";
156    cout << "Mean  < " << setprecision(3) << fixed << M << "            ";
157    if(cdf(complement(dist, t_stat)) > alpha)
158       cout << "NOT REJECTED\n";
159    else
160       cout << "REJECTED\n";
161    cout << "Mean  > " << setprecision(3) << fixed << M << "            ";
162    if(cdf(dist, t_stat) > alpha)
163       cout << "NOT REJECTED\n";
164    else
165       cout << "REJECTED\n";
166    cout << endl << endl;
167 } // void single_sample_t_test(
168 
single_sample_find_df(double M,double Sm,double Sd)169 void single_sample_find_df(double M, double Sm, double Sd)
170 {
171    //
172    // M = true mean.
173    // Sm = Sample Mean.
174    // Sd = Sample Standard Deviation.
175    //
176 
177    using boost::math::students_t;
178 
179    // Print out general info:
180    cout <<
181       "_____________________________________________________________\n"
182       "Estimated sample sizes required for various confidence levels\n"
183       "_____________________________________________________________\n\n";
184    cout << setprecision(5);
185    cout << setw(40) << left << "True Mean" << "=  " << M << "\n";
186    cout << setw(40) << left << "Sample Mean" << "=  " << Sm << "\n";
187    cout << setw(40) << left << "Sample Standard Deviation" << "=  " << Sd << "\n";
188    //
189    // Define a table of significance intervals:
190    //
191    double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
192    //
193    // Print table header:
194    //
195    cout << "\n\n"
196            "_______________________________________________________________\n"
197            "Confidence       Estimated          Estimated\n"
198            " Value (%)      Sample Size        Sample Size\n"
199            "              (one sided test)    (two sided test)\n"
200            "_______________________________________________________________\n";
201    //
202    // Now print out the data for the table rows.
203    //
204    for(unsigned i = 1; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
205    {
206       // Confidence value:
207       cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
208       // calculate df for single sided test:
209       double df = students_t::find_degrees_of_freedom(
210          fabs(M - Sm), alpha[i], alpha[i], Sd);
211       // convert to sample size, always one more than the degrees of freedom:
212       double size = ceil(df) + 1;
213       // Print size:
214       cout << fixed << setprecision(0) << setw(16) << right << size;
215       // calculate df for two sided test:
216       df = students_t::find_degrees_of_freedom(
217          fabs(M - Sm), alpha[i]/2, alpha[i], Sd);
218       // convert to sample size:
219       size = ceil(df) + 1;
220       // Print size:
221       cout << fixed << setprecision(0) << setw(16) << right << size << endl;
222    }
223    cout << endl;
224 } // void single_sample_find_df
225 
main()226 int main()
227 {
228    //
229    // Run tests for Heat Flow Meter data
230    // see http://www.itl.nist.gov/div898/handbook/eda/section4/eda428.htm
231    // The data was collected while calibrating a heat flow meter
232    // against a known value.
233    //
234    confidence_limits_on_mean(9.261460, 0.2278881e-01, 195);
235    single_sample_t_test(5, 9.261460, 0.2278881e-01, 195, 0.05);
236    single_sample_find_df(5, 9.261460, 0.2278881e-01);
237 
238    //
239    // Data for this example from:
240    // P.K.Hou, O. W. Lau & M.C. Wong, Analyst (1983) vol. 108, p 64.
241    // from Statistics for Analytical Chemistry, 3rd ed. (1994), pp 54-55
242    // J. C. Miller and J. N. Miller, Ellis Horwood ISBN 0 13 0309907
243    //
244    // Determination of mercury by cold-vapour atomic absorption,
245    // the following values were obtained fusing a trusted
246    // Standard Reference Material containing 38.9% mercury,
247    // which we assume is correct or 'true'.
248    //
249    confidence_limits_on_mean(37.8, 0.964365, 3);
250    // 95% test:
251    single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.05);
252    // 90% test:
253    single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.1);
254    // parameter estimate:
255    single_sample_find_df(38.9, 37.8, 0.964365);
256 
257    return 0;
258 } // int main()
259 
260 /*
261 
262 Output:
263 
264 ------ Rebuild All started: Project: students_t_single_sample, Configuration: Release Win32 ------
265   students_t_single_sample.cpp
266   Generating code
267   Finished generating code
268   students_t_single_sample.vcxproj -> J:\Cpp\MathToolkit\test\Math_test\Release\students_t_single_sample.exe
269 __________________________________
270 2-Sided Confidence Limits For Mean
271 __________________________________
272 
273 Number of Observations                  =  195
274 Mean                                    =  9.26146
275 Standard Deviation                      =  0.02278881
276 
277 
278 _______________________________________________________________
279 Confidence       T           Interval          Lower          Upper
280  Value (%)     Value          Width            Limit          Limit
281 _______________________________________________________________
282     50.000     0.676       1.103e-003        9.26036        9.26256
283     75.000     1.154       1.883e-003        9.25958        9.26334
284     90.000     1.653       2.697e-003        9.25876        9.26416
285     95.000     1.972       3.219e-003        9.25824        9.26468
286     99.000     2.601       4.245e-003        9.25721        9.26571
287     99.900     3.341       5.453e-003        9.25601        9.26691
288     99.990     3.973       6.484e-003        9.25498        9.26794
289     99.999     4.537       7.404e-003        9.25406        9.26886
290 
291 __________________________________
292 Student t test for a single sample
293 __________________________________
294 
295 Number of Observations                                 =  195
296 Sample Mean                                            =  9.26146
297 Sample Standard Deviation                              =  0.02279
298 Expected True Mean                                     =  5.00000
299 
300 Sample Mean - Expected Test Mean                       =  4.26146
301 Degrees of Freedom                                     =  194
302 T Statistic                                            =  2611.28380
303 Probability that difference is due to chance           =  0.000e+000
304 
305 Results for Alternative Hypothesis and alpha           =  0.0500
306 
307 Alternative Hypothesis     Conclusion
308 Mean != 5.000            NOT REJECTED
309 Mean  < 5.000            REJECTED
310 Mean  > 5.000            NOT REJECTED
311 
312 
313 _____________________________________________________________
314 Estimated sample sizes required for various confidence levels
315 _____________________________________________________________
316 
317 True Mean                               =  5.00000
318 Sample Mean                             =  9.26146
319 Sample Standard Deviation               =  0.02279
320 
321 
322 _______________________________________________________________
323 Confidence       Estimated          Estimated
324  Value (%)      Sample Size        Sample Size
325               (one sided test)    (two sided test)
326 _______________________________________________________________
327     75.000               2               2
328     90.000               2               2
329     95.000               2               2
330     99.000               2               2
331     99.900               3               3
332     99.990               3               3
333     99.999               4               4
334 
335 __________________________________
336 2-Sided Confidence Limits For Mean
337 __________________________________
338 
339 Number of Observations                  =  3
340 Mean                                    =  37.8000000
341 Standard Deviation                      =  0.9643650
342 
343 
344 _______________________________________________________________
345 Confidence       T           Interval          Lower          Upper
346  Value (%)     Value          Width            Limit          Limit
347 _______________________________________________________________
348     50.000     0.816            0.455       37.34539       38.25461
349     75.000     1.604            0.893       36.90717       38.69283
350     90.000     2.920            1.626       36.17422       39.42578
351     95.000     4.303            2.396       35.40438       40.19562
352     99.000     9.925            5.526       32.27408       43.32592
353     99.900    31.599           17.594       20.20639       55.39361
354     99.990    99.992           55.673      -17.87346       93.47346
355     99.999   316.225          176.067     -138.26683      213.86683
356 
357 __________________________________
358 Student t test for a single sample
359 __________________________________
360 
361 Number of Observations                                 =  3
362 Sample Mean                                            =  37.80000
363 Sample Standard Deviation                              =  0.96437
364 Expected True Mean                                     =  38.90000
365 
366 Sample Mean - Expected Test Mean                       =  -1.10000
367 Degrees of Freedom                                     =  2
368 T Statistic                                            =  -1.97566
369 Probability that difference is due to chance           =  1.869e-001
370 
371 Results for Alternative Hypothesis and alpha           =  0.0500
372 
373 Alternative Hypothesis     Conclusion
374 Mean != 38.900            REJECTED
375 Mean  < 38.900            NOT REJECTED
376 Mean  > 38.900            NOT REJECTED
377 
378 
379 __________________________________
380 Student t test for a single sample
381 __________________________________
382 
383 Number of Observations                                 =  3
384 Sample Mean                                            =  37.80000
385 Sample Standard Deviation                              =  0.96437
386 Expected True Mean                                     =  38.90000
387 
388 Sample Mean - Expected Test Mean                       =  -1.10000
389 Degrees of Freedom                                     =  2
390 T Statistic                                            =  -1.97566
391 Probability that difference is due to chance           =  1.869e-001
392 
393 Results for Alternative Hypothesis and alpha           =  0.1000
394 
395 Alternative Hypothesis     Conclusion
396 Mean != 38.900            REJECTED
397 Mean  < 38.900            NOT REJECTED
398 Mean  > 38.900            REJECTED
399 
400 
401 _____________________________________________________________
402 Estimated sample sizes required for various confidence levels
403 _____________________________________________________________
404 
405 True Mean                               =  38.90000
406 Sample Mean                             =  37.80000
407 Sample Standard Deviation               =  0.96437
408 
409 
410 _______________________________________________________________
411 Confidence       Estimated          Estimated
412  Value (%)      Sample Size        Sample Size
413               (one sided test)    (two sided test)
414 _______________________________________________________________
415     75.000               3               4
416     90.000               7               9
417     95.000              11              13
418     99.000              20              22
419     99.900              35              37
420     99.990              50              53
421     99.999              66              68
422 
423 */
424 
425