1# statistics.tcl --
2#
3#    Package for basic statistical analysis
4#
5# version 0.1:   initial implementation, january 2003
6# version 0.1.1: added linear regres
7# version 0.1.2: border case in stdev taken care of
8# version 0.1.3: moved initialisation of CDF to first call, november 2004
9# version 0.3:   added test for normality (as implemented by Torsten Reincke), march 2006
10#                (also fixed an error in the export list)
11# version 0.4:   added the multivariate linear regression procedures by
12#                Eric Kemp-Benedict, february 2007
13# version 0.5:   added the population standard deviation and variance,
14#                as suggested by Dimitrios Zachariadis
15# version 0.6:   added pdf and cdf procedures for various distributions
16#                (provided by Eric Kemp-Benedict)
17# version 0.7:   added Kruskal-Wallis test (by Torsten Berg)
18# version 0.8:   added Wilcoxon test and Spearman rank correlation
19# version 0.9:   added kernel density estimation
20# version 0.9.3: added histogram-alt, corrected test-normal
21# version 1.0:   added test-anova-F
22# version 1.0.1: correction in pdf-lognormal and cdf-lognormal
23# version 1.1:   added test-Tukey-range and test-Dunnett
24
25package require Tcl 8.5 ; # 8.5+ feature in test-anovo-F: **-operator
26package provide math::statistics 1.1.1
27package require math
28
29if {![llength [info commands ::lrepeat]]} {
30    # Forward portability, emulate lrepeat
31    proc ::lrepeat {n args} {
32	if {$n < 1} {
33	    return -code error "must have a count of at least 1"
34	}
35	set res {}
36	while {$n} {
37	    foreach x $args { lappend res $x }
38	    incr n -1
39	}
40	return $res
41    }
42}
43
44# ::math::statistics --
45#   Namespace holding the procedures and variables
46#
47
48namespace eval ::math::statistics {
49    #
50    # Safer: change to short procedures
51    #
52    namespace export mean min max number var stdev pvar pstdev basic-stats corr \
53	    histogram histogram-alt interval-mean-stdev t-test-mean quantiles \
54	    test-normal lillieforsFit \
55	    autocorr crosscorr filter map samplescount median \
56	    test-2x2 print-2x2 control-xbar test_xbar \
57	    control-Rchart test-Rchart \
58	    test-Kruskal-Wallis analyse-Kruskal-Wallis group-rank \
59	    test-Wilcoxon spearman-rank spearman-rank-extended \
60	    test-Duckworth test-anova-F test-Tukey-range test-Dunnett
61    #
62    # Error messages
63    #
64    variable NEGSTDEV   {Zero or negative standard deviation}
65    variable TOOFEWDATA {Too few or invalid data}
66    variable OUTOFRANGE {Argument out of range}
67
68    #
69    # Coefficients involved
70    #
71    variable factorNormalPdf
72    set factorNormalPdf [expr {sqrt(8.0*atan(1.0))}]
73
74    # xbar/R-charts:
75    # Data from:
76    #    Peter W.M. John:
77    #    Statistical methods in engineering and quality assurance
78    #    Wiley and Sons, 1990
79    #
80    variable control_factors {
81        A2 {1.880 1.093 0.729 0.577 0.483 0.419 0.419}
82        D3 {0.0   0.0   0.0   0.0   0.0   0.076 0.076}
83        D4 {3.267 2.574 2.282 2.114 2.004 1.924 1.924}
84    }
85}
86
87# mean, min, max, number, var, stdev, pvar, pstdev --
88#    Return the mean (minimum, maximum) value of a list of numbers
89#    or number of non-missing values
90#
91# Arguments:
92#    type     Type of value to be returned
93#    values   List of values to be examined
94#
95# Results:
96#    Value that was required
97#
98#
99namespace eval ::math::statistics {
100    foreach type {mean min max number stdev var pstdev pvar} {
101	proc $type { values } "BasicStats $type \$values"
102    }
103    proc basic-stats { values } "BasicStats all \$values"
104}
105
106# BasicStats --
107#    Return the one or all of the basic statistical properties
108#
109# Arguments:
110#    type     Type of value to be returned
111#    values   List of values to be examined
112#
113# Results:
114#    Value that was required
115#
116proc ::math::statistics::BasicStats { type values } {
117    variable TOOFEWDATA
118
119    if { [lsearch {all mean min max number stdev var pstdev pvar} $type] < 0 } {
120	return -code error \
121		-errorcode ARG -errorinfo [list unknown type of statistic -- $type] \
122		[list unknown type of statistic -- $type]
123    }
124
125    set min    {}
126    set max    {}
127    set mean   {}
128    set stdev  {}
129    set var    {}
130
131    set sum    0.0
132    set sumsq  0.0
133    set number 0
134    set first  {}
135
136    foreach value $values {
137	if { $value == {} } {
138	    continue
139	}
140	set value [expr {double($value)}]
141
142	if { $first == {} } {
143	    set first $value
144	}
145
146	incr number
147	set  sum    [expr {$sum+$value}]
148	set  sumsq  [expr {$sumsq+($value-$first)*($value-$first)}]
149
150	if { $min == {} || $value < $min } {
151	    set min $value
152	}
153	if { $max == {} || $value > $max } {
154	    set max $value
155	}
156    }
157
158    if { $number > 0 } {
159	set mean [expr {$sum/$number}]
160    } else {
161	return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
162    }
163
164    if { $number > 1 } {
165	set var    [expr {($sumsq-($mean-$first)*($sum-$number*$first))/double($number-1)}]
166        #
167        # Take care of a rare situation: uniform data might
168        # cause a tiny negative difference
169        #
170        if { $var < 0.0 } {
171           set var 0.0
172        }
173	set stdev  [expr {sqrt($var)}]
174    }
175	set pvar [expr {($sumsq-($mean-$first)*($sum-$number*$first))/double($number)}]
176        #
177        # Take care of a rare situation: uniform data might
178        # cause a tiny negative difference
179        #
180        if { $pvar < 0.0 } {
181           set pvar 0.0
182        }
183	set pstdev  [expr {sqrt($pvar)}]
184
185    set all [list $mean $min $max $number $stdev $var $pstdev $pvar]
186
187    #
188    # Return the appropriate value
189    #
190    set $type
191}
192
193# histogram --
194#    Return histogram information from a list of numbers
195#
196# Arguments:
197#    limits   Upper limits for the buckets (in increasing order)
198#    values   List of values to be examined
199#    weights  List of weights, one per value (optional)
200#
201# Results:
202#    List of number of values in each bucket (length is one more than
203#    the number of limits)
204#
205#
206proc ::math::statistics::histogram { limits values {weights {}} } {
207
208    if { [llength $limits] < 1 } {
209	return -code error -errorcode ARG -errorinfo {No limits given} {No limits given}
210    }
211    if { [llength $weights] > 0 && [llength $values] != [llength $weights] } {
212	return -code error -errorcode ARG -errorinfo {Number of weights be equal to number of values} {Weights and values differ in length}
213    }
214
215    set limits [lsort -real -increasing $limits]
216
217    for { set index 0 } { $index <= [llength $limits] } { incr index } {
218	set buckets($index) 0
219    }
220
221    set last [llength $limits]
222
223    # Will do integer arithmetic if unset
224    if {$weights eq ""} {
225       set weights [lrepeat [llength $values] 1]
226    }
227
228    foreach value $values weight $weights {
229	if { $value == {} } {
230	    continue
231	}
232
233	set index 0
234	set found 0
235	foreach limit $limits {
236	    if { $value <= $limit } {
237		set found 1
238		set buckets($index) [expr $buckets($index)+$weight]
239		break
240	    }
241	    incr index
242	}
243
244	if { $found == 0 } {
245	    set buckets($last) [expr $buckets($last)+$weight]
246	}
247    }
248
249    set result {}
250    for { set index 0 } { $index <= $last } { incr index } {
251	lappend result $buckets($index)
252    }
253
254    return $result
255}
256
257# histogram-alt --
258#    Return histogram information from a list of numbers -
259#    intervals are open-ended at the lower bound instead of at the upper bound
260#
261# Arguments:
262#    limits   Upper limits for the buckets (in increasing order)
263#    values   List of values to be examined
264#    weights  List of weights, one per value (optional)
265#
266# Results:
267#    List of number of values in each bucket (length is one more than
268#    the number of limits)
269#
270#
271proc ::math::statistics::histogram-alt { limits values {weights {}} } {
272
273    if { [llength $limits] < 1 } {
274	return -code error -errorcode ARG -errorinfo {No limits given} {No limits given}
275    }
276    if { [llength $weights] > 0 && [llength $values] != [llength $weights] } {
277	return -code error -errorcode ARG -errorinfo {Number of weights be equal to number of values} {Weights and values differ in length}
278    }
279
280    set limits [lsort -real -increasing $limits]
281
282    for { set index 0 } { $index <= [llength $limits] } { incr index } {
283	set buckets($index) 0
284    }
285
286    set last [llength $limits]
287
288    # Will do integer arithmetic if unset
289    if {$weights eq ""} {
290       set weights [lrepeat [llength $values] 1]
291    }
292
293    foreach value $values weight $weights {
294	if { $value == {} } {
295	    continue
296	}
297
298	set index 0
299	set found 0
300	foreach limit $limits {
301	    if { $value < $limit } {
302		set found 1
303		set buckets($index) [expr $buckets($index)+$weight]
304		break
305	    }
306	    incr index
307	}
308
309	if { $found == 0 } {
310	    set buckets($last) [expr $buckets($last)+$weight]
311	}
312    }
313
314    set result {}
315    for { set index 0 } { $index <= $last } { incr index } {
316	lappend result $buckets($index)
317    }
318
319    return $result
320}
321
322# corr --
323#    Return the correlation coefficient of two sets of data
324#
325# Arguments:
326#    data1    List with the first set of data
327#    data2    List with the second set of data
328#
329# Result:
330#    Correlation coefficient of the two
331#
332proc ::math::statistics::corr { data1 data2 } {
333    variable TOOFEWDATA
334
335    set number  0
336    set sum1    0.0
337    set sum2    0.0
338    set sumsq1  0.0
339    set sumsq2  0.0
340    set sumprod 0.0
341
342    foreach value1 $data1 value2 $data2 {
343	if { $value1 == {} || $value2 == {} } {
344	    continue
345	}
346	set  value1  [expr {double($value1)}]
347	set  value2  [expr {double($value2)}]
348
349	set  sum1    [expr {$sum1+$value1}]
350	set  sum2    [expr {$sum2+$value2}]
351	set  sumsq1  [expr {$sumsq1+$value1*$value1}]
352	set  sumsq2  [expr {$sumsq2+$value2*$value2}]
353	set  sumprod [expr {$sumprod+$value1*$value2}]
354	incr number
355    }
356    if { $number > 0 } {
357	set numerator   [expr {$number*$sumprod-$sum1*$sum2}]
358	set denom1      [expr {sqrt($number*$sumsq1-$sum1*$sum1)}]
359	set denom2      [expr {sqrt($number*$sumsq2-$sum2*$sum2)}]
360	if { $denom1 != 0.0 && $denom2 != 0.0 } {
361	    set corr_coeff  [expr {$numerator/$denom1/$denom2}]
362	} elseif { $denom1 != 0.0 || $denom2 != 0.0 } {
363	    set corr_coeff  0.0 ;# Uniform against non-uniform
364	} else {
365	    set corr_coeff  1.0 ;# Both uniform
366	}
367
368    } else {
369	return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
370    }
371    return $corr_coeff
372}
373
374# lillieforsFit --
375#     Calculate the goodness of fit according to Lilliefors
376#     (goodness of fit to a normal distribution)
377#
378# Arguments:
379#     values          List of values to be tested for normality
380#
381# Result:
382#     Value of the statistic D
383#
384proc ::math::statistics::lillieforsFit {values} {
385    #
386    # calculate the goodness of fit according to Lilliefors
387    # (goodness of fit to a normal distribution)
388    #
389    # values -> list of values to be tested for normality
390    # (these values are sampled counts)
391    #
392
393    # calculate standard deviation and mean of the sample:
394    set n [llength $values]
395    if { $n < 5 } {
396        return -code error "Insufficient number of data (at least five required)"
397    }
398    set sd   [stdev $values]
399    set mean [mean $values]
400
401    # sort the sample for further processing:
402    set values [lsort -real $values]
403
404    # standardize the sample data (Z-scores):
405    foreach x $values {
406        lappend stdData [expr {($x - $mean)/double($sd)}]
407    }
408
409    # compute the value of the distribution function at every sampled point:
410    foreach x $stdData {
411        lappend expData [pnorm $x]
412    }
413
414    # compute D+:
415    set i 0
416    foreach x $expData {
417        incr i
418        lappend dplus [expr {$i/double($n)-$x}]
419    }
420    set dplus [lindex [lsort -real $dplus] end]
421
422    # compute D-:
423    set i 0
424    foreach x $expData {
425        incr i
426        lappend dminus [expr {$x-($i-1)/double($n)}]
427    }
428    set dminus [lindex [lsort -real $dminus] end]
429
430    # Calculate the test statistic D
431    # by finding the maximal vertical difference
432    # between the sample and the expectation:
433    #
434    set D [expr {$dplus > $dminus ? $dplus : $dminus}]
435
436    # We now use the modified statistic Z,
437    # because D is only reliable
438    # if the p-value is smaller than 0.1
439    return [expr {$D * (sqrt($n) - 0.01 + 0.831/sqrt($n))}]
440}
441
442# pnorm --
443#     Calculate the cumulative distribution function (cdf)
444#     for the standard normal distribution like in the statistical
445#     software 'R' (mean=0 and sd=1)
446#
447# Arguments:
448#     x               Value fro which the cdf should be calculated
449#
450# Result:
451#     Value of the statistic D
452#
453proc ::math::statistics::pnorm {x} {
454    #
455    # cumulative distribution function (cdf)
456    # for the standard normal distribution like in the statistical software 'R'
457    # (mean=0 and sd=1)
458    #
459    # x -> value for which the cdf should be calculated
460    #
461    set sum [expr {double($x)}]
462    set oldSum 0.0
463    set i 1
464    set denom 1.0
465    while {$sum != $oldSum} {
466            set oldSum $sum
467            incr i 2
468            set denom [expr {$denom*$i}]
469            #puts "$i - $denom"
470            set sum [expr {$oldSum + pow($x,$i)/$denom}]
471    }
472    return [expr {0.5 + $sum * exp(-0.5 * $x*$x - 0.91893853320467274178)}]
473}
474
475# pnorm_quicker --
476#     Calculate the cumulative distribution function (cdf)
477#     for the standard normal distribution - quicker alternative
478#     (less accurate)
479#
480# Arguments:
481#     x               Value for which the cdf should be calculated
482#
483# Result:
484#     Value of the statistic D
485#
486proc ::math::statistics::pnorm_quicker {x} {
487
488    set n [expr {abs($x)}]
489    set n [expr {1.0 + $n*(0.04986735 + $n*(0.02114101 + $n*(0.00327763 \
490            + $n*(0.0000380036 + $n*(0.0000488906 + $n*0.000005383)))))}]
491    set n [expr {1.0/pow($n,16)}]
492    #
493    if {$x >= 0} {
494        return [expr {1 - $n/2.0}]
495    } else {
496        return [expr {$n/2.0}]
497    }
498}
499
500# test-normal --
501#     Test for normality (using method Lilliefors)
502#
503# Arguments:
504#     data            Values that need to be tested
505#     significance    Level at which the discrepancy from normality is tested
506#
507# Result:
508#     1 if the Lilliefors statistic D is larger than the critical level
509#
510# Note:
511#     There was a mistake in the implementation before 0.9.3: confidence (wrong word)
512#     instead of significance. To keep compatibility with earlier versions, both
513#     significance and 1-significance are accepted.
514#
515proc ::math::statistics::test-normal {data significance} {
516    set D [lillieforsFit $data]
517
518    if { $significance > 0.5 } {
519        set significance [expr {1.0-$significance}] ;# Convert the erroneous levels pre 0.9.3
520    }
521
522    set Dcrit --
523    if { abs($significance-0.20) < 0.0001 } {
524        set Dcrit 0.741
525    }
526    if { abs($significance-0.15) < 0.0001 } {
527        set Dcrit 0.775
528    }
529    if { abs($significance-0.10) < 0.0001 } {
530        set Dcrit 0.819
531    }
532    if { abs($significance-0.05) < 0.0001 } {
533        set Dcrit 0.895
534    }
535    if { abs($significance-0.01) < 0.0001 } {
536        set Dcrit 1.035
537    }
538    if { $Dcrit != "--" } {
539        return [expr {$D > $Dcrit ? 1 : 0 }]
540    } else {
541        return -code error "Significancce level must be one of: 0.20, 0.15, 0.10, 0.05 or 0.01"
542    }
543}
544
545# t-test-mean --
546#    Test whether the mean value of a sample is in accordance with the
547#    estimated normal distribution with a certain probability
548#    (Student's t test)
549#
550# Arguments:
551#    data         List of raw data values (small sample)
552#    est_mean     Estimated mean of the distribution
553#    est_stdev    Estimated stdev of the distribution
554#    alpha        Probability level (0.95 or 0.99 for instance)
555#
556# Result:
557#    1 if the test is positive, 0 otherwise. If there are too few data,
558#    returns an empty string
559#
560proc ::math::statistics::t-test-mean { data est_mean est_stdev alpha } {
561    variable NEGSTDEV
562    variable TOOFEWDATA
563
564    if { $est_stdev <= 0.0 } {
565	return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV
566    }
567
568    set allstats        [BasicStats all $data]
569
570    set alpha2          [expr {(1.0+$alpha)/2.0}]
571
572    set sample_mean     [lindex $allstats 0]
573    set sample_number   [lindex $allstats 3]
574
575    if { $sample_number > 1 } {
576	set tzero   [expr {abs($sample_mean-$est_mean)/$est_stdev * \
577		sqrt($sample_number-1)}]
578	set degrees [expr {$sample_number-1}]
579	set prob    [cdf-students-t $degrees $tzero]
580
581	return [expr {$prob<$alpha2}]
582
583    } else {
584	return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
585    }
586}
587
588# interval-mean-stdev --
589#    Return the interval containing the mean value and one
590#    containing the standard deviation with a certain
591#    level of confidence (assuming a normal distribution)
592#
593# Arguments:
594#    data         List of raw data values
595#    confidence   Confidence level (0.95 or 0.99 for instance)
596#
597# Result:
598#    List having the following elements: lower and upper bounds of
599#    mean, lower and upper bounds of stdev
600#
601#
602proc ::math::statistics::interval-mean-stdev { data confidence } {
603    variable TOOFEWDATA
604
605    set allstats [BasicStats all $data]
606
607    set conf2    [expr {(1.0+$confidence)/2.0}]
608    set mean     [lindex $allstats 0]
609    set number   [lindex $allstats 3]
610    set stdev    [lindex $allstats 4]
611
612    if { $number > 1 } {
613	set degrees    [expr {$number-1}]
614	set student_t  [expr {sqrt([Inverse-cdf-toms322 1 $degrees $conf2])}]
615	set mean_lower [expr {$mean-$student_t*$stdev/sqrt($number)}]
616	set mean_upper [expr {$mean+$student_t*$stdev/sqrt($number)}]
617	set stdev_lower {}
618	set stdev_upper {}
619	return [list $mean_lower $mean_upper $stdev_lower $stdev_upper]
620    } else {
621	return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
622    }
623}
624
625# quantiles --
626#    Return the quantiles for a given set of data or histogram
627#
628# Arguments:
629#    (two arguments)
630#    data         List of raw data values
631#    confidence   Confidence level (0.95 or 0.99 for instance)
632#    (three arguments)
633#    limits       List of upper limits from histogram
634#    counts       List of counts for for each interval in histogram
635#    confidence   Confidence level (0.95 or 0.99 for instance)
636#
637# Result:
638#    List of quantiles
639#
640proc ::math::statistics::quantiles { arg1 arg2 {arg3 {}} } {
641    variable TOOFEWDATA
642
643    if { [catch {
644	if { $arg3 == {} } {
645	    set result \
646		    [::math::statistics::QuantilesRawData $arg1 $arg2]
647	} else {
648	    set result \
649		    [::math::statistics::QuantilesHistogram $arg1 $arg2 $arg3]
650	}
651    } msg] } {
652	return -code error -errorcode $msg $msg
653    }
654    return $result
655}
656
657# QuantilesRawData --
658#    Return the quantiles based on raw data
659#
660# Arguments:
661#    data         List of raw data values
662#    confidence   Confidence level (0.95 or 0.99 for instance)
663#
664# Result:
665#    List of quantiles
666#
667proc ::math::statistics::QuantilesRawData { data confidence } {
668    variable TOOFEWDATA
669    variable OUTOFRANGE
670
671    if { [llength $confidence] <= 0 } {
672	return -code error -errorcode ARG "$TOOFEWDATA - quantiles"
673    }
674
675    if { [llength $data] <= 0 } {
676	return -code error -errorcode ARG "$TOOFEWDATA - raw data"
677    }
678
679    foreach cond $confidence {
680	if { $cond <= 0.0 || $cond >= 1.0 } {
681	    return -code error -errorcode ARG "$OUTOFRANGE - quantiles"
682	}
683    }
684
685    #
686    # Sort the data first
687    #
688    set sorted_data [lsort -real -increasing $data]
689
690    #
691    # Determine the list element lower or equal to the quantile
692    # and return the corresponding value
693    #
694    set result      {}
695    set number_data [llength $sorted_data]
696    foreach cond $confidence {
697	set elem [expr {round($number_data*$cond)-1}]
698	if { $elem < 0 } {
699	    set elem 0
700	}
701	lappend result [lindex $sorted_data $elem]
702    }
703
704    return $result
705}
706
707# QuantilesHistogram --
708#    Return the quantiles based on histogram information only
709#
710# Arguments:
711#    limits       Upper limits for histogram intervals
712#    counts       Counts for each interval
713#    confidence   Confidence level (0.95 or 0.99 for instance)
714#
715# Result:
716#    List of quantiles
717#
718proc ::math::statistics::QuantilesHistogram { limits counts confidence } {
719    variable TOOFEWDATA
720    variable OUTOFRANGE
721
722    if { [llength $confidence] <= 0 } {
723	return -code error -errorcode ARG "$TOOFEWDATA - quantiles"
724    }
725
726    if { [llength $confidence] <= 0 } {
727	return -code error -errorcode ARG "$TOOFEWDATA - histogram limits"
728    }
729
730    if { [llength $counts] <= [llength $limits] } {
731	return -code error -errorcode ARG "$TOOFEWDATA - histogram counts"
732    }
733
734    foreach cond $confidence {
735	if { $cond <= 0.0 || $cond >= 1.0 } {
736	    return -code error -errorcode ARG "$OUTOFRANGE - quantiles"
737	}
738    }
739
740    #
741    # Accumulate the histogram counts first
742    #
743    set sum 0
744    set accumulated_counts {}
745    foreach count $counts {
746	set sum [expr {$sum+$count}]
747	lappend accumulated_counts $sum
748    }
749    set total_counts $sum
750
751    #
752    # Determine the list element lower or equal to the quantile
753    # and return the corresponding value (use interpolation if
754    # possible)
755    #
756    set result      {}
757    foreach cond $confidence {
758	set found       0
759	set bound       [expr {round($total_counts*$cond)}]
760	set lower_limit {}
761	set lower_count 0
762	foreach acc_count $accumulated_counts limit $limits {
763	    if { $acc_count >= $bound } {
764		set found 1
765		break
766	    }
767	    set lower_limit $limit
768	    set lower_count $acc_count
769	}
770
771	if { $lower_limit == {} || $limit == {} || $found == 0 } {
772	    set quant $limit
773	    if { $limit == {} } {
774		set quant $lower_limit
775	    }
776	} else {
777	    set quant [expr {$limit+($lower_limit-$limit) *
778	    ($acc_count-$bound)/($acc_count-$lower_count)}]
779	}
780	lappend result $quant
781    }
782
783    return $result
784}
785
786# autocorr --
787#    Return the autocorrelation function (assuming equidistance between
788#    samples)
789#
790# Arguments:
791#    data         Raw data for which the autocorrelation must be determined
792#
793# Result:
794#    List of autocorrelation values (about 1/2 the number of raw data)
795#
796proc ::math::statistics::autocorr { data } {
797    variable TOOFEWDATA
798
799    if { [llength $data] <= 1 } {
800	return -code error -errorcode ARG "$TOOFEWDATA"
801    }
802
803    return [crosscorr $data $data]
804}
805
806# crosscorr --
807#    Return the cross-correlation function (assuming equidistance
808#    between samples)
809#
810# Arguments:
811#    data1        First set of raw data
812#    data2        Second set of raw data
813#
814# Result:
815#    List of cross-correlation values (about 1/2 the number of raw data)
816#
817# Note:
818#    The number of data pairs is not kept constant - because tests
819#    showed rather awkward results when it was kept constant.
820#
821proc ::math::statistics::crosscorr { data1 data2 } {
822    variable TOOFEWDATA
823
824    if { [llength $data1] <= 1 || [llength $data2] <= 1 } {
825	return -code error -errorcode ARG "$TOOFEWDATA"
826    }
827
828    #
829    # First determine the number of data pairs
830    #
831    set number1 [llength $data1]
832    set number2 [llength $data2]
833
834    set basic_stat1 [basic-stats $data1]
835    set basic_stat2 [basic-stats $data2]
836    set vmean1      [lindex $basic_stat1 0]
837    set vmean2      [lindex $basic_stat2 0]
838    set vvar1       [lindex $basic_stat1 end]
839    set vvar2       [lindex $basic_stat2 end]
840
841    set number_pairs $number1
842    if { $number1 > $number2 } {
843	set number_pairs $number2
844    }
845    set number_values $number_pairs
846    set number_delays [expr {$number_values/2.0}]
847
848    set scale [expr {sqrt($vvar1*$vvar2)}]
849
850    set result {}
851    for { set delay 0 } { $delay < $number_delays } { incr delay } {
852	set sumcross 0.0
853	set no_cross 0
854	for { set idx 0 } { $idx < $number_values } { incr idx } {
855	    set value1 [lindex $data1 $idx]
856	    set value2 [lindex $data2 [expr {$idx+$delay}]]
857	    if { $value1 != {} && $value2 != {} } {
858		set  sumcross \
859			[expr {$sumcross+($value1-$vmean1)*($value2-$vmean2)}]
860		incr no_cross
861	    }
862	}
863	lappend result [expr {$sumcross/($no_cross*$scale)}]
864
865	incr number_values -1
866    }
867
868    return $result
869}
870
871# mean-histogram-limits
872#    Determine reasonable limits based on mean and standard deviation
873#    for a histogram
874#
875# Arguments:
876#    mean         Mean of the data
877#    stdev        Standard deviation
878#    number       Number of limits to generate (defaults to 8)
879#
880# Result:
881#    List of limits
882#
883proc ::math::statistics::mean-histogram-limits { mean stdev {number 8} } {
884    variable NEGSTDEV
885
886    if { $stdev <= 0.0 } {
887	return -code error -errorcode ARG "$NEGSTDEV"
888    }
889    if { $number < 1 } {
890	return -code error -errorcode ARG "Number of limits must be positive"
891    }
892
893    #
894    # Always: between mean-3.0*stdev and mean+3.0*stdev
895    # number = 2: -0.25, 0.25
896    # number = 3: -0.25, 0, 0.25
897    # number = 4: -1, -0.25, 0.25, 1
898    # number = 5: -1, -0.25, 0, 0.25, 1
899    # number = 6: -2, -1, -0.25, 0.25, 1, 2
900    # number = 7: -2, -1, -0.25, 0, 0.25, 1, 2
901    # number = 8: -3, -2, -1, -0.25, 0.25, 1, 2, 3
902    #
903    switch -- $number {
904	"1" { set limits {0.0} }
905	"2" { set limits {-0.25 0.25} }
906	"3" { set limits {-0.25 0.0 0.25} }
907	"4" { set limits {-1.0 -0.25 0.25 1.0} }
908	"5" { set limits {-1.0 -0.25 0.0 0.25 1.0} }
909	"6" { set limits {-2.0 -1.0 -0.25 0.25 1.0 2.0} }
910	"7" { set limits {-2.0 -1.0 -0.25 0.0 0.25 1.0 2.0} }
911	"8" { set limits {-3.0 -2.0 -1.0 -0.25 0.25 1.0 2.0 3.0} }
912	"9" { set limits {-3.0 -2.0 -1.0 -0.25 0.0 0.25 1.0 2.0 3.0} }
913	default {
914	    set dlim [expr {6.0/double($number-1)}]
915	    for {set i 0} {$i <$number} {incr i} {
916		lappend limits [expr {$dlim*($i-($number-1)/2.0)}]
917	    }
918	}
919    }
920
921    set result {}
922    foreach limit $limits {
923	lappend result [expr {$mean+$limit*$stdev}]
924    }
925
926    return $result
927}
928
929# minmax-histogram-limits
930#    Determine reasonable limits based on minimum and maximum bounds
931#    for a histogram
932#
933# Arguments:
934#    min          Estimated minimum
935#    max          Estimated maximum
936#    number       Number of limits to generate (defaults to 8)
937#
938# Result:
939#    List of limits
940#
941proc ::math::statistics::minmax-histogram-limits { min max {number 8} } {
942    variable NEGSTDEV
943
944    if { $number < 1 } {
945	return -code error -errorcode ARG "Number of limits must be positive"
946    }
947    if { $min >= $max } {
948	return -code error -errorcode ARG "Minimum must be lower than maximum"
949    }
950
951    set result {}
952    set dlim [expr {($max-$min)/double($number-1)}]
953    for {set i 0} {$i <$number} {incr i} {
954	lappend result [expr {$min+$dlim*$i}]
955    }
956
957    return $result
958}
959
960# linear-model
961#    Determine the coefficients for a linear regression between
962#    two series of data (the model: Y = A + B*X)
963#
964# Arguments:
965#    xdata        Series of independent (X) data
966#    ydata        Series of dependent (Y) data
967#    intercept    Whether to use an intercept or not (optional)
968#
969# Result:
970#    List of the following items:
971#    - (Estimate of) Intercept A
972#    - (Estimate of) Slope B
973#    - Standard deviation of Y relative to fit
974#    - Correlation coefficient R2
975#    - Number of degrees of freedom df
976#    - Standard error of the intercept A
977#    - Significance level of A
978#    - Standard error of the slope B
979#    - Significance level of B
980#
981#
982proc ::math::statistics::linear-model { xdata ydata {intercept 1} } {
983   variable TOOFEWDATA
984
985   if { [llength $xdata] < 3 } {
986      return -code error -errorcode ARG "$TOOFEWDATA: not enough independent data"
987   }
988   if { [llength $ydata] < 3 } {
989      return -code error -errorcode ARG "$TOOFEWDATA: not enough dependent data"
990   }
991   if { [llength $xdata] != [llength $ydata] } {
992      return -code error -errorcode ARG "$TOOFEWDATA: number of dependent data differs from number of independent data"
993   }
994
995   set sumx  0.0
996   set sumy  0.0
997   set sumx2 0.0
998   set sumy2 0.0
999   set sumxy 0.0
1000   set df    0
1001   foreach x $xdata y $ydata {
1002      if { $x != "" && $y != "" } {
1003         set sumx  [expr {$sumx+$x}]
1004         set sumy  [expr {$sumy+$y}]
1005         set sumx2 [expr {$sumx2+$x*$x}]
1006         set sumy2 [expr {$sumy2+$y*$y}]
1007         set sumxy [expr {$sumxy+$x*$y}]
1008         incr df
1009      }
1010   }
1011
1012   if { $df <= 2 } {
1013      return -code error -errorcode ARG "$TOOFEWDATA: too few valid data"
1014   }
1015   if { $sumx2 == 0.0 } {
1016      return -code error -errorcode ARG "$TOOFEWDATA: independent values are all the same"
1017   }
1018
1019   #
1020   # Calculate the intermediate quantities
1021   #
1022   set sx  [expr {$sumx2-$sumx*$sumx/$df}]
1023   set sy  [expr {$sumy2-$sumy*$sumy/$df}]
1024   set sxy [expr {$sumxy-$sumx*$sumy/$df}]
1025
1026   #
1027   # Calculate the coefficients
1028   #
1029   if { $intercept } {
1030      set B [expr {$sxy/$sx}]
1031      set A [expr {($sumy-$B*$sumx)/$df}]
1032   } else {
1033      set B [expr {$sumxy/$sumx2}]
1034      set A 0.0
1035   }
1036
1037   #
1038   # Calculate the error estimates
1039   #
1040   set stdevY 0.0
1041   set varY   0.0
1042
1043   if { $intercept } {
1044      set ve [expr {$sy-$B*$sxy}]
1045      if { $ve >= 0.0 } {
1046         set varY [expr {$ve/($df-2)}]
1047      }
1048   } else {
1049      set ve [expr {$sumy2-$B*$sumxy}]
1050      if { $ve >= 0.0 } {
1051         set varY [expr {$ve/($df-1)}]
1052      }
1053   }
1054   set seY [expr {sqrt($varY)}]
1055
1056   if { $intercept } {
1057      set R2    [expr {$sxy*$sxy/($sx*$sy)}]
1058      set seA   [expr {$seY*sqrt(1.0/$df+$sumx*$sumx/($sx*$df*$df))}]
1059      set seB   [expr {sqrt($varY/$sx)}]
1060      set tA    {}
1061      set tB    {}
1062      if { $seA != 0.0 } {
1063         set tA    [expr {$A/$seA*sqrt($df-2)}]
1064      }
1065      if { $seB != 0.0 } {
1066         set tB    [expr {$B/$seB*sqrt($df-2)}]
1067      }
1068   } else {
1069      set R2    [expr {$sumxy*$sumxy/($sumx2*$sumy2)}]
1070      set seA   {}
1071      set tA    {}
1072      set tB    {}
1073      set seB   [expr {sqrt($varY/$sumx2)}]
1074      if { $seB != 0.0 } {
1075         set tB    [expr {$B/$seB*sqrt($df-1)}]
1076      }
1077   }
1078
1079   #
1080   # Return the list of parameters
1081   #
1082   return [list $A $B $seY $R2 $df $seA $tA $seB $tB]
1083}
1084
1085# linear-residuals
1086#    Determine the difference between actual data and predicted from
1087#    the linear model
1088#
1089# Arguments:
1090#    xdata        Series of independent (X) data
1091#    ydata        Series of dependent (Y) data
1092#    intercept    Whether to use an intercept or not (optional)
1093#
1094# Result:
1095#    List of differences
1096#
1097proc ::math::statistics::linear-residuals { xdata ydata {intercept 1} } {
1098   variable TOOFEWDATA
1099
1100   if { [llength $xdata] < 3 } {
1101      return -code error -errorcode ARG "$TOOFEWDATA: no independent data"
1102   }
1103   if { [llength $ydata] < 3 } {
1104      return -code error -errorcode ARG "$TOOFEWDATA: no dependent data"
1105   }
1106   if { [llength $xdata] != [llength $ydata] } {
1107      return -code error -errorcode ARG "$TOOFEWDATA: number of dependent data differs from number of independent data"
1108   }
1109
1110   foreach {A B} [linear-model $xdata $ydata $intercept] {break}
1111
1112   set result {}
1113   foreach x $xdata y $ydata {
1114      set residue [expr {$y-$A-$B*$x}]
1115      lappend result $residue
1116   }
1117   return $result
1118}
1119
1120# median
1121#    Determine the median from a list of data
1122#
1123# Arguments:
1124#    data         (Unsorted) list of data
1125#
1126# Result:
1127#    Median (either the middle value or the mean of two values in the
1128#    middle)
1129#
1130# Note:
1131#    Adapted from the Wiki page "Stats", code provided by JPS
1132#
1133proc ::math::statistics::median { data } {
1134    set org_data $data
1135    set data     {}
1136    foreach value $org_data {
1137        if { $value != {} } {
1138            lappend data $value
1139        }
1140    }
1141    set len [llength $data]
1142
1143    set data [lsort -real $data]
1144    if { $len % 2 } {
1145        lindex $data [expr {($len-1)/2}]
1146    } else {
1147        expr {([lindex $data [expr {($len / 2) - 1}]] \
1148		+ [lindex $data [expr {$len / 2}]]) / 2.0}
1149    }
1150}
1151
1152# test-2x2 --
1153#     Compute the chi-square statistic for a 2x2 table
1154#
1155# Arguments:
1156#     a           Element upper-left
1157#     b           Element upper-right
1158#     c           Element lower-left
1159#     d           Element lower-right
1160# Return value:
1161#     Chi-square
1162# Note:
1163#     There is only one degree of freedom - this is important
1164#     when comparing the value to the tabulated values
1165#     of chi-square
1166#
1167proc ::math::statistics::test-2x2 { a b c d } {
1168    set ab     [expr {$a+$b}]
1169    set ac     [expr {$a+$c}]
1170    set bd     [expr {$b+$d}]
1171    set cd     [expr {$c+$d}]
1172    set N      [expr {$a+$b+$c+$d}]
1173    set det    [expr {$a*$d-$b*$c}]
1174    set result [expr {double($N*$det*$det)/double($ab*$cd*$ac*$bd)}]
1175}
1176
1177# print-2x2 --
1178#     Print a 2x2 table
1179#
1180# Arguments:
1181#     a           Element upper-left
1182#     b           Element upper-right
1183#     c           Element lower-left
1184#     d           Element lower-right
1185# Return value:
1186#     Printed version with marginals
1187#
1188proc ::math::statistics::print-2x2 { a b c d } {
1189    set ab     [expr {$a+$b}]
1190    set ac     [expr {$a+$c}]
1191    set bd     [expr {$b+$d}]
1192    set cd     [expr {$c+$d}]
1193    set N      [expr {$a+$b+$c+$d}]
1194    set chisq  [test-2x2 $a $b $c $d]
1195
1196    set    line   [string repeat - 10]
1197    set    result [format "%10d%10d | %10d\n" $a $b $ab]
1198    append result [format "%10d%10d | %10d\n" $c $d $cd]
1199    append result [format "%10s%10s + %10s\n" $line $line $line]
1200    append result [format "%10d%10d | %10d\n" $ac $bd $N]
1201    append result "Chisquare = $chisq\n"
1202    append result "Difference is significant?\n"
1203    append result "   at 95%: [expr {$chisq<3.84146? "no":"yes"}]\n"
1204    append result "   at 99%: [expr {$chisq<6.63490? "no":"yes"}]"
1205}
1206
1207# control-xbar --
1208#     Determine the control lines for an x-bar chart
1209#
1210# Arguments:
1211#     data        List of observed values (at least 20*nsamples)
1212#     nsamples    Number of data per subsamples (default: 4)
1213# Return value:
1214#     List of: mean, lower limit, upper limit, number of data per
1215#     subsample. Can be used in the test-xbar procedure
1216#
1217proc ::math::statistics::control-xbar { data {nsamples 4} } {
1218    variable TOOFEWDATA
1219    variable control_factors
1220
1221    #
1222    # Check the number of data
1223    #
1224    if { $nsamples <= 1 } {
1225        return -code error -errorcode DATA -errorinfo $OUTOFRANGE \
1226            "Number of data per subsample must be at least 2"
1227    }
1228    if { [llength $data] < 20*$nsamples } {
1229        return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
1230    }
1231
1232    set nogroups [expr {[llength $data]/$nsamples}]
1233    set mrange   0.0
1234    set xmeans   0.0
1235    for { set i 0 } { $i < $nogroups } { incr i } {
1236        set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]]
1237
1238        set xmean 0.0
1239        set xmin  [lindex $subsample 0]
1240        set xmax  $xmin
1241        foreach d $subsample {
1242            set xmean [expr {$xmean+$d}]
1243            set xmin  [expr {$xmin<$d? $xmin : $d}]
1244            set xmax  [expr {$xmax>$d? $xmax : $d}]
1245        }
1246        set xmean [expr {$xmean/double($nsamples)}]
1247
1248        set xmeans [expr {$xmeans+$xmean}]
1249        set mrange [expr {$mrange+($xmax-$xmin)}]
1250    }
1251
1252    #
1253    # Determine the control lines
1254    #
1255    set xmeans [expr {$xmeans/double($nogroups)}]
1256    set mrange [expr {$mrange/double($nogroups)}]
1257    set A2     [lindex [lindex $control_factors 1] $nsamples]
1258    if { $A2 == "" } { set A2 [lindex [lindex $control_factors 1] end] }
1259
1260    return [list $xmeans [expr {$xmeans-$A2*$mrange}] \
1261                         [expr {$xmeans+$A2*$mrange}] $nsamples]
1262}
1263
1264# test-xbar --
1265#     Determine if any data points lie outside the x-bar control limits
1266#
1267# Arguments:
1268#     control     List returned by control-xbar with control data
1269#     data        List of observed values
1270# Return value:
1271#     Indices of any subsamples that violate the control limits
1272#
1273proc ::math::statistics::test-xbar { control data } {
1274    foreach {xmean xlower xupper nsamples} $control {break}
1275
1276    if { [llength $data] < 1 } {
1277        return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
1278    }
1279
1280    set nogroups [expr {[llength $data]/$nsamples}]
1281    if { $nogroups <= 0 } {
1282        set nogroup  1
1283        set nsamples [llength $data]
1284    }
1285
1286    set result {}
1287
1288    for { set i 0 } { $i < $nogroups } { incr i } {
1289        set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]]
1290
1291        set xmean 0.0
1292        foreach d $subsample {
1293            set xmean [expr {$xmean+$d}]
1294        }
1295        set xmean [expr {$xmean/double($nsamples)}]
1296
1297        if { $xmean < $xlower } { lappend result $i }
1298        if { $xmean > $xupper } { lappend result $i }
1299    }
1300
1301    return $result
1302}
1303
1304# control-Rchart --
1305#     Determine the control lines for an R chart
1306#
1307# Arguments:
1308#     data        List of observed values (at least 20*nsamples)
1309#     nsamples    Number of data per subsamples (default: 4)
1310# Return value:
1311#     List of: mean range, lower limit, upper limit, number of data per
1312#     subsample. Can be used in the test-Rchart procedure
1313#
1314proc ::math::statistics::control-Rchart { data {nsamples 4} } {
1315    variable TOOFEWDATA
1316    variable control_factors
1317
1318    #
1319    # Check the number of data
1320    #
1321    if { $nsamples <= 1 } {
1322        return -code error -errorcode DATA -errorinfo $OUTOFRANGE \
1323            "Number of data per subsample must be at least 2"
1324    }
1325    if { [llength $data] < 20*$nsamples } {
1326        return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
1327    }
1328
1329    set nogroups [expr {[llength $data]/$nsamples}]
1330    set mrange   0.0
1331    for { set i 0 } { $i < $nogroups } { incr i } {
1332        set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]]
1333
1334        set xmin  [lindex $subsample 0]
1335        set xmax  $xmin
1336        foreach d $subsample {
1337            set xmin  [expr {$xmin<$d? $xmin : $d}]
1338            set xmax  [expr {$xmax>$d? $xmax : $d}]
1339        }
1340        set mrange [expr {$mrange+($xmax-$xmin)}]
1341    }
1342
1343    #
1344    # Determine the control lines
1345    #
1346    set mrange [expr {$mrange/double($nogroups)}]
1347    set D3     [lindex [lindex $control_factors 3] $nsamples]
1348    set D4     [lindex [lindex $control_factors 5] $nsamples]
1349    if { $D3 == "" } { set D3 [lindex [lindex $control_factors 3] end] }
1350    if { $D4 == "" } { set D4 [lindex [lindex $control_factors 5] end] }
1351
1352    return [list $mrange [expr {$D3*$mrange}] \
1353                         [expr {$D4*$mrange}] $nsamples]
1354}
1355
1356# test-Rchart --
1357#     Determine if any data points lie outside the R-chart control limits
1358#
1359# Arguments:
1360#     control     List returned by control-xbar with control data
1361#     data        List of observed values
1362# Return value:
1363#     Indices of any subsamples that violate the control limits
1364#
1365proc ::math::statistics::test-Rchart { control data } {
1366    foreach {rmean rlower rupper nsamples} $control {break}
1367
1368    #
1369    # Check the number of data
1370    #
1371    if { [llength $data] < 1 } {
1372        return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
1373    }
1374
1375    set nogroups [expr {[llength $data]/$nsamples}]
1376
1377    set result {}
1378    for { set i 0 } { $i < $nogroups } { incr i } {
1379        set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]]
1380
1381        set xmin  [lindex $subsample 0]
1382        set xmax  $xmin
1383        foreach d $subsample {
1384            set xmin  [expr {$xmin<$d? $xmin : $d}]
1385            set xmax  [expr {$xmax>$d? $xmax : $d}]
1386        }
1387        set range [expr {$xmax-$xmin}]
1388
1389        if { $range < $rlower } { lappend result $i }
1390        if { $range > $rupper } { lappend result $i }
1391    }
1392
1393    return $result
1394}
1395
1396# test-Duckworth --
1397#     Determine if two data sets have the same median according to the Tukey-Duckworth test
1398#
1399# Arguments:
1400#     list1           Values in the first data set
1401#     list2           Values in the second data set
1402#     significance    Significance level (either 0.05, 0.01 or 0.001)
1403#
1404# Returns:
1405#     0 if the medians are unequal, 1 if they are equal, -1 if the test can not
1406#     be conducted (the smallest value must be in a different set than the greatest value)
1407#
1408proc ::math::statistics::test-Duckworth {list1 list2 significance} {
1409    set sorted1   [lsort -real $list1]
1410    set sorted2   [lsort -real -decreasing $list2]
1411
1412    set lowest1   [lindex $sorted1 0]
1413    set lowest2   [lindex $sorted2 end]
1414    set greatest1 [lindex $sorted1 end]
1415    set greatest2 [lindex $sorted2 0]
1416
1417    if { $lowest1 <= $lowest2 && $greatest1 >= $greatest2 } {
1418        return -1
1419    }
1420    if { $lowest1 >= $lowest2 && $greatest1 <= $greatest2 } {
1421        return -1
1422    }
1423
1424    #
1425    # Determine how many elements of set 1 are lower than the lowest of set 2
1426    # Ditto for the number of elements of set 2 greater than the greatest of set 1
1427    # (Or vice versa)
1428    #
1429    if { $lowest1 < $lowest2 } {
1430        set lowest   $lowest2
1431        set greatest $greatest1
1432    } else {
1433        set lowest   $lowest1
1434        set greatest $greatest2
1435        set sorted1   [lsort -real $list2]
1436        set sorted2   [lsort -real -decreasing $list1]
1437        #lassign [list $sorted1 $sorted2] sorted2 sorted1
1438    }
1439
1440    set count1 0
1441    set count2 0
1442    foreach v1 $sorted1 {
1443        if { $v1 >= $lowest } {
1444            break
1445        }
1446        incr count1
1447    }
1448    foreach v2 $sorted2 {
1449        if { $v2 <= $greatest } {
1450            break
1451        }
1452        incr count2
1453    }
1454
1455    #
1456    # Determine the statistic D, possibly with correction
1457    #
1458    set n1 [llength $list1]
1459    set n2 [llength $list2]
1460
1461    set correction 0
1462    if { 3 + 4*$n1/3 <= $n2 && $n2 <= 2*$n1 } {
1463        set correction -1
1464    }
1465    if { 3 + 4*$n2/3 <= $n1 && $n1 <= 2*$n2 } {
1466        set correction -1
1467    }
1468
1469    set D [expr {$count1 + $count2 + $correction}]
1470
1471    switch -- [string trim $significance 0] {
1472        ".05" {
1473             return [expr {$D >= 7? 0 : 1}]
1474        }
1475        ".01" {
1476             return [expr {$D >= 10? 0 : 1}]
1477        }
1478        ".001" {
1479             return [expr {$D >= 13? 0 : 1}]
1480        }
1481        default {
1482             return -code error "Significance level must be 0.05, 0.01 or 0.001"
1483        }
1484    }
1485}
1486
1487# test-anova-F --
1488#     Check if two or more groups with normally distributed data have the same
1489#     means
1490#
1491# Arguments:
1492#     alpha            Significance level
1493#     args             One or more lists containing the data for the
1494#                      other groups
1495#
1496# Returns:
1497#     Whether the mean for the groups is likely to be the same (1)
1498#     or not (0).
1499#
1500# Note:
1501#     args may be a nested list
1502#
1503#     Implementation based on Wikipedia page on the F-test, one-way ANOVA
1504#
1505#     Possibly of interest: the ratio itself.
1506#
1507proc ::math::statistics::test-anova-F {alpha args} {
1508    if { [llength $args] == 1 } {
1509        set args [lindex $args 0]
1510    }
1511
1512    if { [llength $args] < 2 } {
1513        return -code error -errorcode ARG -errorinfo "At least two groups are required" \
1514                                                     "At least two groups are required"
1515    }
1516
1517    #
1518    # Determine the variance within the groups and the variance between the groups
1519    #
1520    set meanPerGroup {}
1521    set varPerGroup  {}
1522    set allData   {}
1523    foreach group $args {
1524        lappend meanPerGroup [::math::statistics::mean $group]
1525        lappend varPerGroup  [::math::statistics::pvar $group]
1526        set allData          [concat $allData $group]
1527    }
1528    set meanOverall [::math::statistics::mean $allData]
1529
1530    set varBetween 0.0
1531    foreach group $args mean $meanPerGroup {
1532        set varBetween [expr {$varBetween + [llength $group] * ($mean - $meanOverall)**2}]
1533    }
1534    set varBetween [expr {$varBetween / ([llength $args] - 1)}]
1535
1536    set varWithin 0.0
1537    foreach group $args var $varPerGroup {
1538        set varWithin [expr {$varWithin + [llength $group] * $var}]
1539    }
1540    set varWithin [expr {$varWithin / ([llength $allData] - [llength $args])}]
1541
1542    #
1543    # Finally compare the ratio to the F-distribution
1544    #
1545    set ratio [expr {$varBetween / $varWithin}]
1546    set nf1   [expr {[llength $args]    - 1}]
1547    set nf2   [expr {[llength $allData] - [llength $args]}]
1548
1549    expr {[::math::statistics::cdf-F $nf1 $nf2 $ratio] <= 1.0 - $alpha}
1550}
1551
1552# test-Tukey-range --
1553#     Check if two or more groups with normally distributed data have different
1554#     means or not, using Tukey's range test
1555#
1556# Arguments:
1557#     alpha            Significance level
1558#     args             Two or more lists containing the data for the
1559#                      other groups
1560#
1561# Returns:
1562#     For each pair of groups a list of:
1563#     - group indices
1564#     - whether the means differ (1) or not (0)
1565#     - limits of the confidence interval (for closer investigation)
1566#
1567# Note:
1568#     args may be a nested list
1569#
1570#     Implementation based on Wikipedia page on Tukey's range test
1571#
1572proc ::math::statistics::test-Tukey-range {alpha args} {
1573    if { [llength $args] == 1 } {
1574        set args [lindex $args 0]
1575    }
1576
1577    if { [llength $args] < 2 } {
1578        return -code error -errorcode ARG -errorinfo "At least two groups are required" \
1579                                                     "At least two groups are required"
1580    }
1581
1582    if { $alpha != 0.05 && $alpha != 0.01 } {
1583        return -code error -errorcode ARG -errorinfo "Alpha must 0.05 or 0.01"
1584    }
1585
1586    #
1587    # Determine the mean per group and the pooled variance of the data
1588    #
1589    set meanPerGroup {}
1590    set allData      {}
1591    set sumVar       0.0
1592    foreach group $args {
1593        lappend meanPerGroup  [mean $group]
1594        set sumVar            [expr {$sumVar + ([llength $group]-1) * [var $group]}]
1595        set allData           [concat $allData $group]
1596    }
1597
1598    set n          [llength $allData]
1599    set stdOverall [expr {sqrt($sumVar /($n - [llength $args]))}]
1600
1601    set qcrit      [Qcrit-Tukey $alpha $n [llength $args]]
1602
1603    set result {}
1604
1605    for {set g 0} {$g < [llength $args]} {incr g} {
1606        set ggroup [lindex $args $g]
1607        set gmean  [mean $ggroup]
1608        set ng     [llength $ggroup]
1609
1610        for {set h [expr {$g+1}]} {$h < [llength $args]} {incr h} {
1611            set hgroup    [lindex $args $h]
1612            set hmean     [mean $hgroup]
1613            set nh        [llength $hgroup]
1614
1615            set halfwidth [expr {$qcrit * $stdOverall / sqrt(2.0) * sqrt( 1.0/$ng + 1.0/$nh )}]
1616            set lower     [expr {$hmean - $gmean - $halfwidth}]
1617            set upper     [expr {$hmean - $gmean + $halfwidth}]
1618            set unequal   1
1619            if { $lower < 0.0 && $upper > 0.0 } {
1620                set unequal 0
1621            }
1622            lappend result [list $g $h $unequal $lower $upper]
1623        }
1624    }
1625
1626    return $result
1627}
1628
1629# Qcrit-Tukey --
1630#     Determine the critical value for the Tukey range test
1631#
1632# Arguments:
1633#     alpha            Significance level
1634#     numberData       Total number of data
1635#     numberGroups     Number of groups
1636#
1637# Returns:
1638#     Critical value
1639#
1640# Note:
1641#     If there are more than 10 groups, simply use 10 groups
1642#
1643proc ::math::statistics::Qcrit-Tukey {alpha numberData numberGroups} {
1644    variable tukey_table_05
1645    variable tukey_table_01
1646
1647    if { $alpha == 0.05 } {
1648        upvar 0 tukey_table_05 tukey_table
1649    } else {
1650        upvar 0 tukey_table_05 tukey_table
1651    }
1652
1653    set df [expr {$numberData - $numberGroups}]
1654
1655    if { $numberGroups > 10 } {
1656        set numberGroups 10
1657    }
1658    incr numberGroups -1 ;# Offset because of 0-based numbering
1659
1660    if { $df > 120 } {
1661        return [lindex $tukey_table end $numberGroups]
1662    }
1663
1664    foreach {dfe values} $tukey_table {
1665        if { $df <= $dfe } {
1666            return [lindex $values $numberGroups]
1667        }
1668    }
1669}
1670
1671# test-Dunnett --
1672#     Check if one or more groups with normally distributed data have different
1673#     means from the control group or not, using Dunnett's test
1674#
1675# Arguments:
1676#     alpha            Significance level
1677#     control          Control group
1678#     args             One or more lists containing the data for the
1679#                      other groups
1680#
1681# Returns:
1682#     For each group a list of:
1683#     - whether the mean differs (1) from the control or not (0)
1684#     - the confidence interval
1685#
1686# Note:
1687#     args may be a nested list
1688#
1689#     Implementation based on Wikipedia page on Dunnett's test
1690#     The test is two-sided.
1691#
1692proc ::math::statistics::test-Dunnett {alpha control args} {
1693    if { [llength $args] == 1 } {
1694        set args [lindex $args 0]
1695    }
1696
1697    if { [llength $args] < 1 } {
1698        return -code error -errorcode ARG -errorinfo "At least one additional group is required" \
1699                                                     "At least one additional group is required"
1700    }
1701
1702    if { $alpha != 0.05 && $alpha != 0.01 } {
1703        return -code error -errorcode ARG -errorinfo "Alpha must 0.05 or 0.01"
1704    }
1705
1706    #
1707    # Determine the mean per group and the pooled variance
1708    #
1709    set allData      $control
1710    set sumVar       [expr {([llength $control]-1)*[var $control]}]
1711    foreach group $args {
1712        set sumVar            [expr {$sumVar + ([llength $group]-1) * [var $group]}]
1713        set allData           [concat $allData $group]
1714    }
1715
1716    set n          [llength $allData]
1717    set stdOverall [expr {sqrt($sumVar /($n - [llength $args] - 1))}]
1718
1719    set tcrit [Tcrit-Dunnett $alpha $n [llength $args]]
1720
1721    set result {}
1722
1723    set cmean  [mean $control]
1724    set nc     [llength $control]
1725
1726    for {set g 0} {$g < [llength $args]} {incr g} {
1727        set ggroup    [lindex $args $g]
1728        set gmean     [mean $ggroup]
1729        set ng        [llength $ggroup]
1730
1731        set halfwidth [expr {$tcrit * $stdOverall * sqrt( 1.0/$nc + 1.0/$ng )}]
1732        set lower     [expr {$gmean - $cmean - $halfwidth}]
1733        set upper     [expr {$gmean - $cmean + $halfwidth}]
1734        set unequal   1
1735        if { $lower < 0.0 && $upper > 0.0 } {
1736            set unequal 0
1737        }
1738        lappend result [list $unequal $lower $upper]
1739    }
1740
1741    return $result
1742}
1743
1744# Tcrit-Dunnett --
1745#     Determine the critical value for the Dunnett test
1746#
1747# Arguments:
1748#     alpha            Significance level
1749#     numberData       Total number of data
1750#     numberGroups     Number of groups to compare against the control
1751#
1752# Returns:
1753#     Critical value
1754#
1755# Note:
1756#     If there are more than 10 groups, simply use 10 groups
1757#
1758proc ::math::statistics::Tcrit-Dunnett {alpha numberData numberGroups} {
1759    variable dunnett_table_05
1760    variable dunnett_table_01
1761
1762    if { $alpha == 0.05 } {
1763        upvar 0 dunnett_table_05 dunnett_table
1764    } else {
1765        upvar 0 dunnett_table_05 dunnett_table
1766    }
1767
1768    set df [expr {$numberData - $numberGroups - 1}]
1769
1770    incr numberGroups 1 ;# Add the control group
1771    if { $numberGroups > 10 } {
1772        set numberGroups 10
1773    }
1774    incr numberGroups -2 ;# Offset because of 0-based numbering and start at 2
1775
1776    if { $df > 60 } {
1777        return [lindex $dunnett_table end $numberGroups]
1778    }
1779
1780    foreach {dfe values} $dunnett_table {
1781        if { $df <= $dfe } {
1782            return [lindex $values $numberGroups]
1783        }
1784    }
1785}
1786
1787#
1788# Load the auxiliary scripts
1789#
1790source [file join [file dirname [info script]] pdf_stat.tcl]
1791source [file join [file dirname [info script]] plotstat.tcl]
1792source [file join [file dirname [info script]] liststat.tcl]
1793source [file join [file dirname [info script]] mvlinreg.tcl]
1794source [file join [file dirname [info script]] kruskal.tcl]
1795source [file join [file dirname [info script]] wilcoxon.tcl]
1796source [file join [file dirname [info script]] stat_kernel.tcl]
1797
1798#
1799# Define the tables
1800#
1801namespace eval ::math::statistics {
1802    variable tukey_table_05
1803    variable tukey_table_01
1804    variable dunnett_table_05
1805    variable dunnett_table_01
1806
1807    #alpha = 0.05
1808    #k 2 3 4 5 6 7 8 9 10
1809    #df
1810    set tukey_table_05 {
1811        1  {18.0 27.0 32.8 37.1 40.4 43.1 45.4 47.4 49.1}
1812        2  {6.08 8.33 9.80 10.88 11.73 12.43 13.03 13.54 13.99}
1813        3  {4.50 5.91 6.82 7.50 8.04 8.48 8.85 9.18 9.46}
1814        4  {3.93 5.04 5.76 6.29 6.71 7.05 7.35 7.60 7.83}
1815        5  {3.64 4.60 5.22 5.67 6.03 6.33 6.58 6.80 6.99}
1816        6  {3.46 4.34 4.90 5.30 5.63 5.90 6.12 6.32 6.49}
1817        7  {3.34 4.16 4.68 5.06 5.36 5.61 5.82 6.00 6.16}
1818        8  {3.26 4.04 4.53 4.89 5.17 5.40 5.60 5.77 5.92}
1819        9  {3.20 3.95 4.41 4.76 5.02 5.24 5.43 5.59 5.74}
1820        10 {3.15 3.88 4.33 4.65 4.91 5.12 5.30 5.46 5.60}
1821        11 {3.11 3.82 4.26 4.57 4.82 5.03 5.20 5.35 5.49}
1822        12 {3.08 3.77 4.20 4.51 4.75 4.95 5.12 5.27 5.39}
1823        13 {3.06 3.73 4.15 4.45 4.69 4.88 5.05 5.19 5.32}
1824        14 {3.03 3.70 4.11 4.41 4.64 4.83 4.99 5.13 5.25}
1825        15 {3.01 3.67 4.08 4.37 4.59 4.78 4.94 5.08 5.20}
1826        16 {3.00 3.65 4.05 4.33 4.56 4.74 4.90 5.03 5.15}
1827        17 {2.98 3.63 4.02 4.30 4.52 4.70 4.86 4.99 5.11}
1828        18 {2.97 3.61 4.00 4.28 4.49 4.67 4.82 4.96 5.07}
1829        19 {2.96 3.59 3.98 4.25 4.47 4.65 4.79 4.92 5.04}
1830        20 {2.95 3.58 3.96 4.23 4.45 4.62 4.77 4.90 5.01}
1831        24 {2.92 3.53 3.90 4.17 4.37 4.54 4.68 4.81 4.92}
1832        30 {2.89 3.49 3.85 4.10 4.30 4.46 4.60 4.72 4.82}
1833        40 {2.86 3.44 3.79 4.04 4.23 4.39 4.52 4.63 4.73}
1834        60 {2.83 3.40 3.74 3.98 4.16 4.31 4.44 4.55 4.65}
1835        120 {2.80 3.36 3.68 3.92 4.10 4.24 4.36 4.47 4.56}
1836        inf {2.77 3.31 3.63 3.86 4.03 4.17 4.29 4.39 4.47}}
1837
1838    #alpha = 0.01
1839    #k 2 3 4 5 6 7 8 9 10
1840    #df
1841    set tukey_table_01 {
1842        1  {90.0 135 164 186 202 216 227 237 246}
1843        2  {13.90 19.02 22.56 25.37 27.76 29.86 31.73 33.41 34.93}
1844        3  {8.26 10.62 12.17 13.32 14.24 15.00 15.65 16.21 16.71}
1845        4  {6.51 8.12 9.17 9.96 10.58 11.10 11.54 11.92 12.26}
1846        5  {5.70 6.98 7.80 8.42 8.91 9.32 9.67 9.97 10.24}
1847        6  {5.24 6.33 7.03 7.56 7.97 8.32 8.61 8.87 9.10}
1848        7  {4.95 5.92 6.54 7.00 7.37 7.68 7.94 8.17 8.37}
1849        8  {4.75 5.64 6.20 6.62 6.96 7.24 7.47 7.68 7.86}
1850        9  {4.60 5.43 5.96 6.35 6.66 6.91 7.13 7.33 7.49}
1851        10 {4.48 5.27 5.77 6.14 6.43 6.67 6.87 7.05 7.21}
1852        11 {4.39 5.15 5.62 5.97 6.25 6.48 6.67 6.84 6.99}
1853        12 {4.32 5.05 5.50 5.84 6.10 6.32 6.51 6.67 6.81}
1854        13 {4.26 4.96 5.40 5.73 5.98 6.19 6.37 6.53 6.67}
1855        14 {4.21 4.89 5.32 5.63 5.88 6.08 6.26 6.41 6.54}
1856        15 {4.17 4.84 5.25 5.56 5.80 5.99 6.16 6.31 6.44}
1857        16 {4.13 4.79 5.19 5.49 5.72 5.92 6.08 6.22 6.35}
1858        17 {4.10 4.74 5.14 5.43 5.66 5.85 6.01 6.15 6.27}
1859        18 {4.07 4.70 5.09 5.38 5.60 5.79 5.94 6.08 6.20}
1860        19 {4.05 4.67 5.05 5.33 5.55 5.73 5.89 6.02 6.14}
1861        20 {4.02 4.64 5.02 5.29 5.51 5.69 5.84 5.97 6.09}
1862        24 {3.96 4.55 4.91 5.17 5.37 5.54 5.69 5.81 5.92}
1863        30 {3.89 4.45 4.80 5.05 5.24 5.40 5.54 5.65 5.76}
1864        40 {3.82 4.37 4.70 4.93 5.11 5.26 5.39 5.50 5.60}
1865        60 {3.76 4.28 4.59 4.82 4.99 5.13 5.25 5.36 5.45}
1866        120 {3.70 4.20 4.50 4.71 4.87 5.01 5.12 5.21 5.30}
1867        inf {3.64 4.12 4.40 4.60 4.76 4.88 4.99 5.08 5.16}}
1868
1869    #From: http://davidmlane.com/hyperstat/table_Dunnett.html
1870    #Dunnett Table
1871    #Number of Groups Including Control Group
1872    #dfe     alpha = 0.05
1873    #         2       3       4       5       6       7       8       9       10
1874    set dunnett_table_05 {
1875    5        {2.57    3.03    3.29    3.48    3.62    3.73    3.82    3.9     3.97}
1876    6        {2.45    2.86    3.1     3.26    3.39    3.49    3.57    3.64    3.71}
1877    7        {2.36    2.75    2.97    3.12    3.24    3.33    3.41    3.47    3.53}
1878    8        {2.31    2.67    2.88    3.02    3.13    3.22    3.29    3.35    3.41}
1879    9        {2.26    2.61    2.81    2.95    3.05    3.14    3.2     3.26    3.32}
1880    10       {2.23    2.57    2.76    2.89    2.99    3.07    3.14    3.19    3.24}
1881    11       {2.2     2.53    2.72    2.84    2.94    3.02    3.08    3.14    3.19}
1882    12       {2.18    2.5     2.68    2.81    2.9     2.98    3.04    3.09    3.14}
1883    13       {2.16    2.48    2.65    2.78    2.87    2.94    3       3.06    3.1}
1884    14       {2.14    2.46    2.63    2.75    2.84    2.91    2.97    3.02    3.07}
1885    15       {2.13    2.44    2.61    2.73    2.82    2.89    2.95    3       3.04}
1886    16       {2.12    2.42    2.59    2.71    2.8     2.87    2.92    2.97    3.02}
1887    17       {2.11    2.41    2.58    2.69    2.78    2.85    2.9     2.95    3}
1888    18       {2.1     2.4     2.56    2.68    2.76    2.83    2.89    2.94    2.98}
1889    19       {2.09    2.39    2.55    2.66    2.75    2.81    2.87    2.92    2.96}
1890    20       {2.09    2.38    2.54    2.65    2.73    2.8     2.86    2.9     2.95}
1891    24       {2.06    2.35    2.51    2.61    2.7     2.76    2.81    2.86    2.9}
1892    30       {2.04    2.32    2.47    2.58    2.66    2.72    2.77    2.82    2.86}
1893    40       {2.02    2.29    2.44    2.54    2.62    2.68    2.73    2.77    2.81}
1894    60       {2       2.27    2.41    2.51    2.58    2.64    2.69    2.73    2.77}}
1895
1896    set dunnett_table_01 {
1897    5        {4.03    4.63    4.98    5.22    5.41    5.56    5.69    5.8     5.89}
1898    6        {3.71    4.21    4.51    4.71    4.87    5       5.1     5.2     5.28}
1899    7        {3.5     3.95    4.21    4.39    4.53    4.64    4.74    4.82    4.89}
1900    8        {3.36    3.77    4       4.17    4.29    4.4     4.48    4.56    4.62}
1901    9        {3.25    3.63    3.85    4.01    4.12    4.22    4.3     4.37    4.43}
1902    10       {3.17    3.53    3.74    3.88    3.99    4.08    4.16    4.22    4.28}
1903    11       {3.11    3.45    3.65    3.79    3.89    3.98    4.05    4.11    4.16}
1904    12       {3.05    3.39    3.58    3.71    3.81    3.89    3.96    4.02    4.07}
1905    13       {3.01    3.33    3.52    3.65    3.74    3.82    3.89    3.94    3.99}
1906    14       {2.98    3.29    3.47    3.59    3.69    3.76    3.83    3.88    3.93}
1907    15       {2.95    3.25    3.43    3.55    3.64    3.71    3.78    3.83    3.88}
1908    16       {2.92    3.22    3.39    3.51    3.6     3.67    3.73    3.78    3.83}
1909    17       {2.9     3.19    3.36    3.47    3.56    3.63    3.69    3.74    3.79}
1910    18       {2.88    3.17    3.33    3.44    3.53    3.6     3.66    3.71    3.75}
1911    19       {2.86    3.15    3.31    3.42    3.5     3.57    3.63    3.68    3.72}
1912    20       {2.85    3.13    3.29    3.4     3.48    3.55    3.6     3.65    3.69}
1913    24       {2.8     3.07    3.22    3.32    3.4     3.47    3.52    3.57    3.61}
1914    30       {2.75    3.01    3.15    3.25    3.33    3.39    3.44    3.49    3.52}
1915    40       {2.7     2.95    3.09    3.19    3.26    3.32    3.37    3.41    3.44}
1916    60       {2.66    2.9     3.03    3.12    3.19    3.25    3.29    3.33    3.37}}
1917
1918}
1919
1920#
1921# Simple test code
1922#
1923if { [info exists ::argv0] && ([file tail [info script]] == [file tail $::argv0]) } {
1924
1925    console show
1926    puts [interp aliases]
1927
1928    set values {1 1 1 1 {}}
1929    puts [::math::statistics::basic-stats $values]
1930    set values {1 2 3 4}
1931    puts [::math::statistics::basic-stats $values]
1932    set values {1 -1 1 -2}
1933    puts [::math::statistics::basic-stats $values]
1934    puts [::math::statistics::mean   $values]
1935    puts [::math::statistics::min    $values]
1936    puts [::math::statistics::max    $values]
1937    puts [::math::statistics::number $values]
1938    puts [::math::statistics::stdev  $values]
1939    puts [::math::statistics::var    $values]
1940
1941    set novals 100
1942    #set maxvals 100001
1943    set maxvals 1001
1944    while { $novals < $maxvals } {
1945	set values {}
1946	for { set i 0 } { $i < $novals } { incr i } {
1947	    lappend values [expr {rand()}]
1948	}
1949	puts [::math::statistics::basic-stats $values]
1950	puts [::math::statistics::histogram {0.0 0.2 0.4 0.6 0.8 1.0} $values]
1951	set novals [expr {$novals*10}]
1952    }
1953
1954    puts "Normal distribution:"
1955    puts "X=0:  [::math::statistics::pdf-normal 0.0 1.0 0.0]"
1956    puts "X=1:  [::math::statistics::pdf-normal 0.0 1.0 1.0]"
1957    puts "X=-1: [::math::statistics::pdf-normal 0.0 1.0 -1.0]"
1958
1959    set data1 {0.0 1.0 3.0 4.0 100.0 -23.0}
1960    set data2 {1.0 2.0 4.0 5.0 101.0 -22.0}
1961    set data3 {0.0 2.0 6.0 8.0 200.0 -46.0}
1962    set data4 {2.0 6.0 8.0 200.0 -46.0 1.0}
1963    set data5 {100.0 99.0 90.0 93.0 5.0 123.0}
1964    puts "Correlation data1 and data1: [::math::statistics::corr $data1 $data1]"
1965    puts "Correlation data1 and data2: [::math::statistics::corr $data1 $data2]"
1966    puts "Correlation data1 and data3: [::math::statistics::corr $data1 $data3]"
1967    puts "Correlation data1 and data4: [::math::statistics::corr $data1 $data4]"
1968    puts "Correlation data1 and data5: [::math::statistics::corr $data1 $data5]"
1969
1970    #   set data {1.0 2.0 2.3 4.0 3.4 1.2 0.6 5.6}
1971    #   puts [::math::statistics::basicStats $data]
1972    #   puts [::math::statistics::interval-mean-stdev $data 0.90]
1973    #   puts [::math::statistics::interval-mean-stdev $data 0.95]
1974    #   puts [::math::statistics::interval-mean-stdev $data 0.99]
1975
1976    #   puts "\nTest mean values:"
1977    #   puts [::math::statistics::test-mean $data 2.0 0.1 0.90]
1978    #   puts [::math::statistics::test-mean $data 2.0 0.5 0.90]
1979    #   puts [::math::statistics::test-mean $data 2.0 1.0 0.90]
1980    #   puts [::math::statistics::test-mean $data 2.0 2.0 0.90]
1981
1982    set rc [catch {
1983	set m [::math::statistics::mean {}]
1984    } msg ] ; # {}
1985    puts "Result: $rc $msg"
1986
1987    puts "\nTest quantiles:"
1988    set data      {1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0}
1989    set quantiles {0.11 0.21 0.51 0.91 0.99}
1990    set limits    {2.1 4.1 6.1 8.1}
1991    puts [::math::statistics::quantiles $data $quantiles]
1992
1993    set histogram [::math::statistics::histogram $limits $data]
1994    puts [::math::statistics::quantiles $limits $histogram $quantiles]
1995
1996    puts "\nTest autocorrelation:"
1997    set data      {1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0}
1998    puts [::math::statistics::autocorr $data]
1999    set data      {1.0 -1.1 2.0 -0.6 3.0 -4.0 0.5 0.9 -1.0}
2000    puts [::math::statistics::autocorr $data]
2001
2002    puts "\nTest histogram limits:"
2003    puts [::math::statistics::mean-histogram-limits   1.0 1.0]
2004    puts [::math::statistics::mean-histogram-limits   1.0 1.0 4]
2005    puts [::math::statistics::minmax-histogram-limits 1.0 10.0 10]
2006
2007}
2008
2009#
2010# Test xbar/R-chart procedures
2011#
2012if { 0 } {
2013    set data {}
2014    for { set i 0 } { $i < 500 } { incr i } {
2015        lappend data [expr {rand()}]
2016    }
2017    set limits [::math::statistics::control-xbar $data]
2018    puts $limits
2019
2020    puts "Outliers? [::math::statistics::test-xbar $limits $data]"
2021
2022    set newdata {1.0 1.0 1.0 1.0 0.5 0.5 0.5 0.5 10.0 10.0 10.0 10.0}
2023    puts "Outliers? [::math::statistics::test-xbar $limits $newdata] -- 0 2"
2024
2025    set limits [::math::statistics::control-Rchart $data]
2026    puts $limits
2027
2028    puts "Outliers? [::math::statistics::test-Rchart $limits $data]"
2029
2030    set newdata {0.0 1.0 2.0 1.0 0.4 0.5 0.6 0.5 10.0  0.0 10.0 10.0}
2031    puts "Outliers? [::math::statistics::test-Rchart $limits $newdata] -- 0 2"
2032}
2033
2034