1# statistics.tcl --
2#
3#    Package for basic statistical analysis
4#
5# version 0.1:   initial implementation, january 2003
6# version 0.1.1: added linear regres
7# version 0.1.2: border case in stdev taken care of
8# version 0.1.3: moved initialisation of CDF to first call, november 2004
9# version 0.3:   added test for normality (as implemented by Torsten Reincke), march 2006
10#                (also fixed an error in the export list)
11# version 0.4:   added the multivariate linear regression procedures by
12#                Eric Kemp-Benedict, february 2007
13# version 0.5:   added the population standard deviation and variance,
14#                as suggested by Dimitrios Zachariadis
15# version 0.6:   added pdf and cdf procedures for various distributions
16#                (provided by Eric Kemp-Benedict)
17# version 0.7:   added Kruskal-Wallis test (by Torsten Berg)
18# version 0.8:   added Wilcoxon test and Spearman rank correlation
19# version 0.9:   added kernel density estimation
20# version 0.9.3: added histogram-alt, corrected test-normal
21# version 1.0:   added test-anova-F
22# version 1.0.1: correction in pdf-lognormal and cdf-lognormal
23# version 1.1:   added test-Tukey-range and test-Dunnett
24# version 1.3:   added wasserstein-distance, kl-divergence and logit regression
25
26package require Tcl 8.5 ; # 8.5+ feature in test-anova-F and others: **-operator
27package provide math::statistics 1.5.0
28package require math
29
30if {![llength [info commands ::lrepeat]]} {
31    # Forward portability, emulate lrepeat
32    proc ::lrepeat {n args} {
33	if {$n < 1} {
34	    return -code error "must have a count of at least 1"
35	}
36	set res {}
37	while {$n} {
38	    foreach x $args { lappend res $x }
39	    incr n -1
40	}
41	return $res
42    }
43}
44
45# ::math::statistics --
46#   Namespace holding the procedures and variables
47#
48
49namespace eval ::math::statistics {
50    #
51    # Safer: change to short procedures
52    #
53    namespace export mean min max number var stdev pvar pstdev basic-stats corr \
54	    histogram histogram-alt interval-mean-stdev t-test-mean quantiles \
55	    test-normal lillieforsFit \
56	    autocorr crosscorr filter map samplescount median \
57	    test-2x2 print-2x2 control-xbar test_xbar \
58	    control-Rchart test-Rchart \
59	    test-Kruskal-Wallis analyse-Kruskal-Wallis group-rank \
60	    test-Wilcoxon spearman-rank spearman-rank-extended \
61	    test-Duckworth test-anova-F test-Tukey-range test-Dunnett
62    #
63    # Error messages
64    #
65    variable NEGSTDEV   {Zero or negative standard deviation}
66    variable TOOFEWDATA {Too few or invalid data}
67    variable OUTOFRANGE {Argument out of range}
68    variable INVALID    {Argument invalid}
69
70    #
71    # Coefficients involved
72    #
73    variable factorNormalPdf
74    set factorNormalPdf [expr {sqrt(8.0*atan(1.0))}]
75
76    # xbar/R-charts:
77    # Data from:
78    #    Peter W.M. John:
79    #    Statistical methods in engineering and quality assurance
80    #    Wiley and Sons, 1990
81    #
82    variable control_factors {
83        A2 {1.880 1.093 0.729 0.577 0.483 0.419 0.419}
84        D3 {0.0   0.0   0.0   0.0   0.0   0.076 0.076}
85        D4 {3.267 2.574 2.282 2.114 2.004 1.924 1.924}
86    }
87}
88
89# mean, min, max, number, var, stdev, pvar, pstdev --
90#    Return the mean (minimum, maximum) value of a list of numbers
91#    or number of non-missing values
92#
93# Arguments:
94#    type     Type of value to be returned
95#    values   List of values to be examined
96#
97# Results:
98#    Value that was required
99#
100#
101namespace eval ::math::statistics {
102    foreach type {mean min max number stdev var pstdev pvar} {
103	proc $type { values } "BasicStats $type \$values"
104    }
105    proc basic-stats { values } "BasicStats all \$values"
106}
107
108# BasicStats --
109#    Return the one or all of the basic statistical properties
110#
111# Arguments:
112#    type     Type of value to be returned
113#    values   List of values to be examined
114#
115# Results:
116#    Value that was required
117#
118proc ::math::statistics::BasicStats { type values } {
119    variable TOOFEWDATA
120
121    if { [lsearch {all mean min max number stdev var pstdev pvar} $type] < 0 } {
122	return -code error \
123		-errorcode ARG -errorinfo [list unknown type of statistic -- $type] \
124		[list unknown type of statistic -- $type]
125    }
126
127    set min    {}
128    set max    {}
129    set mean   {}
130    set stdev  {}
131    set var    {}
132
133    set sum    0.0
134    set sumsq  0.0
135    set number 0
136    set first  {}
137
138    foreach value $values {
139	if { $value == {} } {
140	    continue
141	}
142	set value [expr {double($value)}]
143
144	if { $first == {} } {
145	    set first $value
146	}
147
148	incr number
149	set  sum    [expr {$sum+$value}]
150	set  sumsq  [expr {$sumsq+($value-$first)*($value-$first)}]
151
152	if { $min == {} || $value < $min } {
153	    set min $value
154	}
155	if { $max == {} || $value > $max } {
156	    set max $value
157	}
158    }
159
160    if { $number > 0 } {
161	set mean [expr {$sum/$number}]
162    } else {
163	return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
164    }
165
166    if { $number > 1 } {
167	set var    [expr {($sumsq-($mean-$first)*($sum-$number*$first))/double($number-1)}]
168        #
169        # Take care of a rare situation: uniform data might
170        # cause a tiny negative difference
171        #
172        if { $var < 0.0 } {
173           set var 0.0
174        }
175	set stdev  [expr {sqrt($var)}]
176    }
177	set pvar [expr {($sumsq-($mean-$first)*($sum-$number*$first))/double($number)}]
178        #
179        # Take care of a rare situation: uniform data might
180        # cause a tiny negative difference
181        #
182        if { $pvar < 0.0 } {
183           set pvar 0.0
184        }
185	set pstdev  [expr {sqrt($pvar)}]
186
187    set all [list $mean $min $max $number $stdev $var $pstdev $pvar]
188
189    #
190    # Return the appropriate value
191    #
192    set $type
193}
194
195# histogram --
196#    Return histogram information from a list of numbers
197#
198# Arguments:
199#    limits   Upper limits for the buckets (in increasing order)
200#    values   List of values to be examined
201#    weights  List of weights, one per value (optional)
202#
203# Results:
204#    List of number of values in each bucket (length is one more than
205#    the number of limits)
206#
207#
208proc ::math::statistics::histogram { limits values {weights {}} } {
209
210    if { [llength $limits] < 1 } {
211	return -code error -errorcode ARG -errorinfo {No limits given} {No limits given}
212    }
213    if { [llength $weights] > 0 && [llength $values] != [llength $weights] } {
214	return -code error -errorcode ARG -errorinfo {Number of weights be equal to number of values} {Weights and values differ in length}
215    }
216
217    set limits [lsort -real -increasing $limits]
218
219    for { set index 0 } { $index <= [llength $limits] } { incr index } {
220	set buckets($index) 0
221    }
222
223    set last [llength $limits]
224
225    # Will do integer arithmetic if unset
226    if {$weights eq ""} {
227       set weights [lrepeat [llength $values] 1]
228    }
229
230    foreach value $values weight $weights {
231	if { $value == {} } {
232	    continue
233	}
234
235	set index 0
236	set found 0
237	foreach limit $limits {
238	    if { $value <= $limit } {
239		set found 1
240		set buckets($index) [expr $buckets($index)+$weight]
241		break
242	    }
243	    incr index
244	}
245
246	if { $found == 0 } {
247	    set buckets($last) [expr $buckets($last)+$weight]
248	}
249    }
250
251    set result {}
252    for { set index 0 } { $index <= $last } { incr index } {
253	lappend result $buckets($index)
254    }
255
256    return $result
257}
258
259# histogram-alt --
260#    Return histogram information from a list of numbers -
261#    intervals are open-ended at the lower bound instead of at the upper bound
262#
263# Arguments:
264#    limits   Upper limits for the buckets (in increasing order)
265#    values   List of values to be examined
266#    weights  List of weights, one per value (optional)
267#
268# Results:
269#    List of number of values in each bucket (length is one more than
270#    the number of limits)
271#
272#
273proc ::math::statistics::histogram-alt { limits values {weights {}} } {
274
275    if { [llength $limits] < 1 } {
276	return -code error -errorcode ARG -errorinfo {No limits given} {No limits given}
277    }
278    if { [llength $weights] > 0 && [llength $values] != [llength $weights] } {
279	return -code error -errorcode ARG -errorinfo {Number of weights be equal to number of values} {Weights and values differ in length}
280    }
281
282    set limits [lsort -real -increasing $limits]
283
284    for { set index 0 } { $index <= [llength $limits] } { incr index } {
285	set buckets($index) 0
286    }
287
288    set last [llength $limits]
289
290    # Will do integer arithmetic if unset
291    if {$weights eq ""} {
292       set weights [lrepeat [llength $values] 1]
293    }
294
295    foreach value $values weight $weights {
296	if { $value == {} } {
297	    continue
298	}
299
300	set index 0
301	set found 0
302	foreach limit $limits {
303	    if { $value < $limit } {
304		set found 1
305		set buckets($index) [expr $buckets($index)+$weight]
306		break
307	    }
308	    incr index
309	}
310
311	if { $found == 0 } {
312	    set buckets($last) [expr $buckets($last)+$weight]
313	}
314    }
315
316    set result {}
317    for { set index 0 } { $index <= $last } { incr index } {
318	lappend result $buckets($index)
319    }
320
321    return $result
322}
323
324# corr --
325#    Return the correlation coefficient of two sets of data
326#
327# Arguments:
328#    data1    List with the first set of data
329#    data2    List with the second set of data
330#
331# Result:
332#    Correlation coefficient of the two
333#
334proc ::math::statistics::corr { data1 data2 } {
335    variable TOOFEWDATA
336
337    set number  0
338    set sum1    0.0
339    set sum2    0.0
340    set sumsq1  0.0
341    set sumsq2  0.0
342    set sumprod 0.0
343
344    foreach value1 $data1 value2 $data2 {
345	if { $value1 == {} || $value2 == {} } {
346	    continue
347	}
348	set  value1  [expr {double($value1)}]
349	set  value2  [expr {double($value2)}]
350
351	set  sum1    [expr {$sum1+$value1}]
352	set  sum2    [expr {$sum2+$value2}]
353	set  sumsq1  [expr {$sumsq1+$value1*$value1}]
354	set  sumsq2  [expr {$sumsq2+$value2*$value2}]
355	set  sumprod [expr {$sumprod+$value1*$value2}]
356	incr number
357    }
358    if { $number > 0 } {
359	set numerator   [expr {$number*$sumprod-$sum1*$sum2}]
360	set denom1      [expr {sqrt($number*$sumsq1-$sum1*$sum1)}]
361	set denom2      [expr {sqrt($number*$sumsq2-$sum2*$sum2)}]
362	if { $denom1 != 0.0 && $denom2 != 0.0 } {
363	    set corr_coeff  [expr {$numerator/$denom1/$denom2}]
364	} elseif { $denom1 != 0.0 || $denom2 != 0.0 } {
365	    set corr_coeff  0.0 ;# Uniform against non-uniform
366	} else {
367	    set corr_coeff  1.0 ;# Both uniform
368	}
369
370    } else {
371	return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
372    }
373    return $corr_coeff
374}
375
376# lillieforsFit --
377#     Calculate the goodness of fit according to Lilliefors
378#     (goodness of fit to a normal distribution)
379#
380# Arguments:
381#     values          List of values to be tested for normality
382#
383# Result:
384#     Value of the statistic D
385#
386proc ::math::statistics::lillieforsFit {values} {
387    #
388    # calculate the goodness of fit according to Lilliefors
389    # (goodness of fit to a normal distribution)
390    #
391    # values -> list of values to be tested for normality
392    # (these values are sampled counts)
393    #
394
395    # calculate standard deviation and mean of the sample:
396    set n [llength $values]
397    if { $n < 5 } {
398        return -code error "Insufficient number of data (at least five required)"
399    }
400    set sd   [stdev $values]
401    set mean [mean $values]
402
403    # sort the sample for further processing:
404    set values [lsort -real $values]
405
406    # standardize the sample data (Z-scores):
407    foreach x $values {
408        lappend stdData [expr {($x - $mean)/double($sd)}]
409    }
410
411    # compute the value of the distribution function at every sampled point:
412    foreach x $stdData {
413        lappend expData [pnorm $x]
414    }
415
416    # compute D+:
417    set i 0
418    foreach x $expData {
419        incr i
420        lappend dplus [expr {$i/double($n)-$x}]
421    }
422    set dplus [lindex [lsort -real $dplus] end]
423
424    # compute D-:
425    set i 0
426    foreach x $expData {
427        incr i
428        lappend dminus [expr {$x-($i-1)/double($n)}]
429    }
430    set dminus [lindex [lsort -real $dminus] end]
431
432    # Calculate the test statistic D
433    # by finding the maximal vertical difference
434    # between the sample and the expectation:
435    #
436    set D [expr {$dplus > $dminus ? $dplus : $dminus}]
437
438    # We now use the modified statistic Z,
439    # because D is only reliable
440    # if the p-value is smaller than 0.1
441    return [expr {$D * (sqrt($n) - 0.01 + 0.831/sqrt($n))}]
442}
443
444# pnorm --
445#     Calculate the cumulative distribution function (cdf)
446#     for the standard normal distribution like in the statistical
447#     software 'R' (mean=0 and sd=1)
448#
449# Arguments:
450#     x               Value fro which the cdf should be calculated
451#
452# Result:
453#     Value of the statistic D
454#
455proc ::math::statistics::pnorm {x} {
456    #
457    # cumulative distribution function (cdf)
458    # for the standard normal distribution like in the statistical software 'R'
459    # (mean=0 and sd=1)
460    #
461    # x -> value for which the cdf should be calculated
462    #
463    set sum [expr {double($x)}]
464    set oldSum 0.0
465    set i 1
466    set denom 1.0
467    while {$sum != $oldSum} {
468            set oldSum $sum
469            incr i 2
470            set denom [expr {$denom*$i}]
471            #puts "$i - $denom"
472            set sum [expr {$oldSum + pow($x,$i)/$denom}]
473    }
474    return [expr {0.5 + $sum * exp(-0.5 * $x*$x - 0.91893853320467274178)}]
475}
476
477# pnorm_quicker --
478#     Calculate the cumulative distribution function (cdf)
479#     for the standard normal distribution - quicker alternative
480#     (less accurate)
481#
482# Arguments:
483#     x               Value for which the cdf should be calculated
484#
485# Result:
486#     Value of the statistic D
487#
488proc ::math::statistics::pnorm_quicker {x} {
489
490    set n [expr {abs($x)}]
491    set n [expr {1.0 + $n*(0.04986735 + $n*(0.02114101 + $n*(0.00327763 \
492            + $n*(0.0000380036 + $n*(0.0000488906 + $n*0.000005383)))))}]
493    set n [expr {1.0/pow($n,16)}]
494    #
495    if {$x >= 0} {
496        return [expr {1 - $n/2.0}]
497    } else {
498        return [expr {$n/2.0}]
499    }
500}
501
502# test-normal --
503#     Test for normality (using method Lilliefors)
504#
505# Arguments:
506#     data            Values that need to be tested
507#     significance    Level at which the discrepancy from normality is tested
508#
509# Result:
510#     1 if the Lilliefors statistic D is larger than the critical level
511#
512# Note:
513#     There was a mistake in the implementation before 0.9.3: confidence (wrong word)
514#     instead of significance. To keep compatibility with earlier versions, both
515#     significance and 1-significance are accepted.
516#
517proc ::math::statistics::test-normal {data significance} {
518    set D [lillieforsFit $data]
519
520    if { $significance > 0.5 } {
521        set significance [expr {1.0-$significance}] ;# Convert the erroneous levels pre 0.9.3
522    }
523
524    set Dcrit --
525    if { abs($significance-0.20) < 0.0001 } {
526        set Dcrit 0.741
527    }
528    if { abs($significance-0.15) < 0.0001 } {
529        set Dcrit 0.775
530    }
531    if { abs($significance-0.10) < 0.0001 } {
532        set Dcrit 0.819
533    }
534    if { abs($significance-0.05) < 0.0001 } {
535        set Dcrit 0.895
536    }
537    if { abs($significance-0.01) < 0.0001 } {
538        set Dcrit 1.035
539    }
540    if { $Dcrit != "--" } {
541        return [expr {$D > $Dcrit ? 1 : 0 }]
542    } else {
543        return -code error "Significancce level must be one of: 0.20, 0.15, 0.10, 0.05 or 0.01"
544    }
545}
546
547# t-test-mean --
548#    Test whether the mean value of a sample is in accordance with the
549#    estimated normal distribution with a certain probability
550#    (Student's t test)
551#
552# Arguments:
553#    data         List of raw data values (small sample)
554#    est_mean     Estimated mean of the distribution
555#    est_stdev    Estimated stdev of the distribution
556#    alpha        Probability level (0.95 or 0.99 for instance)
557#
558# Result:
559#    1 if the test is positive, 0 otherwise. If there are too few data,
560#    returns an empty string
561#
562proc ::math::statistics::t-test-mean { data est_mean est_stdev alpha } {
563    variable NEGSTDEV
564    variable TOOFEWDATA
565
566    if { $est_stdev <= 0.0 } {
567	return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV
568    }
569
570    set allstats        [BasicStats all $data]
571
572    set alpha2          [expr {(1.0+$alpha)/2.0}]
573
574    set sample_mean     [lindex $allstats 0]
575    set sample_number   [lindex $allstats 3]
576
577    if { $sample_number > 1 } {
578	set tzero   [expr {abs($sample_mean-$est_mean)/$est_stdev * \
579		sqrt($sample_number-1)}]
580	set degrees [expr {$sample_number-1}]
581	set prob    [cdf-students-t $degrees $tzero]
582
583	return [expr {$prob<$alpha2}]
584
585    } else {
586	return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
587    }
588}
589
590# interval-mean-stdev --
591#    Return the interval containing the mean value and one
592#    containing the standard deviation with a certain
593#    level of confidence (assuming a normal distribution)
594#
595# Arguments:
596#    data         List of raw data values
597#    confidence   Confidence level (0.95 or 0.99 for instance)
598#
599# Result:
600#    List having the following elements: lower and upper bounds of
601#    mean, lower and upper bounds of stdev
602#
603#
604proc ::math::statistics::interval-mean-stdev { data confidence } {
605    variable TOOFEWDATA
606
607    set allstats [BasicStats all $data]
608
609    set conf2    [expr {(1.0+$confidence)/2.0}]
610    set mean     [lindex $allstats 0]
611    set number   [lindex $allstats 3]
612    set stdev    [lindex $allstats 4]
613
614    if { $number > 1 } {
615	set degrees    [expr {$number-1}]
616	set student_t  [expr {sqrt([Inverse-cdf-toms322 1 $degrees $conf2])}]
617	set mean_lower [expr {$mean-$student_t*$stdev/sqrt($number)}]
618	set mean_upper [expr {$mean+$student_t*$stdev/sqrt($number)}]
619	set stdev_lower {}
620	set stdev_upper {}
621	return [list $mean_lower $mean_upper $stdev_lower $stdev_upper]
622    } else {
623	return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
624    }
625}
626
627# quantiles --
628#    Return the quantiles for a given set of data or histogram
629#
630# Arguments:
631#    (two arguments)
632#    data         List of raw data values
633#    confidence   Confidence level (0.95 or 0.99 for instance)
634#    (three arguments)
635#    limits       List of upper limits from histogram
636#    counts       List of counts for for each interval in histogram
637#    confidence   Confidence level (0.95 or 0.99 for instance)
638#
639# Result:
640#    List of quantiles
641#
642proc ::math::statistics::quantiles { arg1 arg2 {arg3 {}} } {
643    variable TOOFEWDATA
644
645    if { [catch {
646	if { $arg3 == {} } {
647	    set result \
648		    [::math::statistics::QuantilesRawData $arg1 $arg2]
649	} else {
650	    set result \
651		    [::math::statistics::QuantilesHistogram $arg1 $arg2 $arg3]
652	}
653    } msg] } {
654	return -code error -errorcode $msg $msg
655    }
656    return $result
657}
658
659# QuantilesRawData --
660#    Return the quantiles based on raw data
661#
662# Arguments:
663#    data         List of raw data values
664#    confidence   Confidence level (0.95 or 0.99 for instance)
665#
666# Result:
667#    List of quantiles
668#
669proc ::math::statistics::QuantilesRawData { data confidence } {
670    variable TOOFEWDATA
671    variable OUTOFRANGE
672
673    if { [llength $confidence] <= 0 } {
674	return -code error -errorcode ARG "$TOOFEWDATA - quantiles"
675    }
676
677    if { [llength $data] <= 0 } {
678	return -code error -errorcode ARG "$TOOFEWDATA - raw data"
679    }
680
681    foreach cond $confidence {
682	if { $cond <= 0.0 || $cond >= 1.0 } {
683	    return -code error -errorcode ARG "$OUTOFRANGE - quantiles"
684	}
685    }
686
687    #
688    # Sort the data first
689    #
690    set sorted_data [lsort -real -increasing $data]
691
692    #
693    # Determine the list element lower or equal to the quantile
694    # and return the corresponding value
695    #
696    set result      {}
697    set number_data [llength $sorted_data]
698    foreach cond $confidence {
699	set elem [expr {round($number_data*$cond)-1}]
700	if { $elem < 0 } {
701	    set elem 0
702	}
703	lappend result [lindex $sorted_data $elem]
704    }
705
706    return $result
707}
708
709# QuantilesHistogram --
710#    Return the quantiles based on histogram information only
711#
712# Arguments:
713#    limits       Upper limits for histogram intervals
714#    counts       Counts for each interval
715#    confidence   Confidence level (0.95 or 0.99 for instance)
716#
717# Result:
718#    List of quantiles
719#
720proc ::math::statistics::QuantilesHistogram { limits counts confidence } {
721    variable TOOFEWDATA
722    variable OUTOFRANGE
723
724    if { [llength $confidence] <= 0 } {
725	return -code error -errorcode ARG "$TOOFEWDATA - quantiles"
726    }
727
728    if { [llength $confidence] <= 0 } {
729	return -code error -errorcode ARG "$TOOFEWDATA - histogram limits"
730    }
731
732    if { [llength $counts] <= [llength $limits] } {
733	return -code error -errorcode ARG "$TOOFEWDATA - histogram counts"
734    }
735
736    foreach cond $confidence {
737	if { $cond <= 0.0 || $cond >= 1.0 } {
738	    return -code error -errorcode ARG "$OUTOFRANGE - quantiles"
739	}
740    }
741
742    #
743    # Accumulate the histogram counts first
744    #
745    set sum 0
746    set accumulated_counts {}
747    foreach count $counts {
748	set sum [expr {$sum+$count}]
749	lappend accumulated_counts $sum
750    }
751    set total_counts $sum
752
753    #
754    # Determine the list element lower or equal to the quantile
755    # and return the corresponding value (use interpolation if
756    # possible)
757    #
758    set result      {}
759    foreach cond $confidence {
760	set found       0
761	set bound       [expr {round($total_counts*$cond)}]
762	set lower_limit {}
763	set lower_count 0
764	foreach acc_count $accumulated_counts limit $limits {
765	    if { $acc_count >= $bound } {
766		set found 1
767		break
768	    }
769	    set lower_limit $limit
770	    set lower_count $acc_count
771	}
772
773	if { $lower_limit == {} || $limit == {} || $found == 0 } {
774	    set quant $limit
775	    if { $limit == {} } {
776		set quant $lower_limit
777	    }
778	} else {
779	    set quant [expr {$limit+($lower_limit-$limit) *
780	    ($acc_count-$bound)/($acc_count-$lower_count)}]
781	}
782	lappend result $quant
783    }
784
785    return $result
786}
787
788# autocorr --
789#    Return the autocorrelation function (assuming equidistance between
790#    samples)
791#
792# Arguments:
793#    data         Raw data for which the autocorrelation must be determined
794#
795# Result:
796#    List of autocorrelation values (about 1/2 the number of raw data)
797#
798proc ::math::statistics::autocorr { data } {
799    variable TOOFEWDATA
800
801    if { [llength $data] <= 1 } {
802	return -code error -errorcode ARG "$TOOFEWDATA"
803    }
804
805    return [crosscorr $data $data]
806}
807
808# crosscorr --
809#    Return the cross-correlation function (assuming equidistance
810#    between samples)
811#
812# Arguments:
813#    data1        First set of raw data
814#    data2        Second set of raw data
815#
816# Result:
817#    List of cross-correlation values (about 1/2 the number of raw data)
818#
819# Note:
820#    The number of data pairs is not kept constant - because tests
821#    showed rather awkward results when it was kept constant.
822#
823proc ::math::statistics::crosscorr { data1 data2 } {
824    variable TOOFEWDATA
825
826    if { [llength $data1] <= 1 || [llength $data2] <= 1 } {
827	return -code error -errorcode ARG "$TOOFEWDATA"
828    }
829
830    #
831    # First determine the number of data pairs
832    #
833    set number1 [llength $data1]
834    set number2 [llength $data2]
835
836    set basic_stat1 [basic-stats $data1]
837    set basic_stat2 [basic-stats $data2]
838    set vmean1      [lindex $basic_stat1 0]
839    set vmean2      [lindex $basic_stat2 0]
840    set vvar1       [lindex $basic_stat1 end]
841    set vvar2       [lindex $basic_stat2 end]
842
843    set number_pairs $number1
844    if { $number1 > $number2 } {
845	set number_pairs $number2
846    }
847    set number_values $number_pairs
848    set number_delays [expr {$number_values/2.0}]
849
850    set scale [expr {sqrt($vvar1*$vvar2)}]
851
852    set result {}
853    for { set delay 0 } { $delay < $number_delays } { incr delay } {
854	set sumcross 0.0
855	set no_cross 0
856	for { set idx 0 } { $idx < $number_values } { incr idx } {
857	    set value1 [lindex $data1 $idx]
858	    set value2 [lindex $data2 [expr {$idx+$delay}]]
859	    if { $value1 != {} && $value2 != {} } {
860		set  sumcross \
861			[expr {$sumcross+($value1-$vmean1)*($value2-$vmean2)}]
862		incr no_cross
863	    }
864	}
865	lappend result [expr {$sumcross/($no_cross*$scale)}]
866
867	incr number_values -1
868    }
869
870    return $result
871}
872
873# mean-histogram-limits
874#    Determine reasonable limits based on mean and standard deviation
875#    for a histogram
876#
877# Arguments:
878#    mean         Mean of the data
879#    stdev        Standard deviation
880#    number       Number of limits to generate (defaults to 8)
881#
882# Result:
883#    List of limits
884#
885proc ::math::statistics::mean-histogram-limits { mean stdev {number 8} } {
886    variable NEGSTDEV
887
888    if { $stdev <= 0.0 } {
889	return -code error -errorcode ARG "$NEGSTDEV"
890    }
891    if { $number < 1 } {
892	return -code error -errorcode ARG "Number of limits must be positive"
893    }
894
895    #
896    # Always: between mean-3.0*stdev and mean+3.0*stdev
897    # number = 2: -0.25, 0.25
898    # number = 3: -0.25, 0, 0.25
899    # number = 4: -1, -0.25, 0.25, 1
900    # number = 5: -1, -0.25, 0, 0.25, 1
901    # number = 6: -2, -1, -0.25, 0.25, 1, 2
902    # number = 7: -2, -1, -0.25, 0, 0.25, 1, 2
903    # number = 8: -3, -2, -1, -0.25, 0.25, 1, 2, 3
904    #
905    switch -- $number {
906	"1" { set limits {0.0} }
907	"2" { set limits {-0.25 0.25} }
908	"3" { set limits {-0.25 0.0 0.25} }
909	"4" { set limits {-1.0 -0.25 0.25 1.0} }
910	"5" { set limits {-1.0 -0.25 0.0 0.25 1.0} }
911	"6" { set limits {-2.0 -1.0 -0.25 0.25 1.0 2.0} }
912	"7" { set limits {-2.0 -1.0 -0.25 0.0 0.25 1.0 2.0} }
913	"8" { set limits {-3.0 -2.0 -1.0 -0.25 0.25 1.0 2.0 3.0} }
914	"9" { set limits {-3.0 -2.0 -1.0 -0.25 0.0 0.25 1.0 2.0 3.0} }
915	default {
916	    set dlim [expr {6.0/double($number-1)}]
917	    for {set i 0} {$i <$number} {incr i} {
918		lappend limits [expr {$dlim*($i-($number-1)/2.0)}]
919	    }
920	}
921    }
922
923    set result {}
924    foreach limit $limits {
925	lappend result [expr {$mean+$limit*$stdev}]
926    }
927
928    return $result
929}
930
931# minmax-histogram-limits
932#    Determine reasonable limits based on minimum and maximum bounds
933#    for a histogram
934#
935# Arguments:
936#    min          Estimated minimum
937#    max          Estimated maximum
938#    number       Number of limits to generate (defaults to 8)
939#
940# Result:
941#    List of limits
942#
943proc ::math::statistics::minmax-histogram-limits { min max {number 8} } {
944    variable NEGSTDEV
945
946    if { $number < 1 } {
947	return -code error -errorcode ARG "Number of limits must be positive"
948    }
949    if { $min >= $max } {
950	return -code error -errorcode ARG "Minimum must be lower than maximum"
951    }
952
953    set result {}
954    set dlim [expr {($max-$min)/double($number-1)}]
955    for {set i 0} {$i <$number} {incr i} {
956	lappend result [expr {$min+$dlim*$i}]
957    }
958
959    return $result
960}
961
962# linear-model
963#    Determine the coefficients for a linear regression between
964#    two series of data (the model: Y = A + B*X)
965#
966# Arguments:
967#    xdata        Series of independent (X) data
968#    ydata        Series of dependent (Y) data
969#    intercept    Whether to use an intercept or not (optional)
970#
971# Result:
972#    List of the following items:
973#    - (Estimate of) Intercept A
974#    - (Estimate of) Slope B
975#    - Standard deviation of Y relative to fit
976#    - Correlation coefficient R2
977#    - Number of degrees of freedom df
978#    - Standard error of the intercept A
979#    - Significance level of A
980#    - Standard error of the slope B
981#    - Significance level of B
982#
983#
984proc ::math::statistics::linear-model { xdata ydata {intercept 1} } {
985   variable TOOFEWDATA
986
987   if { [llength $xdata] < 3 } {
988      return -code error -errorcode ARG "$TOOFEWDATA: not enough independent data"
989   }
990   if { [llength $ydata] < 3 } {
991      return -code error -errorcode ARG "$TOOFEWDATA: not enough dependent data"
992   }
993   if { [llength $xdata] != [llength $ydata] } {
994      return -code error -errorcode ARG "$TOOFEWDATA: number of dependent data differs from number of independent data"
995   }
996
997   set sumx  0.0
998   set sumy  0.0
999   set sumx2 0.0
1000   set sumy2 0.0
1001   set sumxy 0.0
1002   set df    0
1003   foreach x $xdata y $ydata {
1004      if { $x != "" && $y != "" } {
1005         set sumx  [expr {$sumx+$x}]
1006         set sumy  [expr {$sumy+$y}]
1007         set sumx2 [expr {$sumx2+$x*$x}]
1008         set sumy2 [expr {$sumy2+$y*$y}]
1009         set sumxy [expr {$sumxy+$x*$y}]
1010         incr df
1011      }
1012   }
1013
1014   if { $df <= 2 } {
1015      return -code error -errorcode ARG "$TOOFEWDATA: too few valid data"
1016   }
1017   if { $sumx2 == 0.0 } {
1018      return -code error -errorcode ARG "$TOOFEWDATA: independent values are all the same"
1019   }
1020
1021   #
1022   # Calculate the intermediate quantities
1023   #
1024   set sx  [expr {$sumx2-$sumx*$sumx/$df}]
1025   set sy  [expr {$sumy2-$sumy*$sumy/$df}]
1026   set sxy [expr {$sumxy-$sumx*$sumy/$df}]
1027
1028   #
1029   # Calculate the coefficients
1030   #
1031   if { $intercept } {
1032      set B [expr {$sxy/$sx}]
1033      set A [expr {($sumy-$B*$sumx)/$df}]
1034   } else {
1035      set B [expr {$sumxy/$sumx2}]
1036      set A 0.0
1037   }
1038
1039   #
1040   # Calculate the error estimates
1041   #
1042   set stdevY 0.0
1043   set varY   0.0
1044
1045   if { $intercept } {
1046      set ve [expr {$sy-$B*$sxy}]
1047      if { $ve >= 0.0 } {
1048         set varY [expr {$ve/($df-2)}]
1049      }
1050   } else {
1051      set ve [expr {$sumy2-$B*$sumxy}]
1052      if { $ve >= 0.0 } {
1053         set varY [expr {$ve/($df-1)}]
1054      }
1055   }
1056   set seY [expr {sqrt($varY)}]
1057
1058   if { $intercept } {
1059      set R2    [expr {$sxy*$sxy/($sx*$sy)}]
1060      set seA   [expr {$seY*sqrt(1.0/$df+$sumx*$sumx/($sx*$df*$df))}]
1061      set seB   [expr {sqrt($varY/$sx)}]
1062      set tA    {}
1063      set tB    {}
1064      if { $seA != 0.0 } {
1065         set tA    [expr {$A/$seA*sqrt($df-2)}]
1066      }
1067      if { $seB != 0.0 } {
1068         set tB    [expr {$B/$seB*sqrt($df-2)}]
1069      }
1070   } else {
1071      set R2    [expr {$sumxy*$sumxy/($sumx2*$sumy2)}]
1072      set seA   {}
1073      set tA    {}
1074      set tB    {}
1075      set seB   [expr {sqrt($varY/$sumx2)}]
1076      if { $seB != 0.0 } {
1077         set tB    [expr {$B/$seB*sqrt($df-1)}]
1078      }
1079   }
1080
1081   #
1082   # Return the list of parameters
1083   #
1084   return [list $A $B $seY $R2 $df $seA $tA $seB $tB]
1085}
1086
1087# linear-residuals
1088#    Determine the difference between actual data and predicted from
1089#    the linear model
1090#
1091# Arguments:
1092#    xdata        Series of independent (X) data
1093#    ydata        Series of dependent (Y) data
1094#    intercept    Whether to use an intercept or not (optional)
1095#
1096# Result:
1097#    List of differences
1098#
1099proc ::math::statistics::linear-residuals { xdata ydata {intercept 1} } {
1100   variable TOOFEWDATA
1101
1102   if { [llength $xdata] < 3 } {
1103      return -code error -errorcode ARG "$TOOFEWDATA: no independent data"
1104   }
1105   if { [llength $ydata] < 3 } {
1106      return -code error -errorcode ARG "$TOOFEWDATA: no dependent data"
1107   }
1108   if { [llength $xdata] != [llength $ydata] } {
1109      return -code error -errorcode ARG "$TOOFEWDATA: number of dependent data differs from number of independent data"
1110   }
1111
1112   foreach {A B} [linear-model $xdata $ydata $intercept] {break}
1113
1114   set result {}
1115   foreach x $xdata y $ydata {
1116      set residue [expr {$y-$A-$B*$x}]
1117      lappend result $residue
1118   }
1119   return $result
1120}
1121
1122# median
1123#    Determine the median from a list of data
1124#
1125# Arguments:
1126#    data         (Unsorted) list of data
1127#
1128# Result:
1129#    Median (either the middle value or the mean of two values in the
1130#    middle)
1131#
1132# Note:
1133#    Adapted from the Wiki page "Stats", code provided by JPS
1134#
1135proc ::math::statistics::median { data } {
1136    set org_data $data
1137    set data     {}
1138    foreach value $org_data {
1139        if { $value != {} } {
1140            lappend data $value
1141        }
1142    }
1143    set len [llength $data]
1144
1145    set data [lsort -real $data]
1146    if { $len % 2 } {
1147        lindex $data [expr {($len-1)/2}]
1148    } else {
1149        expr {([lindex $data [expr {($len / 2) - 1}]] \
1150		+ [lindex $data [expr {$len / 2}]]) / 2.0}
1151    }
1152}
1153
1154# test-2x2 --
1155#     Compute the chi-square statistic for a 2x2 table
1156#
1157# Arguments:
1158#     a           Element upper-left
1159#     b           Element upper-right
1160#     c           Element lower-left
1161#     d           Element lower-right
1162# Return value:
1163#     Chi-square
1164# Note:
1165#     There is only one degree of freedom - this is important
1166#     when comparing the value to the tabulated values
1167#     of chi-square
1168#
1169proc ::math::statistics::test-2x2 { a b c d } {
1170    set ab     [expr {$a+$b}]
1171    set ac     [expr {$a+$c}]
1172    set bd     [expr {$b+$d}]
1173    set cd     [expr {$c+$d}]
1174    set N      [expr {$a+$b+$c+$d}]
1175    set det    [expr {$a*$d-$b*$c}]
1176    set result [expr {double($N*$det*$det)/double($ab*$cd*$ac*$bd)}]
1177}
1178
1179# print-2x2 --
1180#     Print a 2x2 table
1181#
1182# Arguments:
1183#     a           Element upper-left
1184#     b           Element upper-right
1185#     c           Element lower-left
1186#     d           Element lower-right
1187# Return value:
1188#     Printed version with marginals
1189#
1190proc ::math::statistics::print-2x2 { a b c d } {
1191    set ab     [expr {$a+$b}]
1192    set ac     [expr {$a+$c}]
1193    set bd     [expr {$b+$d}]
1194    set cd     [expr {$c+$d}]
1195    set N      [expr {$a+$b+$c+$d}]
1196    set chisq  [test-2x2 $a $b $c $d]
1197
1198    set    line   [string repeat - 10]
1199    set    result [format "%10d%10d | %10d\n" $a $b $ab]
1200    append result [format "%10d%10d | %10d\n" $c $d $cd]
1201    append result [format "%10s%10s + %10s\n" $line $line $line]
1202    append result [format "%10d%10d | %10d\n" $ac $bd $N]
1203    append result "Chisquare = $chisq\n"
1204    append result "Difference is significant?\n"
1205    append result "   at 95%: [expr {$chisq<3.84146? "no":"yes"}]\n"
1206    append result "   at 99%: [expr {$chisq<6.63490? "no":"yes"}]"
1207}
1208
1209# control-xbar --
1210#     Determine the control lines for an x-bar chart
1211#
1212# Arguments:
1213#     data        List of observed values (at least 20*nsamples)
1214#     nsamples    Number of data per subsamples (default: 4)
1215# Return value:
1216#     List of: mean, lower limit, upper limit, number of data per
1217#     subsample. Can be used in the test-xbar procedure
1218#
1219proc ::math::statistics::control-xbar { data {nsamples 4} } {
1220    variable TOOFEWDATA
1221    variable control_factors
1222
1223    #
1224    # Check the number of data
1225    #
1226    if { $nsamples <= 1 } {
1227        return -code error -errorcode DATA -errorinfo $OUTOFRANGE \
1228            "Number of data per subsample must be at least 2"
1229    }
1230    if { [llength $data] < 20*$nsamples } {
1231        return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
1232    }
1233
1234    set nogroups [expr {[llength $data]/$nsamples}]
1235    set mrange   0.0
1236    set xmeans   0.0
1237    for { set i 0 } { $i < $nogroups } { incr i } {
1238        set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]]
1239
1240        set xmean 0.0
1241        set xmin  [lindex $subsample 0]
1242        set xmax  $xmin
1243        foreach d $subsample {
1244            set xmean [expr {$xmean+$d}]
1245            set xmin  [expr {$xmin<$d? $xmin : $d}]
1246            set xmax  [expr {$xmax>$d? $xmax : $d}]
1247        }
1248        set xmean [expr {$xmean/double($nsamples)}]
1249
1250        set xmeans [expr {$xmeans+$xmean}]
1251        set mrange [expr {$mrange+($xmax-$xmin)}]
1252    }
1253
1254    #
1255    # Determine the control lines
1256    #
1257    set xmeans [expr {$xmeans/double($nogroups)}]
1258    set mrange [expr {$mrange/double($nogroups)}]
1259    set A2     [lindex [lindex $control_factors 1] $nsamples]
1260    if { $A2 == "" } { set A2 [lindex [lindex $control_factors 1] end] }
1261
1262    return [list $xmeans [expr {$xmeans-$A2*$mrange}] \
1263                         [expr {$xmeans+$A2*$mrange}] $nsamples]
1264}
1265
1266# test-xbar --
1267#     Determine if any data points lie outside the x-bar control limits
1268#
1269# Arguments:
1270#     control     List returned by control-xbar with control data
1271#     data        List of observed values
1272# Return value:
1273#     Indices of any subsamples that violate the control limits
1274#
1275proc ::math::statistics::test-xbar { control data } {
1276    foreach {xmean xlower xupper nsamples} $control {break}
1277
1278    if { [llength $data] < 1 } {
1279        return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
1280    }
1281
1282    set nogroups [expr {[llength $data]/$nsamples}]
1283    if { $nogroups <= 0 } {
1284        set nogroup  1
1285        set nsamples [llength $data]
1286    }
1287
1288    set result {}
1289
1290    for { set i 0 } { $i < $nogroups } { incr i } {
1291        set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]]
1292
1293        set xmean 0.0
1294        foreach d $subsample {
1295            set xmean [expr {$xmean+$d}]
1296        }
1297        set xmean [expr {$xmean/double($nsamples)}]
1298
1299        if { $xmean < $xlower } { lappend result $i }
1300        if { $xmean > $xupper } { lappend result $i }
1301    }
1302
1303    return $result
1304}
1305
1306# control-Rchart --
1307#     Determine the control lines for an R chart
1308#
1309# Arguments:
1310#     data        List of observed values (at least 20*nsamples)
1311#     nsamples    Number of data per subsamples (default: 4)
1312# Return value:
1313#     List of: mean range, lower limit, upper limit, number of data per
1314#     subsample. Can be used in the test-Rchart procedure
1315#
1316proc ::math::statistics::control-Rchart { data {nsamples 4} } {
1317    variable TOOFEWDATA
1318    variable control_factors
1319
1320    #
1321    # Check the number of data
1322    #
1323    if { $nsamples <= 1 } {
1324        return -code error -errorcode DATA -errorinfo $OUTOFRANGE \
1325            "Number of data per subsample must be at least 2"
1326    }
1327    if { [llength $data] < 20*$nsamples } {
1328        return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
1329    }
1330
1331    set nogroups [expr {[llength $data]/$nsamples}]
1332    set mrange   0.0
1333    for { set i 0 } { $i < $nogroups } { incr i } {
1334        set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]]
1335
1336        set xmin  [lindex $subsample 0]
1337        set xmax  $xmin
1338        foreach d $subsample {
1339            set xmin  [expr {$xmin<$d? $xmin : $d}]
1340            set xmax  [expr {$xmax>$d? $xmax : $d}]
1341        }
1342        set mrange [expr {$mrange+($xmax-$xmin)}]
1343    }
1344
1345    #
1346    # Determine the control lines
1347    #
1348    set mrange [expr {$mrange/double($nogroups)}]
1349    set D3     [lindex [lindex $control_factors 3] $nsamples]
1350    set D4     [lindex [lindex $control_factors 5] $nsamples]
1351    if { $D3 == "" } { set D3 [lindex [lindex $control_factors 3] end] }
1352    if { $D4 == "" } { set D4 [lindex [lindex $control_factors 5] end] }
1353
1354    return [list $mrange [expr {$D3*$mrange}] \
1355                         [expr {$D4*$mrange}] $nsamples]
1356}
1357
1358# test-Rchart --
1359#     Determine if any data points lie outside the R-chart control limits
1360#
1361# Arguments:
1362#     control     List returned by control-xbar with control data
1363#     data        List of observed values
1364# Return value:
1365#     Indices of any subsamples that violate the control limits
1366#
1367proc ::math::statistics::test-Rchart { control data } {
1368    foreach {rmean rlower rupper nsamples} $control {break}
1369
1370    #
1371    # Check the number of data
1372    #
1373    if { [llength $data] < 1 } {
1374        return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA
1375    }
1376
1377    set nogroups [expr {[llength $data]/$nsamples}]
1378
1379    set result {}
1380    for { set i 0 } { $i < $nogroups } { incr i } {
1381        set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]]
1382
1383        set xmin  [lindex $subsample 0]
1384        set xmax  $xmin
1385        foreach d $subsample {
1386            set xmin  [expr {$xmin<$d? $xmin : $d}]
1387            set xmax  [expr {$xmax>$d? $xmax : $d}]
1388        }
1389        set range [expr {$xmax-$xmin}]
1390
1391        if { $range < $rlower } { lappend result $i }
1392        if { $range > $rupper } { lappend result $i }
1393    }
1394
1395    return $result
1396}
1397
1398# test-Duckworth --
1399#     Determine if two data sets have the same median according to the Tukey-Duckworth test
1400#
1401# Arguments:
1402#     list1           Values in the first data set
1403#     list2           Values in the second data set
1404#     significance    Significance level (either 0.05, 0.01 or 0.001)
1405#
1406# Returns:
1407#     0 if the medians are unequal, 1 if they are equal, -1 if the test can not
1408#     be conducted (the smallest value must be in a different set than the greatest value)
1409#
1410proc ::math::statistics::test-Duckworth {list1 list2 significance} {
1411    set sorted1   [lsort -real $list1]
1412    set sorted2   [lsort -real -decreasing $list2]
1413
1414    set lowest1   [lindex $sorted1 0]
1415    set lowest2   [lindex $sorted2 end]
1416    set greatest1 [lindex $sorted1 end]
1417    set greatest2 [lindex $sorted2 0]
1418
1419    if { $lowest1 <= $lowest2 && $greatest1 >= $greatest2 } {
1420        return -1
1421    }
1422    if { $lowest1 >= $lowest2 && $greatest1 <= $greatest2 } {
1423        return -1
1424    }
1425
1426    #
1427    # Determine how many elements of set 1 are lower than the lowest of set 2
1428    # Ditto for the number of elements of set 2 greater than the greatest of set 1
1429    # (Or vice versa)
1430    #
1431    if { $lowest1 < $lowest2 } {
1432        set lowest   $lowest2
1433        set greatest $greatest1
1434    } else {
1435        set lowest   $lowest1
1436        set greatest $greatest2
1437        set sorted1   [lsort -real $list2]
1438        set sorted2   [lsort -real -decreasing $list1]
1439        #lassign [list $sorted1 $sorted2] sorted2 sorted1
1440    }
1441
1442    set count1 0
1443    set count2 0
1444    foreach v1 $sorted1 {
1445        if { $v1 >= $lowest } {
1446            break
1447        }
1448        incr count1
1449    }
1450    foreach v2 $sorted2 {
1451        if { $v2 <= $greatest } {
1452            break
1453        }
1454        incr count2
1455    }
1456
1457    #
1458    # Determine the statistic D, possibly with correction
1459    #
1460    set n1 [llength $list1]
1461    set n2 [llength $list2]
1462
1463    set correction 0
1464    if { 3 + 4*$n1/3 <= $n2 && $n2 <= 2*$n1 } {
1465        set correction -1
1466    }
1467    if { 3 + 4*$n2/3 <= $n1 && $n1 <= 2*$n2 } {
1468        set correction -1
1469    }
1470
1471    set D [expr {$count1 + $count2 + $correction}]
1472
1473    switch -- [string trim $significance 0] {
1474        ".05" {
1475             return [expr {$D >= 7? 0 : 1}]
1476        }
1477        ".01" {
1478             return [expr {$D >= 10? 0 : 1}]
1479        }
1480        ".001" {
1481             return [expr {$D >= 13? 0 : 1}]
1482        }
1483        default {
1484             return -code error "Significance level must be 0.05, 0.01 or 0.001"
1485        }
1486    }
1487}
1488
1489# test-anova-F --
1490#     Check if two or more groups with normally distributed data have the same
1491#     means
1492#
1493# Arguments:
1494#     alpha            Significance level
1495#     args             One or more lists containing the data for the
1496#                      other groups
1497#
1498# Returns:
1499#     Whether the mean for the groups is likely to be the same (1)
1500#     or not (0).
1501#
1502# Note:
1503#     args may be a nested list
1504#
1505#     Implementation based on Wikipedia page on the F-test, one-way ANOVA
1506#
1507#     Possibly of interest: the ratio itself.
1508#
1509proc ::math::statistics::test-anova-F {alpha args} {
1510    if { [llength $args] == 1 } {
1511        set args [lindex $args 0]
1512    }
1513
1514    if { [llength $args] < 2 } {
1515        return -code error -errorcode ARG -errorinfo "At least two groups are required" \
1516                                                     "At least two groups are required"
1517    }
1518
1519    #
1520    # Determine the variance within the groups and the variance between the groups
1521    #
1522    set meanPerGroup {}
1523    set varPerGroup  {}
1524    set allData   {}
1525    foreach group $args {
1526        lappend meanPerGroup [::math::statistics::mean $group]
1527        lappend varPerGroup  [::math::statistics::pvar $group]
1528        set allData          [concat $allData $group]
1529    }
1530    set meanOverall [::math::statistics::mean $allData]
1531
1532    set varBetween 0.0
1533    foreach group $args mean $meanPerGroup {
1534        set varBetween [expr {$varBetween + [llength $group] * ($mean - $meanOverall)**2}]
1535    }
1536    set varBetween [expr {$varBetween / ([llength $args] - 1)}]
1537
1538    set varWithin 0.0
1539    foreach group $args var $varPerGroup {
1540        set varWithin [expr {$varWithin + [llength $group] * $var}]
1541    }
1542    set varWithin [expr {$varWithin / ([llength $allData] - [llength $args])}]
1543
1544    #
1545    # Finally compare the ratio to the F-distribution
1546    #
1547    set ratio [expr {$varBetween / $varWithin}]
1548    set nf1   [expr {[llength $args]    - 1}]
1549    set nf2   [expr {[llength $allData] - [llength $args]}]
1550
1551    expr {[::math::statistics::cdf-F $nf1 $nf2 $ratio] <= 1.0 - $alpha}
1552}
1553
1554# test-Tukey-range --
1555#     Check if two or more groups with normally distributed data have different
1556#     means or not, using Tukey's range test
1557#
1558# Arguments:
1559#     alpha            Significance level
1560#     args             Two or more lists containing the data for the
1561#                      other groups
1562#
1563# Returns:
1564#     For each pair of groups a list of:
1565#     - group indices
1566#     - whether the means differ (1) or not (0)
1567#     - limits of the confidence interval (for closer investigation)
1568#
1569# Note:
1570#     args may be a nested list
1571#
1572#     Implementation based on Wikipedia page on Tukey's range test
1573#
1574proc ::math::statistics::test-Tukey-range {alpha args} {
1575    if { [llength $args] == 1 } {
1576        set args [lindex $args 0]
1577    }
1578
1579    if { [llength $args] < 2 } {
1580        return -code error -errorcode ARG -errorinfo "At least two groups are required" \
1581                                                     "At least two groups are required"
1582    }
1583
1584    if { $alpha != 0.05 && $alpha != 0.01 } {
1585        return -code error -errorcode ARG -errorinfo "Alpha must 0.05 or 0.01"
1586    }
1587
1588    #
1589    # Determine the mean per group and the pooled variance of the data
1590    #
1591    set meanPerGroup {}
1592    set allData      {}
1593    set sumVar       0.0
1594    foreach group $args {
1595        lappend meanPerGroup  [mean $group]
1596        set sumVar            [expr {$sumVar + ([llength $group]-1) * [var $group]}]
1597        set allData           [concat $allData $group]
1598    }
1599
1600    set n          [llength $allData]
1601    set stdOverall [expr {sqrt($sumVar /($n - [llength $args]))}]
1602
1603    set qcrit      [Qcrit-Tukey $alpha $n [llength $args]]
1604
1605    set result {}
1606
1607    for {set g 0} {$g < [llength $args]} {incr g} {
1608        set ggroup [lindex $args $g]
1609        set gmean  [mean $ggroup]
1610        set ng     [llength $ggroup]
1611
1612        for {set h [expr {$g+1}]} {$h < [llength $args]} {incr h} {
1613            set hgroup    [lindex $args $h]
1614            set hmean     [mean $hgroup]
1615            set nh        [llength $hgroup]
1616
1617            set halfwidth [expr {$qcrit * $stdOverall / sqrt(2.0) * sqrt( 1.0/$ng + 1.0/$nh )}]
1618            set lower     [expr {$hmean - $gmean - $halfwidth}]
1619            set upper     [expr {$hmean - $gmean + $halfwidth}]
1620            set unequal   1
1621            if { $lower < 0.0 && $upper > 0.0 } {
1622                set unequal 0
1623            }
1624            lappend result [list $g $h $unequal $lower $upper]
1625        }
1626    }
1627
1628    return $result
1629}
1630
1631# Qcrit-Tukey --
1632#     Determine the critical value for the Tukey range test
1633#
1634# Arguments:
1635#     alpha            Significance level
1636#     numberData       Total number of data
1637#     numberGroups     Number of groups
1638#
1639# Returns:
1640#     Critical value
1641#
1642# Note:
1643#     If there are more than 10 groups, simply use 10 groups
1644#
1645proc ::math::statistics::Qcrit-Tukey {alpha numberData numberGroups} {
1646    variable tukey_table_05
1647    variable tukey_table_01
1648
1649    if { $alpha == 0.05 } {
1650        upvar 0 tukey_table_05 tukey_table
1651    } else {
1652        upvar 0 tukey_table_05 tukey_table
1653    }
1654
1655    set df [expr {$numberData - $numberGroups}]
1656
1657    if { $numberGroups > 10 } {
1658        set numberGroups 10
1659    }
1660    incr numberGroups -1 ;# Offset because of 0-based numbering
1661
1662    if { $df > 120 } {
1663        return [lindex $tukey_table end $numberGroups]
1664    }
1665
1666    foreach {dfe values} $tukey_table {
1667        if { $df <= $dfe } {
1668            return [lindex $values $numberGroups]
1669        }
1670    }
1671}
1672
1673# test-Dunnett --
1674#     Check if one or more groups with normally distributed data have different
1675#     means from the control group or not, using Dunnett's test
1676#
1677# Arguments:
1678#     alpha            Significance level
1679#     control          Control group
1680#     args             One or more lists containing the data for the
1681#                      other groups
1682#
1683# Returns:
1684#     For each group a list of:
1685#     - whether the mean differs (1) from the control or not (0)
1686#     - the confidence interval
1687#
1688# Note:
1689#     args may be a nested list
1690#
1691#     Implementation based on Wikipedia page on Dunnett's test
1692#     The test is two-sided.
1693#
1694proc ::math::statistics::test-Dunnett {alpha control args} {
1695    if { [llength $args] == 1 } {
1696        set args [lindex $args 0]
1697    }
1698
1699    if { [llength $args] < 1 } {
1700        return -code error -errorcode ARG -errorinfo "At least one additional group is required" \
1701                                                     "At least one additional group is required"
1702    }
1703
1704    if { $alpha != 0.05 && $alpha != 0.01 } {
1705        return -code error -errorcode ARG -errorinfo "Alpha must 0.05 or 0.01"
1706    }
1707
1708    #
1709    # Determine the mean per group and the pooled variance
1710    #
1711    set allData      $control
1712    set sumVar       [expr {([llength $control]-1)*[var $control]}]
1713    foreach group $args {
1714        set sumVar            [expr {$sumVar + ([llength $group]-1) * [var $group]}]
1715        set allData           [concat $allData $group]
1716    }
1717
1718    set n          [llength $allData]
1719    set stdOverall [expr {sqrt($sumVar /($n - [llength $args] - 1))}]
1720
1721    set tcrit [Tcrit-Dunnett $alpha $n [llength $args]]
1722
1723    set result {}
1724
1725    set cmean  [mean $control]
1726    set nc     [llength $control]
1727
1728    for {set g 0} {$g < [llength $args]} {incr g} {
1729        set ggroup    [lindex $args $g]
1730        set gmean     [mean $ggroup]
1731        set ng        [llength $ggroup]
1732
1733        set halfwidth [expr {$tcrit * $stdOverall * sqrt( 1.0/$nc + 1.0/$ng )}]
1734        set lower     [expr {$gmean - $cmean - $halfwidth}]
1735        set upper     [expr {$gmean - $cmean + $halfwidth}]
1736        set unequal   1
1737        if { $lower < 0.0 && $upper > 0.0 } {
1738            set unequal 0
1739        }
1740        lappend result [list $unequal $lower $upper]
1741    }
1742
1743    return $result
1744}
1745
1746# Tcrit-Dunnett --
1747#     Determine the critical value for the Dunnett test
1748#
1749# Arguments:
1750#     alpha            Significance level
1751#     numberData       Total number of data
1752#     numberGroups     Number of groups to compare against the control
1753#
1754# Returns:
1755#     Critical value
1756#
1757# Note:
1758#     If there are more than 10 groups, simply use 10 groups
1759#
1760proc ::math::statistics::Tcrit-Dunnett {alpha numberData numberGroups} {
1761    variable dunnett_table_05
1762    variable dunnett_table_01
1763
1764    if { $alpha == 0.05 } {
1765        upvar 0 dunnett_table_05 dunnett_table
1766    } else {
1767        upvar 0 dunnett_table_05 dunnett_table
1768    }
1769
1770    set df [expr {$numberData - $numberGroups - 1}]
1771
1772    incr numberGroups 1 ;# Add the control group
1773    if { $numberGroups > 10 } {
1774        set numberGroups 10
1775    }
1776    incr numberGroups -2 ;# Offset because of 0-based numbering and start at 2
1777
1778    if { $df > 60 } {
1779        return [lindex $dunnett_table end $numberGroups]
1780    }
1781
1782    foreach {dfe values} $dunnett_table {
1783        if { $df <= $dfe } {
1784            return [lindex $values $numberGroups]
1785        }
1786    }
1787}
1788
1789#
1790# Load the auxiliary scripts
1791#
1792source [file join [file dirname [info script]] pdf_stat.tcl]
1793source [file join [file dirname [info script]] plotstat.tcl]
1794source [file join [file dirname [info script]] liststat.tcl]
1795source [file join [file dirname [info script]] mvlinreg.tcl]
1796source [file join [file dirname [info script]] kruskal.tcl]
1797source [file join [file dirname [info script]] wilcoxon.tcl]
1798source [file join [file dirname [info script]] stat_kernel.tcl]
1799source [file join [file dirname [info script]] stat_wasserstein.tcl]
1800source [file join [file dirname [info script]] stat_logit.tcl]
1801
1802#
1803# Define the tables
1804#
1805namespace eval ::math::statistics {
1806    variable tukey_table_05
1807    variable tukey_table_01
1808    variable dunnett_table_05
1809    variable dunnett_table_01
1810
1811    #alpha = 0.05
1812    #k 2 3 4 5 6 7 8 9 10
1813    #df
1814    set tukey_table_05 {
1815        1  {18.0 27.0 32.8 37.1 40.4 43.1 45.4 47.4 49.1}
1816        2  {6.08 8.33 9.80 10.88 11.73 12.43 13.03 13.54 13.99}
1817        3  {4.50 5.91 6.82 7.50 8.04 8.48 8.85 9.18 9.46}
1818        4  {3.93 5.04 5.76 6.29 6.71 7.05 7.35 7.60 7.83}
1819        5  {3.64 4.60 5.22 5.67 6.03 6.33 6.58 6.80 6.99}
1820        6  {3.46 4.34 4.90 5.30 5.63 5.90 6.12 6.32 6.49}
1821        7  {3.34 4.16 4.68 5.06 5.36 5.61 5.82 6.00 6.16}
1822        8  {3.26 4.04 4.53 4.89 5.17 5.40 5.60 5.77 5.92}
1823        9  {3.20 3.95 4.41 4.76 5.02 5.24 5.43 5.59 5.74}
1824        10 {3.15 3.88 4.33 4.65 4.91 5.12 5.30 5.46 5.60}
1825        11 {3.11 3.82 4.26 4.57 4.82 5.03 5.20 5.35 5.49}
1826        12 {3.08 3.77 4.20 4.51 4.75 4.95 5.12 5.27 5.39}
1827        13 {3.06 3.73 4.15 4.45 4.69 4.88 5.05 5.19 5.32}
1828        14 {3.03 3.70 4.11 4.41 4.64 4.83 4.99 5.13 5.25}
1829        15 {3.01 3.67 4.08 4.37 4.59 4.78 4.94 5.08 5.20}
1830        16 {3.00 3.65 4.05 4.33 4.56 4.74 4.90 5.03 5.15}
1831        17 {2.98 3.63 4.02 4.30 4.52 4.70 4.86 4.99 5.11}
1832        18 {2.97 3.61 4.00 4.28 4.49 4.67 4.82 4.96 5.07}
1833        19 {2.96 3.59 3.98 4.25 4.47 4.65 4.79 4.92 5.04}
1834        20 {2.95 3.58 3.96 4.23 4.45 4.62 4.77 4.90 5.01}
1835        24 {2.92 3.53 3.90 4.17 4.37 4.54 4.68 4.81 4.92}
1836        30 {2.89 3.49 3.85 4.10 4.30 4.46 4.60 4.72 4.82}
1837        40 {2.86 3.44 3.79 4.04 4.23 4.39 4.52 4.63 4.73}
1838        60 {2.83 3.40 3.74 3.98 4.16 4.31 4.44 4.55 4.65}
1839        120 {2.80 3.36 3.68 3.92 4.10 4.24 4.36 4.47 4.56}
1840        inf {2.77 3.31 3.63 3.86 4.03 4.17 4.29 4.39 4.47}}
1841
1842    #alpha = 0.01
1843    #k 2 3 4 5 6 7 8 9 10
1844    #df
1845    set tukey_table_01 {
1846        1  {90.0 135 164 186 202 216 227 237 246}
1847        2  {13.90 19.02 22.56 25.37 27.76 29.86 31.73 33.41 34.93}
1848        3  {8.26 10.62 12.17 13.32 14.24 15.00 15.65 16.21 16.71}
1849        4  {6.51 8.12 9.17 9.96 10.58 11.10 11.54 11.92 12.26}
1850        5  {5.70 6.98 7.80 8.42 8.91 9.32 9.67 9.97 10.24}
1851        6  {5.24 6.33 7.03 7.56 7.97 8.32 8.61 8.87 9.10}
1852        7  {4.95 5.92 6.54 7.00 7.37 7.68 7.94 8.17 8.37}
1853        8  {4.75 5.64 6.20 6.62 6.96 7.24 7.47 7.68 7.86}
1854        9  {4.60 5.43 5.96 6.35 6.66 6.91 7.13 7.33 7.49}
1855        10 {4.48 5.27 5.77 6.14 6.43 6.67 6.87 7.05 7.21}
1856        11 {4.39 5.15 5.62 5.97 6.25 6.48 6.67 6.84 6.99}
1857        12 {4.32 5.05 5.50 5.84 6.10 6.32 6.51 6.67 6.81}
1858        13 {4.26 4.96 5.40 5.73 5.98 6.19 6.37 6.53 6.67}
1859        14 {4.21 4.89 5.32 5.63 5.88 6.08 6.26 6.41 6.54}
1860        15 {4.17 4.84 5.25 5.56 5.80 5.99 6.16 6.31 6.44}
1861        16 {4.13 4.79 5.19 5.49 5.72 5.92 6.08 6.22 6.35}
1862        17 {4.10 4.74 5.14 5.43 5.66 5.85 6.01 6.15 6.27}
1863        18 {4.07 4.70 5.09 5.38 5.60 5.79 5.94 6.08 6.20}
1864        19 {4.05 4.67 5.05 5.33 5.55 5.73 5.89 6.02 6.14}
1865        20 {4.02 4.64 5.02 5.29 5.51 5.69 5.84 5.97 6.09}
1866        24 {3.96 4.55 4.91 5.17 5.37 5.54 5.69 5.81 5.92}
1867        30 {3.89 4.45 4.80 5.05 5.24 5.40 5.54 5.65 5.76}
1868        40 {3.82 4.37 4.70 4.93 5.11 5.26 5.39 5.50 5.60}
1869        60 {3.76 4.28 4.59 4.82 4.99 5.13 5.25 5.36 5.45}
1870        120 {3.70 4.20 4.50 4.71 4.87 5.01 5.12 5.21 5.30}
1871        inf {3.64 4.12 4.40 4.60 4.76 4.88 4.99 5.08 5.16}}
1872
1873    #From: http://davidmlane.com/hyperstat/table_Dunnett.html
1874    #Dunnett Table
1875    #Number of Groups Including Control Group
1876    #dfe     alpha = 0.05
1877    #         2       3       4       5       6       7       8       9       10
1878    set dunnett_table_05 {
1879    5        {2.57    3.03    3.29    3.48    3.62    3.73    3.82    3.9     3.97}
1880    6        {2.45    2.86    3.1     3.26    3.39    3.49    3.57    3.64    3.71}
1881    7        {2.36    2.75    2.97    3.12    3.24    3.33    3.41    3.47    3.53}
1882    8        {2.31    2.67    2.88    3.02    3.13    3.22    3.29    3.35    3.41}
1883    9        {2.26    2.61    2.81    2.95    3.05    3.14    3.2     3.26    3.32}
1884    10       {2.23    2.57    2.76    2.89    2.99    3.07    3.14    3.19    3.24}
1885    11       {2.2     2.53    2.72    2.84    2.94    3.02    3.08    3.14    3.19}
1886    12       {2.18    2.5     2.68    2.81    2.9     2.98    3.04    3.09    3.14}
1887    13       {2.16    2.48    2.65    2.78    2.87    2.94    3       3.06    3.1}
1888    14       {2.14    2.46    2.63    2.75    2.84    2.91    2.97    3.02    3.07}
1889    15       {2.13    2.44    2.61    2.73    2.82    2.89    2.95    3       3.04}
1890    16       {2.12    2.42    2.59    2.71    2.8     2.87    2.92    2.97    3.02}
1891    17       {2.11    2.41    2.58    2.69    2.78    2.85    2.9     2.95    3}
1892    18       {2.1     2.4     2.56    2.68    2.76    2.83    2.89    2.94    2.98}
1893    19       {2.09    2.39    2.55    2.66    2.75    2.81    2.87    2.92    2.96}
1894    20       {2.09    2.38    2.54    2.65    2.73    2.8     2.86    2.9     2.95}
1895    24       {2.06    2.35    2.51    2.61    2.7     2.76    2.81    2.86    2.9}
1896    30       {2.04    2.32    2.47    2.58    2.66    2.72    2.77    2.82    2.86}
1897    40       {2.02    2.29    2.44    2.54    2.62    2.68    2.73    2.77    2.81}
1898    60       {2       2.27    2.41    2.51    2.58    2.64    2.69    2.73    2.77}}
1899
1900    set dunnett_table_01 {
1901    5        {4.03    4.63    4.98    5.22    5.41    5.56    5.69    5.8     5.89}
1902    6        {3.71    4.21    4.51    4.71    4.87    5       5.1     5.2     5.28}
1903    7        {3.5     3.95    4.21    4.39    4.53    4.64    4.74    4.82    4.89}
1904    8        {3.36    3.77    4       4.17    4.29    4.4     4.48    4.56    4.62}
1905    9        {3.25    3.63    3.85    4.01    4.12    4.22    4.3     4.37    4.43}
1906    10       {3.17    3.53    3.74    3.88    3.99    4.08    4.16    4.22    4.28}
1907    11       {3.11    3.45    3.65    3.79    3.89    3.98    4.05    4.11    4.16}
1908    12       {3.05    3.39    3.58    3.71    3.81    3.89    3.96    4.02    4.07}
1909    13       {3.01    3.33    3.52    3.65    3.74    3.82    3.89    3.94    3.99}
1910    14       {2.98    3.29    3.47    3.59    3.69    3.76    3.83    3.88    3.93}
1911    15       {2.95    3.25    3.43    3.55    3.64    3.71    3.78    3.83    3.88}
1912    16       {2.92    3.22    3.39    3.51    3.6     3.67    3.73    3.78    3.83}
1913    17       {2.9     3.19    3.36    3.47    3.56    3.63    3.69    3.74    3.79}
1914    18       {2.88    3.17    3.33    3.44    3.53    3.6     3.66    3.71    3.75}
1915    19       {2.86    3.15    3.31    3.42    3.5     3.57    3.63    3.68    3.72}
1916    20       {2.85    3.13    3.29    3.4     3.48    3.55    3.6     3.65    3.69}
1917    24       {2.8     3.07    3.22    3.32    3.4     3.47    3.52    3.57    3.61}
1918    30       {2.75    3.01    3.15    3.25    3.33    3.39    3.44    3.49    3.52}
1919    40       {2.7     2.95    3.09    3.19    3.26    3.32    3.37    3.41    3.44}
1920    60       {2.66    2.9     3.03    3.12    3.19    3.25    3.29    3.33    3.37}}
1921
1922}
1923
1924#
1925# Simple test code
1926#
1927if { [info exists ::argv0] && ([file tail [info script]] == [file tail $::argv0]) } {
1928
1929    console show
1930    puts [interp aliases]
1931
1932    set values {1 1 1 1 {}}
1933    puts [::math::statistics::basic-stats $values]
1934    set values {1 2 3 4}
1935    puts [::math::statistics::basic-stats $values]
1936    set values {1 -1 1 -2}
1937    puts [::math::statistics::basic-stats $values]
1938    puts [::math::statistics::mean   $values]
1939    puts [::math::statistics::min    $values]
1940    puts [::math::statistics::max    $values]
1941    puts [::math::statistics::number $values]
1942    puts [::math::statistics::stdev  $values]
1943    puts [::math::statistics::var    $values]
1944
1945    set novals 100
1946    #set maxvals 100001
1947    set maxvals 1001
1948    while { $novals < $maxvals } {
1949	set values {}
1950	for { set i 0 } { $i < $novals } { incr i } {
1951	    lappend values [expr {rand()}]
1952	}
1953	puts [::math::statistics::basic-stats $values]
1954	puts [::math::statistics::histogram {0.0 0.2 0.4 0.6 0.8 1.0} $values]
1955	set novals [expr {$novals*10}]
1956    }
1957
1958    puts "Normal distribution:"
1959    puts "X=0:  [::math::statistics::pdf-normal 0.0 1.0 0.0]"
1960    puts "X=1:  [::math::statistics::pdf-normal 0.0 1.0 1.0]"
1961    puts "X=-1: [::math::statistics::pdf-normal 0.0 1.0 -1.0]"
1962
1963    set data1 {0.0 1.0 3.0 4.0 100.0 -23.0}
1964    set data2 {1.0 2.0 4.0 5.0 101.0 -22.0}
1965    set data3 {0.0 2.0 6.0 8.0 200.0 -46.0}
1966    set data4 {2.0 6.0 8.0 200.0 -46.0 1.0}
1967    set data5 {100.0 99.0 90.0 93.0 5.0 123.0}
1968    puts "Correlation data1 and data1: [::math::statistics::corr $data1 $data1]"
1969    puts "Correlation data1 and data2: [::math::statistics::corr $data1 $data2]"
1970    puts "Correlation data1 and data3: [::math::statistics::corr $data1 $data3]"
1971    puts "Correlation data1 and data4: [::math::statistics::corr $data1 $data4]"
1972    puts "Correlation data1 and data5: [::math::statistics::corr $data1 $data5]"
1973
1974    #   set data {1.0 2.0 2.3 4.0 3.4 1.2 0.6 5.6}
1975    #   puts [::math::statistics::basicStats $data]
1976    #   puts [::math::statistics::interval-mean-stdev $data 0.90]
1977    #   puts [::math::statistics::interval-mean-stdev $data 0.95]
1978    #   puts [::math::statistics::interval-mean-stdev $data 0.99]
1979
1980    #   puts "\nTest mean values:"
1981    #   puts [::math::statistics::test-mean $data 2.0 0.1 0.90]
1982    #   puts [::math::statistics::test-mean $data 2.0 0.5 0.90]
1983    #   puts [::math::statistics::test-mean $data 2.0 1.0 0.90]
1984    #   puts [::math::statistics::test-mean $data 2.0 2.0 0.90]
1985
1986    set rc [catch {
1987	set m [::math::statistics::mean {}]
1988    } msg ] ; # {}
1989    puts "Result: $rc $msg"
1990
1991    puts "\nTest quantiles:"
1992    set data      {1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0}
1993    set quantiles {0.11 0.21 0.51 0.91 0.99}
1994    set limits    {2.1 4.1 6.1 8.1}
1995    puts [::math::statistics::quantiles $data $quantiles]
1996
1997    set histogram [::math::statistics::histogram $limits $data]
1998    puts [::math::statistics::quantiles $limits $histogram $quantiles]
1999
2000    puts "\nTest autocorrelation:"
2001    set data      {1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0}
2002    puts [::math::statistics::autocorr $data]
2003    set data      {1.0 -1.1 2.0 -0.6 3.0 -4.0 0.5 0.9 -1.0}
2004    puts [::math::statistics::autocorr $data]
2005
2006    puts "\nTest histogram limits:"
2007    puts [::math::statistics::mean-histogram-limits   1.0 1.0]
2008    puts [::math::statistics::mean-histogram-limits   1.0 1.0 4]
2009    puts [::math::statistics::minmax-histogram-limits 1.0 10.0 10]
2010
2011}
2012
2013#
2014# Test xbar/R-chart procedures
2015#
2016if { 0 } {
2017    set data {}
2018    for { set i 0 } { $i < 500 } { incr i } {
2019        lappend data [expr {rand()}]
2020    }
2021    set limits [::math::statistics::control-xbar $data]
2022    puts $limits
2023
2024    puts "Outliers? [::math::statistics::test-xbar $limits $data]"
2025
2026    set newdata {1.0 1.0 1.0 1.0 0.5 0.5 0.5 0.5 10.0 10.0 10.0 10.0}
2027    puts "Outliers? [::math::statistics::test-xbar $limits $newdata] -- 0 2"
2028
2029    set limits [::math::statistics::control-Rchart $data]
2030    puts $limits
2031
2032    puts "Outliers? [::math::statistics::test-Rchart $limits $data]"
2033
2034    set newdata {0.0 1.0 2.0 1.0 0.4 0.5 0.6 0.5 10.0  0.0 10.0 10.0}
2035    puts "Outliers? [::math::statistics::test-Rchart $limits $newdata] -- 0 2"
2036}
2037
2038