1# statistics.tcl -- 2# 3# Package for basic statistical analysis 4# 5# version 0.1: initial implementation, january 2003 6# version 0.1.1: added linear regres 7# version 0.1.2: border case in stdev taken care of 8# version 0.1.3: moved initialisation of CDF to first call, november 2004 9# version 0.3: added test for normality (as implemented by Torsten Reincke), march 2006 10# (also fixed an error in the export list) 11# version 0.4: added the multivariate linear regression procedures by 12# Eric Kemp-Benedict, february 2007 13# version 0.5: added the population standard deviation and variance, 14# as suggested by Dimitrios Zachariadis 15# version 0.6: added pdf and cdf procedures for various distributions 16# (provided by Eric Kemp-Benedict) 17# version 0.7: added Kruskal-Wallis test (by Torsten Berg) 18# version 0.8: added Wilcoxon test and Spearman rank correlation 19# version 0.9: added kernel density estimation 20# version 0.9.3: added histogram-alt, corrected test-normal 21# version 1.0: added test-anova-F 22# version 1.0.1: correction in pdf-lognormal and cdf-lognormal 23# version 1.1: added test-Tukey-range and test-Dunnett 24# version 1.3: added wasserstein-distance, kl-divergence and logit regression 25 26package require Tcl 8.5 ; # 8.5+ feature in test-anova-F and others: **-operator 27package provide math::statistics 1.5.0 28package require math 29 30if {![llength [info commands ::lrepeat]]} { 31 # Forward portability, emulate lrepeat 32 proc ::lrepeat {n args} { 33 if {$n < 1} { 34 return -code error "must have a count of at least 1" 35 } 36 set res {} 37 while {$n} { 38 foreach x $args { lappend res $x } 39 incr n -1 40 } 41 return $res 42 } 43} 44 45# ::math::statistics -- 46# Namespace holding the procedures and variables 47# 48 49namespace eval ::math::statistics { 50 # 51 # Safer: change to short procedures 52 # 53 namespace export mean min max number var stdev pvar pstdev basic-stats corr \ 54 histogram histogram-alt interval-mean-stdev t-test-mean quantiles \ 55 test-normal lillieforsFit \ 56 autocorr crosscorr filter map samplescount median \ 57 test-2x2 print-2x2 control-xbar test_xbar \ 58 control-Rchart test-Rchart \ 59 test-Kruskal-Wallis analyse-Kruskal-Wallis group-rank \ 60 test-Wilcoxon spearman-rank spearman-rank-extended \ 61 test-Duckworth test-anova-F test-Tukey-range test-Dunnett 62 # 63 # Error messages 64 # 65 variable NEGSTDEV {Zero or negative standard deviation} 66 variable TOOFEWDATA {Too few or invalid data} 67 variable OUTOFRANGE {Argument out of range} 68 variable INVALID {Argument invalid} 69 70 # 71 # Coefficients involved 72 # 73 variable factorNormalPdf 74 set factorNormalPdf [expr {sqrt(8.0*atan(1.0))}] 75 76 # xbar/R-charts: 77 # Data from: 78 # Peter W.M. John: 79 # Statistical methods in engineering and quality assurance 80 # Wiley and Sons, 1990 81 # 82 variable control_factors { 83 A2 {1.880 1.093 0.729 0.577 0.483 0.419 0.419} 84 D3 {0.0 0.0 0.0 0.0 0.0 0.076 0.076} 85 D4 {3.267 2.574 2.282 2.114 2.004 1.924 1.924} 86 } 87} 88 89# mean, min, max, number, var, stdev, pvar, pstdev -- 90# Return the mean (minimum, maximum) value of a list of numbers 91# or number of non-missing values 92# 93# Arguments: 94# type Type of value to be returned 95# values List of values to be examined 96# 97# Results: 98# Value that was required 99# 100# 101namespace eval ::math::statistics { 102 foreach type {mean min max number stdev var pstdev pvar} { 103 proc $type { values } "BasicStats $type \$values" 104 } 105 proc basic-stats { values } "BasicStats all \$values" 106} 107 108# BasicStats -- 109# Return the one or all of the basic statistical properties 110# 111# Arguments: 112# type Type of value to be returned 113# values List of values to be examined 114# 115# Results: 116# Value that was required 117# 118proc ::math::statistics::BasicStats { type values } { 119 variable TOOFEWDATA 120 121 if { [lsearch {all mean min max number stdev var pstdev pvar} $type] < 0 } { 122 return -code error \ 123 -errorcode ARG -errorinfo [list unknown type of statistic -- $type] \ 124 [list unknown type of statistic -- $type] 125 } 126 127 set min {} 128 set max {} 129 set mean {} 130 set stdev {} 131 set var {} 132 133 set sum 0.0 134 set sumsq 0.0 135 set number 0 136 set first {} 137 138 foreach value $values { 139 if { $value == {} } { 140 continue 141 } 142 set value [expr {double($value)}] 143 144 if { $first == {} } { 145 set first $value 146 } 147 148 incr number 149 set sum [expr {$sum+$value}] 150 set sumsq [expr {$sumsq+($value-$first)*($value-$first)}] 151 152 if { $min == {} || $value < $min } { 153 set min $value 154 } 155 if { $max == {} || $value > $max } { 156 set max $value 157 } 158 } 159 160 if { $number > 0 } { 161 set mean [expr {$sum/$number}] 162 } else { 163 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 164 } 165 166 if { $number > 1 } { 167 set var [expr {($sumsq-($mean-$first)*($sum-$number*$first))/double($number-1)}] 168 # 169 # Take care of a rare situation: uniform data might 170 # cause a tiny negative difference 171 # 172 if { $var < 0.0 } { 173 set var 0.0 174 } 175 set stdev [expr {sqrt($var)}] 176 } 177 set pvar [expr {($sumsq-($mean-$first)*($sum-$number*$first))/double($number)}] 178 # 179 # Take care of a rare situation: uniform data might 180 # cause a tiny negative difference 181 # 182 if { $pvar < 0.0 } { 183 set pvar 0.0 184 } 185 set pstdev [expr {sqrt($pvar)}] 186 187 set all [list $mean $min $max $number $stdev $var $pstdev $pvar] 188 189 # 190 # Return the appropriate value 191 # 192 set $type 193} 194 195# histogram -- 196# Return histogram information from a list of numbers 197# 198# Arguments: 199# limits Upper limits for the buckets (in increasing order) 200# values List of values to be examined 201# weights List of weights, one per value (optional) 202# 203# Results: 204# List of number of values in each bucket (length is one more than 205# the number of limits) 206# 207# 208proc ::math::statistics::histogram { limits values {weights {}} } { 209 210 if { [llength $limits] < 1 } { 211 return -code error -errorcode ARG -errorinfo {No limits given} {No limits given} 212 } 213 if { [llength $weights] > 0 && [llength $values] != [llength $weights] } { 214 return -code error -errorcode ARG -errorinfo {Number of weights be equal to number of values} {Weights and values differ in length} 215 } 216 217 set limits [lsort -real -increasing $limits] 218 219 for { set index 0 } { $index <= [llength $limits] } { incr index } { 220 set buckets($index) 0 221 } 222 223 set last [llength $limits] 224 225 # Will do integer arithmetic if unset 226 if {$weights eq ""} { 227 set weights [lrepeat [llength $values] 1] 228 } 229 230 foreach value $values weight $weights { 231 if { $value == {} } { 232 continue 233 } 234 235 set index 0 236 set found 0 237 foreach limit $limits { 238 if { $value <= $limit } { 239 set found 1 240 set buckets($index) [expr $buckets($index)+$weight] 241 break 242 } 243 incr index 244 } 245 246 if { $found == 0 } { 247 set buckets($last) [expr $buckets($last)+$weight] 248 } 249 } 250 251 set result {} 252 for { set index 0 } { $index <= $last } { incr index } { 253 lappend result $buckets($index) 254 } 255 256 return $result 257} 258 259# histogram-alt -- 260# Return histogram information from a list of numbers - 261# intervals are open-ended at the lower bound instead of at the upper bound 262# 263# Arguments: 264# limits Upper limits for the buckets (in increasing order) 265# values List of values to be examined 266# weights List of weights, one per value (optional) 267# 268# Results: 269# List of number of values in each bucket (length is one more than 270# the number of limits) 271# 272# 273proc ::math::statistics::histogram-alt { limits values {weights {}} } { 274 275 if { [llength $limits] < 1 } { 276 return -code error -errorcode ARG -errorinfo {No limits given} {No limits given} 277 } 278 if { [llength $weights] > 0 && [llength $values] != [llength $weights] } { 279 return -code error -errorcode ARG -errorinfo {Number of weights be equal to number of values} {Weights and values differ in length} 280 } 281 282 set limits [lsort -real -increasing $limits] 283 284 for { set index 0 } { $index <= [llength $limits] } { incr index } { 285 set buckets($index) 0 286 } 287 288 set last [llength $limits] 289 290 # Will do integer arithmetic if unset 291 if {$weights eq ""} { 292 set weights [lrepeat [llength $values] 1] 293 } 294 295 foreach value $values weight $weights { 296 if { $value == {} } { 297 continue 298 } 299 300 set index 0 301 set found 0 302 foreach limit $limits { 303 if { $value < $limit } { 304 set found 1 305 set buckets($index) [expr $buckets($index)+$weight] 306 break 307 } 308 incr index 309 } 310 311 if { $found == 0 } { 312 set buckets($last) [expr $buckets($last)+$weight] 313 } 314 } 315 316 set result {} 317 for { set index 0 } { $index <= $last } { incr index } { 318 lappend result $buckets($index) 319 } 320 321 return $result 322} 323 324# corr -- 325# Return the correlation coefficient of two sets of data 326# 327# Arguments: 328# data1 List with the first set of data 329# data2 List with the second set of data 330# 331# Result: 332# Correlation coefficient of the two 333# 334proc ::math::statistics::corr { data1 data2 } { 335 variable TOOFEWDATA 336 337 set number 0 338 set sum1 0.0 339 set sum2 0.0 340 set sumsq1 0.0 341 set sumsq2 0.0 342 set sumprod 0.0 343 344 foreach value1 $data1 value2 $data2 { 345 if { $value1 == {} || $value2 == {} } { 346 continue 347 } 348 set value1 [expr {double($value1)}] 349 set value2 [expr {double($value2)}] 350 351 set sum1 [expr {$sum1+$value1}] 352 set sum2 [expr {$sum2+$value2}] 353 set sumsq1 [expr {$sumsq1+$value1*$value1}] 354 set sumsq2 [expr {$sumsq2+$value2*$value2}] 355 set sumprod [expr {$sumprod+$value1*$value2}] 356 incr number 357 } 358 if { $number > 0 } { 359 set numerator [expr {$number*$sumprod-$sum1*$sum2}] 360 set denom1 [expr {sqrt($number*$sumsq1-$sum1*$sum1)}] 361 set denom2 [expr {sqrt($number*$sumsq2-$sum2*$sum2)}] 362 if { $denom1 != 0.0 && $denom2 != 0.0 } { 363 set corr_coeff [expr {$numerator/$denom1/$denom2}] 364 } elseif { $denom1 != 0.0 || $denom2 != 0.0 } { 365 set corr_coeff 0.0 ;# Uniform against non-uniform 366 } else { 367 set corr_coeff 1.0 ;# Both uniform 368 } 369 370 } else { 371 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 372 } 373 return $corr_coeff 374} 375 376# lillieforsFit -- 377# Calculate the goodness of fit according to Lilliefors 378# (goodness of fit to a normal distribution) 379# 380# Arguments: 381# values List of values to be tested for normality 382# 383# Result: 384# Value of the statistic D 385# 386proc ::math::statistics::lillieforsFit {values} { 387 # 388 # calculate the goodness of fit according to Lilliefors 389 # (goodness of fit to a normal distribution) 390 # 391 # values -> list of values to be tested for normality 392 # (these values are sampled counts) 393 # 394 395 # calculate standard deviation and mean of the sample: 396 set n [llength $values] 397 if { $n < 5 } { 398 return -code error "Insufficient number of data (at least five required)" 399 } 400 set sd [stdev $values] 401 set mean [mean $values] 402 403 # sort the sample for further processing: 404 set values [lsort -real $values] 405 406 # standardize the sample data (Z-scores): 407 foreach x $values { 408 lappend stdData [expr {($x - $mean)/double($sd)}] 409 } 410 411 # compute the value of the distribution function at every sampled point: 412 foreach x $stdData { 413 lappend expData [pnorm $x] 414 } 415 416 # compute D+: 417 set i 0 418 foreach x $expData { 419 incr i 420 lappend dplus [expr {$i/double($n)-$x}] 421 } 422 set dplus [lindex [lsort -real $dplus] end] 423 424 # compute D-: 425 set i 0 426 foreach x $expData { 427 incr i 428 lappend dminus [expr {$x-($i-1)/double($n)}] 429 } 430 set dminus [lindex [lsort -real $dminus] end] 431 432 # Calculate the test statistic D 433 # by finding the maximal vertical difference 434 # between the sample and the expectation: 435 # 436 set D [expr {$dplus > $dminus ? $dplus : $dminus}] 437 438 # We now use the modified statistic Z, 439 # because D is only reliable 440 # if the p-value is smaller than 0.1 441 return [expr {$D * (sqrt($n) - 0.01 + 0.831/sqrt($n))}] 442} 443 444# pnorm -- 445# Calculate the cumulative distribution function (cdf) 446# for the standard normal distribution like in the statistical 447# software 'R' (mean=0 and sd=1) 448# 449# Arguments: 450# x Value fro which the cdf should be calculated 451# 452# Result: 453# Value of the statistic D 454# 455proc ::math::statistics::pnorm {x} { 456 # 457 # cumulative distribution function (cdf) 458 # for the standard normal distribution like in the statistical software 'R' 459 # (mean=0 and sd=1) 460 # 461 # x -> value for which the cdf should be calculated 462 # 463 set sum [expr {double($x)}] 464 set oldSum 0.0 465 set i 1 466 set denom 1.0 467 while {$sum != $oldSum} { 468 set oldSum $sum 469 incr i 2 470 set denom [expr {$denom*$i}] 471 #puts "$i - $denom" 472 set sum [expr {$oldSum + pow($x,$i)/$denom}] 473 } 474 return [expr {0.5 + $sum * exp(-0.5 * $x*$x - 0.91893853320467274178)}] 475} 476 477# pnorm_quicker -- 478# Calculate the cumulative distribution function (cdf) 479# for the standard normal distribution - quicker alternative 480# (less accurate) 481# 482# Arguments: 483# x Value for which the cdf should be calculated 484# 485# Result: 486# Value of the statistic D 487# 488proc ::math::statistics::pnorm_quicker {x} { 489 490 set n [expr {abs($x)}] 491 set n [expr {1.0 + $n*(0.04986735 + $n*(0.02114101 + $n*(0.00327763 \ 492 + $n*(0.0000380036 + $n*(0.0000488906 + $n*0.000005383)))))}] 493 set n [expr {1.0/pow($n,16)}] 494 # 495 if {$x >= 0} { 496 return [expr {1 - $n/2.0}] 497 } else { 498 return [expr {$n/2.0}] 499 } 500} 501 502# test-normal -- 503# Test for normality (using method Lilliefors) 504# 505# Arguments: 506# data Values that need to be tested 507# significance Level at which the discrepancy from normality is tested 508# 509# Result: 510# 1 if the Lilliefors statistic D is larger than the critical level 511# 512# Note: 513# There was a mistake in the implementation before 0.9.3: confidence (wrong word) 514# instead of significance. To keep compatibility with earlier versions, both 515# significance and 1-significance are accepted. 516# 517proc ::math::statistics::test-normal {data significance} { 518 set D [lillieforsFit $data] 519 520 if { $significance > 0.5 } { 521 set significance [expr {1.0-$significance}] ;# Convert the erroneous levels pre 0.9.3 522 } 523 524 set Dcrit -- 525 if { abs($significance-0.20) < 0.0001 } { 526 set Dcrit 0.741 527 } 528 if { abs($significance-0.15) < 0.0001 } { 529 set Dcrit 0.775 530 } 531 if { abs($significance-0.10) < 0.0001 } { 532 set Dcrit 0.819 533 } 534 if { abs($significance-0.05) < 0.0001 } { 535 set Dcrit 0.895 536 } 537 if { abs($significance-0.01) < 0.0001 } { 538 set Dcrit 1.035 539 } 540 if { $Dcrit != "--" } { 541 return [expr {$D > $Dcrit ? 1 : 0 }] 542 } else { 543 return -code error "Significancce level must be one of: 0.20, 0.15, 0.10, 0.05 or 0.01" 544 } 545} 546 547# t-test-mean -- 548# Test whether the mean value of a sample is in accordance with the 549# estimated normal distribution with a certain probability 550# (Student's t test) 551# 552# Arguments: 553# data List of raw data values (small sample) 554# est_mean Estimated mean of the distribution 555# est_stdev Estimated stdev of the distribution 556# alpha Probability level (0.95 or 0.99 for instance) 557# 558# Result: 559# 1 if the test is positive, 0 otherwise. If there are too few data, 560# returns an empty string 561# 562proc ::math::statistics::t-test-mean { data est_mean est_stdev alpha } { 563 variable NEGSTDEV 564 variable TOOFEWDATA 565 566 if { $est_stdev <= 0.0 } { 567 return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV 568 } 569 570 set allstats [BasicStats all $data] 571 572 set alpha2 [expr {(1.0+$alpha)/2.0}] 573 574 set sample_mean [lindex $allstats 0] 575 set sample_number [lindex $allstats 3] 576 577 if { $sample_number > 1 } { 578 set tzero [expr {abs($sample_mean-$est_mean)/$est_stdev * \ 579 sqrt($sample_number-1)}] 580 set degrees [expr {$sample_number-1}] 581 set prob [cdf-students-t $degrees $tzero] 582 583 return [expr {$prob<$alpha2}] 584 585 } else { 586 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 587 } 588} 589 590# interval-mean-stdev -- 591# Return the interval containing the mean value and one 592# containing the standard deviation with a certain 593# level of confidence (assuming a normal distribution) 594# 595# Arguments: 596# data List of raw data values 597# confidence Confidence level (0.95 or 0.99 for instance) 598# 599# Result: 600# List having the following elements: lower and upper bounds of 601# mean, lower and upper bounds of stdev 602# 603# 604proc ::math::statistics::interval-mean-stdev { data confidence } { 605 variable TOOFEWDATA 606 607 set allstats [BasicStats all $data] 608 609 set conf2 [expr {(1.0+$confidence)/2.0}] 610 set mean [lindex $allstats 0] 611 set number [lindex $allstats 3] 612 set stdev [lindex $allstats 4] 613 614 if { $number > 1 } { 615 set degrees [expr {$number-1}] 616 set student_t [expr {sqrt([Inverse-cdf-toms322 1 $degrees $conf2])}] 617 set mean_lower [expr {$mean-$student_t*$stdev/sqrt($number)}] 618 set mean_upper [expr {$mean+$student_t*$stdev/sqrt($number)}] 619 set stdev_lower {} 620 set stdev_upper {} 621 return [list $mean_lower $mean_upper $stdev_lower $stdev_upper] 622 } else { 623 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 624 } 625} 626 627# quantiles -- 628# Return the quantiles for a given set of data or histogram 629# 630# Arguments: 631# (two arguments) 632# data List of raw data values 633# confidence Confidence level (0.95 or 0.99 for instance) 634# (three arguments) 635# limits List of upper limits from histogram 636# counts List of counts for for each interval in histogram 637# confidence Confidence level (0.95 or 0.99 for instance) 638# 639# Result: 640# List of quantiles 641# 642proc ::math::statistics::quantiles { arg1 arg2 {arg3 {}} } { 643 variable TOOFEWDATA 644 645 if { [catch { 646 if { $arg3 == {} } { 647 set result \ 648 [::math::statistics::QuantilesRawData $arg1 $arg2] 649 } else { 650 set result \ 651 [::math::statistics::QuantilesHistogram $arg1 $arg2 $arg3] 652 } 653 } msg] } { 654 return -code error -errorcode $msg $msg 655 } 656 return $result 657} 658 659# QuantilesRawData -- 660# Return the quantiles based on raw data 661# 662# Arguments: 663# data List of raw data values 664# confidence Confidence level (0.95 or 0.99 for instance) 665# 666# Result: 667# List of quantiles 668# 669proc ::math::statistics::QuantilesRawData { data confidence } { 670 variable TOOFEWDATA 671 variable OUTOFRANGE 672 673 if { [llength $confidence] <= 0 } { 674 return -code error -errorcode ARG "$TOOFEWDATA - quantiles" 675 } 676 677 if { [llength $data] <= 0 } { 678 return -code error -errorcode ARG "$TOOFEWDATA - raw data" 679 } 680 681 foreach cond $confidence { 682 if { $cond <= 0.0 || $cond >= 1.0 } { 683 return -code error -errorcode ARG "$OUTOFRANGE - quantiles" 684 } 685 } 686 687 # 688 # Sort the data first 689 # 690 set sorted_data [lsort -real -increasing $data] 691 692 # 693 # Determine the list element lower or equal to the quantile 694 # and return the corresponding value 695 # 696 set result {} 697 set number_data [llength $sorted_data] 698 foreach cond $confidence { 699 set elem [expr {round($number_data*$cond)-1}] 700 if { $elem < 0 } { 701 set elem 0 702 } 703 lappend result [lindex $sorted_data $elem] 704 } 705 706 return $result 707} 708 709# QuantilesHistogram -- 710# Return the quantiles based on histogram information only 711# 712# Arguments: 713# limits Upper limits for histogram intervals 714# counts Counts for each interval 715# confidence Confidence level (0.95 or 0.99 for instance) 716# 717# Result: 718# List of quantiles 719# 720proc ::math::statistics::QuantilesHistogram { limits counts confidence } { 721 variable TOOFEWDATA 722 variable OUTOFRANGE 723 724 if { [llength $confidence] <= 0 } { 725 return -code error -errorcode ARG "$TOOFEWDATA - quantiles" 726 } 727 728 if { [llength $confidence] <= 0 } { 729 return -code error -errorcode ARG "$TOOFEWDATA - histogram limits" 730 } 731 732 if { [llength $counts] <= [llength $limits] } { 733 return -code error -errorcode ARG "$TOOFEWDATA - histogram counts" 734 } 735 736 foreach cond $confidence { 737 if { $cond <= 0.0 || $cond >= 1.0 } { 738 return -code error -errorcode ARG "$OUTOFRANGE - quantiles" 739 } 740 } 741 742 # 743 # Accumulate the histogram counts first 744 # 745 set sum 0 746 set accumulated_counts {} 747 foreach count $counts { 748 set sum [expr {$sum+$count}] 749 lappend accumulated_counts $sum 750 } 751 set total_counts $sum 752 753 # 754 # Determine the list element lower or equal to the quantile 755 # and return the corresponding value (use interpolation if 756 # possible) 757 # 758 set result {} 759 foreach cond $confidence { 760 set found 0 761 set bound [expr {round($total_counts*$cond)}] 762 set lower_limit {} 763 set lower_count 0 764 foreach acc_count $accumulated_counts limit $limits { 765 if { $acc_count >= $bound } { 766 set found 1 767 break 768 } 769 set lower_limit $limit 770 set lower_count $acc_count 771 } 772 773 if { $lower_limit == {} || $limit == {} || $found == 0 } { 774 set quant $limit 775 if { $limit == {} } { 776 set quant $lower_limit 777 } 778 } else { 779 set quant [expr {$limit+($lower_limit-$limit) * 780 ($acc_count-$bound)/($acc_count-$lower_count)}] 781 } 782 lappend result $quant 783 } 784 785 return $result 786} 787 788# autocorr -- 789# Return the autocorrelation function (assuming equidistance between 790# samples) 791# 792# Arguments: 793# data Raw data for which the autocorrelation must be determined 794# 795# Result: 796# List of autocorrelation values (about 1/2 the number of raw data) 797# 798proc ::math::statistics::autocorr { data } { 799 variable TOOFEWDATA 800 801 if { [llength $data] <= 1 } { 802 return -code error -errorcode ARG "$TOOFEWDATA" 803 } 804 805 return [crosscorr $data $data] 806} 807 808# crosscorr -- 809# Return the cross-correlation function (assuming equidistance 810# between samples) 811# 812# Arguments: 813# data1 First set of raw data 814# data2 Second set of raw data 815# 816# Result: 817# List of cross-correlation values (about 1/2 the number of raw data) 818# 819# Note: 820# The number of data pairs is not kept constant - because tests 821# showed rather awkward results when it was kept constant. 822# 823proc ::math::statistics::crosscorr { data1 data2 } { 824 variable TOOFEWDATA 825 826 if { [llength $data1] <= 1 || [llength $data2] <= 1 } { 827 return -code error -errorcode ARG "$TOOFEWDATA" 828 } 829 830 # 831 # First determine the number of data pairs 832 # 833 set number1 [llength $data1] 834 set number2 [llength $data2] 835 836 set basic_stat1 [basic-stats $data1] 837 set basic_stat2 [basic-stats $data2] 838 set vmean1 [lindex $basic_stat1 0] 839 set vmean2 [lindex $basic_stat2 0] 840 set vvar1 [lindex $basic_stat1 end] 841 set vvar2 [lindex $basic_stat2 end] 842 843 set number_pairs $number1 844 if { $number1 > $number2 } { 845 set number_pairs $number2 846 } 847 set number_values $number_pairs 848 set number_delays [expr {$number_values/2.0}] 849 850 set scale [expr {sqrt($vvar1*$vvar2)}] 851 852 set result {} 853 for { set delay 0 } { $delay < $number_delays } { incr delay } { 854 set sumcross 0.0 855 set no_cross 0 856 for { set idx 0 } { $idx < $number_values } { incr idx } { 857 set value1 [lindex $data1 $idx] 858 set value2 [lindex $data2 [expr {$idx+$delay}]] 859 if { $value1 != {} && $value2 != {} } { 860 set sumcross \ 861 [expr {$sumcross+($value1-$vmean1)*($value2-$vmean2)}] 862 incr no_cross 863 } 864 } 865 lappend result [expr {$sumcross/($no_cross*$scale)}] 866 867 incr number_values -1 868 } 869 870 return $result 871} 872 873# mean-histogram-limits 874# Determine reasonable limits based on mean and standard deviation 875# for a histogram 876# 877# Arguments: 878# mean Mean of the data 879# stdev Standard deviation 880# number Number of limits to generate (defaults to 8) 881# 882# Result: 883# List of limits 884# 885proc ::math::statistics::mean-histogram-limits { mean stdev {number 8} } { 886 variable NEGSTDEV 887 888 if { $stdev <= 0.0 } { 889 return -code error -errorcode ARG "$NEGSTDEV" 890 } 891 if { $number < 1 } { 892 return -code error -errorcode ARG "Number of limits must be positive" 893 } 894 895 # 896 # Always: between mean-3.0*stdev and mean+3.0*stdev 897 # number = 2: -0.25, 0.25 898 # number = 3: -0.25, 0, 0.25 899 # number = 4: -1, -0.25, 0.25, 1 900 # number = 5: -1, -0.25, 0, 0.25, 1 901 # number = 6: -2, -1, -0.25, 0.25, 1, 2 902 # number = 7: -2, -1, -0.25, 0, 0.25, 1, 2 903 # number = 8: -3, -2, -1, -0.25, 0.25, 1, 2, 3 904 # 905 switch -- $number { 906 "1" { set limits {0.0} } 907 "2" { set limits {-0.25 0.25} } 908 "3" { set limits {-0.25 0.0 0.25} } 909 "4" { set limits {-1.0 -0.25 0.25 1.0} } 910 "5" { set limits {-1.0 -0.25 0.0 0.25 1.0} } 911 "6" { set limits {-2.0 -1.0 -0.25 0.25 1.0 2.0} } 912 "7" { set limits {-2.0 -1.0 -0.25 0.0 0.25 1.0 2.0} } 913 "8" { set limits {-3.0 -2.0 -1.0 -0.25 0.25 1.0 2.0 3.0} } 914 "9" { set limits {-3.0 -2.0 -1.0 -0.25 0.0 0.25 1.0 2.0 3.0} } 915 default { 916 set dlim [expr {6.0/double($number-1)}] 917 for {set i 0} {$i <$number} {incr i} { 918 lappend limits [expr {$dlim*($i-($number-1)/2.0)}] 919 } 920 } 921 } 922 923 set result {} 924 foreach limit $limits { 925 lappend result [expr {$mean+$limit*$stdev}] 926 } 927 928 return $result 929} 930 931# minmax-histogram-limits 932# Determine reasonable limits based on minimum and maximum bounds 933# for a histogram 934# 935# Arguments: 936# min Estimated minimum 937# max Estimated maximum 938# number Number of limits to generate (defaults to 8) 939# 940# Result: 941# List of limits 942# 943proc ::math::statistics::minmax-histogram-limits { min max {number 8} } { 944 variable NEGSTDEV 945 946 if { $number < 1 } { 947 return -code error -errorcode ARG "Number of limits must be positive" 948 } 949 if { $min >= $max } { 950 return -code error -errorcode ARG "Minimum must be lower than maximum" 951 } 952 953 set result {} 954 set dlim [expr {($max-$min)/double($number-1)}] 955 for {set i 0} {$i <$number} {incr i} { 956 lappend result [expr {$min+$dlim*$i}] 957 } 958 959 return $result 960} 961 962# linear-model 963# Determine the coefficients for a linear regression between 964# two series of data (the model: Y = A + B*X) 965# 966# Arguments: 967# xdata Series of independent (X) data 968# ydata Series of dependent (Y) data 969# intercept Whether to use an intercept or not (optional) 970# 971# Result: 972# List of the following items: 973# - (Estimate of) Intercept A 974# - (Estimate of) Slope B 975# - Standard deviation of Y relative to fit 976# - Correlation coefficient R2 977# - Number of degrees of freedom df 978# - Standard error of the intercept A 979# - Significance level of A 980# - Standard error of the slope B 981# - Significance level of B 982# 983# 984proc ::math::statistics::linear-model { xdata ydata {intercept 1} } { 985 variable TOOFEWDATA 986 987 if { [llength $xdata] < 3 } { 988 return -code error -errorcode ARG "$TOOFEWDATA: not enough independent data" 989 } 990 if { [llength $ydata] < 3 } { 991 return -code error -errorcode ARG "$TOOFEWDATA: not enough dependent data" 992 } 993 if { [llength $xdata] != [llength $ydata] } { 994 return -code error -errorcode ARG "$TOOFEWDATA: number of dependent data differs from number of independent data" 995 } 996 997 set sumx 0.0 998 set sumy 0.0 999 set sumx2 0.0 1000 set sumy2 0.0 1001 set sumxy 0.0 1002 set df 0 1003 foreach x $xdata y $ydata { 1004 if { $x != "" && $y != "" } { 1005 set sumx [expr {$sumx+$x}] 1006 set sumy [expr {$sumy+$y}] 1007 set sumx2 [expr {$sumx2+$x*$x}] 1008 set sumy2 [expr {$sumy2+$y*$y}] 1009 set sumxy [expr {$sumxy+$x*$y}] 1010 incr df 1011 } 1012 } 1013 1014 if { $df <= 2 } { 1015 return -code error -errorcode ARG "$TOOFEWDATA: too few valid data" 1016 } 1017 if { $sumx2 == 0.0 } { 1018 return -code error -errorcode ARG "$TOOFEWDATA: independent values are all the same" 1019 } 1020 1021 # 1022 # Calculate the intermediate quantities 1023 # 1024 set sx [expr {$sumx2-$sumx*$sumx/$df}] 1025 set sy [expr {$sumy2-$sumy*$sumy/$df}] 1026 set sxy [expr {$sumxy-$sumx*$sumy/$df}] 1027 1028 # 1029 # Calculate the coefficients 1030 # 1031 if { $intercept } { 1032 set B [expr {$sxy/$sx}] 1033 set A [expr {($sumy-$B*$sumx)/$df}] 1034 } else { 1035 set B [expr {$sumxy/$sumx2}] 1036 set A 0.0 1037 } 1038 1039 # 1040 # Calculate the error estimates 1041 # 1042 set stdevY 0.0 1043 set varY 0.0 1044 1045 if { $intercept } { 1046 set ve [expr {$sy-$B*$sxy}] 1047 if { $ve >= 0.0 } { 1048 set varY [expr {$ve/($df-2)}] 1049 } 1050 } else { 1051 set ve [expr {$sumy2-$B*$sumxy}] 1052 if { $ve >= 0.0 } { 1053 set varY [expr {$ve/($df-1)}] 1054 } 1055 } 1056 set seY [expr {sqrt($varY)}] 1057 1058 if { $intercept } { 1059 set R2 [expr {$sxy*$sxy/($sx*$sy)}] 1060 set seA [expr {$seY*sqrt(1.0/$df+$sumx*$sumx/($sx*$df*$df))}] 1061 set seB [expr {sqrt($varY/$sx)}] 1062 set tA {} 1063 set tB {} 1064 if { $seA != 0.0 } { 1065 set tA [expr {$A/$seA*sqrt($df-2)}] 1066 } 1067 if { $seB != 0.0 } { 1068 set tB [expr {$B/$seB*sqrt($df-2)}] 1069 } 1070 } else { 1071 set R2 [expr {$sumxy*$sumxy/($sumx2*$sumy2)}] 1072 set seA {} 1073 set tA {} 1074 set tB {} 1075 set seB [expr {sqrt($varY/$sumx2)}] 1076 if { $seB != 0.0 } { 1077 set tB [expr {$B/$seB*sqrt($df-1)}] 1078 } 1079 } 1080 1081 # 1082 # Return the list of parameters 1083 # 1084 return [list $A $B $seY $R2 $df $seA $tA $seB $tB] 1085} 1086 1087# linear-residuals 1088# Determine the difference between actual data and predicted from 1089# the linear model 1090# 1091# Arguments: 1092# xdata Series of independent (X) data 1093# ydata Series of dependent (Y) data 1094# intercept Whether to use an intercept or not (optional) 1095# 1096# Result: 1097# List of differences 1098# 1099proc ::math::statistics::linear-residuals { xdata ydata {intercept 1} } { 1100 variable TOOFEWDATA 1101 1102 if { [llength $xdata] < 3 } { 1103 return -code error -errorcode ARG "$TOOFEWDATA: no independent data" 1104 } 1105 if { [llength $ydata] < 3 } { 1106 return -code error -errorcode ARG "$TOOFEWDATA: no dependent data" 1107 } 1108 if { [llength $xdata] != [llength $ydata] } { 1109 return -code error -errorcode ARG "$TOOFEWDATA: number of dependent data differs from number of independent data" 1110 } 1111 1112 foreach {A B} [linear-model $xdata $ydata $intercept] {break} 1113 1114 set result {} 1115 foreach x $xdata y $ydata { 1116 set residue [expr {$y-$A-$B*$x}] 1117 lappend result $residue 1118 } 1119 return $result 1120} 1121 1122# median 1123# Determine the median from a list of data 1124# 1125# Arguments: 1126# data (Unsorted) list of data 1127# 1128# Result: 1129# Median (either the middle value or the mean of two values in the 1130# middle) 1131# 1132# Note: 1133# Adapted from the Wiki page "Stats", code provided by JPS 1134# 1135proc ::math::statistics::median { data } { 1136 set org_data $data 1137 set data {} 1138 foreach value $org_data { 1139 if { $value != {} } { 1140 lappend data $value 1141 } 1142 } 1143 set len [llength $data] 1144 1145 set data [lsort -real $data] 1146 if { $len % 2 } { 1147 lindex $data [expr {($len-1)/2}] 1148 } else { 1149 expr {([lindex $data [expr {($len / 2) - 1}]] \ 1150 + [lindex $data [expr {$len / 2}]]) / 2.0} 1151 } 1152} 1153 1154# test-2x2 -- 1155# Compute the chi-square statistic for a 2x2 table 1156# 1157# Arguments: 1158# a Element upper-left 1159# b Element upper-right 1160# c Element lower-left 1161# d Element lower-right 1162# Return value: 1163# Chi-square 1164# Note: 1165# There is only one degree of freedom - this is important 1166# when comparing the value to the tabulated values 1167# of chi-square 1168# 1169proc ::math::statistics::test-2x2 { a b c d } { 1170 set ab [expr {$a+$b}] 1171 set ac [expr {$a+$c}] 1172 set bd [expr {$b+$d}] 1173 set cd [expr {$c+$d}] 1174 set N [expr {$a+$b+$c+$d}] 1175 set det [expr {$a*$d-$b*$c}] 1176 set result [expr {double($N*$det*$det)/double($ab*$cd*$ac*$bd)}] 1177} 1178 1179# print-2x2 -- 1180# Print a 2x2 table 1181# 1182# Arguments: 1183# a Element upper-left 1184# b Element upper-right 1185# c Element lower-left 1186# d Element lower-right 1187# Return value: 1188# Printed version with marginals 1189# 1190proc ::math::statistics::print-2x2 { a b c d } { 1191 set ab [expr {$a+$b}] 1192 set ac [expr {$a+$c}] 1193 set bd [expr {$b+$d}] 1194 set cd [expr {$c+$d}] 1195 set N [expr {$a+$b+$c+$d}] 1196 set chisq [test-2x2 $a $b $c $d] 1197 1198 set line [string repeat - 10] 1199 set result [format "%10d%10d | %10d\n" $a $b $ab] 1200 append result [format "%10d%10d | %10d\n" $c $d $cd] 1201 append result [format "%10s%10s + %10s\n" $line $line $line] 1202 append result [format "%10d%10d | %10d\n" $ac $bd $N] 1203 append result "Chisquare = $chisq\n" 1204 append result "Difference is significant?\n" 1205 append result " at 95%: [expr {$chisq<3.84146? "no":"yes"}]\n" 1206 append result " at 99%: [expr {$chisq<6.63490? "no":"yes"}]" 1207} 1208 1209# control-xbar -- 1210# Determine the control lines for an x-bar chart 1211# 1212# Arguments: 1213# data List of observed values (at least 20*nsamples) 1214# nsamples Number of data per subsamples (default: 4) 1215# Return value: 1216# List of: mean, lower limit, upper limit, number of data per 1217# subsample. Can be used in the test-xbar procedure 1218# 1219proc ::math::statistics::control-xbar { data {nsamples 4} } { 1220 variable TOOFEWDATA 1221 variable control_factors 1222 1223 # 1224 # Check the number of data 1225 # 1226 if { $nsamples <= 1 } { 1227 return -code error -errorcode DATA -errorinfo $OUTOFRANGE \ 1228 "Number of data per subsample must be at least 2" 1229 } 1230 if { [llength $data] < 20*$nsamples } { 1231 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 1232 } 1233 1234 set nogroups [expr {[llength $data]/$nsamples}] 1235 set mrange 0.0 1236 set xmeans 0.0 1237 for { set i 0 } { $i < $nogroups } { incr i } { 1238 set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]] 1239 1240 set xmean 0.0 1241 set xmin [lindex $subsample 0] 1242 set xmax $xmin 1243 foreach d $subsample { 1244 set xmean [expr {$xmean+$d}] 1245 set xmin [expr {$xmin<$d? $xmin : $d}] 1246 set xmax [expr {$xmax>$d? $xmax : $d}] 1247 } 1248 set xmean [expr {$xmean/double($nsamples)}] 1249 1250 set xmeans [expr {$xmeans+$xmean}] 1251 set mrange [expr {$mrange+($xmax-$xmin)}] 1252 } 1253 1254 # 1255 # Determine the control lines 1256 # 1257 set xmeans [expr {$xmeans/double($nogroups)}] 1258 set mrange [expr {$mrange/double($nogroups)}] 1259 set A2 [lindex [lindex $control_factors 1] $nsamples] 1260 if { $A2 == "" } { set A2 [lindex [lindex $control_factors 1] end] } 1261 1262 return [list $xmeans [expr {$xmeans-$A2*$mrange}] \ 1263 [expr {$xmeans+$A2*$mrange}] $nsamples] 1264} 1265 1266# test-xbar -- 1267# Determine if any data points lie outside the x-bar control limits 1268# 1269# Arguments: 1270# control List returned by control-xbar with control data 1271# data List of observed values 1272# Return value: 1273# Indices of any subsamples that violate the control limits 1274# 1275proc ::math::statistics::test-xbar { control data } { 1276 foreach {xmean xlower xupper nsamples} $control {break} 1277 1278 if { [llength $data] < 1 } { 1279 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 1280 } 1281 1282 set nogroups [expr {[llength $data]/$nsamples}] 1283 if { $nogroups <= 0 } { 1284 set nogroup 1 1285 set nsamples [llength $data] 1286 } 1287 1288 set result {} 1289 1290 for { set i 0 } { $i < $nogroups } { incr i } { 1291 set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]] 1292 1293 set xmean 0.0 1294 foreach d $subsample { 1295 set xmean [expr {$xmean+$d}] 1296 } 1297 set xmean [expr {$xmean/double($nsamples)}] 1298 1299 if { $xmean < $xlower } { lappend result $i } 1300 if { $xmean > $xupper } { lappend result $i } 1301 } 1302 1303 return $result 1304} 1305 1306# control-Rchart -- 1307# Determine the control lines for an R chart 1308# 1309# Arguments: 1310# data List of observed values (at least 20*nsamples) 1311# nsamples Number of data per subsamples (default: 4) 1312# Return value: 1313# List of: mean range, lower limit, upper limit, number of data per 1314# subsample. Can be used in the test-Rchart procedure 1315# 1316proc ::math::statistics::control-Rchart { data {nsamples 4} } { 1317 variable TOOFEWDATA 1318 variable control_factors 1319 1320 # 1321 # Check the number of data 1322 # 1323 if { $nsamples <= 1 } { 1324 return -code error -errorcode DATA -errorinfo $OUTOFRANGE \ 1325 "Number of data per subsample must be at least 2" 1326 } 1327 if { [llength $data] < 20*$nsamples } { 1328 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 1329 } 1330 1331 set nogroups [expr {[llength $data]/$nsamples}] 1332 set mrange 0.0 1333 for { set i 0 } { $i < $nogroups } { incr i } { 1334 set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]] 1335 1336 set xmin [lindex $subsample 0] 1337 set xmax $xmin 1338 foreach d $subsample { 1339 set xmin [expr {$xmin<$d? $xmin : $d}] 1340 set xmax [expr {$xmax>$d? $xmax : $d}] 1341 } 1342 set mrange [expr {$mrange+($xmax-$xmin)}] 1343 } 1344 1345 # 1346 # Determine the control lines 1347 # 1348 set mrange [expr {$mrange/double($nogroups)}] 1349 set D3 [lindex [lindex $control_factors 3] $nsamples] 1350 set D4 [lindex [lindex $control_factors 5] $nsamples] 1351 if { $D3 == "" } { set D3 [lindex [lindex $control_factors 3] end] } 1352 if { $D4 == "" } { set D4 [lindex [lindex $control_factors 5] end] } 1353 1354 return [list $mrange [expr {$D3*$mrange}] \ 1355 [expr {$D4*$mrange}] $nsamples] 1356} 1357 1358# test-Rchart -- 1359# Determine if any data points lie outside the R-chart control limits 1360# 1361# Arguments: 1362# control List returned by control-xbar with control data 1363# data List of observed values 1364# Return value: 1365# Indices of any subsamples that violate the control limits 1366# 1367proc ::math::statistics::test-Rchart { control data } { 1368 foreach {rmean rlower rupper nsamples} $control {break} 1369 1370 # 1371 # Check the number of data 1372 # 1373 if { [llength $data] < 1 } { 1374 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 1375 } 1376 1377 set nogroups [expr {[llength $data]/$nsamples}] 1378 1379 set result {} 1380 for { set i 0 } { $i < $nogroups } { incr i } { 1381 set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]] 1382 1383 set xmin [lindex $subsample 0] 1384 set xmax $xmin 1385 foreach d $subsample { 1386 set xmin [expr {$xmin<$d? $xmin : $d}] 1387 set xmax [expr {$xmax>$d? $xmax : $d}] 1388 } 1389 set range [expr {$xmax-$xmin}] 1390 1391 if { $range < $rlower } { lappend result $i } 1392 if { $range > $rupper } { lappend result $i } 1393 } 1394 1395 return $result 1396} 1397 1398# test-Duckworth -- 1399# Determine if two data sets have the same median according to the Tukey-Duckworth test 1400# 1401# Arguments: 1402# list1 Values in the first data set 1403# list2 Values in the second data set 1404# significance Significance level (either 0.05, 0.01 or 0.001) 1405# 1406# Returns: 1407# 0 if the medians are unequal, 1 if they are equal, -1 if the test can not 1408# be conducted (the smallest value must be in a different set than the greatest value) 1409# 1410proc ::math::statistics::test-Duckworth {list1 list2 significance} { 1411 set sorted1 [lsort -real $list1] 1412 set sorted2 [lsort -real -decreasing $list2] 1413 1414 set lowest1 [lindex $sorted1 0] 1415 set lowest2 [lindex $sorted2 end] 1416 set greatest1 [lindex $sorted1 end] 1417 set greatest2 [lindex $sorted2 0] 1418 1419 if { $lowest1 <= $lowest2 && $greatest1 >= $greatest2 } { 1420 return -1 1421 } 1422 if { $lowest1 >= $lowest2 && $greatest1 <= $greatest2 } { 1423 return -1 1424 } 1425 1426 # 1427 # Determine how many elements of set 1 are lower than the lowest of set 2 1428 # Ditto for the number of elements of set 2 greater than the greatest of set 1 1429 # (Or vice versa) 1430 # 1431 if { $lowest1 < $lowest2 } { 1432 set lowest $lowest2 1433 set greatest $greatest1 1434 } else { 1435 set lowest $lowest1 1436 set greatest $greatest2 1437 set sorted1 [lsort -real $list2] 1438 set sorted2 [lsort -real -decreasing $list1] 1439 #lassign [list $sorted1 $sorted2] sorted2 sorted1 1440 } 1441 1442 set count1 0 1443 set count2 0 1444 foreach v1 $sorted1 { 1445 if { $v1 >= $lowest } { 1446 break 1447 } 1448 incr count1 1449 } 1450 foreach v2 $sorted2 { 1451 if { $v2 <= $greatest } { 1452 break 1453 } 1454 incr count2 1455 } 1456 1457 # 1458 # Determine the statistic D, possibly with correction 1459 # 1460 set n1 [llength $list1] 1461 set n2 [llength $list2] 1462 1463 set correction 0 1464 if { 3 + 4*$n1/3 <= $n2 && $n2 <= 2*$n1 } { 1465 set correction -1 1466 } 1467 if { 3 + 4*$n2/3 <= $n1 && $n1 <= 2*$n2 } { 1468 set correction -1 1469 } 1470 1471 set D [expr {$count1 + $count2 + $correction}] 1472 1473 switch -- [string trim $significance 0] { 1474 ".05" { 1475 return [expr {$D >= 7? 0 : 1}] 1476 } 1477 ".01" { 1478 return [expr {$D >= 10? 0 : 1}] 1479 } 1480 ".001" { 1481 return [expr {$D >= 13? 0 : 1}] 1482 } 1483 default { 1484 return -code error "Significance level must be 0.05, 0.01 or 0.001" 1485 } 1486 } 1487} 1488 1489# test-anova-F -- 1490# Check if two or more groups with normally distributed data have the same 1491# means 1492# 1493# Arguments: 1494# alpha Significance level 1495# args One or more lists containing the data for the 1496# other groups 1497# 1498# Returns: 1499# Whether the mean for the groups is likely to be the same (1) 1500# or not (0). 1501# 1502# Note: 1503# args may be a nested list 1504# 1505# Implementation based on Wikipedia page on the F-test, one-way ANOVA 1506# 1507# Possibly of interest: the ratio itself. 1508# 1509proc ::math::statistics::test-anova-F {alpha args} { 1510 if { [llength $args] == 1 } { 1511 set args [lindex $args 0] 1512 } 1513 1514 if { [llength $args] < 2 } { 1515 return -code error -errorcode ARG -errorinfo "At least two groups are required" \ 1516 "At least two groups are required" 1517 } 1518 1519 # 1520 # Determine the variance within the groups and the variance between the groups 1521 # 1522 set meanPerGroup {} 1523 set varPerGroup {} 1524 set allData {} 1525 foreach group $args { 1526 lappend meanPerGroup [::math::statistics::mean $group] 1527 lappend varPerGroup [::math::statistics::pvar $group] 1528 set allData [concat $allData $group] 1529 } 1530 set meanOverall [::math::statistics::mean $allData] 1531 1532 set varBetween 0.0 1533 foreach group $args mean $meanPerGroup { 1534 set varBetween [expr {$varBetween + [llength $group] * ($mean - $meanOverall)**2}] 1535 } 1536 set varBetween [expr {$varBetween / ([llength $args] - 1)}] 1537 1538 set varWithin 0.0 1539 foreach group $args var $varPerGroup { 1540 set varWithin [expr {$varWithin + [llength $group] * $var}] 1541 } 1542 set varWithin [expr {$varWithin / ([llength $allData] - [llength $args])}] 1543 1544 # 1545 # Finally compare the ratio to the F-distribution 1546 # 1547 set ratio [expr {$varBetween / $varWithin}] 1548 set nf1 [expr {[llength $args] - 1}] 1549 set nf2 [expr {[llength $allData] - [llength $args]}] 1550 1551 expr {[::math::statistics::cdf-F $nf1 $nf2 $ratio] <= 1.0 - $alpha} 1552} 1553 1554# test-Tukey-range -- 1555# Check if two or more groups with normally distributed data have different 1556# means or not, using Tukey's range test 1557# 1558# Arguments: 1559# alpha Significance level 1560# args Two or more lists containing the data for the 1561# other groups 1562# 1563# Returns: 1564# For each pair of groups a list of: 1565# - group indices 1566# - whether the means differ (1) or not (0) 1567# - limits of the confidence interval (for closer investigation) 1568# 1569# Note: 1570# args may be a nested list 1571# 1572# Implementation based on Wikipedia page on Tukey's range test 1573# 1574proc ::math::statistics::test-Tukey-range {alpha args} { 1575 if { [llength $args] == 1 } { 1576 set args [lindex $args 0] 1577 } 1578 1579 if { [llength $args] < 2 } { 1580 return -code error -errorcode ARG -errorinfo "At least two groups are required" \ 1581 "At least two groups are required" 1582 } 1583 1584 if { $alpha != 0.05 && $alpha != 0.01 } { 1585 return -code error -errorcode ARG -errorinfo "Alpha must 0.05 or 0.01" 1586 } 1587 1588 # 1589 # Determine the mean per group and the pooled variance of the data 1590 # 1591 set meanPerGroup {} 1592 set allData {} 1593 set sumVar 0.0 1594 foreach group $args { 1595 lappend meanPerGroup [mean $group] 1596 set sumVar [expr {$sumVar + ([llength $group]-1) * [var $group]}] 1597 set allData [concat $allData $group] 1598 } 1599 1600 set n [llength $allData] 1601 set stdOverall [expr {sqrt($sumVar /($n - [llength $args]))}] 1602 1603 set qcrit [Qcrit-Tukey $alpha $n [llength $args]] 1604 1605 set result {} 1606 1607 for {set g 0} {$g < [llength $args]} {incr g} { 1608 set ggroup [lindex $args $g] 1609 set gmean [mean $ggroup] 1610 set ng [llength $ggroup] 1611 1612 for {set h [expr {$g+1}]} {$h < [llength $args]} {incr h} { 1613 set hgroup [lindex $args $h] 1614 set hmean [mean $hgroup] 1615 set nh [llength $hgroup] 1616 1617 set halfwidth [expr {$qcrit * $stdOverall / sqrt(2.0) * sqrt( 1.0/$ng + 1.0/$nh )}] 1618 set lower [expr {$hmean - $gmean - $halfwidth}] 1619 set upper [expr {$hmean - $gmean + $halfwidth}] 1620 set unequal 1 1621 if { $lower < 0.0 && $upper > 0.0 } { 1622 set unequal 0 1623 } 1624 lappend result [list $g $h $unequal $lower $upper] 1625 } 1626 } 1627 1628 return $result 1629} 1630 1631# Qcrit-Tukey -- 1632# Determine the critical value for the Tukey range test 1633# 1634# Arguments: 1635# alpha Significance level 1636# numberData Total number of data 1637# numberGroups Number of groups 1638# 1639# Returns: 1640# Critical value 1641# 1642# Note: 1643# If there are more than 10 groups, simply use 10 groups 1644# 1645proc ::math::statistics::Qcrit-Tukey {alpha numberData numberGroups} { 1646 variable tukey_table_05 1647 variable tukey_table_01 1648 1649 if { $alpha == 0.05 } { 1650 upvar 0 tukey_table_05 tukey_table 1651 } else { 1652 upvar 0 tukey_table_05 tukey_table 1653 } 1654 1655 set df [expr {$numberData - $numberGroups}] 1656 1657 if { $numberGroups > 10 } { 1658 set numberGroups 10 1659 } 1660 incr numberGroups -1 ;# Offset because of 0-based numbering 1661 1662 if { $df > 120 } { 1663 return [lindex $tukey_table end $numberGroups] 1664 } 1665 1666 foreach {dfe values} $tukey_table { 1667 if { $df <= $dfe } { 1668 return [lindex $values $numberGroups] 1669 } 1670 } 1671} 1672 1673# test-Dunnett -- 1674# Check if one or more groups with normally distributed data have different 1675# means from the control group or not, using Dunnett's test 1676# 1677# Arguments: 1678# alpha Significance level 1679# control Control group 1680# args One or more lists containing the data for the 1681# other groups 1682# 1683# Returns: 1684# For each group a list of: 1685# - whether the mean differs (1) from the control or not (0) 1686# - the confidence interval 1687# 1688# Note: 1689# args may be a nested list 1690# 1691# Implementation based on Wikipedia page on Dunnett's test 1692# The test is two-sided. 1693# 1694proc ::math::statistics::test-Dunnett {alpha control args} { 1695 if { [llength $args] == 1 } { 1696 set args [lindex $args 0] 1697 } 1698 1699 if { [llength $args] < 1 } { 1700 return -code error -errorcode ARG -errorinfo "At least one additional group is required" \ 1701 "At least one additional group is required" 1702 } 1703 1704 if { $alpha != 0.05 && $alpha != 0.01 } { 1705 return -code error -errorcode ARG -errorinfo "Alpha must 0.05 or 0.01" 1706 } 1707 1708 # 1709 # Determine the mean per group and the pooled variance 1710 # 1711 set allData $control 1712 set sumVar [expr {([llength $control]-1)*[var $control]}] 1713 foreach group $args { 1714 set sumVar [expr {$sumVar + ([llength $group]-1) * [var $group]}] 1715 set allData [concat $allData $group] 1716 } 1717 1718 set n [llength $allData] 1719 set stdOverall [expr {sqrt($sumVar /($n - [llength $args] - 1))}] 1720 1721 set tcrit [Tcrit-Dunnett $alpha $n [llength $args]] 1722 1723 set result {} 1724 1725 set cmean [mean $control] 1726 set nc [llength $control] 1727 1728 for {set g 0} {$g < [llength $args]} {incr g} { 1729 set ggroup [lindex $args $g] 1730 set gmean [mean $ggroup] 1731 set ng [llength $ggroup] 1732 1733 set halfwidth [expr {$tcrit * $stdOverall * sqrt( 1.0/$nc + 1.0/$ng )}] 1734 set lower [expr {$gmean - $cmean - $halfwidth}] 1735 set upper [expr {$gmean - $cmean + $halfwidth}] 1736 set unequal 1 1737 if { $lower < 0.0 && $upper > 0.0 } { 1738 set unequal 0 1739 } 1740 lappend result [list $unequal $lower $upper] 1741 } 1742 1743 return $result 1744} 1745 1746# Tcrit-Dunnett -- 1747# Determine the critical value for the Dunnett test 1748# 1749# Arguments: 1750# alpha Significance level 1751# numberData Total number of data 1752# numberGroups Number of groups to compare against the control 1753# 1754# Returns: 1755# Critical value 1756# 1757# Note: 1758# If there are more than 10 groups, simply use 10 groups 1759# 1760proc ::math::statistics::Tcrit-Dunnett {alpha numberData numberGroups} { 1761 variable dunnett_table_05 1762 variable dunnett_table_01 1763 1764 if { $alpha == 0.05 } { 1765 upvar 0 dunnett_table_05 dunnett_table 1766 } else { 1767 upvar 0 dunnett_table_05 dunnett_table 1768 } 1769 1770 set df [expr {$numberData - $numberGroups - 1}] 1771 1772 incr numberGroups 1 ;# Add the control group 1773 if { $numberGroups > 10 } { 1774 set numberGroups 10 1775 } 1776 incr numberGroups -2 ;# Offset because of 0-based numbering and start at 2 1777 1778 if { $df > 60 } { 1779 return [lindex $dunnett_table end $numberGroups] 1780 } 1781 1782 foreach {dfe values} $dunnett_table { 1783 if { $df <= $dfe } { 1784 return [lindex $values $numberGroups] 1785 } 1786 } 1787} 1788 1789# 1790# Load the auxiliary scripts 1791# 1792source [file join [file dirname [info script]] pdf_stat.tcl] 1793source [file join [file dirname [info script]] plotstat.tcl] 1794source [file join [file dirname [info script]] liststat.tcl] 1795source [file join [file dirname [info script]] mvlinreg.tcl] 1796source [file join [file dirname [info script]] kruskal.tcl] 1797source [file join [file dirname [info script]] wilcoxon.tcl] 1798source [file join [file dirname [info script]] stat_kernel.tcl] 1799source [file join [file dirname [info script]] stat_wasserstein.tcl] 1800source [file join [file dirname [info script]] stat_logit.tcl] 1801 1802# 1803# Define the tables 1804# 1805namespace eval ::math::statistics { 1806 variable tukey_table_05 1807 variable tukey_table_01 1808 variable dunnett_table_05 1809 variable dunnett_table_01 1810 1811 #alpha = 0.05 1812 #k 2 3 4 5 6 7 8 9 10 1813 #df 1814 set tukey_table_05 { 1815 1 {18.0 27.0 32.8 37.1 40.4 43.1 45.4 47.4 49.1} 1816 2 {6.08 8.33 9.80 10.88 11.73 12.43 13.03 13.54 13.99} 1817 3 {4.50 5.91 6.82 7.50 8.04 8.48 8.85 9.18 9.46} 1818 4 {3.93 5.04 5.76 6.29 6.71 7.05 7.35 7.60 7.83} 1819 5 {3.64 4.60 5.22 5.67 6.03 6.33 6.58 6.80 6.99} 1820 6 {3.46 4.34 4.90 5.30 5.63 5.90 6.12 6.32 6.49} 1821 7 {3.34 4.16 4.68 5.06 5.36 5.61 5.82 6.00 6.16} 1822 8 {3.26 4.04 4.53 4.89 5.17 5.40 5.60 5.77 5.92} 1823 9 {3.20 3.95 4.41 4.76 5.02 5.24 5.43 5.59 5.74} 1824 10 {3.15 3.88 4.33 4.65 4.91 5.12 5.30 5.46 5.60} 1825 11 {3.11 3.82 4.26 4.57 4.82 5.03 5.20 5.35 5.49} 1826 12 {3.08 3.77 4.20 4.51 4.75 4.95 5.12 5.27 5.39} 1827 13 {3.06 3.73 4.15 4.45 4.69 4.88 5.05 5.19 5.32} 1828 14 {3.03 3.70 4.11 4.41 4.64 4.83 4.99 5.13 5.25} 1829 15 {3.01 3.67 4.08 4.37 4.59 4.78 4.94 5.08 5.20} 1830 16 {3.00 3.65 4.05 4.33 4.56 4.74 4.90 5.03 5.15} 1831 17 {2.98 3.63 4.02 4.30 4.52 4.70 4.86 4.99 5.11} 1832 18 {2.97 3.61 4.00 4.28 4.49 4.67 4.82 4.96 5.07} 1833 19 {2.96 3.59 3.98 4.25 4.47 4.65 4.79 4.92 5.04} 1834 20 {2.95 3.58 3.96 4.23 4.45 4.62 4.77 4.90 5.01} 1835 24 {2.92 3.53 3.90 4.17 4.37 4.54 4.68 4.81 4.92} 1836 30 {2.89 3.49 3.85 4.10 4.30 4.46 4.60 4.72 4.82} 1837 40 {2.86 3.44 3.79 4.04 4.23 4.39 4.52 4.63 4.73} 1838 60 {2.83 3.40 3.74 3.98 4.16 4.31 4.44 4.55 4.65} 1839 120 {2.80 3.36 3.68 3.92 4.10 4.24 4.36 4.47 4.56} 1840 inf {2.77 3.31 3.63 3.86 4.03 4.17 4.29 4.39 4.47}} 1841 1842 #alpha = 0.01 1843 #k 2 3 4 5 6 7 8 9 10 1844 #df 1845 set tukey_table_01 { 1846 1 {90.0 135 164 186 202 216 227 237 246} 1847 2 {13.90 19.02 22.56 25.37 27.76 29.86 31.73 33.41 34.93} 1848 3 {8.26 10.62 12.17 13.32 14.24 15.00 15.65 16.21 16.71} 1849 4 {6.51 8.12 9.17 9.96 10.58 11.10 11.54 11.92 12.26} 1850 5 {5.70 6.98 7.80 8.42 8.91 9.32 9.67 9.97 10.24} 1851 6 {5.24 6.33 7.03 7.56 7.97 8.32 8.61 8.87 9.10} 1852 7 {4.95 5.92 6.54 7.00 7.37 7.68 7.94 8.17 8.37} 1853 8 {4.75 5.64 6.20 6.62 6.96 7.24 7.47 7.68 7.86} 1854 9 {4.60 5.43 5.96 6.35 6.66 6.91 7.13 7.33 7.49} 1855 10 {4.48 5.27 5.77 6.14 6.43 6.67 6.87 7.05 7.21} 1856 11 {4.39 5.15 5.62 5.97 6.25 6.48 6.67 6.84 6.99} 1857 12 {4.32 5.05 5.50 5.84 6.10 6.32 6.51 6.67 6.81} 1858 13 {4.26 4.96 5.40 5.73 5.98 6.19 6.37 6.53 6.67} 1859 14 {4.21 4.89 5.32 5.63 5.88 6.08 6.26 6.41 6.54} 1860 15 {4.17 4.84 5.25 5.56 5.80 5.99 6.16 6.31 6.44} 1861 16 {4.13 4.79 5.19 5.49 5.72 5.92 6.08 6.22 6.35} 1862 17 {4.10 4.74 5.14 5.43 5.66 5.85 6.01 6.15 6.27} 1863 18 {4.07 4.70 5.09 5.38 5.60 5.79 5.94 6.08 6.20} 1864 19 {4.05 4.67 5.05 5.33 5.55 5.73 5.89 6.02 6.14} 1865 20 {4.02 4.64 5.02 5.29 5.51 5.69 5.84 5.97 6.09} 1866 24 {3.96 4.55 4.91 5.17 5.37 5.54 5.69 5.81 5.92} 1867 30 {3.89 4.45 4.80 5.05 5.24 5.40 5.54 5.65 5.76} 1868 40 {3.82 4.37 4.70 4.93 5.11 5.26 5.39 5.50 5.60} 1869 60 {3.76 4.28 4.59 4.82 4.99 5.13 5.25 5.36 5.45} 1870 120 {3.70 4.20 4.50 4.71 4.87 5.01 5.12 5.21 5.30} 1871 inf {3.64 4.12 4.40 4.60 4.76 4.88 4.99 5.08 5.16}} 1872 1873 #From: http://davidmlane.com/hyperstat/table_Dunnett.html 1874 #Dunnett Table 1875 #Number of Groups Including Control Group 1876 #dfe alpha = 0.05 1877 # 2 3 4 5 6 7 8 9 10 1878 set dunnett_table_05 { 1879 5 {2.57 3.03 3.29 3.48 3.62 3.73 3.82 3.9 3.97} 1880 6 {2.45 2.86 3.1 3.26 3.39 3.49 3.57 3.64 3.71} 1881 7 {2.36 2.75 2.97 3.12 3.24 3.33 3.41 3.47 3.53} 1882 8 {2.31 2.67 2.88 3.02 3.13 3.22 3.29 3.35 3.41} 1883 9 {2.26 2.61 2.81 2.95 3.05 3.14 3.2 3.26 3.32} 1884 10 {2.23 2.57 2.76 2.89 2.99 3.07 3.14 3.19 3.24} 1885 11 {2.2 2.53 2.72 2.84 2.94 3.02 3.08 3.14 3.19} 1886 12 {2.18 2.5 2.68 2.81 2.9 2.98 3.04 3.09 3.14} 1887 13 {2.16 2.48 2.65 2.78 2.87 2.94 3 3.06 3.1} 1888 14 {2.14 2.46 2.63 2.75 2.84 2.91 2.97 3.02 3.07} 1889 15 {2.13 2.44 2.61 2.73 2.82 2.89 2.95 3 3.04} 1890 16 {2.12 2.42 2.59 2.71 2.8 2.87 2.92 2.97 3.02} 1891 17 {2.11 2.41 2.58 2.69 2.78 2.85 2.9 2.95 3} 1892 18 {2.1 2.4 2.56 2.68 2.76 2.83 2.89 2.94 2.98} 1893 19 {2.09 2.39 2.55 2.66 2.75 2.81 2.87 2.92 2.96} 1894 20 {2.09 2.38 2.54 2.65 2.73 2.8 2.86 2.9 2.95} 1895 24 {2.06 2.35 2.51 2.61 2.7 2.76 2.81 2.86 2.9} 1896 30 {2.04 2.32 2.47 2.58 2.66 2.72 2.77 2.82 2.86} 1897 40 {2.02 2.29 2.44 2.54 2.62 2.68 2.73 2.77 2.81} 1898 60 {2 2.27 2.41 2.51 2.58 2.64 2.69 2.73 2.77}} 1899 1900 set dunnett_table_01 { 1901 5 {4.03 4.63 4.98 5.22 5.41 5.56 5.69 5.8 5.89} 1902 6 {3.71 4.21 4.51 4.71 4.87 5 5.1 5.2 5.28} 1903 7 {3.5 3.95 4.21 4.39 4.53 4.64 4.74 4.82 4.89} 1904 8 {3.36 3.77 4 4.17 4.29 4.4 4.48 4.56 4.62} 1905 9 {3.25 3.63 3.85 4.01 4.12 4.22 4.3 4.37 4.43} 1906 10 {3.17 3.53 3.74 3.88 3.99 4.08 4.16 4.22 4.28} 1907 11 {3.11 3.45 3.65 3.79 3.89 3.98 4.05 4.11 4.16} 1908 12 {3.05 3.39 3.58 3.71 3.81 3.89 3.96 4.02 4.07} 1909 13 {3.01 3.33 3.52 3.65 3.74 3.82 3.89 3.94 3.99} 1910 14 {2.98 3.29 3.47 3.59 3.69 3.76 3.83 3.88 3.93} 1911 15 {2.95 3.25 3.43 3.55 3.64 3.71 3.78 3.83 3.88} 1912 16 {2.92 3.22 3.39 3.51 3.6 3.67 3.73 3.78 3.83} 1913 17 {2.9 3.19 3.36 3.47 3.56 3.63 3.69 3.74 3.79} 1914 18 {2.88 3.17 3.33 3.44 3.53 3.6 3.66 3.71 3.75} 1915 19 {2.86 3.15 3.31 3.42 3.5 3.57 3.63 3.68 3.72} 1916 20 {2.85 3.13 3.29 3.4 3.48 3.55 3.6 3.65 3.69} 1917 24 {2.8 3.07 3.22 3.32 3.4 3.47 3.52 3.57 3.61} 1918 30 {2.75 3.01 3.15 3.25 3.33 3.39 3.44 3.49 3.52} 1919 40 {2.7 2.95 3.09 3.19 3.26 3.32 3.37 3.41 3.44} 1920 60 {2.66 2.9 3.03 3.12 3.19 3.25 3.29 3.33 3.37}} 1921 1922} 1923 1924# 1925# Simple test code 1926# 1927if { [info exists ::argv0] && ([file tail [info script]] == [file tail $::argv0]) } { 1928 1929 console show 1930 puts [interp aliases] 1931 1932 set values {1 1 1 1 {}} 1933 puts [::math::statistics::basic-stats $values] 1934 set values {1 2 3 4} 1935 puts [::math::statistics::basic-stats $values] 1936 set values {1 -1 1 -2} 1937 puts [::math::statistics::basic-stats $values] 1938 puts [::math::statistics::mean $values] 1939 puts [::math::statistics::min $values] 1940 puts [::math::statistics::max $values] 1941 puts [::math::statistics::number $values] 1942 puts [::math::statistics::stdev $values] 1943 puts [::math::statistics::var $values] 1944 1945 set novals 100 1946 #set maxvals 100001 1947 set maxvals 1001 1948 while { $novals < $maxvals } { 1949 set values {} 1950 for { set i 0 } { $i < $novals } { incr i } { 1951 lappend values [expr {rand()}] 1952 } 1953 puts [::math::statistics::basic-stats $values] 1954 puts [::math::statistics::histogram {0.0 0.2 0.4 0.6 0.8 1.0} $values] 1955 set novals [expr {$novals*10}] 1956 } 1957 1958 puts "Normal distribution:" 1959 puts "X=0: [::math::statistics::pdf-normal 0.0 1.0 0.0]" 1960 puts "X=1: [::math::statistics::pdf-normal 0.0 1.0 1.0]" 1961 puts "X=-1: [::math::statistics::pdf-normal 0.0 1.0 -1.0]" 1962 1963 set data1 {0.0 1.0 3.0 4.0 100.0 -23.0} 1964 set data2 {1.0 2.0 4.0 5.0 101.0 -22.0} 1965 set data3 {0.0 2.0 6.0 8.0 200.0 -46.0} 1966 set data4 {2.0 6.0 8.0 200.0 -46.0 1.0} 1967 set data5 {100.0 99.0 90.0 93.0 5.0 123.0} 1968 puts "Correlation data1 and data1: [::math::statistics::corr $data1 $data1]" 1969 puts "Correlation data1 and data2: [::math::statistics::corr $data1 $data2]" 1970 puts "Correlation data1 and data3: [::math::statistics::corr $data1 $data3]" 1971 puts "Correlation data1 and data4: [::math::statistics::corr $data1 $data4]" 1972 puts "Correlation data1 and data5: [::math::statistics::corr $data1 $data5]" 1973 1974 # set data {1.0 2.0 2.3 4.0 3.4 1.2 0.6 5.6} 1975 # puts [::math::statistics::basicStats $data] 1976 # puts [::math::statistics::interval-mean-stdev $data 0.90] 1977 # puts [::math::statistics::interval-mean-stdev $data 0.95] 1978 # puts [::math::statistics::interval-mean-stdev $data 0.99] 1979 1980 # puts "\nTest mean values:" 1981 # puts [::math::statistics::test-mean $data 2.0 0.1 0.90] 1982 # puts [::math::statistics::test-mean $data 2.0 0.5 0.90] 1983 # puts [::math::statistics::test-mean $data 2.0 1.0 0.90] 1984 # puts [::math::statistics::test-mean $data 2.0 2.0 0.90] 1985 1986 set rc [catch { 1987 set m [::math::statistics::mean {}] 1988 } msg ] ; # {} 1989 puts "Result: $rc $msg" 1990 1991 puts "\nTest quantiles:" 1992 set data {1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0} 1993 set quantiles {0.11 0.21 0.51 0.91 0.99} 1994 set limits {2.1 4.1 6.1 8.1} 1995 puts [::math::statistics::quantiles $data $quantiles] 1996 1997 set histogram [::math::statistics::histogram $limits $data] 1998 puts [::math::statistics::quantiles $limits $histogram $quantiles] 1999 2000 puts "\nTest autocorrelation:" 2001 set data {1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0} 2002 puts [::math::statistics::autocorr $data] 2003 set data {1.0 -1.1 2.0 -0.6 3.0 -4.0 0.5 0.9 -1.0} 2004 puts [::math::statistics::autocorr $data] 2005 2006 puts "\nTest histogram limits:" 2007 puts [::math::statistics::mean-histogram-limits 1.0 1.0] 2008 puts [::math::statistics::mean-histogram-limits 1.0 1.0 4] 2009 puts [::math::statistics::minmax-histogram-limits 1.0 10.0 10] 2010 2011} 2012 2013# 2014# Test xbar/R-chart procedures 2015# 2016if { 0 } { 2017 set data {} 2018 for { set i 0 } { $i < 500 } { incr i } { 2019 lappend data [expr {rand()}] 2020 } 2021 set limits [::math::statistics::control-xbar $data] 2022 puts $limits 2023 2024 puts "Outliers? [::math::statistics::test-xbar $limits $data]" 2025 2026 set newdata {1.0 1.0 1.0 1.0 0.5 0.5 0.5 0.5 10.0 10.0 10.0 10.0} 2027 puts "Outliers? [::math::statistics::test-xbar $limits $newdata] -- 0 2" 2028 2029 set limits [::math::statistics::control-Rchart $data] 2030 puts $limits 2031 2032 puts "Outliers? [::math::statistics::test-Rchart $limits $data]" 2033 2034 set newdata {0.0 1.0 2.0 1.0 0.4 0.5 0.6 0.5 10.0 0.0 10.0 10.0} 2035 puts "Outliers? [::math::statistics::test-Rchart $limits $newdata] -- 0 2" 2036} 2037 2038