1# statistics.tcl -- 2# 3# Package for basic statistical analysis 4# 5# version 0.1: initial implementation, january 2003 6# version 0.1.1: added linear regres 7# version 0.1.2: border case in stdev taken care of 8# version 0.1.3: moved initialisation of CDF to first call, november 2004 9# version 0.3: added test for normality (as implemented by Torsten Reincke), march 2006 10# (also fixed an error in the export list) 11# version 0.4: added the multivariate linear regression procedures by 12# Eric Kemp-Benedict, february 2007 13# version 0.5: added the population standard deviation and variance, 14# as suggested by Dimitrios Zachariadis 15# version 0.6: added pdf and cdf procedures for various distributions 16# (provided by Eric Kemp-Benedict) 17# version 0.7: added Kruskal-Wallis test (by Torsten Berg) 18# version 0.8: added Wilcoxon test and Spearman rank correlation 19# version 0.9: added kernel density estimation 20# version 0.9.3: added histogram-alt, corrected test-normal 21# version 1.0: added test-anova-F 22# version 1.0.1: correction in pdf-lognormal and cdf-lognormal 23# version 1.1: added test-Tukey-range and test-Dunnett 24 25package require Tcl 8.5 ; # 8.5+ feature in test-anovo-F: **-operator 26package provide math::statistics 1.1.1 27package require math 28 29if {![llength [info commands ::lrepeat]]} { 30 # Forward portability, emulate lrepeat 31 proc ::lrepeat {n args} { 32 if {$n < 1} { 33 return -code error "must have a count of at least 1" 34 } 35 set res {} 36 while {$n} { 37 foreach x $args { lappend res $x } 38 incr n -1 39 } 40 return $res 41 } 42} 43 44# ::math::statistics -- 45# Namespace holding the procedures and variables 46# 47 48namespace eval ::math::statistics { 49 # 50 # Safer: change to short procedures 51 # 52 namespace export mean min max number var stdev pvar pstdev basic-stats corr \ 53 histogram histogram-alt interval-mean-stdev t-test-mean quantiles \ 54 test-normal lillieforsFit \ 55 autocorr crosscorr filter map samplescount median \ 56 test-2x2 print-2x2 control-xbar test_xbar \ 57 control-Rchart test-Rchart \ 58 test-Kruskal-Wallis analyse-Kruskal-Wallis group-rank \ 59 test-Wilcoxon spearman-rank spearman-rank-extended \ 60 test-Duckworth test-anova-F test-Tukey-range test-Dunnett 61 # 62 # Error messages 63 # 64 variable NEGSTDEV {Zero or negative standard deviation} 65 variable TOOFEWDATA {Too few or invalid data} 66 variable OUTOFRANGE {Argument out of range} 67 68 # 69 # Coefficients involved 70 # 71 variable factorNormalPdf 72 set factorNormalPdf [expr {sqrt(8.0*atan(1.0))}] 73 74 # xbar/R-charts: 75 # Data from: 76 # Peter W.M. John: 77 # Statistical methods in engineering and quality assurance 78 # Wiley and Sons, 1990 79 # 80 variable control_factors { 81 A2 {1.880 1.093 0.729 0.577 0.483 0.419 0.419} 82 D3 {0.0 0.0 0.0 0.0 0.0 0.076 0.076} 83 D4 {3.267 2.574 2.282 2.114 2.004 1.924 1.924} 84 } 85} 86 87# mean, min, max, number, var, stdev, pvar, pstdev -- 88# Return the mean (minimum, maximum) value of a list of numbers 89# or number of non-missing values 90# 91# Arguments: 92# type Type of value to be returned 93# values List of values to be examined 94# 95# Results: 96# Value that was required 97# 98# 99namespace eval ::math::statistics { 100 foreach type {mean min max number stdev var pstdev pvar} { 101 proc $type { values } "BasicStats $type \$values" 102 } 103 proc basic-stats { values } "BasicStats all \$values" 104} 105 106# BasicStats -- 107# Return the one or all of the basic statistical properties 108# 109# Arguments: 110# type Type of value to be returned 111# values List of values to be examined 112# 113# Results: 114# Value that was required 115# 116proc ::math::statistics::BasicStats { type values } { 117 variable TOOFEWDATA 118 119 if { [lsearch {all mean min max number stdev var pstdev pvar} $type] < 0 } { 120 return -code error \ 121 -errorcode ARG -errorinfo [list unknown type of statistic -- $type] \ 122 [list unknown type of statistic -- $type] 123 } 124 125 set min {} 126 set max {} 127 set mean {} 128 set stdev {} 129 set var {} 130 131 set sum 0.0 132 set sumsq 0.0 133 set number 0 134 set first {} 135 136 foreach value $values { 137 if { $value == {} } { 138 continue 139 } 140 set value [expr {double($value)}] 141 142 if { $first == {} } { 143 set first $value 144 } 145 146 incr number 147 set sum [expr {$sum+$value}] 148 set sumsq [expr {$sumsq+($value-$first)*($value-$first)}] 149 150 if { $min == {} || $value < $min } { 151 set min $value 152 } 153 if { $max == {} || $value > $max } { 154 set max $value 155 } 156 } 157 158 if { $number > 0 } { 159 set mean [expr {$sum/$number}] 160 } else { 161 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 162 } 163 164 if { $number > 1 } { 165 set var [expr {($sumsq-($mean-$first)*($sum-$number*$first))/double($number-1)}] 166 # 167 # Take care of a rare situation: uniform data might 168 # cause a tiny negative difference 169 # 170 if { $var < 0.0 } { 171 set var 0.0 172 } 173 set stdev [expr {sqrt($var)}] 174 } 175 set pvar [expr {($sumsq-($mean-$first)*($sum-$number*$first))/double($number)}] 176 # 177 # Take care of a rare situation: uniform data might 178 # cause a tiny negative difference 179 # 180 if { $pvar < 0.0 } { 181 set pvar 0.0 182 } 183 set pstdev [expr {sqrt($pvar)}] 184 185 set all [list $mean $min $max $number $stdev $var $pstdev $pvar] 186 187 # 188 # Return the appropriate value 189 # 190 set $type 191} 192 193# histogram -- 194# Return histogram information from a list of numbers 195# 196# Arguments: 197# limits Upper limits for the buckets (in increasing order) 198# values List of values to be examined 199# weights List of weights, one per value (optional) 200# 201# Results: 202# List of number of values in each bucket (length is one more than 203# the number of limits) 204# 205# 206proc ::math::statistics::histogram { limits values {weights {}} } { 207 208 if { [llength $limits] < 1 } { 209 return -code error -errorcode ARG -errorinfo {No limits given} {No limits given} 210 } 211 if { [llength $weights] > 0 && [llength $values] != [llength $weights] } { 212 return -code error -errorcode ARG -errorinfo {Number of weights be equal to number of values} {Weights and values differ in length} 213 } 214 215 set limits [lsort -real -increasing $limits] 216 217 for { set index 0 } { $index <= [llength $limits] } { incr index } { 218 set buckets($index) 0 219 } 220 221 set last [llength $limits] 222 223 # Will do integer arithmetic if unset 224 if {$weights eq ""} { 225 set weights [lrepeat [llength $values] 1] 226 } 227 228 foreach value $values weight $weights { 229 if { $value == {} } { 230 continue 231 } 232 233 set index 0 234 set found 0 235 foreach limit $limits { 236 if { $value <= $limit } { 237 set found 1 238 set buckets($index) [expr $buckets($index)+$weight] 239 break 240 } 241 incr index 242 } 243 244 if { $found == 0 } { 245 set buckets($last) [expr $buckets($last)+$weight] 246 } 247 } 248 249 set result {} 250 for { set index 0 } { $index <= $last } { incr index } { 251 lappend result $buckets($index) 252 } 253 254 return $result 255} 256 257# histogram-alt -- 258# Return histogram information from a list of numbers - 259# intervals are open-ended at the lower bound instead of at the upper bound 260# 261# Arguments: 262# limits Upper limits for the buckets (in increasing order) 263# values List of values to be examined 264# weights List of weights, one per value (optional) 265# 266# Results: 267# List of number of values in each bucket (length is one more than 268# the number of limits) 269# 270# 271proc ::math::statistics::histogram-alt { limits values {weights {}} } { 272 273 if { [llength $limits] < 1 } { 274 return -code error -errorcode ARG -errorinfo {No limits given} {No limits given} 275 } 276 if { [llength $weights] > 0 && [llength $values] != [llength $weights] } { 277 return -code error -errorcode ARG -errorinfo {Number of weights be equal to number of values} {Weights and values differ in length} 278 } 279 280 set limits [lsort -real -increasing $limits] 281 282 for { set index 0 } { $index <= [llength $limits] } { incr index } { 283 set buckets($index) 0 284 } 285 286 set last [llength $limits] 287 288 # Will do integer arithmetic if unset 289 if {$weights eq ""} { 290 set weights [lrepeat [llength $values] 1] 291 } 292 293 foreach value $values weight $weights { 294 if { $value == {} } { 295 continue 296 } 297 298 set index 0 299 set found 0 300 foreach limit $limits { 301 if { $value < $limit } { 302 set found 1 303 set buckets($index) [expr $buckets($index)+$weight] 304 break 305 } 306 incr index 307 } 308 309 if { $found == 0 } { 310 set buckets($last) [expr $buckets($last)+$weight] 311 } 312 } 313 314 set result {} 315 for { set index 0 } { $index <= $last } { incr index } { 316 lappend result $buckets($index) 317 } 318 319 return $result 320} 321 322# corr -- 323# Return the correlation coefficient of two sets of data 324# 325# Arguments: 326# data1 List with the first set of data 327# data2 List with the second set of data 328# 329# Result: 330# Correlation coefficient of the two 331# 332proc ::math::statistics::corr { data1 data2 } { 333 variable TOOFEWDATA 334 335 set number 0 336 set sum1 0.0 337 set sum2 0.0 338 set sumsq1 0.0 339 set sumsq2 0.0 340 set sumprod 0.0 341 342 foreach value1 $data1 value2 $data2 { 343 if { $value1 == {} || $value2 == {} } { 344 continue 345 } 346 set value1 [expr {double($value1)}] 347 set value2 [expr {double($value2)}] 348 349 set sum1 [expr {$sum1+$value1}] 350 set sum2 [expr {$sum2+$value2}] 351 set sumsq1 [expr {$sumsq1+$value1*$value1}] 352 set sumsq2 [expr {$sumsq2+$value2*$value2}] 353 set sumprod [expr {$sumprod+$value1*$value2}] 354 incr number 355 } 356 if { $number > 0 } { 357 set numerator [expr {$number*$sumprod-$sum1*$sum2}] 358 set denom1 [expr {sqrt($number*$sumsq1-$sum1*$sum1)}] 359 set denom2 [expr {sqrt($number*$sumsq2-$sum2*$sum2)}] 360 if { $denom1 != 0.0 && $denom2 != 0.0 } { 361 set corr_coeff [expr {$numerator/$denom1/$denom2}] 362 } elseif { $denom1 != 0.0 || $denom2 != 0.0 } { 363 set corr_coeff 0.0 ;# Uniform against non-uniform 364 } else { 365 set corr_coeff 1.0 ;# Both uniform 366 } 367 368 } else { 369 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 370 } 371 return $corr_coeff 372} 373 374# lillieforsFit -- 375# Calculate the goodness of fit according to Lilliefors 376# (goodness of fit to a normal distribution) 377# 378# Arguments: 379# values List of values to be tested for normality 380# 381# Result: 382# Value of the statistic D 383# 384proc ::math::statistics::lillieforsFit {values} { 385 # 386 # calculate the goodness of fit according to Lilliefors 387 # (goodness of fit to a normal distribution) 388 # 389 # values -> list of values to be tested for normality 390 # (these values are sampled counts) 391 # 392 393 # calculate standard deviation and mean of the sample: 394 set n [llength $values] 395 if { $n < 5 } { 396 return -code error "Insufficient number of data (at least five required)" 397 } 398 set sd [stdev $values] 399 set mean [mean $values] 400 401 # sort the sample for further processing: 402 set values [lsort -real $values] 403 404 # standardize the sample data (Z-scores): 405 foreach x $values { 406 lappend stdData [expr {($x - $mean)/double($sd)}] 407 } 408 409 # compute the value of the distribution function at every sampled point: 410 foreach x $stdData { 411 lappend expData [pnorm $x] 412 } 413 414 # compute D+: 415 set i 0 416 foreach x $expData { 417 incr i 418 lappend dplus [expr {$i/double($n)-$x}] 419 } 420 set dplus [lindex [lsort -real $dplus] end] 421 422 # compute D-: 423 set i 0 424 foreach x $expData { 425 incr i 426 lappend dminus [expr {$x-($i-1)/double($n)}] 427 } 428 set dminus [lindex [lsort -real $dminus] end] 429 430 # Calculate the test statistic D 431 # by finding the maximal vertical difference 432 # between the sample and the expectation: 433 # 434 set D [expr {$dplus > $dminus ? $dplus : $dminus}] 435 436 # We now use the modified statistic Z, 437 # because D is only reliable 438 # if the p-value is smaller than 0.1 439 return [expr {$D * (sqrt($n) - 0.01 + 0.831/sqrt($n))}] 440} 441 442# pnorm -- 443# Calculate the cumulative distribution function (cdf) 444# for the standard normal distribution like in the statistical 445# software 'R' (mean=0 and sd=1) 446# 447# Arguments: 448# x Value fro which the cdf should be calculated 449# 450# Result: 451# Value of the statistic D 452# 453proc ::math::statistics::pnorm {x} { 454 # 455 # cumulative distribution function (cdf) 456 # for the standard normal distribution like in the statistical software 'R' 457 # (mean=0 and sd=1) 458 # 459 # x -> value for which the cdf should be calculated 460 # 461 set sum [expr {double($x)}] 462 set oldSum 0.0 463 set i 1 464 set denom 1.0 465 while {$sum != $oldSum} { 466 set oldSum $sum 467 incr i 2 468 set denom [expr {$denom*$i}] 469 #puts "$i - $denom" 470 set sum [expr {$oldSum + pow($x,$i)/$denom}] 471 } 472 return [expr {0.5 + $sum * exp(-0.5 * $x*$x - 0.91893853320467274178)}] 473} 474 475# pnorm_quicker -- 476# Calculate the cumulative distribution function (cdf) 477# for the standard normal distribution - quicker alternative 478# (less accurate) 479# 480# Arguments: 481# x Value for which the cdf should be calculated 482# 483# Result: 484# Value of the statistic D 485# 486proc ::math::statistics::pnorm_quicker {x} { 487 488 set n [expr {abs($x)}] 489 set n [expr {1.0 + $n*(0.04986735 + $n*(0.02114101 + $n*(0.00327763 \ 490 + $n*(0.0000380036 + $n*(0.0000488906 + $n*0.000005383)))))}] 491 set n [expr {1.0/pow($n,16)}] 492 # 493 if {$x >= 0} { 494 return [expr {1 - $n/2.0}] 495 } else { 496 return [expr {$n/2.0}] 497 } 498} 499 500# test-normal -- 501# Test for normality (using method Lilliefors) 502# 503# Arguments: 504# data Values that need to be tested 505# significance Level at which the discrepancy from normality is tested 506# 507# Result: 508# 1 if the Lilliefors statistic D is larger than the critical level 509# 510# Note: 511# There was a mistake in the implementation before 0.9.3: confidence (wrong word) 512# instead of significance. To keep compatibility with earlier versions, both 513# significance and 1-significance are accepted. 514# 515proc ::math::statistics::test-normal {data significance} { 516 set D [lillieforsFit $data] 517 518 if { $significance > 0.5 } { 519 set significance [expr {1.0-$significance}] ;# Convert the erroneous levels pre 0.9.3 520 } 521 522 set Dcrit -- 523 if { abs($significance-0.20) < 0.0001 } { 524 set Dcrit 0.741 525 } 526 if { abs($significance-0.15) < 0.0001 } { 527 set Dcrit 0.775 528 } 529 if { abs($significance-0.10) < 0.0001 } { 530 set Dcrit 0.819 531 } 532 if { abs($significance-0.05) < 0.0001 } { 533 set Dcrit 0.895 534 } 535 if { abs($significance-0.01) < 0.0001 } { 536 set Dcrit 1.035 537 } 538 if { $Dcrit != "--" } { 539 return [expr {$D > $Dcrit ? 1 : 0 }] 540 } else { 541 return -code error "Significancce level must be one of: 0.20, 0.15, 0.10, 0.05 or 0.01" 542 } 543} 544 545# t-test-mean -- 546# Test whether the mean value of a sample is in accordance with the 547# estimated normal distribution with a certain probability 548# (Student's t test) 549# 550# Arguments: 551# data List of raw data values (small sample) 552# est_mean Estimated mean of the distribution 553# est_stdev Estimated stdev of the distribution 554# alpha Probability level (0.95 or 0.99 for instance) 555# 556# Result: 557# 1 if the test is positive, 0 otherwise. If there are too few data, 558# returns an empty string 559# 560proc ::math::statistics::t-test-mean { data est_mean est_stdev alpha } { 561 variable NEGSTDEV 562 variable TOOFEWDATA 563 564 if { $est_stdev <= 0.0 } { 565 return -code error -errorcode ARG -errorinfo $NEGSTDEV $NEGSTDEV 566 } 567 568 set allstats [BasicStats all $data] 569 570 set alpha2 [expr {(1.0+$alpha)/2.0}] 571 572 set sample_mean [lindex $allstats 0] 573 set sample_number [lindex $allstats 3] 574 575 if { $sample_number > 1 } { 576 set tzero [expr {abs($sample_mean-$est_mean)/$est_stdev * \ 577 sqrt($sample_number-1)}] 578 set degrees [expr {$sample_number-1}] 579 set prob [cdf-students-t $degrees $tzero] 580 581 return [expr {$prob<$alpha2}] 582 583 } else { 584 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 585 } 586} 587 588# interval-mean-stdev -- 589# Return the interval containing the mean value and one 590# containing the standard deviation with a certain 591# level of confidence (assuming a normal distribution) 592# 593# Arguments: 594# data List of raw data values 595# confidence Confidence level (0.95 or 0.99 for instance) 596# 597# Result: 598# List having the following elements: lower and upper bounds of 599# mean, lower and upper bounds of stdev 600# 601# 602proc ::math::statistics::interval-mean-stdev { data confidence } { 603 variable TOOFEWDATA 604 605 set allstats [BasicStats all $data] 606 607 set conf2 [expr {(1.0+$confidence)/2.0}] 608 set mean [lindex $allstats 0] 609 set number [lindex $allstats 3] 610 set stdev [lindex $allstats 4] 611 612 if { $number > 1 } { 613 set degrees [expr {$number-1}] 614 set student_t [expr {sqrt([Inverse-cdf-toms322 1 $degrees $conf2])}] 615 set mean_lower [expr {$mean-$student_t*$stdev/sqrt($number)}] 616 set mean_upper [expr {$mean+$student_t*$stdev/sqrt($number)}] 617 set stdev_lower {} 618 set stdev_upper {} 619 return [list $mean_lower $mean_upper $stdev_lower $stdev_upper] 620 } else { 621 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 622 } 623} 624 625# quantiles -- 626# Return the quantiles for a given set of data or histogram 627# 628# Arguments: 629# (two arguments) 630# data List of raw data values 631# confidence Confidence level (0.95 or 0.99 for instance) 632# (three arguments) 633# limits List of upper limits from histogram 634# counts List of counts for for each interval in histogram 635# confidence Confidence level (0.95 or 0.99 for instance) 636# 637# Result: 638# List of quantiles 639# 640proc ::math::statistics::quantiles { arg1 arg2 {arg3 {}} } { 641 variable TOOFEWDATA 642 643 if { [catch { 644 if { $arg3 == {} } { 645 set result \ 646 [::math::statistics::QuantilesRawData $arg1 $arg2] 647 } else { 648 set result \ 649 [::math::statistics::QuantilesHistogram $arg1 $arg2 $arg3] 650 } 651 } msg] } { 652 return -code error -errorcode $msg $msg 653 } 654 return $result 655} 656 657# QuantilesRawData -- 658# Return the quantiles based on raw data 659# 660# Arguments: 661# data List of raw data values 662# confidence Confidence level (0.95 or 0.99 for instance) 663# 664# Result: 665# List of quantiles 666# 667proc ::math::statistics::QuantilesRawData { data confidence } { 668 variable TOOFEWDATA 669 variable OUTOFRANGE 670 671 if { [llength $confidence] <= 0 } { 672 return -code error -errorcode ARG "$TOOFEWDATA - quantiles" 673 } 674 675 if { [llength $data] <= 0 } { 676 return -code error -errorcode ARG "$TOOFEWDATA - raw data" 677 } 678 679 foreach cond $confidence { 680 if { $cond <= 0.0 || $cond >= 1.0 } { 681 return -code error -errorcode ARG "$OUTOFRANGE - quantiles" 682 } 683 } 684 685 # 686 # Sort the data first 687 # 688 set sorted_data [lsort -real -increasing $data] 689 690 # 691 # Determine the list element lower or equal to the quantile 692 # and return the corresponding value 693 # 694 set result {} 695 set number_data [llength $sorted_data] 696 foreach cond $confidence { 697 set elem [expr {round($number_data*$cond)-1}] 698 if { $elem < 0 } { 699 set elem 0 700 } 701 lappend result [lindex $sorted_data $elem] 702 } 703 704 return $result 705} 706 707# QuantilesHistogram -- 708# Return the quantiles based on histogram information only 709# 710# Arguments: 711# limits Upper limits for histogram intervals 712# counts Counts for each interval 713# confidence Confidence level (0.95 or 0.99 for instance) 714# 715# Result: 716# List of quantiles 717# 718proc ::math::statistics::QuantilesHistogram { limits counts confidence } { 719 variable TOOFEWDATA 720 variable OUTOFRANGE 721 722 if { [llength $confidence] <= 0 } { 723 return -code error -errorcode ARG "$TOOFEWDATA - quantiles" 724 } 725 726 if { [llength $confidence] <= 0 } { 727 return -code error -errorcode ARG "$TOOFEWDATA - histogram limits" 728 } 729 730 if { [llength $counts] <= [llength $limits] } { 731 return -code error -errorcode ARG "$TOOFEWDATA - histogram counts" 732 } 733 734 foreach cond $confidence { 735 if { $cond <= 0.0 || $cond >= 1.0 } { 736 return -code error -errorcode ARG "$OUTOFRANGE - quantiles" 737 } 738 } 739 740 # 741 # Accumulate the histogram counts first 742 # 743 set sum 0 744 set accumulated_counts {} 745 foreach count $counts { 746 set sum [expr {$sum+$count}] 747 lappend accumulated_counts $sum 748 } 749 set total_counts $sum 750 751 # 752 # Determine the list element lower or equal to the quantile 753 # and return the corresponding value (use interpolation if 754 # possible) 755 # 756 set result {} 757 foreach cond $confidence { 758 set found 0 759 set bound [expr {round($total_counts*$cond)}] 760 set lower_limit {} 761 set lower_count 0 762 foreach acc_count $accumulated_counts limit $limits { 763 if { $acc_count >= $bound } { 764 set found 1 765 break 766 } 767 set lower_limit $limit 768 set lower_count $acc_count 769 } 770 771 if { $lower_limit == {} || $limit == {} || $found == 0 } { 772 set quant $limit 773 if { $limit == {} } { 774 set quant $lower_limit 775 } 776 } else { 777 set quant [expr {$limit+($lower_limit-$limit) * 778 ($acc_count-$bound)/($acc_count-$lower_count)}] 779 } 780 lappend result $quant 781 } 782 783 return $result 784} 785 786# autocorr -- 787# Return the autocorrelation function (assuming equidistance between 788# samples) 789# 790# Arguments: 791# data Raw data for which the autocorrelation must be determined 792# 793# Result: 794# List of autocorrelation values (about 1/2 the number of raw data) 795# 796proc ::math::statistics::autocorr { data } { 797 variable TOOFEWDATA 798 799 if { [llength $data] <= 1 } { 800 return -code error -errorcode ARG "$TOOFEWDATA" 801 } 802 803 return [crosscorr $data $data] 804} 805 806# crosscorr -- 807# Return the cross-correlation function (assuming equidistance 808# between samples) 809# 810# Arguments: 811# data1 First set of raw data 812# data2 Second set of raw data 813# 814# Result: 815# List of cross-correlation values (about 1/2 the number of raw data) 816# 817# Note: 818# The number of data pairs is not kept constant - because tests 819# showed rather awkward results when it was kept constant. 820# 821proc ::math::statistics::crosscorr { data1 data2 } { 822 variable TOOFEWDATA 823 824 if { [llength $data1] <= 1 || [llength $data2] <= 1 } { 825 return -code error -errorcode ARG "$TOOFEWDATA" 826 } 827 828 # 829 # First determine the number of data pairs 830 # 831 set number1 [llength $data1] 832 set number2 [llength $data2] 833 834 set basic_stat1 [basic-stats $data1] 835 set basic_stat2 [basic-stats $data2] 836 set vmean1 [lindex $basic_stat1 0] 837 set vmean2 [lindex $basic_stat2 0] 838 set vvar1 [lindex $basic_stat1 end] 839 set vvar2 [lindex $basic_stat2 end] 840 841 set number_pairs $number1 842 if { $number1 > $number2 } { 843 set number_pairs $number2 844 } 845 set number_values $number_pairs 846 set number_delays [expr {$number_values/2.0}] 847 848 set scale [expr {sqrt($vvar1*$vvar2)}] 849 850 set result {} 851 for { set delay 0 } { $delay < $number_delays } { incr delay } { 852 set sumcross 0.0 853 set no_cross 0 854 for { set idx 0 } { $idx < $number_values } { incr idx } { 855 set value1 [lindex $data1 $idx] 856 set value2 [lindex $data2 [expr {$idx+$delay}]] 857 if { $value1 != {} && $value2 != {} } { 858 set sumcross \ 859 [expr {$sumcross+($value1-$vmean1)*($value2-$vmean2)}] 860 incr no_cross 861 } 862 } 863 lappend result [expr {$sumcross/($no_cross*$scale)}] 864 865 incr number_values -1 866 } 867 868 return $result 869} 870 871# mean-histogram-limits 872# Determine reasonable limits based on mean and standard deviation 873# for a histogram 874# 875# Arguments: 876# mean Mean of the data 877# stdev Standard deviation 878# number Number of limits to generate (defaults to 8) 879# 880# Result: 881# List of limits 882# 883proc ::math::statistics::mean-histogram-limits { mean stdev {number 8} } { 884 variable NEGSTDEV 885 886 if { $stdev <= 0.0 } { 887 return -code error -errorcode ARG "$NEGSTDEV" 888 } 889 if { $number < 1 } { 890 return -code error -errorcode ARG "Number of limits must be positive" 891 } 892 893 # 894 # Always: between mean-3.0*stdev and mean+3.0*stdev 895 # number = 2: -0.25, 0.25 896 # number = 3: -0.25, 0, 0.25 897 # number = 4: -1, -0.25, 0.25, 1 898 # number = 5: -1, -0.25, 0, 0.25, 1 899 # number = 6: -2, -1, -0.25, 0.25, 1, 2 900 # number = 7: -2, -1, -0.25, 0, 0.25, 1, 2 901 # number = 8: -3, -2, -1, -0.25, 0.25, 1, 2, 3 902 # 903 switch -- $number { 904 "1" { set limits {0.0} } 905 "2" { set limits {-0.25 0.25} } 906 "3" { set limits {-0.25 0.0 0.25} } 907 "4" { set limits {-1.0 -0.25 0.25 1.0} } 908 "5" { set limits {-1.0 -0.25 0.0 0.25 1.0} } 909 "6" { set limits {-2.0 -1.0 -0.25 0.25 1.0 2.0} } 910 "7" { set limits {-2.0 -1.0 -0.25 0.0 0.25 1.0 2.0} } 911 "8" { set limits {-3.0 -2.0 -1.0 -0.25 0.25 1.0 2.0 3.0} } 912 "9" { set limits {-3.0 -2.0 -1.0 -0.25 0.0 0.25 1.0 2.0 3.0} } 913 default { 914 set dlim [expr {6.0/double($number-1)}] 915 for {set i 0} {$i <$number} {incr i} { 916 lappend limits [expr {$dlim*($i-($number-1)/2.0)}] 917 } 918 } 919 } 920 921 set result {} 922 foreach limit $limits { 923 lappend result [expr {$mean+$limit*$stdev}] 924 } 925 926 return $result 927} 928 929# minmax-histogram-limits 930# Determine reasonable limits based on minimum and maximum bounds 931# for a histogram 932# 933# Arguments: 934# min Estimated minimum 935# max Estimated maximum 936# number Number of limits to generate (defaults to 8) 937# 938# Result: 939# List of limits 940# 941proc ::math::statistics::minmax-histogram-limits { min max {number 8} } { 942 variable NEGSTDEV 943 944 if { $number < 1 } { 945 return -code error -errorcode ARG "Number of limits must be positive" 946 } 947 if { $min >= $max } { 948 return -code error -errorcode ARG "Minimum must be lower than maximum" 949 } 950 951 set result {} 952 set dlim [expr {($max-$min)/double($number-1)}] 953 for {set i 0} {$i <$number} {incr i} { 954 lappend result [expr {$min+$dlim*$i}] 955 } 956 957 return $result 958} 959 960# linear-model 961# Determine the coefficients for a linear regression between 962# two series of data (the model: Y = A + B*X) 963# 964# Arguments: 965# xdata Series of independent (X) data 966# ydata Series of dependent (Y) data 967# intercept Whether to use an intercept or not (optional) 968# 969# Result: 970# List of the following items: 971# - (Estimate of) Intercept A 972# - (Estimate of) Slope B 973# - Standard deviation of Y relative to fit 974# - Correlation coefficient R2 975# - Number of degrees of freedom df 976# - Standard error of the intercept A 977# - Significance level of A 978# - Standard error of the slope B 979# - Significance level of B 980# 981# 982proc ::math::statistics::linear-model { xdata ydata {intercept 1} } { 983 variable TOOFEWDATA 984 985 if { [llength $xdata] < 3 } { 986 return -code error -errorcode ARG "$TOOFEWDATA: not enough independent data" 987 } 988 if { [llength $ydata] < 3 } { 989 return -code error -errorcode ARG "$TOOFEWDATA: not enough dependent data" 990 } 991 if { [llength $xdata] != [llength $ydata] } { 992 return -code error -errorcode ARG "$TOOFEWDATA: number of dependent data differs from number of independent data" 993 } 994 995 set sumx 0.0 996 set sumy 0.0 997 set sumx2 0.0 998 set sumy2 0.0 999 set sumxy 0.0 1000 set df 0 1001 foreach x $xdata y $ydata { 1002 if { $x != "" && $y != "" } { 1003 set sumx [expr {$sumx+$x}] 1004 set sumy [expr {$sumy+$y}] 1005 set sumx2 [expr {$sumx2+$x*$x}] 1006 set sumy2 [expr {$sumy2+$y*$y}] 1007 set sumxy [expr {$sumxy+$x*$y}] 1008 incr df 1009 } 1010 } 1011 1012 if { $df <= 2 } { 1013 return -code error -errorcode ARG "$TOOFEWDATA: too few valid data" 1014 } 1015 if { $sumx2 == 0.0 } { 1016 return -code error -errorcode ARG "$TOOFEWDATA: independent values are all the same" 1017 } 1018 1019 # 1020 # Calculate the intermediate quantities 1021 # 1022 set sx [expr {$sumx2-$sumx*$sumx/$df}] 1023 set sy [expr {$sumy2-$sumy*$sumy/$df}] 1024 set sxy [expr {$sumxy-$sumx*$sumy/$df}] 1025 1026 # 1027 # Calculate the coefficients 1028 # 1029 if { $intercept } { 1030 set B [expr {$sxy/$sx}] 1031 set A [expr {($sumy-$B*$sumx)/$df}] 1032 } else { 1033 set B [expr {$sumxy/$sumx2}] 1034 set A 0.0 1035 } 1036 1037 # 1038 # Calculate the error estimates 1039 # 1040 set stdevY 0.0 1041 set varY 0.0 1042 1043 if { $intercept } { 1044 set ve [expr {$sy-$B*$sxy}] 1045 if { $ve >= 0.0 } { 1046 set varY [expr {$ve/($df-2)}] 1047 } 1048 } else { 1049 set ve [expr {$sumy2-$B*$sumxy}] 1050 if { $ve >= 0.0 } { 1051 set varY [expr {$ve/($df-1)}] 1052 } 1053 } 1054 set seY [expr {sqrt($varY)}] 1055 1056 if { $intercept } { 1057 set R2 [expr {$sxy*$sxy/($sx*$sy)}] 1058 set seA [expr {$seY*sqrt(1.0/$df+$sumx*$sumx/($sx*$df*$df))}] 1059 set seB [expr {sqrt($varY/$sx)}] 1060 set tA {} 1061 set tB {} 1062 if { $seA != 0.0 } { 1063 set tA [expr {$A/$seA*sqrt($df-2)}] 1064 } 1065 if { $seB != 0.0 } { 1066 set tB [expr {$B/$seB*sqrt($df-2)}] 1067 } 1068 } else { 1069 set R2 [expr {$sumxy*$sumxy/($sumx2*$sumy2)}] 1070 set seA {} 1071 set tA {} 1072 set tB {} 1073 set seB [expr {sqrt($varY/$sumx2)}] 1074 if { $seB != 0.0 } { 1075 set tB [expr {$B/$seB*sqrt($df-1)}] 1076 } 1077 } 1078 1079 # 1080 # Return the list of parameters 1081 # 1082 return [list $A $B $seY $R2 $df $seA $tA $seB $tB] 1083} 1084 1085# linear-residuals 1086# Determine the difference between actual data and predicted from 1087# the linear model 1088# 1089# Arguments: 1090# xdata Series of independent (X) data 1091# ydata Series of dependent (Y) data 1092# intercept Whether to use an intercept or not (optional) 1093# 1094# Result: 1095# List of differences 1096# 1097proc ::math::statistics::linear-residuals { xdata ydata {intercept 1} } { 1098 variable TOOFEWDATA 1099 1100 if { [llength $xdata] < 3 } { 1101 return -code error -errorcode ARG "$TOOFEWDATA: no independent data" 1102 } 1103 if { [llength $ydata] < 3 } { 1104 return -code error -errorcode ARG "$TOOFEWDATA: no dependent data" 1105 } 1106 if { [llength $xdata] != [llength $ydata] } { 1107 return -code error -errorcode ARG "$TOOFEWDATA: number of dependent data differs from number of independent data" 1108 } 1109 1110 foreach {A B} [linear-model $xdata $ydata $intercept] {break} 1111 1112 set result {} 1113 foreach x $xdata y $ydata { 1114 set residue [expr {$y-$A-$B*$x}] 1115 lappend result $residue 1116 } 1117 return $result 1118} 1119 1120# median 1121# Determine the median from a list of data 1122# 1123# Arguments: 1124# data (Unsorted) list of data 1125# 1126# Result: 1127# Median (either the middle value or the mean of two values in the 1128# middle) 1129# 1130# Note: 1131# Adapted from the Wiki page "Stats", code provided by JPS 1132# 1133proc ::math::statistics::median { data } { 1134 set org_data $data 1135 set data {} 1136 foreach value $org_data { 1137 if { $value != {} } { 1138 lappend data $value 1139 } 1140 } 1141 set len [llength $data] 1142 1143 set data [lsort -real $data] 1144 if { $len % 2 } { 1145 lindex $data [expr {($len-1)/2}] 1146 } else { 1147 expr {([lindex $data [expr {($len / 2) - 1}]] \ 1148 + [lindex $data [expr {$len / 2}]]) / 2.0} 1149 } 1150} 1151 1152# test-2x2 -- 1153# Compute the chi-square statistic for a 2x2 table 1154# 1155# Arguments: 1156# a Element upper-left 1157# b Element upper-right 1158# c Element lower-left 1159# d Element lower-right 1160# Return value: 1161# Chi-square 1162# Note: 1163# There is only one degree of freedom - this is important 1164# when comparing the value to the tabulated values 1165# of chi-square 1166# 1167proc ::math::statistics::test-2x2 { a b c d } { 1168 set ab [expr {$a+$b}] 1169 set ac [expr {$a+$c}] 1170 set bd [expr {$b+$d}] 1171 set cd [expr {$c+$d}] 1172 set N [expr {$a+$b+$c+$d}] 1173 set det [expr {$a*$d-$b*$c}] 1174 set result [expr {double($N*$det*$det)/double($ab*$cd*$ac*$bd)}] 1175} 1176 1177# print-2x2 -- 1178# Print a 2x2 table 1179# 1180# Arguments: 1181# a Element upper-left 1182# b Element upper-right 1183# c Element lower-left 1184# d Element lower-right 1185# Return value: 1186# Printed version with marginals 1187# 1188proc ::math::statistics::print-2x2 { a b c d } { 1189 set ab [expr {$a+$b}] 1190 set ac [expr {$a+$c}] 1191 set bd [expr {$b+$d}] 1192 set cd [expr {$c+$d}] 1193 set N [expr {$a+$b+$c+$d}] 1194 set chisq [test-2x2 $a $b $c $d] 1195 1196 set line [string repeat - 10] 1197 set result [format "%10d%10d | %10d\n" $a $b $ab] 1198 append result [format "%10d%10d | %10d\n" $c $d $cd] 1199 append result [format "%10s%10s + %10s\n" $line $line $line] 1200 append result [format "%10d%10d | %10d\n" $ac $bd $N] 1201 append result "Chisquare = $chisq\n" 1202 append result "Difference is significant?\n" 1203 append result " at 95%: [expr {$chisq<3.84146? "no":"yes"}]\n" 1204 append result " at 99%: [expr {$chisq<6.63490? "no":"yes"}]" 1205} 1206 1207# control-xbar -- 1208# Determine the control lines for an x-bar chart 1209# 1210# Arguments: 1211# data List of observed values (at least 20*nsamples) 1212# nsamples Number of data per subsamples (default: 4) 1213# Return value: 1214# List of: mean, lower limit, upper limit, number of data per 1215# subsample. Can be used in the test-xbar procedure 1216# 1217proc ::math::statistics::control-xbar { data {nsamples 4} } { 1218 variable TOOFEWDATA 1219 variable control_factors 1220 1221 # 1222 # Check the number of data 1223 # 1224 if { $nsamples <= 1 } { 1225 return -code error -errorcode DATA -errorinfo $OUTOFRANGE \ 1226 "Number of data per subsample must be at least 2" 1227 } 1228 if { [llength $data] < 20*$nsamples } { 1229 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 1230 } 1231 1232 set nogroups [expr {[llength $data]/$nsamples}] 1233 set mrange 0.0 1234 set xmeans 0.0 1235 for { set i 0 } { $i < $nogroups } { incr i } { 1236 set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]] 1237 1238 set xmean 0.0 1239 set xmin [lindex $subsample 0] 1240 set xmax $xmin 1241 foreach d $subsample { 1242 set xmean [expr {$xmean+$d}] 1243 set xmin [expr {$xmin<$d? $xmin : $d}] 1244 set xmax [expr {$xmax>$d? $xmax : $d}] 1245 } 1246 set xmean [expr {$xmean/double($nsamples)}] 1247 1248 set xmeans [expr {$xmeans+$xmean}] 1249 set mrange [expr {$mrange+($xmax-$xmin)}] 1250 } 1251 1252 # 1253 # Determine the control lines 1254 # 1255 set xmeans [expr {$xmeans/double($nogroups)}] 1256 set mrange [expr {$mrange/double($nogroups)}] 1257 set A2 [lindex [lindex $control_factors 1] $nsamples] 1258 if { $A2 == "" } { set A2 [lindex [lindex $control_factors 1] end] } 1259 1260 return [list $xmeans [expr {$xmeans-$A2*$mrange}] \ 1261 [expr {$xmeans+$A2*$mrange}] $nsamples] 1262} 1263 1264# test-xbar -- 1265# Determine if any data points lie outside the x-bar control limits 1266# 1267# Arguments: 1268# control List returned by control-xbar with control data 1269# data List of observed values 1270# Return value: 1271# Indices of any subsamples that violate the control limits 1272# 1273proc ::math::statistics::test-xbar { control data } { 1274 foreach {xmean xlower xupper nsamples} $control {break} 1275 1276 if { [llength $data] < 1 } { 1277 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 1278 } 1279 1280 set nogroups [expr {[llength $data]/$nsamples}] 1281 if { $nogroups <= 0 } { 1282 set nogroup 1 1283 set nsamples [llength $data] 1284 } 1285 1286 set result {} 1287 1288 for { set i 0 } { $i < $nogroups } { incr i } { 1289 set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]] 1290 1291 set xmean 0.0 1292 foreach d $subsample { 1293 set xmean [expr {$xmean+$d}] 1294 } 1295 set xmean [expr {$xmean/double($nsamples)}] 1296 1297 if { $xmean < $xlower } { lappend result $i } 1298 if { $xmean > $xupper } { lappend result $i } 1299 } 1300 1301 return $result 1302} 1303 1304# control-Rchart -- 1305# Determine the control lines for an R chart 1306# 1307# Arguments: 1308# data List of observed values (at least 20*nsamples) 1309# nsamples Number of data per subsamples (default: 4) 1310# Return value: 1311# List of: mean range, lower limit, upper limit, number of data per 1312# subsample. Can be used in the test-Rchart procedure 1313# 1314proc ::math::statistics::control-Rchart { data {nsamples 4} } { 1315 variable TOOFEWDATA 1316 variable control_factors 1317 1318 # 1319 # Check the number of data 1320 # 1321 if { $nsamples <= 1 } { 1322 return -code error -errorcode DATA -errorinfo $OUTOFRANGE \ 1323 "Number of data per subsample must be at least 2" 1324 } 1325 if { [llength $data] < 20*$nsamples } { 1326 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 1327 } 1328 1329 set nogroups [expr {[llength $data]/$nsamples}] 1330 set mrange 0.0 1331 for { set i 0 } { $i < $nogroups } { incr i } { 1332 set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]] 1333 1334 set xmin [lindex $subsample 0] 1335 set xmax $xmin 1336 foreach d $subsample { 1337 set xmin [expr {$xmin<$d? $xmin : $d}] 1338 set xmax [expr {$xmax>$d? $xmax : $d}] 1339 } 1340 set mrange [expr {$mrange+($xmax-$xmin)}] 1341 } 1342 1343 # 1344 # Determine the control lines 1345 # 1346 set mrange [expr {$mrange/double($nogroups)}] 1347 set D3 [lindex [lindex $control_factors 3] $nsamples] 1348 set D4 [lindex [lindex $control_factors 5] $nsamples] 1349 if { $D3 == "" } { set D3 [lindex [lindex $control_factors 3] end] } 1350 if { $D4 == "" } { set D4 [lindex [lindex $control_factors 5] end] } 1351 1352 return [list $mrange [expr {$D3*$mrange}] \ 1353 [expr {$D4*$mrange}] $nsamples] 1354} 1355 1356# test-Rchart -- 1357# Determine if any data points lie outside the R-chart control limits 1358# 1359# Arguments: 1360# control List returned by control-xbar with control data 1361# data List of observed values 1362# Return value: 1363# Indices of any subsamples that violate the control limits 1364# 1365proc ::math::statistics::test-Rchart { control data } { 1366 foreach {rmean rlower rupper nsamples} $control {break} 1367 1368 # 1369 # Check the number of data 1370 # 1371 if { [llength $data] < 1 } { 1372 return -code error -errorcode DATA -errorinfo $TOOFEWDATA $TOOFEWDATA 1373 } 1374 1375 set nogroups [expr {[llength $data]/$nsamples}] 1376 1377 set result {} 1378 for { set i 0 } { $i < $nogroups } { incr i } { 1379 set subsample [lrange $data [expr {$i*$nsamples}] [expr {$i*$nsamples+$nsamples-1}]] 1380 1381 set xmin [lindex $subsample 0] 1382 set xmax $xmin 1383 foreach d $subsample { 1384 set xmin [expr {$xmin<$d? $xmin : $d}] 1385 set xmax [expr {$xmax>$d? $xmax : $d}] 1386 } 1387 set range [expr {$xmax-$xmin}] 1388 1389 if { $range < $rlower } { lappend result $i } 1390 if { $range > $rupper } { lappend result $i } 1391 } 1392 1393 return $result 1394} 1395 1396# test-Duckworth -- 1397# Determine if two data sets have the same median according to the Tukey-Duckworth test 1398# 1399# Arguments: 1400# list1 Values in the first data set 1401# list2 Values in the second data set 1402# significance Significance level (either 0.05, 0.01 or 0.001) 1403# 1404# Returns: 1405# 0 if the medians are unequal, 1 if they are equal, -1 if the test can not 1406# be conducted (the smallest value must be in a different set than the greatest value) 1407# 1408proc ::math::statistics::test-Duckworth {list1 list2 significance} { 1409 set sorted1 [lsort -real $list1] 1410 set sorted2 [lsort -real -decreasing $list2] 1411 1412 set lowest1 [lindex $sorted1 0] 1413 set lowest2 [lindex $sorted2 end] 1414 set greatest1 [lindex $sorted1 end] 1415 set greatest2 [lindex $sorted2 0] 1416 1417 if { $lowest1 <= $lowest2 && $greatest1 >= $greatest2 } { 1418 return -1 1419 } 1420 if { $lowest1 >= $lowest2 && $greatest1 <= $greatest2 } { 1421 return -1 1422 } 1423 1424 # 1425 # Determine how many elements of set 1 are lower than the lowest of set 2 1426 # Ditto for the number of elements of set 2 greater than the greatest of set 1 1427 # (Or vice versa) 1428 # 1429 if { $lowest1 < $lowest2 } { 1430 set lowest $lowest2 1431 set greatest $greatest1 1432 } else { 1433 set lowest $lowest1 1434 set greatest $greatest2 1435 set sorted1 [lsort -real $list2] 1436 set sorted2 [lsort -real -decreasing $list1] 1437 #lassign [list $sorted1 $sorted2] sorted2 sorted1 1438 } 1439 1440 set count1 0 1441 set count2 0 1442 foreach v1 $sorted1 { 1443 if { $v1 >= $lowest } { 1444 break 1445 } 1446 incr count1 1447 } 1448 foreach v2 $sorted2 { 1449 if { $v2 <= $greatest } { 1450 break 1451 } 1452 incr count2 1453 } 1454 1455 # 1456 # Determine the statistic D, possibly with correction 1457 # 1458 set n1 [llength $list1] 1459 set n2 [llength $list2] 1460 1461 set correction 0 1462 if { 3 + 4*$n1/3 <= $n2 && $n2 <= 2*$n1 } { 1463 set correction -1 1464 } 1465 if { 3 + 4*$n2/3 <= $n1 && $n1 <= 2*$n2 } { 1466 set correction -1 1467 } 1468 1469 set D [expr {$count1 + $count2 + $correction}] 1470 1471 switch -- [string trim $significance 0] { 1472 ".05" { 1473 return [expr {$D >= 7? 0 : 1}] 1474 } 1475 ".01" { 1476 return [expr {$D >= 10? 0 : 1}] 1477 } 1478 ".001" { 1479 return [expr {$D >= 13? 0 : 1}] 1480 } 1481 default { 1482 return -code error "Significance level must be 0.05, 0.01 or 0.001" 1483 } 1484 } 1485} 1486 1487# test-anova-F -- 1488# Check if two or more groups with normally distributed data have the same 1489# means 1490# 1491# Arguments: 1492# alpha Significance level 1493# args One or more lists containing the data for the 1494# other groups 1495# 1496# Returns: 1497# Whether the mean for the groups is likely to be the same (1) 1498# or not (0). 1499# 1500# Note: 1501# args may be a nested list 1502# 1503# Implementation based on Wikipedia page on the F-test, one-way ANOVA 1504# 1505# Possibly of interest: the ratio itself. 1506# 1507proc ::math::statistics::test-anova-F {alpha args} { 1508 if { [llength $args] == 1 } { 1509 set args [lindex $args 0] 1510 } 1511 1512 if { [llength $args] < 2 } { 1513 return -code error -errorcode ARG -errorinfo "At least two groups are required" \ 1514 "At least two groups are required" 1515 } 1516 1517 # 1518 # Determine the variance within the groups and the variance between the groups 1519 # 1520 set meanPerGroup {} 1521 set varPerGroup {} 1522 set allData {} 1523 foreach group $args { 1524 lappend meanPerGroup [::math::statistics::mean $group] 1525 lappend varPerGroup [::math::statistics::pvar $group] 1526 set allData [concat $allData $group] 1527 } 1528 set meanOverall [::math::statistics::mean $allData] 1529 1530 set varBetween 0.0 1531 foreach group $args mean $meanPerGroup { 1532 set varBetween [expr {$varBetween + [llength $group] * ($mean - $meanOverall)**2}] 1533 } 1534 set varBetween [expr {$varBetween / ([llength $args] - 1)}] 1535 1536 set varWithin 0.0 1537 foreach group $args var $varPerGroup { 1538 set varWithin [expr {$varWithin + [llength $group] * $var}] 1539 } 1540 set varWithin [expr {$varWithin / ([llength $allData] - [llength $args])}] 1541 1542 # 1543 # Finally compare the ratio to the F-distribution 1544 # 1545 set ratio [expr {$varBetween / $varWithin}] 1546 set nf1 [expr {[llength $args] - 1}] 1547 set nf2 [expr {[llength $allData] - [llength $args]}] 1548 1549 expr {[::math::statistics::cdf-F $nf1 $nf2 $ratio] <= 1.0 - $alpha} 1550} 1551 1552# test-Tukey-range -- 1553# Check if two or more groups with normally distributed data have different 1554# means or not, using Tukey's range test 1555# 1556# Arguments: 1557# alpha Significance level 1558# args Two or more lists containing the data for the 1559# other groups 1560# 1561# Returns: 1562# For each pair of groups a list of: 1563# - group indices 1564# - whether the means differ (1) or not (0) 1565# - limits of the confidence interval (for closer investigation) 1566# 1567# Note: 1568# args may be a nested list 1569# 1570# Implementation based on Wikipedia page on Tukey's range test 1571# 1572proc ::math::statistics::test-Tukey-range {alpha args} { 1573 if { [llength $args] == 1 } { 1574 set args [lindex $args 0] 1575 } 1576 1577 if { [llength $args] < 2 } { 1578 return -code error -errorcode ARG -errorinfo "At least two groups are required" \ 1579 "At least two groups are required" 1580 } 1581 1582 if { $alpha != 0.05 && $alpha != 0.01 } { 1583 return -code error -errorcode ARG -errorinfo "Alpha must 0.05 or 0.01" 1584 } 1585 1586 # 1587 # Determine the mean per group and the pooled variance of the data 1588 # 1589 set meanPerGroup {} 1590 set allData {} 1591 set sumVar 0.0 1592 foreach group $args { 1593 lappend meanPerGroup [mean $group] 1594 set sumVar [expr {$sumVar + ([llength $group]-1) * [var $group]}] 1595 set allData [concat $allData $group] 1596 } 1597 1598 set n [llength $allData] 1599 set stdOverall [expr {sqrt($sumVar /($n - [llength $args]))}] 1600 1601 set qcrit [Qcrit-Tukey $alpha $n [llength $args]] 1602 1603 set result {} 1604 1605 for {set g 0} {$g < [llength $args]} {incr g} { 1606 set ggroup [lindex $args $g] 1607 set gmean [mean $ggroup] 1608 set ng [llength $ggroup] 1609 1610 for {set h [expr {$g+1}]} {$h < [llength $args]} {incr h} { 1611 set hgroup [lindex $args $h] 1612 set hmean [mean $hgroup] 1613 set nh [llength $hgroup] 1614 1615 set halfwidth [expr {$qcrit * $stdOverall / sqrt(2.0) * sqrt( 1.0/$ng + 1.0/$nh )}] 1616 set lower [expr {$hmean - $gmean - $halfwidth}] 1617 set upper [expr {$hmean - $gmean + $halfwidth}] 1618 set unequal 1 1619 if { $lower < 0.0 && $upper > 0.0 } { 1620 set unequal 0 1621 } 1622 lappend result [list $g $h $unequal $lower $upper] 1623 } 1624 } 1625 1626 return $result 1627} 1628 1629# Qcrit-Tukey -- 1630# Determine the critical value for the Tukey range test 1631# 1632# Arguments: 1633# alpha Significance level 1634# numberData Total number of data 1635# numberGroups Number of groups 1636# 1637# Returns: 1638# Critical value 1639# 1640# Note: 1641# If there are more than 10 groups, simply use 10 groups 1642# 1643proc ::math::statistics::Qcrit-Tukey {alpha numberData numberGroups} { 1644 variable tukey_table_05 1645 variable tukey_table_01 1646 1647 if { $alpha == 0.05 } { 1648 upvar 0 tukey_table_05 tukey_table 1649 } else { 1650 upvar 0 tukey_table_05 tukey_table 1651 } 1652 1653 set df [expr {$numberData - $numberGroups}] 1654 1655 if { $numberGroups > 10 } { 1656 set numberGroups 10 1657 } 1658 incr numberGroups -1 ;# Offset because of 0-based numbering 1659 1660 if { $df > 120 } { 1661 return [lindex $tukey_table end $numberGroups] 1662 } 1663 1664 foreach {dfe values} $tukey_table { 1665 if { $df <= $dfe } { 1666 return [lindex $values $numberGroups] 1667 } 1668 } 1669} 1670 1671# test-Dunnett -- 1672# Check if one or more groups with normally distributed data have different 1673# means from the control group or not, using Dunnett's test 1674# 1675# Arguments: 1676# alpha Significance level 1677# control Control group 1678# args One or more lists containing the data for the 1679# other groups 1680# 1681# Returns: 1682# For each group a list of: 1683# - whether the mean differs (1) from the control or not (0) 1684# - the confidence interval 1685# 1686# Note: 1687# args may be a nested list 1688# 1689# Implementation based on Wikipedia page on Dunnett's test 1690# The test is two-sided. 1691# 1692proc ::math::statistics::test-Dunnett {alpha control args} { 1693 if { [llength $args] == 1 } { 1694 set args [lindex $args 0] 1695 } 1696 1697 if { [llength $args] < 1 } { 1698 return -code error -errorcode ARG -errorinfo "At least one additional group is required" \ 1699 "At least one additional group is required" 1700 } 1701 1702 if { $alpha != 0.05 && $alpha != 0.01 } { 1703 return -code error -errorcode ARG -errorinfo "Alpha must 0.05 or 0.01" 1704 } 1705 1706 # 1707 # Determine the mean per group and the pooled variance 1708 # 1709 set allData $control 1710 set sumVar [expr {([llength $control]-1)*[var $control]}] 1711 foreach group $args { 1712 set sumVar [expr {$sumVar + ([llength $group]-1) * [var $group]}] 1713 set allData [concat $allData $group] 1714 } 1715 1716 set n [llength $allData] 1717 set stdOverall [expr {sqrt($sumVar /($n - [llength $args] - 1))}] 1718 1719 set tcrit [Tcrit-Dunnett $alpha $n [llength $args]] 1720 1721 set result {} 1722 1723 set cmean [mean $control] 1724 set nc [llength $control] 1725 1726 for {set g 0} {$g < [llength $args]} {incr g} { 1727 set ggroup [lindex $args $g] 1728 set gmean [mean $ggroup] 1729 set ng [llength $ggroup] 1730 1731 set halfwidth [expr {$tcrit * $stdOverall * sqrt( 1.0/$nc + 1.0/$ng )}] 1732 set lower [expr {$gmean - $cmean - $halfwidth}] 1733 set upper [expr {$gmean - $cmean + $halfwidth}] 1734 set unequal 1 1735 if { $lower < 0.0 && $upper > 0.0 } { 1736 set unequal 0 1737 } 1738 lappend result [list $unequal $lower $upper] 1739 } 1740 1741 return $result 1742} 1743 1744# Tcrit-Dunnett -- 1745# Determine the critical value for the Dunnett test 1746# 1747# Arguments: 1748# alpha Significance level 1749# numberData Total number of data 1750# numberGroups Number of groups to compare against the control 1751# 1752# Returns: 1753# Critical value 1754# 1755# Note: 1756# If there are more than 10 groups, simply use 10 groups 1757# 1758proc ::math::statistics::Tcrit-Dunnett {alpha numberData numberGroups} { 1759 variable dunnett_table_05 1760 variable dunnett_table_01 1761 1762 if { $alpha == 0.05 } { 1763 upvar 0 dunnett_table_05 dunnett_table 1764 } else { 1765 upvar 0 dunnett_table_05 dunnett_table 1766 } 1767 1768 set df [expr {$numberData - $numberGroups - 1}] 1769 1770 incr numberGroups 1 ;# Add the control group 1771 if { $numberGroups > 10 } { 1772 set numberGroups 10 1773 } 1774 incr numberGroups -2 ;# Offset because of 0-based numbering and start at 2 1775 1776 if { $df > 60 } { 1777 return [lindex $dunnett_table end $numberGroups] 1778 } 1779 1780 foreach {dfe values} $dunnett_table { 1781 if { $df <= $dfe } { 1782 return [lindex $values $numberGroups] 1783 } 1784 } 1785} 1786 1787# 1788# Load the auxiliary scripts 1789# 1790source [file join [file dirname [info script]] pdf_stat.tcl] 1791source [file join [file dirname [info script]] plotstat.tcl] 1792source [file join [file dirname [info script]] liststat.tcl] 1793source [file join [file dirname [info script]] mvlinreg.tcl] 1794source [file join [file dirname [info script]] kruskal.tcl] 1795source [file join [file dirname [info script]] wilcoxon.tcl] 1796source [file join [file dirname [info script]] stat_kernel.tcl] 1797 1798# 1799# Define the tables 1800# 1801namespace eval ::math::statistics { 1802 variable tukey_table_05 1803 variable tukey_table_01 1804 variable dunnett_table_05 1805 variable dunnett_table_01 1806 1807 #alpha = 0.05 1808 #k 2 3 4 5 6 7 8 9 10 1809 #df 1810 set tukey_table_05 { 1811 1 {18.0 27.0 32.8 37.1 40.4 43.1 45.4 47.4 49.1} 1812 2 {6.08 8.33 9.80 10.88 11.73 12.43 13.03 13.54 13.99} 1813 3 {4.50 5.91 6.82 7.50 8.04 8.48 8.85 9.18 9.46} 1814 4 {3.93 5.04 5.76 6.29 6.71 7.05 7.35 7.60 7.83} 1815 5 {3.64 4.60 5.22 5.67 6.03 6.33 6.58 6.80 6.99} 1816 6 {3.46 4.34 4.90 5.30 5.63 5.90 6.12 6.32 6.49} 1817 7 {3.34 4.16 4.68 5.06 5.36 5.61 5.82 6.00 6.16} 1818 8 {3.26 4.04 4.53 4.89 5.17 5.40 5.60 5.77 5.92} 1819 9 {3.20 3.95 4.41 4.76 5.02 5.24 5.43 5.59 5.74} 1820 10 {3.15 3.88 4.33 4.65 4.91 5.12 5.30 5.46 5.60} 1821 11 {3.11 3.82 4.26 4.57 4.82 5.03 5.20 5.35 5.49} 1822 12 {3.08 3.77 4.20 4.51 4.75 4.95 5.12 5.27 5.39} 1823 13 {3.06 3.73 4.15 4.45 4.69 4.88 5.05 5.19 5.32} 1824 14 {3.03 3.70 4.11 4.41 4.64 4.83 4.99 5.13 5.25} 1825 15 {3.01 3.67 4.08 4.37 4.59 4.78 4.94 5.08 5.20} 1826 16 {3.00 3.65 4.05 4.33 4.56 4.74 4.90 5.03 5.15} 1827 17 {2.98 3.63 4.02 4.30 4.52 4.70 4.86 4.99 5.11} 1828 18 {2.97 3.61 4.00 4.28 4.49 4.67 4.82 4.96 5.07} 1829 19 {2.96 3.59 3.98 4.25 4.47 4.65 4.79 4.92 5.04} 1830 20 {2.95 3.58 3.96 4.23 4.45 4.62 4.77 4.90 5.01} 1831 24 {2.92 3.53 3.90 4.17 4.37 4.54 4.68 4.81 4.92} 1832 30 {2.89 3.49 3.85 4.10 4.30 4.46 4.60 4.72 4.82} 1833 40 {2.86 3.44 3.79 4.04 4.23 4.39 4.52 4.63 4.73} 1834 60 {2.83 3.40 3.74 3.98 4.16 4.31 4.44 4.55 4.65} 1835 120 {2.80 3.36 3.68 3.92 4.10 4.24 4.36 4.47 4.56} 1836 inf {2.77 3.31 3.63 3.86 4.03 4.17 4.29 4.39 4.47}} 1837 1838 #alpha = 0.01 1839 #k 2 3 4 5 6 7 8 9 10 1840 #df 1841 set tukey_table_01 { 1842 1 {90.0 135 164 186 202 216 227 237 246} 1843 2 {13.90 19.02 22.56 25.37 27.76 29.86 31.73 33.41 34.93} 1844 3 {8.26 10.62 12.17 13.32 14.24 15.00 15.65 16.21 16.71} 1845 4 {6.51 8.12 9.17 9.96 10.58 11.10 11.54 11.92 12.26} 1846 5 {5.70 6.98 7.80 8.42 8.91 9.32 9.67 9.97 10.24} 1847 6 {5.24 6.33 7.03 7.56 7.97 8.32 8.61 8.87 9.10} 1848 7 {4.95 5.92 6.54 7.00 7.37 7.68 7.94 8.17 8.37} 1849 8 {4.75 5.64 6.20 6.62 6.96 7.24 7.47 7.68 7.86} 1850 9 {4.60 5.43 5.96 6.35 6.66 6.91 7.13 7.33 7.49} 1851 10 {4.48 5.27 5.77 6.14 6.43 6.67 6.87 7.05 7.21} 1852 11 {4.39 5.15 5.62 5.97 6.25 6.48 6.67 6.84 6.99} 1853 12 {4.32 5.05 5.50 5.84 6.10 6.32 6.51 6.67 6.81} 1854 13 {4.26 4.96 5.40 5.73 5.98 6.19 6.37 6.53 6.67} 1855 14 {4.21 4.89 5.32 5.63 5.88 6.08 6.26 6.41 6.54} 1856 15 {4.17 4.84 5.25 5.56 5.80 5.99 6.16 6.31 6.44} 1857 16 {4.13 4.79 5.19 5.49 5.72 5.92 6.08 6.22 6.35} 1858 17 {4.10 4.74 5.14 5.43 5.66 5.85 6.01 6.15 6.27} 1859 18 {4.07 4.70 5.09 5.38 5.60 5.79 5.94 6.08 6.20} 1860 19 {4.05 4.67 5.05 5.33 5.55 5.73 5.89 6.02 6.14} 1861 20 {4.02 4.64 5.02 5.29 5.51 5.69 5.84 5.97 6.09} 1862 24 {3.96 4.55 4.91 5.17 5.37 5.54 5.69 5.81 5.92} 1863 30 {3.89 4.45 4.80 5.05 5.24 5.40 5.54 5.65 5.76} 1864 40 {3.82 4.37 4.70 4.93 5.11 5.26 5.39 5.50 5.60} 1865 60 {3.76 4.28 4.59 4.82 4.99 5.13 5.25 5.36 5.45} 1866 120 {3.70 4.20 4.50 4.71 4.87 5.01 5.12 5.21 5.30} 1867 inf {3.64 4.12 4.40 4.60 4.76 4.88 4.99 5.08 5.16}} 1868 1869 #From: http://davidmlane.com/hyperstat/table_Dunnett.html 1870 #Dunnett Table 1871 #Number of Groups Including Control Group 1872 #dfe alpha = 0.05 1873 # 2 3 4 5 6 7 8 9 10 1874 set dunnett_table_05 { 1875 5 {2.57 3.03 3.29 3.48 3.62 3.73 3.82 3.9 3.97} 1876 6 {2.45 2.86 3.1 3.26 3.39 3.49 3.57 3.64 3.71} 1877 7 {2.36 2.75 2.97 3.12 3.24 3.33 3.41 3.47 3.53} 1878 8 {2.31 2.67 2.88 3.02 3.13 3.22 3.29 3.35 3.41} 1879 9 {2.26 2.61 2.81 2.95 3.05 3.14 3.2 3.26 3.32} 1880 10 {2.23 2.57 2.76 2.89 2.99 3.07 3.14 3.19 3.24} 1881 11 {2.2 2.53 2.72 2.84 2.94 3.02 3.08 3.14 3.19} 1882 12 {2.18 2.5 2.68 2.81 2.9 2.98 3.04 3.09 3.14} 1883 13 {2.16 2.48 2.65 2.78 2.87 2.94 3 3.06 3.1} 1884 14 {2.14 2.46 2.63 2.75 2.84 2.91 2.97 3.02 3.07} 1885 15 {2.13 2.44 2.61 2.73 2.82 2.89 2.95 3 3.04} 1886 16 {2.12 2.42 2.59 2.71 2.8 2.87 2.92 2.97 3.02} 1887 17 {2.11 2.41 2.58 2.69 2.78 2.85 2.9 2.95 3} 1888 18 {2.1 2.4 2.56 2.68 2.76 2.83 2.89 2.94 2.98} 1889 19 {2.09 2.39 2.55 2.66 2.75 2.81 2.87 2.92 2.96} 1890 20 {2.09 2.38 2.54 2.65 2.73 2.8 2.86 2.9 2.95} 1891 24 {2.06 2.35 2.51 2.61 2.7 2.76 2.81 2.86 2.9} 1892 30 {2.04 2.32 2.47 2.58 2.66 2.72 2.77 2.82 2.86} 1893 40 {2.02 2.29 2.44 2.54 2.62 2.68 2.73 2.77 2.81} 1894 60 {2 2.27 2.41 2.51 2.58 2.64 2.69 2.73 2.77}} 1895 1896 set dunnett_table_01 { 1897 5 {4.03 4.63 4.98 5.22 5.41 5.56 5.69 5.8 5.89} 1898 6 {3.71 4.21 4.51 4.71 4.87 5 5.1 5.2 5.28} 1899 7 {3.5 3.95 4.21 4.39 4.53 4.64 4.74 4.82 4.89} 1900 8 {3.36 3.77 4 4.17 4.29 4.4 4.48 4.56 4.62} 1901 9 {3.25 3.63 3.85 4.01 4.12 4.22 4.3 4.37 4.43} 1902 10 {3.17 3.53 3.74 3.88 3.99 4.08 4.16 4.22 4.28} 1903 11 {3.11 3.45 3.65 3.79 3.89 3.98 4.05 4.11 4.16} 1904 12 {3.05 3.39 3.58 3.71 3.81 3.89 3.96 4.02 4.07} 1905 13 {3.01 3.33 3.52 3.65 3.74 3.82 3.89 3.94 3.99} 1906 14 {2.98 3.29 3.47 3.59 3.69 3.76 3.83 3.88 3.93} 1907 15 {2.95 3.25 3.43 3.55 3.64 3.71 3.78 3.83 3.88} 1908 16 {2.92 3.22 3.39 3.51 3.6 3.67 3.73 3.78 3.83} 1909 17 {2.9 3.19 3.36 3.47 3.56 3.63 3.69 3.74 3.79} 1910 18 {2.88 3.17 3.33 3.44 3.53 3.6 3.66 3.71 3.75} 1911 19 {2.86 3.15 3.31 3.42 3.5 3.57 3.63 3.68 3.72} 1912 20 {2.85 3.13 3.29 3.4 3.48 3.55 3.6 3.65 3.69} 1913 24 {2.8 3.07 3.22 3.32 3.4 3.47 3.52 3.57 3.61} 1914 30 {2.75 3.01 3.15 3.25 3.33 3.39 3.44 3.49 3.52} 1915 40 {2.7 2.95 3.09 3.19 3.26 3.32 3.37 3.41 3.44} 1916 60 {2.66 2.9 3.03 3.12 3.19 3.25 3.29 3.33 3.37}} 1917 1918} 1919 1920# 1921# Simple test code 1922# 1923if { [info exists ::argv0] && ([file tail [info script]] == [file tail $::argv0]) } { 1924 1925 console show 1926 puts [interp aliases] 1927 1928 set values {1 1 1 1 {}} 1929 puts [::math::statistics::basic-stats $values] 1930 set values {1 2 3 4} 1931 puts [::math::statistics::basic-stats $values] 1932 set values {1 -1 1 -2} 1933 puts [::math::statistics::basic-stats $values] 1934 puts [::math::statistics::mean $values] 1935 puts [::math::statistics::min $values] 1936 puts [::math::statistics::max $values] 1937 puts [::math::statistics::number $values] 1938 puts [::math::statistics::stdev $values] 1939 puts [::math::statistics::var $values] 1940 1941 set novals 100 1942 #set maxvals 100001 1943 set maxvals 1001 1944 while { $novals < $maxvals } { 1945 set values {} 1946 for { set i 0 } { $i < $novals } { incr i } { 1947 lappend values [expr {rand()}] 1948 } 1949 puts [::math::statistics::basic-stats $values] 1950 puts [::math::statistics::histogram {0.0 0.2 0.4 0.6 0.8 1.0} $values] 1951 set novals [expr {$novals*10}] 1952 } 1953 1954 puts "Normal distribution:" 1955 puts "X=0: [::math::statistics::pdf-normal 0.0 1.0 0.0]" 1956 puts "X=1: [::math::statistics::pdf-normal 0.0 1.0 1.0]" 1957 puts "X=-1: [::math::statistics::pdf-normal 0.0 1.0 -1.0]" 1958 1959 set data1 {0.0 1.0 3.0 4.0 100.0 -23.0} 1960 set data2 {1.0 2.0 4.0 5.0 101.0 -22.0} 1961 set data3 {0.0 2.0 6.0 8.0 200.0 -46.0} 1962 set data4 {2.0 6.0 8.0 200.0 -46.0 1.0} 1963 set data5 {100.0 99.0 90.0 93.0 5.0 123.0} 1964 puts "Correlation data1 and data1: [::math::statistics::corr $data1 $data1]" 1965 puts "Correlation data1 and data2: [::math::statistics::corr $data1 $data2]" 1966 puts "Correlation data1 and data3: [::math::statistics::corr $data1 $data3]" 1967 puts "Correlation data1 and data4: [::math::statistics::corr $data1 $data4]" 1968 puts "Correlation data1 and data5: [::math::statistics::corr $data1 $data5]" 1969 1970 # set data {1.0 2.0 2.3 4.0 3.4 1.2 0.6 5.6} 1971 # puts [::math::statistics::basicStats $data] 1972 # puts [::math::statistics::interval-mean-stdev $data 0.90] 1973 # puts [::math::statistics::interval-mean-stdev $data 0.95] 1974 # puts [::math::statistics::interval-mean-stdev $data 0.99] 1975 1976 # puts "\nTest mean values:" 1977 # puts [::math::statistics::test-mean $data 2.0 0.1 0.90] 1978 # puts [::math::statistics::test-mean $data 2.0 0.5 0.90] 1979 # puts [::math::statistics::test-mean $data 2.0 1.0 0.90] 1980 # puts [::math::statistics::test-mean $data 2.0 2.0 0.90] 1981 1982 set rc [catch { 1983 set m [::math::statistics::mean {}] 1984 } msg ] ; # {} 1985 puts "Result: $rc $msg" 1986 1987 puts "\nTest quantiles:" 1988 set data {1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0} 1989 set quantiles {0.11 0.21 0.51 0.91 0.99} 1990 set limits {2.1 4.1 6.1 8.1} 1991 puts [::math::statistics::quantiles $data $quantiles] 1992 1993 set histogram [::math::statistics::histogram $limits $data] 1994 puts [::math::statistics::quantiles $limits $histogram $quantiles] 1995 1996 puts "\nTest autocorrelation:" 1997 set data {1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0} 1998 puts [::math::statistics::autocorr $data] 1999 set data {1.0 -1.1 2.0 -0.6 3.0 -4.0 0.5 0.9 -1.0} 2000 puts [::math::statistics::autocorr $data] 2001 2002 puts "\nTest histogram limits:" 2003 puts [::math::statistics::mean-histogram-limits 1.0 1.0] 2004 puts [::math::statistics::mean-histogram-limits 1.0 1.0 4] 2005 puts [::math::statistics::minmax-histogram-limits 1.0 10.0 10] 2006 2007} 2008 2009# 2010# Test xbar/R-chart procedures 2011# 2012if { 0 } { 2013 set data {} 2014 for { set i 0 } { $i < 500 } { incr i } { 2015 lappend data [expr {rand()}] 2016 } 2017 set limits [::math::statistics::control-xbar $data] 2018 puts $limits 2019 2020 puts "Outliers? [::math::statistics::test-xbar $limits $data]" 2021 2022 set newdata {1.0 1.0 1.0 1.0 0.5 0.5 0.5 0.5 10.0 10.0 10.0 10.0} 2023 puts "Outliers? [::math::statistics::test-xbar $limits $newdata] -- 0 2" 2024 2025 set limits [::math::statistics::control-Rchart $data] 2026 puts $limits 2027 2028 puts "Outliers? [::math::statistics::test-Rchart $limits $data]" 2029 2030 set newdata {0.0 1.0 2.0 1.0 0.4 0.5 0.6 0.5 10.0 0.0 10.0 10.0} 2031 puts "Outliers? [::math::statistics::test-Rchart $limits $newdata] -- 0 2" 2032} 2033 2034