1package Statistics::Descriptive::Discrete; 2 3### This module draws heavily from Statistics::Descriptive 4 5use strict; 6use warnings; 7use Carp; 8use AutoLoader; 9use vars qw($VERSION $AUTOLOAD $DEBUG $Tolerance %autosubs); 10 11$VERSION = '0.12'; 12$DEBUG = 0; 13 14#see Statistics::Descriptive documentation for use of $Tolerance 15$Tolerance = 0.0; 16 17#what subs can be autoloaded? 18%autosubs = ( 19 count => undef, 20 mean => undef, 21 geometric_mean=> undef, 22 harmonic_mean=>undef, 23 sum => undef, 24 mode => undef, 25 median => undef, 26 min => undef, 27 max => undef, 28 mindex => undef, 29 maxdex => undef, 30 standard_deviation => undef, 31 sample_range => undef, 32 variance => undef, 33 text => undef, 34); 35 36 37sub new 38{ 39 my $proto = shift; 40 my $class = ref($proto) || $proto; 41 my $self = {}; 42 $self->{_permitted} = \%autosubs; 43 $self->{data} = (); 44 $self->{_dataindex} = (); #index of where each value first seen when adding data 45 $self->{dirty} = 1; #is the data dirty? 46 $self->{_index} = 0; #current index of number of data items added 47 48 bless ($self,$class); 49 print __PACKAGE__,"->new(",join(',',@_),")\n" if $DEBUG; 50 return $self; 51} 52 53# Clear the stat object & erase all data 54# Object will be ready to use as if new was called 55# Not sure this is more efficient than just creating a new object but 56# maintained for compatability with Statistics::Descriptive 57sub clear 58{ 59 my $self = shift; 60 my %keys = %{ $self }; 61 62 #remove _permitted from the deletion list 63 delete $keys{"_permitted"}; 64 65 foreach my $key (keys %keys) 66 { # Check each key in the object 67 print __PACKAGE__,"->clear, deleting $key\n" if $DEBUG; 68 delete $self->{$key}; # Delete any out of date cached key 69 } 70 $self->{data} = (); 71 $self->{_dataindex} = (); 72 $self->{dirty} = 1; 73 $self->{_index} = 0; 74} 75 76sub add_data 77{ 78 #add data but don't compute ANY statistics yet 79 my $self = shift; 80 print __PACKAGE__,"->add_data(",join(',',@_),")\n" if $DEBUG; 81 82 #get each element and add 0 to force it be a number 83 #that way, 0.000 and 0 are treated the same 84 my $val = shift; 85 while (defined $val) 86 { 87 $val += 0; 88 $self->{data}{$val}++; 89 if (not exists $self->{_dataindex}{$val}) { 90 $self->{_dataindex}{$val} = $self->{_index}; 91 } 92 $self->{_index}++; 93 #set dirty flag so we know cached stats are invalid 94 $self->{dirty}++; 95 $val = shift; #get next element 96 } 97} 98 99sub add_data_tuple 100{ 101 #add data but don't compute ANY statistics yet 102 #the data are pairs of values and occurrences 103 #e.g. 4,2 means 2 occurrences of the value 4 104 #thanks to Bill Dueber for suggesting this 105 106 my $self = shift; 107 print __PACKAGE__,"->add_data_tuple(",join(',',@_),")\n" if $DEBUG; 108 109 #we want an even number of arguments (tuples in the form (value, count)) 110 carp "argument list must have even number of elements" if @_ % 2; 111 112 #get each element and add 0 to force it be a number 113 #that way, 0.000 and 0 are treated the same 114 #if $count is 0, then this will set the dirty flag but have no effect on 115 #the statistics 116 my $val = shift; 117 my $count = shift; 118 while (defined $count) 119 { 120 $val += 0; 121 $self->{data}{$val} += $count; 122 if (not exists $self->{_dataindex}{$val}) { 123 $self->{_dataindex}{$val} = $self->{_index}; 124 } 125 $self->{_index} += $count; 126 #set dirty flag so we know cached stats are invalid 127 $self->{dirty}++; 128 $val = shift; #get next element 129 $count = shift; 130 } 131} 132 133sub _test_for_too_small_val 134{ 135 my $self = shift; 136 my $val = shift; 137 138 return (abs($val) <= $Statistics::Descriptive::Discrete::Tolerance); 139} 140 141sub _calc_harmonic_mean 142{ 143 my $self = shift; 144 my $count = shift; 145 my $datakeys = shift; #array ref 146 147 my $hs = 0; 148 149 foreach my $val ( @{$datakeys} ) 150 { 151 ##Guarantee that there are no divide by zeros 152 if ($self->_test_for_too_small_val($val)) 153 { 154 return; 155 } 156 157 foreach (1..$self->{data}{$val}) 158 { 159 $hs += 1/$val; 160 } 161 } 162 163 if ($self->_test_for_too_small_val($hs)) 164 { 165 return; 166 } 167 168 return $count/$hs; 169} 170 171sub _all_stats 172{ 173 #compute all the stats in one sub to save overhead of sub calls 174 #a little wasteful to do this if all we want is count or sum for example but 175 #I want to keep add_data as lean as possible since it gets called a lot 176 my $self = shift; 177 print __PACKAGE__,"->_all_stats(",join(',',@_),")\n" if $DEBUG; 178 179 #if data is empty, set all stats to undef and return 180 if (!$self->{data}) 181 { 182 foreach my $key (keys %{$self->{_permitted}}) 183 { 184 $self->{$key} = undef; 185 } 186 $self->{count} = 0; 187 return; 188 } 189 190 #count = total number of data values we have 191 my $count = 0; 192 $count += $_ foreach (values %{$self->{data}}); 193 194 my @datakeys = keys %{$self->{data}}; 195 196 #initialize min, max, mode to an arbitrary value that's in the hash 197 my $default = $datakeys[0]; 198 my $max = $default; 199 my $min = $default; 200 my $mode = $default; 201 my $moden = 0; 202 my $sum = 0; 203 204 #find min, max, sum, and mode 205 foreach (@datakeys) 206 { 207 my $n = $self->{data}{$_}; 208 $sum += $_ * $n; 209 $min = $_ if $_ < $min; 210 $max = $_ if $_ > $max; 211 212 #only finds one mode but there could be more than one 213 #also, there might not be any mode (all the same frequency) 214 #todo: need to make this more robust 215 if ($n > $moden) 216 { 217 $mode = $_; 218 $moden = $n; 219 } 220 } 221 my $mindex = $self->{_dataindex}{$min}; 222 my $maxdex = $self->{_dataindex}{$max}; 223 224 my $mean = $sum/$count; 225 226 my $stddev = 0; 227 my $variance = 0; 228 229 if ($count > 1) 230 { 231 # Thanks to Peter Dienes for finding and fixing a round-off error 232 # in the following variance calculation 233 234 foreach my $val (@datakeys) 235 { 236 $stddev += $self->{data}{$val} * (($val - $mean) ** 2); 237 } 238 $variance = $stddev / ($count - 1); 239 $stddev = sqrt($variance); 240 } 241 else {$stddev = undef} 242 243 #find median, and do it without creating a list of the all the data points 244 #if n=count is odd and n=2k+1 then median = data(k+1) 245 #if n=count is even and n=2k, then median = (data(k) + data(k+1))/2 246 my $odd = $count % 2; #odd or even number of points? 247 my $even = !$odd; 248 my $k = $odd ? ($count-1)/2 : $count/2; 249 my $median = undef; 250 my $temp = 0; 251 MEDIAN: foreach my $val (sort {$a <=> $b} (@datakeys)) 252 { 253 foreach (1..$self->{data}{$val}) 254 { 255 $temp++; 256 if (($temp == $k) && $even) 257 { 258 $median += $val; 259 } 260 elsif ($temp == $k+1) 261 { 262 $median += $val; 263 $median /= 2 if $even; 264 last MEDIAN; 265 } 266 } 267 } 268 269 #compute geometric mean 270 my $gm = 1; 271 my $exponent = 1/$count; 272 foreach my $val (@datakeys) 273 { 274 if ($val < 0) 275 { 276 $gm = undef; 277 last; 278 } 279 foreach (1..$self->{data}{$val}) 280 { 281 $gm *= $val**$exponent; 282 } 283 } 284 285 #compute harmonic mean 286 my $harmonic_mean = scalar $self->_calc_harmonic_mean($count, \@datakeys); 287 288 print __PACKAGE__,"count: $count, _index ",$self->{_index},"\n" if $DEBUG; 289 290 $self->{count} = $count; 291 $self->{sum} = $sum; 292 $self->{standard_deviation} = $stddev; 293 $self->{variance} = $variance; 294 $self->{min} = $min; 295 $self->{max} = $max; 296 $self->{mindex} = $mindex; 297 $self->{maxdex} = $maxdex; 298 $self->{sample_range} = $max - $min; #todo: does this require any bounds checking 299 $self->{mean} = $mean; 300 $self->{geometric_mean} = $gm; 301 $self->{harmonic_mean} = $harmonic_mean; 302 $self->{median} = $median; 303 $self->{mode} = $mode; 304 305 #clear dirty flag so we don't needlessly recompute the statistics 306 $self->{dirty} = 0; 307} 308 309sub set_text 310{ 311 my $self = shift; 312 $self->{text} = shift; 313} 314 315sub get_data 316{ 317 #returns a list of the data in sorted order 318 #the list could be very big an this defeat the purpose of using this module 319 #use this only if you really need it 320 my $self = shift; 321 print __PACKAGE__,"->get_data(",join(',',@_),")\n" if $DEBUG; 322 323 my @data; 324 foreach my $val (sort {$a <=> $b} (keys %{$self->{data}})) 325 { 326 push @data, $val foreach (1..$self->{data}{$val}); 327 } 328 return @data; 329} 330 331# this is the previous frequency_distribution code 332# redid this completely based on current implementation in 333# Statistics::Descriptive 334# sub frequency_distribution 335# { 336# #Compute frequency distribution (histogram), borrowed heavily from Statistics::Descriptive 337# #Behavior is slightly different than Statistics::Descriptive 338# #e.g. if partition is not specified, we use to set the number of partitions 339# # if partition = 0, then we return the data hash WITHOUT binning it into equal bins 340# # I often want to just see how many of each value I saw 341# #Also, you can manually pass in the bin info (min bin, bin size, and number of partitions) 342# #I don't cache the frequency data like Statistics::Descriptive does since it's not as expensive to compute 343# #but I might add that later 344# #todo: the minbin/binsize stuff is funky and not intuitive -- fix it 345# my $self = shift; 346# print __PACKAGE__,"->frequency_distribution(",join(',',@_),")\n" if $DEBUG; 347 348# my $partitions = shift; #how many partitions (bins)? 349# my $minbin = shift; #upper bound of first bin 350# my $binsize = shift; #how wide is each bin? 351 352# #if partition == 0, then return the data hash 353# if (not defined $partitions || ($partitions == 0)) 354# { 355# $self->{frequency_partitions} = 0; 356# %{$self->{frequency}} = %{$self->{data}}; 357# return %{$self->{frequency}}; 358# } 359 360# #otherwise, partition better be >= 1 361# return undef unless $partitions >= 1; 362 363# $self->_all_stats() if $self->{dirty}; #recompute stats if dirty, (so we have count) 364# return undef if $self->{count} < 2; #must have at least 2 values 365 366# #set up the bins 367# my ($interval, $iter, $max); 368# if (defined $minbin && defined $binsize) 369# { 370# $iter = $minbin; 371# $max = $minbin+$partitions*$binsize - $binsize; 372# $interval = $binsize; 373# $iter -= $interval; #so that loop that sets up bins works correctly 374# } 375# else 376# { 377# $iter = $self->{min}; 378# $max = $self->{max}; 379# $interval = $self->{sample_range}/$partitions; 380# } 381# my @k; 382# my %bins; 383# while (($iter += $interval) < $max) 384# { 385# $bins{$iter} = 0; 386# push @k, $iter; 387# } 388# $bins{$max} = 0; 389# push @k, $max; 390 391# VALUE: foreach my $val (keys %{$self->{data}}) 392# { 393# foreach my $k (@k) 394# { 395# if ($val <= $k) 396# { 397# $bins{$k} += $self->{data}{$val}; #how many of this value do we have? 398# next VALUE; 399# } 400# } 401# # } 402 403# %{$self->{frequency}} = %bins; #save it for later in case I add caching 404# $self->{frequency_partitions} = $partitions; #in case I add caching in the future 405# return %{$self->{frequency}}; 406# } 407 408sub frequency_distribution_ref 409{ 410 my $self = shift; 411 my @k = (); 412 413 # If called with no parameters, return the cached hashref 414 # if we have one and data is not dirty 415 # This is implemented this way because that's how Statistics::Descriptive 416 # implements this. I don't like it. 417 if ((!@_) && (! $self->{dirty}) && (defined $self->{_frequency})) 418 { 419 return $self->{_frequency}; 420 } 421 422 $self->_all_stats() if $self->{dirty}; #recompute stats if dirty, (so we have count) 423 424 # Must have at least two elements 425 if ($self->count() < 2) 426 { 427 return undef; 428 } 429 430 my %bins; 431 my $partitions = shift; 432 433 if (ref($partitions) eq 'ARRAY') 434 { 435 @k = @{ $partitions }; 436 return undef unless @k; ##Empty array 437 if (@k > 1) { 438 ##Check for monotonicity 439 my $element = $k[0]; 440 for my $next_elem (@k[1..$#k]) { 441 if ($element > $next_elem) { 442 carp "Non monotonic array cannot be used as frequency bins!\n"; 443 return undef; 444 } 445 $element = $next_elem; 446 } 447 } 448 %bins = map { $_ => 0 } @k; 449 } 450 else 451 { 452 return undef unless (defined $partitions) && ($partitions >= 1); 453 my $interval = $self->sample_range() / $partitions; 454 foreach my $idx (1 .. ($partitions-1)) 455 { 456 push @k, ($self->min() + $idx * $interval); 457 } 458 459 $bins{$self->max()} = 0; 460 461 push @k, $self->max(); 462 } 463 464 ELEMENT: 465 foreach my $element (keys %{$self->{data}}) 466 { 467 foreach my $limit (@k) 468 { 469 if ($element <= $limit) 470 { 471 $bins{$limit} += $self->{data}{$element}; 472 next ELEMENT; 473 } 474 } 475 } 476 477 $self->{_frequency} = \%bins; 478 return $self->{_frequency}; 479} 480 481sub frequency_distribution { 482 my $self = shift; 483 484 my $ret = $self->frequency_distribution_ref(@_); 485 486 if (!defined($ret)) 487 { 488 return undef; 489 } 490 else 491 { 492 return %$ret; 493 } 494} 495 496# return count of unique values in data if called in scalar context 497# returns sorted array of unique data values if called in array context 498# returns undef if no data 499sub uniq 500{ 501 my $self = shift; 502 503 if (!$self->{data}) 504 { 505 return undef; 506 } 507 508 my @datakeys = sort {$a <=> $b} keys %{$self->{data}}; 509 510 if (wantarray) 511 { 512 return @datakeys; 513 } 514 else 515 { 516 my $uniq = @datakeys; 517 return $uniq; 518 } 519} 520 521sub AUTOLOAD { 522 my $self = shift; 523 my $type = ref($self) 524 or croak "$self is not an object"; 525 my $name = $AUTOLOAD; 526 $name =~ s/.*://; ##Strip fully qualified-package portion 527 return if $name eq "DESTROY"; 528 unless (exists $self->{_permitted}{$name} ) { 529 croak "Can't access `$name' field in class $type"; 530 } 531 532 print __PACKAGE__,"->AUTOLOAD $name\n" if $DEBUG; 533 534 #compute stats if necessary 535 $self->_all_stats() if $self->{dirty}; 536 return $self->{$name}; 537} 538 5391; 540 541__END__ 542 543=head1 NAME 544 545Statistics::Descriptive::Discrete - Compute descriptive statistics for discrete data sets. 546 547To install, use the CPAN module (https://metacpan.org/pod/Statistics::Descriptive::Discrete). 548 549=head1 SYNOPSIS 550 551 use Statistics::Descriptive::Discrete; 552 553 my $stats = new Statistics::Descriptive::Discrete; 554 $stats->add_data(1,10,2,1,1,4,5,1,10,8,7); 555 print "count = ",$stats->count(),"\n"; 556 print "uniq = ",$stats->uniq(),"\n"; 557 print "sum = ",$stats->sum(),"\n"; 558 print "min = ",$stats->min(),"\n"; 559 print "min index = ",$stats->mindex(),"\n"; 560 print "max = ",$stats->max(),"\n"; 561 print "max index = ",$stats->maxdex(),"\n"; 562 print "mean = ",$stats->mean(),"\n"; 563 print "geometric mean = ",$stats->geometric_mean(),"\n"; 564 print "harmonic mean = ", $stats->harmonic_mean(),"\n"; 565 print "standard_deviation = ",$stats->standard_deviation(),"\n"; 566 print "variance = ",$stats->variance(),"\n"; 567 print "sample_range = ",$stats->sample_range(),"\n"; 568 print "mode = ",$stats->mode(),"\n"; 569 print "median = ",$stats->median(),"\n"; 570 my $f = $stats->frequency_distribution_ref(3); 571 for (sort {$a <=> $b} keys %$f) { 572 print "key = $_, count = $f->{$_}\n"; 573 } 574 575=head1 DESCRIPTION 576 577This module provides basic functions used in descriptive statistics. 578It borrows very heavily from Statistics::Descriptive::Full 579(which is included with Statistics::Descriptive) with one major 580difference. This module is optimized for discretized data 581e.g. data from an A/D conversion that has a discrete set of possible values. 582E.g. if your data is produced by an 8 bit A/D then you'd have only 256 possible 583values in your data set. Even though you might have a million data points, 584you'd only have 256 different values in those million points. Instead of storing the 585entire data set as Statistics::Descriptive does, this module only stores 586the values seen and the number of times each value occurs. 587 588For very large data sets, this storage method results in significant speed 589and memory improvements. For example, for an 8-bit data set (256 possible values), 590with 1,000,000 data points, this module is about 10x faster than Statistics::Descriptive::Full 591or Statistics::Descriptive::Sparse. 592 593Statistics::Descriptive run time is a factor of the size of the data set. In particular, 594repeated calls to C<add_data> are slow. Statistics::Descriptive::Discrete's C<add_data> is 595optimized for speed. For a give number of data points, this module's run time will increase 596as the number of unique data values in the data set increases. For example, while this module 597runs about 10x the speed of Statistics::Descriptive::Full for an 8-bit data set, the 598run speed drops to about 3x for an equivalent sized 20-bit data set. 599 600See sdd_prof.pl in the examples directory to play with profiling this module against 601Statistics::Descriptive::Full. 602 603=head1 METHODS 604 605=over 606 607=item $stat = Statistics::Descriptive::Discrete->new(); 608 609Create a new statistics object. 610 611=item $stat->add_data(1,2,3,4,5); 612 613Adds data to the statistics object. Sets a flag so that 614the statistics will be recomputed the next time they're 615needed. 616 617=item $stat->add_data_tuple(1,2,42,3); 618 619Adds data to the statistics object where every two elements 620are a value and a count (how many times did the value occur?) 621The above is equivalent to C<< $stat->add_data(1,1,42,42,42); >> 622Use this when your data is in a form isomorphic to 623($value, $occurrence). 624 625=item $stat->max(); 626 627Returns the maximum value of the data set. 628 629=item $stat->min(); 630 631Returns the minimum value of the data set. 632 633=item $stat->mindex(); 634 635Returns the index of the minimum value of the data set. 636The index returned is the first occurence of the minimum value. 637 638Note: the index is determined by the order data was added using add_data() or add_data_tuple(). 639It is meaningless in context of get_data() as get_data() does not return values in the same 640order in which they were added. This behavior is different than Statistics::Descriptive which 641does preserve order. 642 643=item $stat->maxdex(); 644 645Returns the index of the maximum value of the data set. 646The index returned is the first occurence of the maximum value. 647 648Note: the index is determined by the order data was added using 649C<add_data()> or C<add_data_tuple()>. It is meaningless in context of 650C<get_data()> as C<get_data()> does not return values in the same 651order in which they were added. This behavior is different than 652Statistics::Descriptive which does preserve order. 653 654=item $stat->count(); 655 656Returns the total number of elements in the data set. 657 658=item $stat->uniq(); 659 660If called in scalar context, returns the total number of unique elements in the data set. 661For example, if your data set is (1,2,2,3,3,3), uniq will return 3. 662 663If called in array context, returns an array of each data value in the data set in sorted order. 664In the above example, C<< @uniq = $stats->uniq(); >> would return (1,2,3) 665 666This function is specific to Statistics::Descriptive::Discrete 667and is not implemented in Statistics::Descriptive. 668 669It is useful for getting a frequency distribution for each discrete value in the data the set: 670 671 my $stats = Statistics::Descriptive::Discrete->new(); 672 $stats->add_data_tuple(1,1,2,2,3,3,4,4,5,5,6,6,7,7); 673 my @bins = $stats->uniq(); 674 my $f = $stats->frequency_distribution_ref(\@bins); 675 for (sort {$a <=> $b} keys %$f) { 676 print "value = $_, count = $f->{$_}\n"; 677 } 678 679=item $stat->sum(); 680 681Returns the sum of all the values in the data set. 682 683=item $stat->mean(); 684 685Returns the mean of the data. 686 687=item $stat->harmonic_mean(); 688 689Returns the harmonic mean of the data. Since the mean is undefined 690if any of the data are zero or if the sum of the reciprocals is zero, 691it will return undef for both of those cases. 692 693=item $stat->geometric_mean(); 694 695Returns the geometric mean of the data. Returns C<undef> if any of the data 696are less than 0. Returns 0 if any of the data are 0. 697 698=item $stat->median(); 699 700Returns the median value of the data. 701 702=item $stat->mode(); 703 704Returns the mode of the data. 705 706=item $stat->variance(); 707 708Returns the variance of the data. 709 710=item $stat->standard_deviation(); 711 712Returns the standard_deviation of the data. 713 714=item $stat->sample_range(); 715 716Returns the sample range (max - min) of the data set. 717 718=item $stat->frequency_distribution_ref($num_partitions); 719 720=item $stat->frequency_distribution_ref(\@bins); 721 722=item $stat->frequency_distribution_ref(); 723 724C<frequency_distribution_ref($num_partitions)> slices the data into 725C<$num_partitions> sets (where $num_partitions is greater than 1) and counts 726the number of items that fall into each partition. It returns a reference to a 727hash where the keys are the numerical values of the partitions used. The 728minimum value of the data set is not a key and the maximum value of the data 729set is always a key. The number of entries for a particular partition key are 730the number of items which are greater than the previous partition key and less 731then or equal to the current partition key. As an example, 732 733 $stat->add_data(1,1.5,2,2.5,3,3.5,4); 734 $f = $stat->frequency_distribution_ref(2); 735 for (sort {$a <=> $b} keys %$f) { 736 print "key = $_, count = $f->{$_}\n"; 737 } 738 739prints 740 741 key = 2.5, count = 4 742 key = 4, count = 3 743 744since there are four items less than or equal to 2.5, and 3 items 745greater than 2.5 and less than 4. 746 747C<frequency_distribution_ref(\@bins)> provides the bins that are to be used 748for the distribution. This allows for non-uniform distributions as 749well as trimmed or sample distributions to be found. C<@bins> must 750be monotonic and must contain at least one element. Note that unless the 751set of bins contains the full range of the data, the total counts returned will 752be less than the sample size. 753 754Calling C<frequency_distribution_ref()> with no arguments returns the last 755distribution calculated, if such exists. 756 757=item my %hash = $stat->frequency_distribution($partitions); 758 759=item my %hash = $stat->frequency_distribution(\@bins); 760 761=item my %hash = $stat->frequency_distribution(); 762 763Same as C<frequency_distribution_ref()> except that it returns the hash 764clobbered into the return list. Kept for compatibility reasons with previous 765versions of Statistics::Descriptive::Discrete and using it is discouraged. 766 767Note: in earlier versions of Statistics:Descriptive::Discrete, C<frequency_distribution()> 768behaved differently than the Statistics::Descriptive implementation. Any code that uses 769this function should be carefully checked to ensure compatability with the current 770implementation. 771 772 773=item $stat->get_data(); 774 775Returns a copy of the data array. Note: This array could be 776very large and would thus defeat the purpose of using this 777module. Make sure you really need it before using get_data(). 778 779The returned array contains the values sorted by value. It does 780not preserve the order in which the values were added. Preserving 781order would defeat the purpose of this module which trades speed 782and memory usage over preserving order. If order is important, 783use Statistics::Descriptive. 784 785=item $stat->clear(); 786 787Clears all data and resets the instance as if it were newly created 788 789Effectively the same as 790 791 my $class = ref($stat); 792 undef $stat; 793 $stat = new $class; 794 795=back 796 797=head1 NOTE 798 799The interface for this module strives to be identical to Statistics::Descriptive. 800Any differences are noted in the description for each method. 801 802=head1 BUGS 803 804=over 805 806=item * 807 808Code for calculating mode is not as robust as it should be. 809 810=item * 811 812Other bugs are lurking I'm sure. 813 814=back 815 816=head1 TODO 817 818=over 819 820=item * 821 822Add rest of methods (at least ones that don't depend on original order of data) 823from Statistics::Descriptive 824 825=back 826 827=head1 AUTHOR 828 829Rhet Turnbull, rturnbull+cpan@gmail.com 830 831=head1 CREDIT 832 833Thanks to the following individuals for finding bugs, providing feedback, 834and submitting changes: 835 836=over 837 838=item * 839 840Peter Dienes for finding and fixing a bug in the variance calculation. 841 842=item * 843 844Bill Dueber for suggesting the add_data_tuple method. 845 846=back 847 848=head1 COPYRIGHT 849 850 Copyright (c) 2002, 2019 Rhet Turnbull. All rights reserved. This 851 program is free software; you can redistribute it and/or modify it 852 under the same terms as Perl itself. 853 854 Portions of this code is from Statistics::Descriptive which is under 855 the following copyrights: 856 857 Copyright (c) 1997,1998 Colin Kuskie. All rights reserved. This 858 program is free software; you can redistribute it and/or modify it 859 under the same terms as Perl itself. 860 861 Copyright (c) 1998 Andrea Spinelli. All rights reserved. This program 862 is free software; you can redistribute it and/or modify it under the 863 same terms as Perl itself. 864 865 Copyright (c) 1994,1995 Jason Kastner. All rights 866 reserved. This program is free software; you can redistribute it 867 and/or modify it under the same terms as Perl itself. 868 869=head1 SEE ALSO 870 871Statistics::Descriptive 872 873Statistics::Discrete 874 875 876 877 878