1package Statistics::Descriptive::Discrete;
2
3### This module draws heavily from Statistics::Descriptive
4
5use strict;
6use warnings;
7use Carp;
8use AutoLoader;
9use vars qw($VERSION $AUTOLOAD $DEBUG $Tolerance %autosubs);
10
11$VERSION = '0.12';
12$DEBUG = 0;
13
14#see Statistics::Descriptive documentation for use of $Tolerance
15$Tolerance = 0.0;
16
17#what subs can be autoloaded?
18%autosubs = (
19  count					=> undef,
20  mean					=> undef,
21  geometric_mean=> undef,
22  harmonic_mean=>undef,
23  sum					=> undef,
24  mode					=> undef,
25  median				=> undef,
26  min					=> undef,
27  max					=> undef,
28  mindex			=> undef,
29  maxdex			=> undef,
30  standard_deviation	=> undef,
31  sample_range			=> undef,
32  variance				=> undef,
33  text					=> undef,
34);
35
36
37sub new
38{
39	my $proto = shift;
40	my $class = ref($proto) || $proto;
41	my $self = {};
42	$self->{_permitted} = \%autosubs;
43	$self->{data} = ();
44	$self->{_dataindex} = (); #index of where each value first seen when adding data
45	$self->{dirty} = 1; #is the data dirty?
46	$self->{_index} = 0; #current index of number of data items added
47
48	bless ($self,$class);
49	print __PACKAGE__,"->new(",join(',',@_),")\n" if $DEBUG;
50	return $self;
51}
52
53# Clear the stat object & erase all data
54# Object will be ready to use as if new was called
55# Not sure this is more efficient than just creating a new object but
56# maintained for compatability with Statistics::Descriptive
57sub clear
58{
59	my $self = shift;
60    my %keys = %{ $self };
61
62	#remove _permitted from the deletion list
63    delete $keys{"_permitted"};
64
65    foreach my $key (keys %keys)
66	{ # Check each key in the object
67		print __PACKAGE__,"->clear, deleting $key\n" if $DEBUG;
68        delete $self->{$key};  # Delete any out of date cached key
69    }
70	$self->{data} = ();
71	$self->{_dataindex} = ();
72	$self->{dirty} = 1;
73	$self->{_index} = 0;
74}
75
76sub add_data
77{
78	#add data but don't compute ANY statistics yet
79	my $self = shift;
80	print __PACKAGE__,"->add_data(",join(',',@_),")\n" if $DEBUG;
81
82	#get each element and add 0 to force it be a number
83	#that way, 0.000 and 0 are treated the same
84	my $val = shift;
85	while (defined $val)
86	{
87		$val += 0;
88		$self->{data}{$val}++;
89		if (not exists $self->{_dataindex}{$val}) {
90			$self->{_dataindex}{$val} = $self->{_index};
91		}
92		$self->{_index}++;
93		#set dirty flag so we know cached stats are invalid
94		$self->{dirty}++;
95		$val = shift; #get next element
96	}
97}
98
99sub add_data_tuple
100{
101	#add data but don't compute ANY statistics yet
102	#the data are pairs of values and occurrences
103	#e.g. 4,2 means 2 occurrences of the value 4
104	#thanks to Bill Dueber for suggesting this
105
106	my $self = shift;
107	print __PACKAGE__,"->add_data_tuple(",join(',',@_),")\n" if $DEBUG;
108
109	#we want an even number of arguments (tuples in the form (value, count))
110	carp "argument list must have even number of elements" if @_ % 2;
111
112	#get each element and add 0 to force it be a number
113	#that way, 0.000 and 0 are treated the same
114	#if $count is 0, then this will set the dirty flag but have no effect on
115	#the statistics
116	my $val = shift;
117	my $count = shift;
118	while (defined $count)
119	{
120		$val += 0;
121		$self->{data}{$val} += $count;
122		if (not exists $self->{_dataindex}{$val}) {
123			$self->{_dataindex}{$val} = $self->{_index};
124		}
125		$self->{_index} += $count;
126		#set dirty flag so we know cached stats are invalid
127		$self->{dirty}++;
128		$val = shift; #get next element
129		$count = shift;
130	}
131}
132
133sub _test_for_too_small_val
134{
135    my $self = shift;
136    my $val = shift;
137
138    return (abs($val) <= $Statistics::Descriptive::Discrete::Tolerance);
139}
140
141sub _calc_harmonic_mean
142{
143    my $self = shift;
144		my $count = shift;
145		my $datakeys = shift; #array ref
146
147    my $hs = 0;
148
149    foreach my $val ( @{$datakeys} )
150    {
151        ##Guarantee that there are no divide by zeros
152        if ($self->_test_for_too_small_val($val))
153        {
154            return;
155        }
156
157				foreach (1..$self->{data}{$val})
158				{
159        	$hs += 1/$val;
160				}
161    }
162
163    if ($self->_test_for_too_small_val($hs))
164    {
165        return;
166    }
167
168    return $count/$hs;
169}
170
171sub _all_stats
172{
173	#compute all the stats in one sub to save overhead of sub calls
174	#a little wasteful to do this if all we want is count or sum for example but
175	#I want to keep add_data as lean as possible since it gets called a lot
176	my $self = shift;
177	print __PACKAGE__,"->_all_stats(",join(',',@_),")\n" if $DEBUG;
178
179	#if data is empty, set all stats to undef and return
180	if (!$self->{data})
181	{
182		foreach my $key (keys %{$self->{_permitted}})
183		{
184			$self->{$key} = undef;
185		}
186		$self->{count} = 0;
187		return;
188	}
189
190	#count = total number of data values we have
191	my $count = 0;
192	$count += $_ foreach (values %{$self->{data}});
193
194	my @datakeys = keys %{$self->{data}};
195
196	#initialize min, max, mode to an arbitrary value that's in the hash
197	my $default = $datakeys[0];
198	my $max  = $default;
199	my $min  = $default;
200	my $mode = $default;
201	my $moden = 0;
202	my $sum = 0;
203
204	#find min, max, sum, and mode
205	foreach (@datakeys)
206	{
207		my $n = $self->{data}{$_};
208		$sum += $_ * $n;
209		$min = $_ if $_ < $min;
210		$max = $_ if $_ > $max;
211
212		#only finds one mode but there could be more than one
213		#also, there might not be any mode (all the same frequency)
214		#todo: need to make this more robust
215		if ($n > $moden)
216		{
217			$mode = $_;
218			$moden = $n;
219		}
220	}
221	my $mindex = $self->{_dataindex}{$min};
222	my $maxdex = $self->{_dataindex}{$max};
223
224	my $mean = $sum/$count;
225
226	my $stddev = 0;
227	my $variance = 0;
228
229	if ($count > 1)
230	{
231		# Thanks to Peter Dienes for finding and fixing a round-off error
232		# in the following variance calculation
233
234		foreach my $val (@datakeys)
235		{
236			$stddev += $self->{data}{$val} * (($val - $mean) ** 2);
237		}
238		$variance = $stddev / ($count - 1);
239		$stddev = sqrt($variance);
240	}
241	else {$stddev = undef}
242
243	#find median, and do it without creating a list of the all the data points
244	#if n=count is odd and n=2k+1 then median = data(k+1)
245	#if n=count is even and n=2k, then median = (data(k) + data(k+1))/2
246	my $odd = $count % 2; #odd or even number of points?
247	my $even = !$odd;
248	my $k = $odd ? ($count-1)/2 : $count/2;
249	my $median = undef;
250	my $temp = 0;
251	MEDIAN: foreach my $val (sort {$a <=> $b} (@datakeys))
252	{
253		foreach (1..$self->{data}{$val})
254		{
255			$temp++;
256			if (($temp == $k) && $even)
257			{
258				$median += $val;
259			}
260			elsif ($temp == $k+1)
261			{
262				$median += $val;
263				$median /= 2 if $even;
264				last MEDIAN;
265			}
266		}
267	}
268
269	#compute geometric mean
270	my $gm = 1;
271	my $exponent = 1/$count;
272	foreach my $val (@datakeys)
273		{
274				if ($val < 0)
275				{
276						$gm = undef;
277						last;
278				}
279				foreach (1..$self->{data}{$val})
280				{
281					$gm *= $val**$exponent;
282				}
283		}
284
285	#compute harmonic mean
286	my $harmonic_mean = scalar $self->_calc_harmonic_mean($count, \@datakeys);
287
288	print __PACKAGE__,"count: $count, _index ",$self->{_index},"\n" if $DEBUG;
289
290	$self->{count}  = $count;
291	$self->{sum}    = $sum;
292	$self->{standard_deviation} = $stddev;
293	$self->{variance} = $variance;
294	$self->{min}    = $min;
295	$self->{max}    = $max;
296	$self->{mindex} = $mindex;
297	$self->{maxdex} = $maxdex;
298	$self->{sample_range} = $max - $min; #todo: does this require any bounds checking
299	$self->{mean}    = $mean;
300	$self->{geometric_mean} = $gm;
301	$self->{harmonic_mean} = $harmonic_mean;
302	$self->{median} = $median;
303	$self->{mode}   = $mode;
304
305	#clear dirty flag so we don't needlessly recompute the statistics
306	$self->{dirty} = 0;
307}
308
309sub set_text
310{
311	my $self = shift;
312	$self->{text} = shift;
313}
314
315sub get_data
316{
317	#returns a list of the data in sorted order
318	#the list could be very big an this defeat the purpose of using this module
319	#use this only if you really need it
320	my $self = shift;
321	print __PACKAGE__,"->get_data(",join(',',@_),")\n" if $DEBUG;
322
323	my @data;
324	foreach my $val (sort {$a <=> $b} (keys %{$self->{data}}))
325	{
326		push @data, $val foreach (1..$self->{data}{$val});
327	}
328	return @data;
329}
330
331# this is the previous frequency_distribution code
332# redid this completely based on current implementation in
333# Statistics::Descriptive
334# sub frequency_distribution
335# {
336# 	#Compute frequency distribution (histogram), borrowed heavily from Statistics::Descriptive
337# 	#Behavior is slightly different than Statistics::Descriptive
338# 	#e.g. if partition is not specified, we use  to set the number of partitions
339# 	#     if partition = 0, then we return the data hash WITHOUT binning it into equal bins
340# 	#	  I often want to just see how many of each value I saw
341# 	#Also, you can manually pass in the bin info (min bin, bin size, and number of partitions)
342# 	#I don't cache the frequency data like Statistics::Descriptive does since it's not as expensive to compute
343# 	#but I might add that later
344# 	#todo: the minbin/binsize stuff is funky and not intuitive -- fix it
345# 	my $self = shift;
346# 	print __PACKAGE__,"->frequency_distribution(",join(',',@_),")\n" if $DEBUG;
347
348# 	my $partitions = shift; #how many partitions (bins)?
349# 	my $minbin = shift; #upper bound of first bin
350# 	my $binsize = shift; #how wide is each bin?
351
352# 	#if partition == 0, then return the data hash
353# 	if (not defined $partitions || ($partitions == 0))
354# 	{
355# 		$self->{frequency_partitions} = 0;
356# 		%{$self->{frequency}} = %{$self->{data}};
357# 		return %{$self->{frequency}};
358# 	}
359
360# 	#otherwise, partition better be >= 1
361# 	return undef unless $partitions >= 1;
362
363# 	$self->_all_stats() if $self->{dirty}; #recompute stats if dirty, (so we have count)
364# 	return undef if $self->{count} < 2; #must have at least 2 values
365
366# 	#set up the bins
367# 	my ($interval, $iter, $max);
368# 	if (defined $minbin && defined $binsize)
369# 	{
370# 		$iter = $minbin;
371# 		$max = $minbin+$partitions*$binsize - $binsize;
372# 		$interval = $binsize;
373# 		$iter -= $interval; #so that loop that sets up bins works correctly
374# 	}
375# 	else
376# 	{
377# 		$iter = $self->{min};
378# 		$max = $self->{max};
379# 		$interval = $self->{sample_range}/$partitions;
380# 	}
381# 	my @k;
382# 	my %bins;
383# 	while (($iter += $interval) < $max)
384# 	{
385# 		$bins{$iter} = 0;
386# 		push @k, $iter;
387# 	}
388# 	$bins{$max} = 0;
389# 	push @k, $max;
390
391# 	VALUE: foreach my $val (keys %{$self->{data}})
392# 	{
393# 		foreach my $k (@k)
394# 		{
395# 			if ($val <= $k)
396# 			{
397# 				$bins{$k} += $self->{data}{$val};  #how many of this value do we have?
398# 				next VALUE;
399# 			}
400# 		}
401# # 	}
402
403# 	%{$self->{frequency}} = %bins;   #save it for later in case I add caching
404# 	$self->{frequency_partitions} = $partitions; #in case I add caching in the future
405# 	return %{$self->{frequency}};
406# }
407
408sub frequency_distribution_ref
409{
410    my $self = shift;
411    my @k = ();
412
413		# If called with no parameters, return the cached hashref
414		# if we have one and data is not dirty
415		# This is implemented this way because that's how Statistics::Descriptive
416		# implements this.  I don't like it.
417  	if ((!@_) && (! $self->{dirty}) && (defined $self->{_frequency}))
418    {
419        return $self->{_frequency};
420    }
421
422		$self->_all_stats() if $self->{dirty}; #recompute stats if dirty, (so we have count)
423
424    # Must have at least two elements
425    if ($self->count() < 2)
426    {
427        return undef;
428    }
429
430    my %bins;
431    my $partitions = shift;
432
433    if (ref($partitions) eq 'ARRAY')
434    {
435        @k = @{ $partitions };
436        return undef unless @k;  ##Empty array
437        if (@k > 1) {
438            ##Check for monotonicity
439            my $element = $k[0];
440            for my $next_elem (@k[1..$#k]) {
441                if ($element > $next_elem) {
442                    carp "Non monotonic array cannot be used as frequency bins!\n";
443                    return undef;
444                }
445                $element = $next_elem;
446            }
447        }
448        %bins = map { $_ => 0 } @k;
449    }
450    else
451    {
452        return undef unless (defined $partitions) && ($partitions >= 1);
453        my $interval = $self->sample_range() / $partitions;
454        foreach my $idx (1 .. ($partitions-1))
455        {
456            push @k, ($self->min() + $idx * $interval);
457        }
458
459        $bins{$self->max()} = 0;
460
461        push @k, $self->max();
462    }
463
464    ELEMENT:
465    foreach my $element (keys %{$self->{data}})
466    {
467        foreach my $limit (@k)
468        {
469            if ($element <= $limit)
470            {
471                $bins{$limit} += $self->{data}{$element};
472                next ELEMENT;
473            }
474        }
475    }
476
477		$self->{_frequency} = \%bins;
478    return $self->{_frequency};
479}
480
481sub frequency_distribution {
482    my $self = shift;
483
484    my $ret = $self->frequency_distribution_ref(@_);
485
486    if (!defined($ret))
487    {
488        return undef;
489    }
490    else
491    {
492        return %$ret;
493    }
494}
495
496# return count of unique values in data if called in scalar context
497# returns sorted array of unique data values if called in array context
498# returns undef if no data
499sub uniq
500{
501	my $self = shift;
502
503	if (!$self->{data})
504	{
505		return undef;
506	}
507
508	my @datakeys = sort {$a <=> $b} keys %{$self->{data}};
509
510	if (wantarray)
511	{
512		return @datakeys;
513	}
514	else
515	{
516		my $uniq = @datakeys;
517		return $uniq;
518	}
519}
520
521sub AUTOLOAD {
522	my $self = shift;
523	my $type = ref($self)
524		or croak "$self is not an object";
525	my $name = $AUTOLOAD;
526	$name =~ s/.*://;     ##Strip fully qualified-package portion
527	return if $name eq "DESTROY";
528	unless (exists $self->{_permitted}{$name} ) {
529		croak "Can't access `$name' field in class $type";
530	}
531
532	print __PACKAGE__,"->AUTOLOAD $name\n" if $DEBUG;
533
534	#compute stats if necessary
535	$self->_all_stats() if $self->{dirty};
536	return $self->{$name};
537}
538
5391;
540
541__END__
542
543=head1 NAME
544
545Statistics::Descriptive::Discrete - Compute descriptive statistics for discrete data sets.
546
547To install, use the CPAN module (https://metacpan.org/pod/Statistics::Descriptive::Discrete).
548
549=head1 SYNOPSIS
550
551  use Statistics::Descriptive::Discrete;
552
553  my $stats = new Statistics::Descriptive::Discrete;
554  $stats->add_data(1,10,2,1,1,4,5,1,10,8,7);
555  print "count = ",$stats->count(),"\n";
556  print "uniq  = ",$stats->uniq(),"\n";
557  print "sum = ",$stats->sum(),"\n";
558  print "min = ",$stats->min(),"\n";
559  print "min index = ",$stats->mindex(),"\n";
560  print "max = ",$stats->max(),"\n";
561  print "max index = ",$stats->maxdex(),"\n";
562  print "mean = ",$stats->mean(),"\n";
563  print "geometric mean = ",$stats->geometric_mean(),"\n";
564  print "harmonic mean = ", $stats->harmonic_mean(),"\n";
565  print "standard_deviation = ",$stats->standard_deviation(),"\n";
566  print "variance = ",$stats->variance(),"\n";
567  print "sample_range = ",$stats->sample_range(),"\n";
568  print "mode = ",$stats->mode(),"\n";
569  print "median = ",$stats->median(),"\n";
570  my $f = $stats->frequency_distribution_ref(3);
571  for (sort {$a <=> $b} keys %$f) {
572    print "key = $_, count = $f->{$_}\n";
573  }
574
575=head1 DESCRIPTION
576
577This module provides basic functions used in descriptive statistics.
578It borrows very heavily from Statistics::Descriptive::Full
579(which is included with Statistics::Descriptive) with one major
580difference.  This module is optimized for discretized data
581e.g. data from an A/D conversion that  has a discrete set of possible values.
582E.g. if your data is produced by an 8 bit A/D then you'd have only 256 possible
583values in your data  set.  Even though you might have a million data points,
584you'd only have 256 different values in those million points.  Instead of storing the
585entire data set as Statistics::Descriptive does, this module only stores
586the values seen and the number of times each value occurs.
587
588For very large data sets, this storage method results in significant speed
589and memory improvements.  For example, for an 8-bit data set (256 possible values),
590with 1,000,000 data points,  this module is about 10x faster than Statistics::Descriptive::Full
591or Statistics::Descriptive::Sparse.
592
593Statistics::Descriptive run time is a factor of the size of the data set. In particular,
594repeated calls to C<add_data> are slow.  Statistics::Descriptive::Discrete's C<add_data> is
595optimized for speed.  For a give number of data points, this module's run time will increase
596as the number of unique data values in the data set increases. For example, while this module
597runs about 10x the speed of Statistics::Descriptive::Full for an 8-bit data set, the
598run speed drops to about 3x for an equivalent sized 20-bit data set.
599
600See sdd_prof.pl in the examples directory to play with profiling this module against
601Statistics::Descriptive::Full.
602
603=head1 METHODS
604
605=over
606
607=item $stat = Statistics::Descriptive::Discrete->new();
608
609Create a new statistics object.
610
611=item $stat->add_data(1,2,3,4,5);
612
613Adds data to the statistics object.  Sets a flag so that
614the statistics will be recomputed the next time they're
615needed.
616
617=item $stat->add_data_tuple(1,2,42,3);
618
619Adds data to the statistics object where every two elements
620are a value and a count (how many times did the value occur?)
621The above is equivalent to C<< $stat->add_data(1,1,42,42,42); >>
622Use this when your data is in a form isomorphic to
623($value, $occurrence).
624
625=item $stat->max();
626
627Returns the maximum value of the data set.
628
629=item $stat->min();
630
631Returns the minimum value of the data set.
632
633=item $stat->mindex();
634
635Returns the index of the minimum value of the data set.
636The index returned is the first occurence of the minimum value.
637
638Note: the index is determined by the order data was added using add_data() or add_data_tuple().
639It is meaningless in context of get_data() as get_data() does not return values in the same
640order in which they were added.  This behavior is different than Statistics::Descriptive which
641does preserve order.
642
643=item $stat->maxdex();
644
645Returns the index of the maximum value of the data set.
646The index returned is the first occurence of the maximum value.
647
648Note: the index is determined by the order data was added using
649C<add_data()> or C<add_data_tuple()>. It is meaningless in context of
650C<get_data()> as C<get_data()> does not return values in the same
651order in which they were added.  This behavior is different than
652Statistics::Descriptive which does preserve order.
653
654=item $stat->count();
655
656Returns the total number of elements in the data set.
657
658=item $stat->uniq();
659
660If called in scalar context, returns the total number of unique elements in the data set.
661For example, if your data set is (1,2,2,3,3,3), uniq will return 3.
662
663If called in array context, returns an array of each data value in the data set in sorted order.
664In the above example, C<< @uniq = $stats->uniq(); >> would return (1,2,3)
665
666This function is specific to Statistics::Descriptive::Discrete
667and is not implemented in Statistics::Descriptive.
668
669It is useful for getting a frequency distribution for each discrete value in the data the set:
670
671   my $stats = Statistics::Descriptive::Discrete->new();
672	 $stats->add_data_tuple(1,1,2,2,3,3,4,4,5,5,6,6,7,7);
673	 my @bins = $stats->uniq();
674	 my $f = $stats->frequency_distribution_ref(\@bins);
675	 for (sort {$a <=> $b} keys %$f) {
676		 print "value = $_, count = $f->{$_}\n";
677	 }
678
679=item $stat->sum();
680
681Returns the sum of all the values in the data set.
682
683=item $stat->mean();
684
685Returns the mean of the data.
686
687=item $stat->harmonic_mean();
688
689Returns the harmonic mean of the data.  Since the mean is undefined
690if any of the data are zero or if the sum of the reciprocals is zero,
691it will return undef for both of those cases.
692
693=item $stat->geometric_mean();
694
695Returns the geometric mean of the data.  Returns C<undef> if any of the data
696are less than 0. Returns 0 if any of the data are 0.
697
698=item $stat->median();
699
700Returns the median value of the data.
701
702=item $stat->mode();
703
704Returns the mode of the data.
705
706=item $stat->variance();
707
708Returns the variance of the data.
709
710=item $stat->standard_deviation();
711
712Returns the standard_deviation of the data.
713
714=item $stat->sample_range();
715
716Returns the sample range (max - min) of the data set.
717
718=item $stat->frequency_distribution_ref($num_partitions);
719
720=item $stat->frequency_distribution_ref(\@bins);
721
722=item $stat->frequency_distribution_ref();
723
724C<frequency_distribution_ref($num_partitions)> slices the data into
725C<$num_partitions> sets (where $num_partitions is greater than 1) and counts
726the number of items that fall into each partition. It returns a reference to a
727hash where the keys are the numerical values of the partitions used. The
728minimum value of the data set is not a key and the maximum value of the data
729set is always a key. The number of entries for a particular partition key are
730the number of items which are greater than the previous partition key and less
731then or equal to the current partition key. As an example,
732
733   $stat->add_data(1,1.5,2,2.5,3,3.5,4);
734   $f = $stat->frequency_distribution_ref(2);
735   for (sort {$a <=> $b} keys %$f) {
736      print "key = $_, count = $f->{$_}\n";
737   }
738
739prints
740
741   key = 2.5, count = 4
742   key = 4, count = 3
743
744since there are four items less than or equal to 2.5, and 3 items
745greater than 2.5 and less than 4.
746
747C<frequency_distribution_ref(\@bins)> provides the bins that are to be used
748for the distribution.  This allows for non-uniform distributions as
749well as trimmed or sample distributions to be found.  C<@bins> must
750be monotonic and must contain at least one element.  Note that unless the
751set of bins contains the full range of the data, the total counts returned will
752be less than the sample size.
753
754Calling C<frequency_distribution_ref()> with no arguments returns the last
755distribution calculated, if such exists.
756
757=item my %hash = $stat->frequency_distribution($partitions);
758
759=item my %hash = $stat->frequency_distribution(\@bins);
760
761=item my %hash = $stat->frequency_distribution();
762
763Same as C<frequency_distribution_ref()> except that it returns the hash
764clobbered into the return list. Kept for compatibility reasons with previous
765versions of Statistics::Descriptive::Discrete and using it is discouraged.
766
767Note: in earlier versions of Statistics:Descriptive::Discrete, C<frequency_distribution()>
768behaved differently than the Statistics::Descriptive implementation.  Any code that uses
769this function should be carefully checked to ensure compatability with the current
770implementation.
771
772
773=item $stat->get_data();
774
775Returns a copy of the data array.  Note: This array could be
776very large and would thus defeat the purpose of using this
777module.  Make sure you really need it before using get_data().
778
779The returned array contains the values sorted by value.  It does
780not preserve the order in which the values were added.  Preserving
781order would defeat the purpose of this module which trades speed
782and memory usage over preserving order.  If order is important,
783use Statistics::Descriptive.
784
785=item $stat->clear();
786
787Clears all data and resets the instance as if it were newly created
788
789Effectively the same as
790
791  my $class = ref($stat);
792  undef $stat;
793  $stat = new $class;
794
795=back
796
797=head1 NOTE
798
799The interface for this module strives to be identical to Statistics::Descriptive.
800Any differences are noted in the description for each method.
801
802=head1 BUGS
803
804=over
805
806=item *
807
808Code for calculating mode is not as robust as it should be.
809
810=item *
811
812Other bugs are lurking I'm sure.
813
814=back
815
816=head1 TODO
817
818=over
819
820=item *
821
822Add rest of methods (at least ones that don't depend on original order of data)
823from Statistics::Descriptive
824
825=back
826
827=head1 AUTHOR
828
829Rhet Turnbull, rturnbull+cpan@gmail.com
830
831=head1 CREDIT
832
833Thanks to the following individuals for finding bugs, providing feedback,
834and submitting changes:
835
836=over
837
838=item *
839
840Peter Dienes for finding and fixing a bug in the variance calculation.
841
842=item *
843
844Bill Dueber for suggesting the add_data_tuple method.
845
846=back
847
848=head1 COPYRIGHT
849
850  Copyright (c) 2002, 2019 Rhet Turnbull. All rights reserved.  This
851  program is free software; you can redistribute it and/or modify it
852  under the same terms as Perl itself.
853
854  Portions of this code is from Statistics::Descriptive which is under
855  the following copyrights:
856
857  Copyright (c) 1997,1998 Colin Kuskie. All rights reserved.  This
858  program is free software; you can redistribute it and/or modify it
859  under the same terms as Perl itself.
860
861  Copyright (c) 1998 Andrea Spinelli. All rights reserved.  This program
862  is free software; you can redistribute it and/or modify it under the
863  same terms as Perl itself.
864
865  Copyright (c) 1994,1995 Jason Kastner. All rights
866  reserved.  This program is free software; you can redistribute it
867  and/or modify it under the same terms as Perl itself.
868
869=head1 SEE ALSO
870
871Statistics::Descriptive
872
873Statistics::Discrete
874
875
876
877
878