1package Statistics::PointEstimation;
2use strict;
3use Carp;
4use vars qw($VERSION @ISA $AUTOLOAD);
5use Statistics::Distributions qw(chisqrdistr tdistr fdistr udistr uprob chisqrprob tprob fprob);
6use Statistics::Descriptive;
7use POSIX;
8
9
10@ISA= qw (Statistics::Descriptive::Full);
11$VERSION = '1.1';
12my %confidence_interval=  #data related to confidence interval
13(
14
15	"significance" => undef,
16	"alpha" => undef,
17	"df" =>undef,
18	"standard_error" => undef,
19	"t_value" =>undef,
20	"t_statistic" =>undef,
21	"t_prob" =>undef,
22	"delta" =>undef,
23	"upper_clm" => undef,
24	"lower_clm" =>undef,
25	"valid"  =>undef
26);
27
28
29
30sub new{
31	my $proto = shift;
32	my $class = ref($proto) || $proto;
33	my $self = $class->SUPER::new();
34	my %confidence=%confidence_interval;
35	$self->{confidence}=\%confidence;
36	bless ($self, $class);
37	return $self;
38}
39
40sub compute_confidence_interval{
41	my $self=shift;
42	croak "sample size must be >1 to compute the confidence interval \n" if($self->count()<=1);
43	$self->{'significance'}=95 if (!defined($self->{'significance'}));
44	$self->{df}=$self->count()-1;
45	$self->{alpha}=(100-$self->{significance})/2;
46	$self->{alpha}/=100;
47	$self->{standard_error}=$self->standard_deviation()/sqrt($self->count());
48	$self->{t_value}=abs tdistr($self->{df},$self->{alpha});
49	$self->{delta}=$self->{t_value}*$self->{standard_error};
50
51	$self->{upper_clm}=$self->mean() +$self->{delta};
52	$self->{lower_clm}=$self->mean() -$self->{delta};
53	$self->{t_statistic}=$self->{standard_error}
54						?($self->mean()/$self->{standard_error}):0;
55	$self->{t_prob}=1- abs (tprob($self->{df},-1*$self->{t_statistic})-tprob($self->{df},$self->{t_statistic})) ;
56	$self->{valid}=1;
57	return 1;
58
59}
60sub add_data{
61	my $self = shift;
62	my $aref;
63
64	if (ref $_[0] eq 'ARRAY') {
65		$aref = $_[0];
66	}
67	else {
68		$aref = \@_;
69	}
70	my $significance=$self->{'significance'} if (defined($self->{'significance'}));
71	$self->SUPER::add_data($aref);
72	$self->{'significance'}=$significance;
73	$self->compute_confidence_interval() if ((defined($self->{count}))&&($self->{count}>1)) ;
74
75	return 1;
76
77}
78sub set_significance{   # set the significance level. usually 90, 95 or 99
79	my $self=shift;
80	my $significance=shift;
81	$self->{'significance'}=$significance if (($significance>0)&&($significance<100));
82	$self->compute_confidence_interval() if((defined($self->{count}))&&($self->{count}>1));
83	return 1;
84
85}
86
87sub print_confidence_interval{
88	my $self=shift;
89	print "mean:",$self->mean(),"\n";
90	print "variance:",$self->variance(),"\n";
91	my $confidence=\%confidence_interval;
92
93	foreach my $k ( keys %$confidence)
94	{
95		print "$k:", $self->{$k}," \n";
96	}
97	return 1;
98
99}
100
101sub output_confidence_interval{
102	my $self=shift;
103	croak "sample size must be >1 to compute the confidence interval\n" if($self->{valid}!=1);
104	my $title=shift;
105	print "Summary  from the observed values of the sample $title:\n";
106	print "\tsample size= ", $self->count()," , degree of freedom=", $self->df(), "\n";
107	print "\tmean=", $self->mean()," , variance=", $self->variance(),"\n";
108	print "\tstandard deviation=", $self->standard_deviation()," , standard error=", $self->standard_error(),"\n";
109	print "\t the estimate of the mean is ", $self->mean()," +/- ",$self->delta(),"\n\t",
110		" or (",$self->lower_clm()," to ",$self->upper_clm," ) with ",$self->significance," % of confidence\n";
111	print "\t t-statistic=T=",$self->t_statistic()," , Prob >|T|=",$self->t_prob(),"\n";
112}
113
114sub AUTOLOAD{
115	my $self = shift;
116	my $type = ref($self)
117	or croak "$self is not an object";
118	my $name = $AUTOLOAD;
119	$self->{_confidence}=\%confidence_interval;
120	$name =~ s/.*://;
121	return if $name eq "DESTROY";
122	if (exists $self->{_permitted}->{$name} ) {
123		return $self->{$name};
124	}
125	elsif(exists $self->{'_confidence'}->{$name})
126	{
127		return $self->{$name};
128	}
129	else
130	{
131		croak "Can't access `$name' field in class $type";
132	}
133}
1341;
135
136package Statistics::PointEstimation::Sufficient;
137use strict;
138use Carp;
139use vars qw($VERSION $AUTOLOAD @ISA);
140use POSIX;
141@ISA=qw (Statistics::PointEstimation);
142$VERSION='1.1';
143my %fields=  #data related to confidence interval
144(
145        "count"=>undef,
146        "mean" =>undef,
147        "variance" => undef,
148        "standard_deviation" =>undef,
149        "significance" => undef,
150        "alpha" => undef,
151        "df" =>undef,
152        "standard_error" => undef,
153        "t_value" =>undef,
154        "t_statistic" =>undef,
155        "t_prob" =>undef,
156        "delta" =>undef,
157        "upper_clm" => undef,
158        "lower_clm" =>undef,
159        "valid"  =>undef
160);
161
162sub new{
163        my $proto = shift;
164        my $class = ref($proto) || $proto;
165        my $self = {%fields};
166        bless ($self, $class);
167        return $self;
168}
169sub add_data{
170
171     croak "the add_data() method is not supported in Statistics::PointEstimation::Sufficient\n";
172
173}
174sub load_data{
175        my $self=shift;
176        my ($count,$mean,$variance)=@_;
177        $self->{count}=$count;
178        $self->{mean}=$mean;
179        $self->{variance}=$variance;
180        $self->{standard_deviation}=sqrt($variance);
181        $self->compute_confidence_interval() if ($self->count()>1) ;
182        return;
183
184}
185
186sub AUTOLOAD{
187        my $self = shift;
188        my $type = ref($self)
189        or croak "$self is not an object";
190        $self->{_confidence}=\%fields;
191        my $name = $AUTOLOAD;
192        $name =~ s/.*://;
193        return if $name eq "DESTROY";
194
195        if(exists $self->{_confidence}->{$name})
196        {
197                return $self->{$name};
198        }
199        else
200        {
201                croak "Can't access `$name' field in class $type";
202        }
203}
2041;
205
206
207__END__
208
209=head1 NAME
210
211Statistics::PointEstimation - Perl module for computing confidence intervals in parameter estimation with Student's T distribution
212Statistics::PointEstimation::Sufficient - Perl module for computing the confidence intervals using sufficient statistics
213
214=head1 SYNOPSIS
215
216  # example for Statistics::PointEstimation
217  use Statistics::PointEstimation;
218
219  my @r=();
220  for($i=1;$i<=32;$i++) #generate a uniformly distributed sample with mean=5
221  {
222
223	  $rand=rand(10);
224	  push @r,$rand;
225  }
226
227  my $stat = new Statistics::PointEstimation;
228  $stat->set_significance(95); #set the significance(confidence) level to 95%
229  $stat->add_data(@r);
230  $stat->output_confidence_interval(); #output summary
231  $stat->print_confidence_interval();  #output the data hash related to confidence interval estimation
232
233  #the following is the same as $stat->output_confidence_interval();
234  print "Summary  from the observed values of the sample:\n";
235  print "\tsample size= ", $stat->count()," , degree of freedom=", $stat->df(), "\n";
236  print "\tmean=", $stat->mean()," , variance=", $stat->variance(),"\n";
237  print "\tstandard deviation=", $stat->standard_deviation()," , standard error=", $stat->standard_error(),"\n";
238  print "\t the estimate of the mean is ", $stat->mean()," +/- ",$stat->delta(),"\n\t",
239  " or (",$stat->lower_clm()," to ",$stat->upper_clm," ) with ",$stat->significance," % of confidence\n";
240  print "\t t-statistic=T=",$stat->t_statistic()," , Prob >|T|=",$stat->t_prob(),"\n";
241
242  #example for Statistics::PointEstimation::Sufficient
243
244  use strict;
245  use Statistics::PointEstimation;
246  my ($count,$mean,$variance)=(30,3.996,1.235);
247  my $stat = new Statistics::PointEstimation::Sufficient;
248  $stat->set_significance(99);
249  $stat->load_data($count,$mean,$variance);
250  $stat->output_confidence_interval();
251  $stat->set_significance(95);
252  $stat->output_confidence_interval();
253
254
255=head1 DESCRIPTION
256
257=head2  Statistics::PointEstimation
258
259  This module is a subclass of Statistics::Descriptive::Full. It uses T-distribution for point estimation
260  assuming the data is normally distributed or the sample size is sufficiently large. It overrides the
261  add_data() method in Statistics::Descriptive to compute the confidence interval with the specified significance
262   level (default is 95%). It also computes the t-statistic=T and Prob>|T| in case of hypothesis
263  testing of paired T-tests.
264
265=head2  Statistics::PointEstimation::Sufficient
266
267 This module is a subclass of Statistics::PointEstimation. Instead of taking the real data points as the input,
268 it will compute the confidence intervals based on the sufficient statistics and the sample size inputted.
269 To use this module, you need to pass the sample size, the sample mean , and the sample variance into the load_data()
270 function. The output will be exactly the same as the Statistics::PointEstimation Module.
271
272
273=head1 AUTHOR
274
275Yun-Fang Juan , Yahoo! Inc.  (yunfang@yahoo-inc.com)
276
277=head1 SEE ALSO
278
279Statistics::Descriptive Statistics::Distributions
280
281=cut 
282