1package Statistics::PointEstimation; 2use strict; 3use Carp; 4use vars qw($VERSION @ISA $AUTOLOAD); 5use Statistics::Distributions qw(chisqrdistr tdistr fdistr udistr uprob chisqrprob tprob fprob); 6use Statistics::Descriptive; 7use POSIX; 8 9 10@ISA= qw (Statistics::Descriptive::Full); 11$VERSION = '1.1'; 12my %confidence_interval= #data related to confidence interval 13( 14 15 "significance" => undef, 16 "alpha" => undef, 17 "df" =>undef, 18 "standard_error" => undef, 19 "t_value" =>undef, 20 "t_statistic" =>undef, 21 "t_prob" =>undef, 22 "delta" =>undef, 23 "upper_clm" => undef, 24 "lower_clm" =>undef, 25 "valid" =>undef 26); 27 28 29 30sub new{ 31 my $proto = shift; 32 my $class = ref($proto) || $proto; 33 my $self = $class->SUPER::new(); 34 my %confidence=%confidence_interval; 35 $self->{confidence}=\%confidence; 36 bless ($self, $class); 37 return $self; 38} 39 40sub compute_confidence_interval{ 41 my $self=shift; 42 croak "sample size must be >1 to compute the confidence interval \n" if($self->count()<=1); 43 $self->{'significance'}=95 if (!defined($self->{'significance'})); 44 $self->{df}=$self->count()-1; 45 $self->{alpha}=(100-$self->{significance})/2; 46 $self->{alpha}/=100; 47 $self->{standard_error}=$self->standard_deviation()/sqrt($self->count()); 48 $self->{t_value}=abs tdistr($self->{df},$self->{alpha}); 49 $self->{delta}=$self->{t_value}*$self->{standard_error}; 50 51 $self->{upper_clm}=$self->mean() +$self->{delta}; 52 $self->{lower_clm}=$self->mean() -$self->{delta}; 53 $self->{t_statistic}=$self->{standard_error} 54 ?($self->mean()/$self->{standard_error}):0; 55 $self->{t_prob}=1- abs (tprob($self->{df},-1*$self->{t_statistic})-tprob($self->{df},$self->{t_statistic})) ; 56 $self->{valid}=1; 57 return 1; 58 59} 60sub add_data{ 61 my $self = shift; 62 my $aref; 63 64 if (ref $_[0] eq 'ARRAY') { 65 $aref = $_[0]; 66 } 67 else { 68 $aref = \@_; 69 } 70 my $significance=$self->{'significance'} if (defined($self->{'significance'})); 71 $self->SUPER::add_data($aref); 72 $self->{'significance'}=$significance; 73 $self->compute_confidence_interval() if ((defined($self->{count}))&&($self->{count}>1)) ; 74 75 return 1; 76 77} 78sub set_significance{ # set the significance level. usually 90, 95 or 99 79 my $self=shift; 80 my $significance=shift; 81 $self->{'significance'}=$significance if (($significance>0)&&($significance<100)); 82 $self->compute_confidence_interval() if((defined($self->{count}))&&($self->{count}>1)); 83 return 1; 84 85} 86 87sub print_confidence_interval{ 88 my $self=shift; 89 print "mean:",$self->mean(),"\n"; 90 print "variance:",$self->variance(),"\n"; 91 my $confidence=\%confidence_interval; 92 93 foreach my $k ( keys %$confidence) 94 { 95 print "$k:", $self->{$k}," \n"; 96 } 97 return 1; 98 99} 100 101sub output_confidence_interval{ 102 my $self=shift; 103 croak "sample size must be >1 to compute the confidence interval\n" if($self->{valid}!=1); 104 my $title=shift; 105 print "Summary from the observed values of the sample $title:\n"; 106 print "\tsample size= ", $self->count()," , degree of freedom=", $self->df(), "\n"; 107 print "\tmean=", $self->mean()," , variance=", $self->variance(),"\n"; 108 print "\tstandard deviation=", $self->standard_deviation()," , standard error=", $self->standard_error(),"\n"; 109 print "\t the estimate of the mean is ", $self->mean()," +/- ",$self->delta(),"\n\t", 110 " or (",$self->lower_clm()," to ",$self->upper_clm," ) with ",$self->significance," % of confidence\n"; 111 print "\t t-statistic=T=",$self->t_statistic()," , Prob >|T|=",$self->t_prob(),"\n"; 112} 113 114sub AUTOLOAD{ 115 my $self = shift; 116 my $type = ref($self) 117 or croak "$self is not an object"; 118 my $name = $AUTOLOAD; 119 $self->{_confidence}=\%confidence_interval; 120 $name =~ s/.*://; 121 return if $name eq "DESTROY"; 122 if (exists $self->{_permitted}->{$name} ) { 123 return $self->{$name}; 124 } 125 elsif(exists $self->{'_confidence'}->{$name}) 126 { 127 return $self->{$name}; 128 } 129 else 130 { 131 croak "Can't access `$name' field in class $type"; 132 } 133} 1341; 135 136package Statistics::PointEstimation::Sufficient; 137use strict; 138use Carp; 139use vars qw($VERSION $AUTOLOAD @ISA); 140use POSIX; 141@ISA=qw (Statistics::PointEstimation); 142$VERSION='1.1'; 143my %fields= #data related to confidence interval 144( 145 "count"=>undef, 146 "mean" =>undef, 147 "variance" => undef, 148 "standard_deviation" =>undef, 149 "significance" => undef, 150 "alpha" => undef, 151 "df" =>undef, 152 "standard_error" => undef, 153 "t_value" =>undef, 154 "t_statistic" =>undef, 155 "t_prob" =>undef, 156 "delta" =>undef, 157 "upper_clm" => undef, 158 "lower_clm" =>undef, 159 "valid" =>undef 160); 161 162sub new{ 163 my $proto = shift; 164 my $class = ref($proto) || $proto; 165 my $self = {%fields}; 166 bless ($self, $class); 167 return $self; 168} 169sub add_data{ 170 171 croak "the add_data() method is not supported in Statistics::PointEstimation::Sufficient\n"; 172 173} 174sub load_data{ 175 my $self=shift; 176 my ($count,$mean,$variance)=@_; 177 $self->{count}=$count; 178 $self->{mean}=$mean; 179 $self->{variance}=$variance; 180 $self->{standard_deviation}=sqrt($variance); 181 $self->compute_confidence_interval() if ($self->count()>1) ; 182 return; 183 184} 185 186sub AUTOLOAD{ 187 my $self = shift; 188 my $type = ref($self) 189 or croak "$self is not an object"; 190 $self->{_confidence}=\%fields; 191 my $name = $AUTOLOAD; 192 $name =~ s/.*://; 193 return if $name eq "DESTROY"; 194 195 if(exists $self->{_confidence}->{$name}) 196 { 197 return $self->{$name}; 198 } 199 else 200 { 201 croak "Can't access `$name' field in class $type"; 202 } 203} 2041; 205 206 207__END__ 208 209=head1 NAME 210 211Statistics::PointEstimation - Perl module for computing confidence intervals in parameter estimation with Student's T distribution 212Statistics::PointEstimation::Sufficient - Perl module for computing the confidence intervals using sufficient statistics 213 214=head1 SYNOPSIS 215 216 # example for Statistics::PointEstimation 217 use Statistics::PointEstimation; 218 219 my @r=(); 220 for($i=1;$i<=32;$i++) #generate a uniformly distributed sample with mean=5 221 { 222 223 $rand=rand(10); 224 push @r,$rand; 225 } 226 227 my $stat = new Statistics::PointEstimation; 228 $stat->set_significance(95); #set the significance(confidence) level to 95% 229 $stat->add_data(@r); 230 $stat->output_confidence_interval(); #output summary 231 $stat->print_confidence_interval(); #output the data hash related to confidence interval estimation 232 233 #the following is the same as $stat->output_confidence_interval(); 234 print "Summary from the observed values of the sample:\n"; 235 print "\tsample size= ", $stat->count()," , degree of freedom=", $stat->df(), "\n"; 236 print "\tmean=", $stat->mean()," , variance=", $stat->variance(),"\n"; 237 print "\tstandard deviation=", $stat->standard_deviation()," , standard error=", $stat->standard_error(),"\n"; 238 print "\t the estimate of the mean is ", $stat->mean()," +/- ",$stat->delta(),"\n\t", 239 " or (",$stat->lower_clm()," to ",$stat->upper_clm," ) with ",$stat->significance," % of confidence\n"; 240 print "\t t-statistic=T=",$stat->t_statistic()," , Prob >|T|=",$stat->t_prob(),"\n"; 241 242 #example for Statistics::PointEstimation::Sufficient 243 244 use strict; 245 use Statistics::PointEstimation; 246 my ($count,$mean,$variance)=(30,3.996,1.235); 247 my $stat = new Statistics::PointEstimation::Sufficient; 248 $stat->set_significance(99); 249 $stat->load_data($count,$mean,$variance); 250 $stat->output_confidence_interval(); 251 $stat->set_significance(95); 252 $stat->output_confidence_interval(); 253 254 255=head1 DESCRIPTION 256 257=head2 Statistics::PointEstimation 258 259 This module is a subclass of Statistics::Descriptive::Full. It uses T-distribution for point estimation 260 assuming the data is normally distributed or the sample size is sufficiently large. It overrides the 261 add_data() method in Statistics::Descriptive to compute the confidence interval with the specified significance 262 level (default is 95%). It also computes the t-statistic=T and Prob>|T| in case of hypothesis 263 testing of paired T-tests. 264 265=head2 Statistics::PointEstimation::Sufficient 266 267 This module is a subclass of Statistics::PointEstimation. Instead of taking the real data points as the input, 268 it will compute the confidence intervals based on the sufficient statistics and the sample size inputted. 269 To use this module, you need to pass the sample size, the sample mean , and the sample variance into the load_data() 270 function. The output will be exactly the same as the Statistics::PointEstimation Module. 271 272 273=head1 AUTHOR 274 275Yun-Fang Juan , Yahoo! Inc. (yunfang@yahoo-inc.com) 276 277=head1 SEE ALSO 278 279Statistics::Descriptive Statistics::Distributions 280 281=cut 282