1################################################################# 2# Matrix.pm 3################################################################# 4# Author: Thomas Hladish 5# $Id: Matrix.pm,v 1.23 2007/09/21 23:09:09 rvos Exp $ 6 7#################### START POD DOCUMENTATION ################## 8 9=head1 NAME 10 11Bio::NEXUS::Matrix - Provides functions for handling blocks that have matrices 12 13=head1 SYNOPSIS 14 15This module is the super class of Characters, Unaligned, and Distances block classes, and indirectly it is a super-class of Data and History blocks, which are both sub-classes of Characters blocks. These sub-classes inherint the methods within this module. There is no constructor, as a Matrix should not exist that is not also one of the sub-class block types. 16 17=head1 DESCRIPTION 18 19Provides functions used for handling blocks that have matrices. 20 21=head1 COMMENTS 22 23=head1 FEEDBACK 24 25All feedback (bugs, feature enhancements, etc.) are greatly appreciated. 26 27=head1 AUTHORS 28 29 Thomas Hladish (tjhladish at yahoo) 30 31=head1 VERSION 32 33$Revision: 1.23 $ 34 35=head1 METHODS 36 37=cut 38 39package Bio::NEXUS::Matrix; 40 41use strict; 42#use Data::Dumper; # XXX this is not used, might as well not import it! 43#use Carp; # XXX this is not used, might as well not import it! 44use Bio::NEXUS::Functions; 45use Bio::NEXUS::Block; 46use Bio::NEXUS::Util::Logger; 47use Bio::NEXUS::Util::Exceptions; 48use vars qw(@ISA $VERSION $AUTOLOAD); 49use Bio::NEXUS; $VERSION = $Bio::NEXUS::VERSION; 50 51@ISA = qw(Bio::NEXUS::Block); 52my $logger = Bio::NEXUS::Util::Logger->new(); 53 54=head2 get_ntax 55 56 Title : get_ntax 57 Usage : $block->get_ntax(); 58 Function: Returns the number of taxa in the block 59 Returns : # taxa 60 Args : none 61 62=cut 63 64sub get_ntax { 65 my $self = shift; 66 67 if ( my $otuset = $self->get_otuset() ) { 68 return $otuset->get_ntax(); 69 } 70 elsif ( my $dimensions = $self->get_dimensions() ) { 71 return $dimensions->{'ntax'}; 72 } 73 else { 74 return; 75 } 76} 77 78=head2 set_nchar 79 80 Title : set_nchar 81 Usage : print $block->set_nchar(); 82 Function: Sets the value of Dimensions:nchar 83 Returns : none 84 Args : number of char(scalar) 85 86=cut 87 88sub set_nchar { 89 my ( $self, $nchar ) = @_; 90 $self->{'dimensions'}{'nchar'} = $nchar; 91 return; 92} 93 94=head2 get_nchar 95 96 Title : get_nchar 97 Usage : $block->get_nchar(); 98 Function: Returns the number of characters in the block (Note: In Distances Blocks, this is the number of characters used to infer distances.) 99 Returns : # taxa 100 Args : none 101 102=cut 103 104sub get_nchar { 105 my $self = shift; 106 107 if ( my $dimensions = $self->get_dimensions() ) { 108 return $dimensions->{'nchar'}; 109 } 110 else { 111 return; 112 } 113} 114 115=begin comment 116 117 Title : _parse_format 118 Usage : $format = $self->_parse_format($buffer); (private) 119 Function: Extracts format values from line and stores in format attribute 120 Returns : none 121 Args : buffer (string) 122 Methods : Separates formats by whitespace and creates hash containing 123 key = format name and value = format value. 124 125=end comment 126 127=cut 128 129sub _parse_format { 130 my ( $self, $string ) = @_; 131 132 my %format = (); 133 134 my @format_tokens = @{ _parse_nexus_words($string) }; 135 while (@format_tokens) { 136 137 # If the second thing in the list is a '=' (e.g. ('datatype', '=', 'standard') ) 138 if ( $format_tokens[1] && $format_tokens[1] eq '=' ) { 139 if ( lc($format_tokens[0]) eq 'items' ) { 140 # process items list 141 my ( $key, $equals, $val ) = splice( @format_tokens, 0, 3 ); 142 $format{ 'items' } = $val; 143 if ( $val eq '(' ) { 144 while ( $format{ 'items' } !~ /\)$/ ) { 145 #print Dumper @format_tokens; 146 $format{ 'items' } .= " " . shift( @format_tokens ); 147 } 148 } 149 } 150 else { 151 #then set the first thing equal to the third 152 my ( $key, $equals, $val ) = splice( @format_tokens, 0, 3 ); 153 $format{ lc $key } = $val; 154 } 155 } 156 else { 157 my $key = shift @format_tokens; 158 159 # Otherwise, just set the first thing equal to TRUE 160 $format{ lc $key } = 1; 161 } 162 } 163 164 # Note: Treating flags and things with rvalues the same way is problematic-- 165 # how do you know whether a given format token has a count of 1, or if it 166 # was merely present, and that's why it has a value of one. One possible 167 # way to make this more robust is to store flags in $format{'flags'}, 168 # e.g. $format{'flags'} = ['tokens', 'respectcase']; 169 170 $self->set_format( \%format ); 171 return; 172} 173 174=begin comment 175 176 Title : _validate_format 177 Usage : $self->_validate_format($format_hashref); (private) 178 Function: Assigns defaults and sorts through formatting subcommands per the NEXUS standard 179 Returns : hash reference (the validated formatting) 180 Args : hash reference with format keys (the subcommands) and their values 181 182=end comment 183 184=cut 185 186sub _validate_format { 187 my ( $self, $format ) = @_; 188 my $block_type = $self->get_type(); 189 190 # Currently, only Characters and Unaligned blocks are handled here--other 191 # matrix-type blocks are treated as though their formatting is valid 192 if ( $block_type !~ qr/^(?:characters|unaligned)$/i ) { 193 return $format; 194 } 195 196 $format->{'datatype'} ||= 'standard'; # 'standard' is the default datatype 197 198 # tokens always true for continuous data (p. 601 of Maddison, et al, 1997) 199 if ( $format->{'datatype'} =~ /^continuous$/i ) { 200 if ( $format->{'notokens'} ) { 201 $logger->warn( 202 "notokens subcommand is incompatible with" 203 . "datatype=continuous subcommand in format statement" 204 ); 205 } 206 $format->{'tokens'} = 1; 207 } 208 209 if ( $format->{'datatype'} =~ /^(?:dna|rna|nucleotide|protein|continuous)$/i ) { 210 delete $format->{'respectcase'}; 211 } 212 elsif ( $format->{'datatype'} eq 'standard' ) { 213 if ( !$format->{'respectcase'} ) { 214 for my $sub_cmd (qw/symbols missing gap matchar/) { 215 $format->{$sub_cmd} = lc $format->{$sub_cmd} 216 if defined $format->{$sub_cmd}; 217 } 218 } 219 } 220 else { 221 $logger->warn( 222 "Unfamiliar datatype encountered in $block_type block: " 223 . "'$format->{'datatype'}' (continuing anyway)" 224 ); 225 } 226 227 return $format; 228} 229 230=head2 set_format 231 232 Title : set_format 233 Usage : $block->set_format(\%format); 234 Function: set the format of the characters 235 Returns : none 236 Args : hash of format values 237 238=cut 239 240sub set_format { 241 my ( $self, $format_hashref ) = @_; 242 $self->{'format'} = $self->_validate_format($format_hashref); 243} 244 245=head2 get_format 246 247 Title : get_format 248 Usage : $block->get_format($attribute); 249 Function: Returns the format of the characters 250 Returns : hash of format values, or if $attribute (a string) is supplied, the value of that attribute in the hash 251 Args : none 252 253=cut 254 255sub get_format { 256 my ( $self, $attribute ) = @_; 257 $attribute 258 ? return $self->{'format'}->{$attribute} 259 : return $self->{'format'} || {}; 260} 261 262=head2 add_taxlabels 263 264 Title : add_taxlabels 265 Usage : $block->add_taxlabels($new_taxlabels); 266 Function: Adds new taxa to taxlabels if they aren't already there 267 Returns : none 268 Args : taxa to be added 269 270=cut 271 272sub add_taxlabels { 273 my ( $self, $new_taxlabels ) = @_; 274 my $current_taxlabels = $self->get_taxlabels(); 275 276 for my $new_label (@$new_taxlabels) { 277 278 # Check to see if new_label is already in current_taxlabels 279 if ( !defined first {/$new_label/} @$current_taxlabels ) { 280 push @$current_taxlabels, $new_label; 281 } 282 } 283 return; 284} 285 286=begin comment 287 288 Title : _write_dimensions 289 Usage : $block->_write_dimensions(); 290 Function: writes out the dimensions command 291 Returns : none 292 Args : filehandle to write to, a verbose flag 293 294=end comment 295 296=cut 297 298sub _write_dimensions { 299 my ( $self, $fh, $verbose ) = @_; 300 $fh ||= \*STDOUT; 301 302 # Arlin took out all ntax stuff, ntax only used in taxa block according to standard 303 # 304 # my $ntax = $self->get_ntax(); 305 my $nchar = $self->get_nchar(); 306 307 return if !defined $nchar;# && !defined $ntax; 308 309 # my $ntax_text = $ntax ? " ntax=$ntax" : q{}; 310 my $nchar_text = $nchar ? " nchar=$nchar" : q{}; 311 312 # Tom: this code cannot be reached due to return above on !$nchar, right? -Arlin 313 # 314 if ( $self->get_type() eq 'characters' && !$nchar ) { 315 Bio::NEXUS::Util::Exceptions::BadFormat->throw( 316 'error' => "Characters blocks require that Dimensions:nchar be defined" 317 ); 318 } 319 320 # print $fh "\tDIMENSIONS$ntax_text$nchar_text;\n"; 321 print $fh "\tDIMENSIONS$nchar_text;\n"; 322 return; 323} 324 325=begin comment 326 327 Title : _write_format 328 Usage : $block->_write_format(); 329 Function: writes out the format command 330 Returns : none 331 Args : filehandle to write to, a verbose flag 332 333=end comment 334 335=cut 336 337sub _write_format { 338 my ( $self, $fh, $verbose ) = @_; 339 $fh ||= \*STDOUT; 340 341 my %format_of = %{ $self->get_format() }; 342 if ( scalar keys %format_of ) { 343 print $fh "\tFORMAT"; 344 345 print $fh " datatype=$format_of{'datatype'}" 346 if defined $format_of{'datatype'}; 347 print $fh ' respectcase' if $format_of{'respectcase'}; 348 349 while ( my ( $key, $val ) = each %format_of ) { 350 next if ( lc($key) eq 'interleave' ); 351 if ( !$val || ( $key =~ /(?:datatype|respectcase)/i ) ) { next; } 352 elsif ( $val eq '1' ) { 353 print $fh " $key"; 354 } 355 else { 356 print $fh " $key=$val"; 357 } 358 } 359 print $fh ";\n"; 360 } 361 return; 362} 363 364sub AUTOLOAD { 365 return if $AUTOLOAD =~ /DESTROY$/; 366 my $package_name = __PACKAGE__ . '::'; 367 368 # The following methods are deprecated and are temporarily supported 369 # via a warning and a redirection 370 my %synonym_for = ( 371 372# "${package_name}parse" => "${package_name}_parse_tree", # example 373 ); 374 375 if ( defined $synonym_for{$AUTOLOAD} ) { 376 $logger->warn("$AUTOLOAD() is deprecated; use $synonym_for{$AUTOLOAD}() instead"); 377 goto &{ $synonym_for{$AUTOLOAD} }; 378 } 379 else { 380 Bio::NEXUS::Util::Exceptions::UnknownMethod->throw( 381 'error' => "ERROR: Unknown method $AUTOLOAD called" 382 ); 383 } 384} 385 3861; 387