################################################################# # Matrix.pm ################################################################# # Author: Thomas Hladish # $Id: Matrix.pm,v 1.23 2007/09/21 23:09:09 rvos Exp $ #################### START POD DOCUMENTATION ################## =head1 NAME Bio::NEXUS::Matrix - Provides functions for handling blocks that have matrices =head1 SYNOPSIS This module is the super class of Characters, Unaligned, and Distances block classes, and indirectly it is a super-class of Data and History blocks, which are both sub-classes of Characters blocks. These sub-classes inherint the methods within this module. There is no constructor, as a Matrix should not exist that is not also one of the sub-class block types. =head1 DESCRIPTION Provides functions used for handling blocks that have matrices. =head1 COMMENTS =head1 FEEDBACK All feedback (bugs, feature enhancements, etc.) are greatly appreciated. =head1 AUTHORS Thomas Hladish (tjhladish at yahoo) =head1 VERSION $Revision: 1.23 $ =head1 METHODS =cut package Bio::NEXUS::Matrix; use strict; #use Data::Dumper; # XXX this is not used, might as well not import it! #use Carp; # XXX this is not used, might as well not import it! use Bio::NEXUS::Functions; use Bio::NEXUS::Block; use Bio::NEXUS::Util::Logger; use Bio::NEXUS::Util::Exceptions; use vars qw(@ISA $VERSION $AUTOLOAD); use Bio::NEXUS; $VERSION = $Bio::NEXUS::VERSION; @ISA = qw(Bio::NEXUS::Block); my $logger = Bio::NEXUS::Util::Logger->new(); =head2 get_ntax Title : get_ntax Usage : $block->get_ntax(); Function: Returns the number of taxa in the block Returns : # taxa Args : none =cut sub get_ntax { my $self = shift; if ( my $otuset = $self->get_otuset() ) { return $otuset->get_ntax(); } elsif ( my $dimensions = $self->get_dimensions() ) { return $dimensions->{'ntax'}; } else { return; } } =head2 set_nchar Title : set_nchar Usage : print $block->set_nchar(); Function: Sets the value of Dimensions:nchar Returns : none Args : number of char(scalar) =cut sub set_nchar { my ( $self, $nchar ) = @_; $self->{'dimensions'}{'nchar'} = $nchar; return; } =head2 get_nchar Title : get_nchar Usage : $block->get_nchar(); Function: Returns the number of characters in the block (Note: In Distances Blocks, this is the number of characters used to infer distances.) Returns : # taxa Args : none =cut sub get_nchar { my $self = shift; if ( my $dimensions = $self->get_dimensions() ) { return $dimensions->{'nchar'}; } else { return; } } =begin comment Title : _parse_format Usage : $format = $self->_parse_format($buffer); (private) Function: Extracts format values from line and stores in format attribute Returns : none Args : buffer (string) Methods : Separates formats by whitespace and creates hash containing key = format name and value = format value. =end comment =cut sub _parse_format { my ( $self, $string ) = @_; my %format = (); my @format_tokens = @{ _parse_nexus_words($string) }; while (@format_tokens) { # If the second thing in the list is a '=' (e.g. ('datatype', '=', 'standard') ) if ( $format_tokens[1] && $format_tokens[1] eq '=' ) { if ( lc($format_tokens[0]) eq 'items' ) { # process items list my ( $key, $equals, $val ) = splice( @format_tokens, 0, 3 ); $format{ 'items' } = $val; if ( $val eq '(' ) { while ( $format{ 'items' } !~ /\)$/ ) { #print Dumper @format_tokens; $format{ 'items' } .= " " . shift( @format_tokens ); } } } else { #then set the first thing equal to the third my ( $key, $equals, $val ) = splice( @format_tokens, 0, 3 ); $format{ lc $key } = $val; } } else { my $key = shift @format_tokens; # Otherwise, just set the first thing equal to TRUE $format{ lc $key } = 1; } } # Note: Treating flags and things with rvalues the same way is problematic-- # how do you know whether a given format token has a count of 1, or if it # was merely present, and that's why it has a value of one. One possible # way to make this more robust is to store flags in $format{'flags'}, # e.g. $format{'flags'} = ['tokens', 'respectcase']; $self->set_format( \%format ); return; } =begin comment Title : _validate_format Usage : $self->_validate_format($format_hashref); (private) Function: Assigns defaults and sorts through formatting subcommands per the NEXUS standard Returns : hash reference (the validated formatting) Args : hash reference with format keys (the subcommands) and their values =end comment =cut sub _validate_format { my ( $self, $format ) = @_; my $block_type = $self->get_type(); # Currently, only Characters and Unaligned blocks are handled here--other # matrix-type blocks are treated as though their formatting is valid if ( $block_type !~ qr/^(?:characters|unaligned)$/i ) { return $format; } $format->{'datatype'} ||= 'standard'; # 'standard' is the default datatype # tokens always true for continuous data (p. 601 of Maddison, et al, 1997) if ( $format->{'datatype'} =~ /^continuous$/i ) { if ( $format->{'notokens'} ) { $logger->warn( "notokens subcommand is incompatible with" . "datatype=continuous subcommand in format statement" ); } $format->{'tokens'} = 1; } if ( $format->{'datatype'} =~ /^(?:dna|rna|nucleotide|protein|continuous)$/i ) { delete $format->{'respectcase'}; } elsif ( $format->{'datatype'} eq 'standard' ) { if ( !$format->{'respectcase'} ) { for my $sub_cmd (qw/symbols missing gap matchar/) { $format->{$sub_cmd} = lc $format->{$sub_cmd} if defined $format->{$sub_cmd}; } } } else { $logger->warn( "Unfamiliar datatype encountered in $block_type block: " . "'$format->{'datatype'}' (continuing anyway)" ); } return $format; } =head2 set_format Title : set_format Usage : $block->set_format(\%format); Function: set the format of the characters Returns : none Args : hash of format values =cut sub set_format { my ( $self, $format_hashref ) = @_; $self->{'format'} = $self->_validate_format($format_hashref); } =head2 get_format Title : get_format Usage : $block->get_format($attribute); Function: Returns the format of the characters Returns : hash of format values, or if $attribute (a string) is supplied, the value of that attribute in the hash Args : none =cut sub get_format { my ( $self, $attribute ) = @_; $attribute ? return $self->{'format'}->{$attribute} : return $self->{'format'} || {}; } =head2 add_taxlabels Title : add_taxlabels Usage : $block->add_taxlabels($new_taxlabels); Function: Adds new taxa to taxlabels if they aren't already there Returns : none Args : taxa to be added =cut sub add_taxlabels { my ( $self, $new_taxlabels ) = @_; my $current_taxlabels = $self->get_taxlabels(); for my $new_label (@$new_taxlabels) { # Check to see if new_label is already in current_taxlabels if ( !defined first {/$new_label/} @$current_taxlabels ) { push @$current_taxlabels, $new_label; } } return; } =begin comment Title : _write_dimensions Usage : $block->_write_dimensions(); Function: writes out the dimensions command Returns : none Args : filehandle to write to, a verbose flag =end comment =cut sub _write_dimensions { my ( $self, $fh, $verbose ) = @_; $fh ||= \*STDOUT; # Arlin took out all ntax stuff, ntax only used in taxa block according to standard # # my $ntax = $self->get_ntax(); my $nchar = $self->get_nchar(); return if !defined $nchar;# && !defined $ntax; # my $ntax_text = $ntax ? " ntax=$ntax" : q{}; my $nchar_text = $nchar ? " nchar=$nchar" : q{}; # Tom: this code cannot be reached due to return above on !$nchar, right? -Arlin # if ( $self->get_type() eq 'characters' && !$nchar ) { Bio::NEXUS::Util::Exceptions::BadFormat->throw( 'error' => "Characters blocks require that Dimensions:nchar be defined" ); } # print $fh "\tDIMENSIONS$ntax_text$nchar_text;\n"; print $fh "\tDIMENSIONS$nchar_text;\n"; return; } =begin comment Title : _write_format Usage : $block->_write_format(); Function: writes out the format command Returns : none Args : filehandle to write to, a verbose flag =end comment =cut sub _write_format { my ( $self, $fh, $verbose ) = @_; $fh ||= \*STDOUT; my %format_of = %{ $self->get_format() }; if ( scalar keys %format_of ) { print $fh "\tFORMAT"; print $fh " datatype=$format_of{'datatype'}" if defined $format_of{'datatype'}; print $fh ' respectcase' if $format_of{'respectcase'}; while ( my ( $key, $val ) = each %format_of ) { next if ( lc($key) eq 'interleave' ); if ( !$val || ( $key =~ /(?:datatype|respectcase)/i ) ) { next; } elsif ( $val eq '1' ) { print $fh " $key"; } else { print $fh " $key=$val"; } } print $fh ";\n"; } return; } sub AUTOLOAD { return if $AUTOLOAD =~ /DESTROY$/; my $package_name = __PACKAGE__ . '::'; # The following methods are deprecated and are temporarily supported # via a warning and a redirection my %synonym_for = ( # "${package_name}parse" => "${package_name}_parse_tree", # example ); if ( defined $synonym_for{$AUTOLOAD} ) { $logger->warn("$AUTOLOAD() is deprecated; use $synonym_for{$AUTOLOAD}() instead"); goto &{ $synonym_for{$AUTOLOAD} }; } else { Bio::NEXUS::Util::Exceptions::UnknownMethod->throw( 'error' => "ERROR: Unknown method $AUTOLOAD called" ); } } 1;