1#################################################################
2# Matrix.pm
3#################################################################
4# Author: Thomas Hladish
5# $Id: Matrix.pm,v 1.23 2007/09/21 23:09:09 rvos Exp $
6
7#################### START POD DOCUMENTATION ##################
8
9=head1 NAME
10
11Bio::NEXUS::Matrix - Provides functions for handling blocks that have matrices
12
13=head1 SYNOPSIS
14
15This module is the super class of Characters, Unaligned, and Distances block classes, and indirectly it is a super-class of Data and History blocks, which are both sub-classes of Characters blocks. These sub-classes inherint the methods within this module.  There is no constructor, as a Matrix should not exist that is not also one of the sub-class block types.
16
17=head1 DESCRIPTION
18
19Provides functions used for handling blocks that have matrices.
20
21=head1 COMMENTS
22
23=head1 FEEDBACK
24
25All feedback (bugs, feature enhancements, etc.) are greatly appreciated.
26
27=head1 AUTHORS
28
29 Thomas Hladish (tjhladish at yahoo)
30
31=head1 VERSION
32
33$Revision: 1.23 $
34
35=head1 METHODS
36
37=cut
38
39package Bio::NEXUS::Matrix;
40
41use strict;
42#use Data::Dumper; # XXX this is not used, might as well not import it!
43#use Carp; # XXX this is not used, might as well not import it!
44use Bio::NEXUS::Functions;
45use Bio::NEXUS::Block;
46use Bio::NEXUS::Util::Logger;
47use Bio::NEXUS::Util::Exceptions;
48use vars qw(@ISA $VERSION $AUTOLOAD);
49use Bio::NEXUS; $VERSION = $Bio::NEXUS::VERSION;
50
51@ISA = qw(Bio::NEXUS::Block);
52my $logger = Bio::NEXUS::Util::Logger->new();
53
54=head2 get_ntax
55
56 Title   : get_ntax
57 Usage   : $block->get_ntax();
58 Function: Returns the number of taxa in the block
59 Returns : # taxa
60 Args    : none
61
62=cut
63
64sub get_ntax {
65    my $self = shift;
66
67    if ( my $otuset = $self->get_otuset() ) {
68        return $otuset->get_ntax();
69    }
70    elsif ( my $dimensions = $self->get_dimensions() ) {
71        return $dimensions->{'ntax'};
72    }
73    else {
74        return;
75    }
76}
77
78=head2 set_nchar
79
80 Title   : set_nchar
81 Usage   : print $block->set_nchar();
82 Function: Sets the value of Dimensions:nchar
83 Returns : none
84 Args    : number of char(scalar)
85
86=cut
87
88sub set_nchar {
89    my ( $self, $nchar ) = @_;
90    $self->{'dimensions'}{'nchar'} = $nchar;
91    return;
92}
93
94=head2 get_nchar
95
96 Title   : get_nchar
97 Usage   : $block->get_nchar();
98 Function: Returns the number of characters in the block (Note: In Distances Blocks, this is the number of characters used to infer distances.)
99 Returns : # taxa
100 Args    : none
101
102=cut
103
104sub get_nchar {
105    my $self = shift;
106
107    if ( my $dimensions = $self->get_dimensions() ) {
108        return $dimensions->{'nchar'};
109    }
110    else {
111        return;
112    }
113}
114
115=begin comment
116
117 Title   : _parse_format
118 Usage   : $format = $self->_parse_format($buffer); (private)
119 Function: Extracts format values from line and stores in format attribute
120 Returns : none
121 Args    : buffer (string)
122 Methods : Separates formats by whitespace and creates hash containing
123           key = format name and value = format value.
124
125=end comment
126
127=cut
128
129sub _parse_format {
130    my ( $self, $string ) = @_;
131
132    my %format = ();
133
134    my @format_tokens = @{ _parse_nexus_words($string) };
135    while (@format_tokens) {
136
137		# If the second thing in the list is a '=' (e.g. ('datatype', '=', 'standard') )
138        if ( $format_tokens[1] && $format_tokens[1] eq '=' ) {
139			if ( lc($format_tokens[0]) eq 'items' ) {
140				# process items list
141	           	my ( $key, $equals, $val ) = splice( @format_tokens, 0, 3 );
142             	$format{ 'items' } = $val;
143             	if ( $val eq '(' ) {
144					while ( $format{ 'items' } !~ /\)$/ ) {
145						#print Dumper @format_tokens;
146						$format{ 'items' } .= " " . shift( @format_tokens );
147					}
148				}
149			}
150			else {
151            	#then set the first thing equal to the third
152            	my ( $key, $equals, $val ) = splice( @format_tokens, 0, 3 );
153            	$format{ lc $key } = $val;
154        	}
155      	}
156        else {
157            my $key = shift @format_tokens;
158
159            # Otherwise, just set the first thing equal to TRUE
160            $format{ lc $key } = 1;
161        }
162    }
163
164    # Note: Treating flags and things with rvalues the same way is problematic--
165    # how do you know whether a given format token has a count of 1, or if it
166    # was merely present, and that's why it has a value of one.  One possible
167    # way to make this more robust is to store flags in $format{'flags'},
168    # e.g. $format{'flags'} = ['tokens', 'respectcase'];
169
170    $self->set_format( \%format );
171    return;
172}
173
174=begin comment
175
176 Title   : _validate_format
177 Usage   : $self->_validate_format($format_hashref); (private)
178 Function: Assigns defaults and sorts through formatting subcommands per the NEXUS standard
179 Returns : hash reference (the validated formatting)
180 Args    : hash reference with format keys (the subcommands) and their values
181
182=end comment
183
184=cut
185
186sub _validate_format {
187    my ( $self, $format ) = @_;
188    my $block_type = $self->get_type();
189
190    # Currently, only Characters and Unaligned blocks are handled here--other
191    # matrix-type blocks are treated as though their formatting is valid
192    if ( $block_type !~ qr/^(?:characters|unaligned)$/i ) {
193    	return $format;
194    }
195
196    $format->{'datatype'} ||= 'standard';   # 'standard' is the default datatype
197
198	# tokens always true for continuous data (p. 601 of Maddison, et al, 1997)
199    if ( $format->{'datatype'} =~ /^continuous$/i ) {
200        if ( $format->{'notokens'} ) {
201        	$logger->warn(
202        		"notokens subcommand is incompatible with"
203        		. "datatype=continuous subcommand in format statement"
204        	);
205        }
206        $format->{'tokens'} = 1;
207    }
208
209    if ( $format->{'datatype'} =~ /^(?:dna|rna|nucleotide|protein|continuous)$/i ) {
210        delete $format->{'respectcase'};
211    }
212    elsif ( $format->{'datatype'} eq 'standard' ) {
213        if ( !$format->{'respectcase'} ) {
214            for my $sub_cmd (qw/symbols missing gap matchar/) {
215                $format->{$sub_cmd} = lc $format->{$sub_cmd}
216                    if defined $format->{$sub_cmd};
217            }
218        }
219    }
220    else {
221        $logger->warn(
222        	"Unfamiliar datatype encountered in $block_type block: "
223        	. "'$format->{'datatype'}' (continuing anyway)"
224        );
225    }
226
227    return $format;
228}
229
230=head2 set_format
231
232 Title   : set_format
233 Usage   : $block->set_format(\%format);
234 Function: set the format of the characters
235 Returns : none
236 Args    : hash of format values
237
238=cut
239
240sub set_format {
241    my ( $self, $format_hashref ) = @_;
242    $self->{'format'} = $self->_validate_format($format_hashref);
243}
244
245=head2 get_format
246
247 Title   : get_format
248 Usage   : $block->get_format($attribute);
249 Function: Returns the format of the characters
250 Returns : hash of format values, or if $attribute (a string) is supplied, the value of that attribute in the hash
251 Args    : none
252
253=cut
254
255sub get_format {
256    my ( $self, $attribute ) = @_;
257    $attribute
258        ? return $self->{'format'}->{$attribute}
259        : return $self->{'format'} || {};
260}
261
262=head2 add_taxlabels
263
264 Title   : add_taxlabels
265 Usage   : $block->add_taxlabels($new_taxlabels);
266 Function: Adds new taxa to taxlabels if they aren't already there
267 Returns : none
268 Args    : taxa to be added
269
270=cut
271
272sub add_taxlabels {
273    my ( $self, $new_taxlabels ) = @_;
274    my $current_taxlabels = $self->get_taxlabels();
275
276    for my $new_label (@$new_taxlabels) {
277
278        # Check to see if new_label is already in current_taxlabels
279        if ( !defined first {/$new_label/} @$current_taxlabels ) {
280            push @$current_taxlabels, $new_label;
281        }
282    }
283    return;
284}
285
286=begin comment
287
288 Title   : _write_dimensions
289 Usage   : $block->_write_dimensions();
290 Function: writes out the dimensions command
291 Returns : none
292 Args    : filehandle to write to, a verbose flag
293
294=end comment
295
296=cut
297
298sub _write_dimensions {
299    my ( $self, $fh, $verbose ) = @_;
300    $fh ||= \*STDOUT;
301
302	# Arlin took out all ntax stuff, ntax only used in taxa block according to standard
303	#
304	# my $ntax  = $self->get_ntax();
305    my $nchar = $self->get_nchar();
306
307    return if !defined $nchar;# && !defined $ntax;
308
309	# my $ntax_text  = $ntax  ? " ntax=$ntax"   : q{};
310    my $nchar_text = $nchar ? " nchar=$nchar" : q{};
311
312	# Tom: this code cannot be reached due to return above on !$nchar, right? -Arlin
313	#
314	if ( $self->get_type() eq 'characters' && !$nchar ) {
315		Bio::NEXUS::Util::Exceptions::BadFormat->throw(
316			'error' => "Characters blocks require that Dimensions:nchar be defined"
317		);
318	}
319
320	# print $fh "\tDIMENSIONS$ntax_text$nchar_text;\n";
321    print $fh "\tDIMENSIONS$nchar_text;\n";
322    return;
323}
324
325=begin comment
326
327 Title   : _write_format
328 Usage   : $block->_write_format();
329 Function: writes out the format command
330 Returns : none
331 Args    : filehandle to write to, a verbose flag
332
333=end comment
334
335=cut
336
337sub _write_format {
338    my ( $self, $fh, $verbose ) = @_;
339    $fh ||= \*STDOUT;
340
341    my %format_of = %{ $self->get_format() };
342    if ( scalar keys %format_of ) {
343        print $fh "\tFORMAT";
344
345        print $fh " datatype=$format_of{'datatype'}"
346            if defined $format_of{'datatype'};
347        print $fh ' respectcase' if $format_of{'respectcase'};
348
349        while ( my ( $key, $val ) = each %format_of ) {
350            next if ( lc($key) eq 'interleave' );
351            if ( !$val || ( $key =~ /(?:datatype|respectcase)/i ) ) { next; }
352            elsif ( $val eq '1' ) {
353                print $fh " $key";
354            }
355            else {
356                print $fh " $key=$val";
357            }
358        }
359        print $fh ";\n";
360    }
361    return;
362}
363
364sub AUTOLOAD {
365    return if $AUTOLOAD =~ /DESTROY$/;
366    my $package_name = __PACKAGE__ . '::';
367
368    # The following methods are deprecated and are temporarily supported
369    # via a warning and a redirection
370    my %synonym_for = (
371
372#        "${package_name}parse"      => "${package_name}_parse_tree",  # example
373    );
374
375    if ( defined $synonym_for{$AUTOLOAD} ) {
376        $logger->warn("$AUTOLOAD() is deprecated; use $synonym_for{$AUTOLOAD}() instead");
377        goto &{ $synonym_for{$AUTOLOAD} };
378    }
379    else {
380        Bio::NEXUS::Util::Exceptions::UnknownMethod->throw(
381        	'error' => "ERROR: Unknown method $AUTOLOAD called"
382        );
383    }
384}
385
3861;
387