1# Copyright 2008-2010 Tim Rayner 2# 3# This file is part of Bio::MAGETAB. 4# 5# Bio::MAGETAB is free software: you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation, either version 2 of the License, or 8# (at your option) any later version. 9# 10# Bio::MAGETAB is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with Bio::MAGETAB. If not, see <http://www.gnu.org/licenses/>. 17# 18# $Id: ADF.pm 340 2010-07-23 13:19:27Z tfrayner $ 19 20package Bio::MAGETAB::Util::Writer::ADF; 21 22use Moose; 23use MooseX::FollowPBP; 24 25use Carp; 26 27use MooseX::Types::Moose qw( Bool ); 28 29BEGIN { extends 'Bio::MAGETAB::Util::Writer::Tabfile' }; 30 31has 'magetab_object' => ( is => 'ro', 32 isa => 'Bio::MAGETAB::ArrayDesign', 33 required => 1 ); 34 35has '_cached_mapping_flag' => ( is => 'rw', 36 isa => Bool, 37 predicate => '_has_cached_mapping_flag', 38 required => 0 ); 39 40sub _write_header { 41 42 my ( $self ) = @_; 43 44 my $array = $self->get_magetab_object(); 45 46 # Term Sources are a bit ugly, because they're normally attached 47 # to Investigation. We currently cheat and go via any Bio::MAGETAB 48 # container that's available (this means that *all* in-memory term 49 # sources are dumped into the ADF): 50 my ( @termsources, $num_cols ); 51 if ( my $magetab = $array->get_ClassContainer() ) { 52 @termsources = $magetab->get_termSources(); 53 if ( my $num_ts = scalar @termsources ) { 54 $num_cols = $num_ts + 1; 55 } 56 } 57 58 # Just two columns is standard for the header section if there are 59 # no Term Sources; main and mapping sections will differ (FIXME 60 # check this against the spec; is this valid?). 61 $num_cols ||= 2; 62 $self->set_num_columns( $num_cols ); 63 $self->_write_line( '[header]' ); 64 65 my %single = ( 66 'Array Design Name' => 'name', 67 'Version' => 'version', 68 'Provider' => 'provider', 69 'Printing Protocol' => 'printingProtocol', 70 ); 71 72 # Single elements are straightforward. 73 while ( my ( $field, $value ) = each %single ) { 74 my $getter = "get_$value"; 75 $self->_write_line( $field, $array->$getter ); 76 } 77 78 # Elements pointing to objects need a bit more work. 79 my %multi = ( 80 81 'technologyType' => [ 82 sub { return ( [ 'Technology Type', 83 map { $_->get_value() } @_ ] ) }, 84 sub { return ( [ 'Technology Type Term Accession Number', 85 map { $self->_get_thing_accession($_) } @_ ] ) }, 86 sub { return ( [ 'Technology Type Term Source REF', 87 map { $self->_get_type_termsource_name($_) } @_ ] ) }, 88 ], 89 'surfaceType' => [ 90 sub { return ( [ 'Surface Type', 91 map { $_->get_value() } @_ ] ) }, 92 sub { return ( [ 'Surface Type Term Accession Number', 93 map { $self->_get_thing_accession($_) } @_ ] ) }, 94 sub { return ( [ 'Surface Type Term Source REF', 95 map { $self->_get_type_termsource_name($_) } @_ ] ) }, 96 ], 97 'substrateType' => [ 98 sub { return ( [ 'Substrate Type', 99 map { $_->get_value() } @_ ] ) }, 100 sub { return ( [ 'Substrate Type Term Accession Number', 101 map { $self->_get_thing_accession($_) } @_ ] ) }, 102 sub { return ( [ 'Substrate Type Term Source REF', 103 map { $self->_get_type_termsource_name($_) } @_ ] ) }, 104 ], 105 'sequencePolymerType' => [ 106 sub { return ( [ 'Sequence Polymer Type', 107 map { $_->get_value() } @_ ] ) }, 108 sub { return ( [ 'Sequence Polymer Type Term Accession Number', 109 map { $self->_get_thing_accession($_) } @_ ] ) }, 110 sub { return ( [ 'Sequence Polymer Type Term Source REF', 111 map { $self->_get_type_termsource_name($_) } @_ ] ) }, 112 ], 113 ); 114 115 # All the complicated stuff gets handled by the dispatch methods 116 # in %multi. 117 ATTR: 118 while ( my ( $field, $subs ) = each %multi ) { 119 my $getter = "get_$field"; 120 my @attrs = $array->$getter; 121 next ATTR if ( scalar @attrs == 1 && ! defined $attrs[0] ); 122 foreach my $sub ( @$subs ) { 123 foreach my $lineref ( $sub->( @attrs ) ) { 124 125 # Don't write the line if there's nothing to write but the tag. 126 if ( scalar grep { defined $_ && $_ ne q{} } @{ $lineref }[1..$#$lineref] ) { 127 $self->_write_line( @{ $lineref } ); 128 } 129 } 130 } 131 } 132 133 # Dump out our Term Source info. 134 if ( scalar @termsources ) { 135 $self->_write_line( 'Term Source Name', 136 map { $_->get_name() } @termsources ); 137 $self->_write_line( 'Term Source Version', 138 map { $_->get_version() } @termsources ); 139 $self->_write_line( 'Term Source File', 140 map { $_->get_uri() } @termsources ); 141 } 142 143 # Attach all comments to the ArrayDesign. 144 foreach my $comment ( $array->get_comments() ) { 145 my $field = sprintf("Comment[%s]", $comment->get_name()); 146 $self->_write_line( $field, $comment->get_value() ); 147 } 148 149 return; 150} 151 152sub _get_reporter_tag_lists { 153 154 my ( $self ) = @_; 155 156 my $array = $self->get_magetab_object(); 157 158 my @reporters = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Reporter' ) } 159 $array->get_designElements(); 160 161 my (%db_name, %group_name); 162 foreach my $rep ( @reporters ) { 163 foreach my $db_entry ( $rep->get_databaseEntries() ) { 164 my $ts = $db_entry->get_termSource(); 165 $db_name{ $ts->get_name() }++ if $ts; 166 } 167 foreach my $group ( $rep->get_groups() ) { 168 $group_name{ $group->get_category() }++; 169 } 170 } 171 my @dbs = sort keys %db_name; 172 my @groups = sort keys %group_name; 173 174 return \@dbs, \@groups; 175} 176 177sub _get_composite_tag_lists { 178 179 my ( $self ) = @_; 180 181 my $array = $self->get_magetab_object(); 182 183 my @composites = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::CompositeElement' ) } 184 $array->get_designElements(); 185 186 my %db_name; 187 foreach my $elem ( @composites ) { 188 foreach my $db_entry ( $elem->get_databaseEntries() ) { 189 my $ts = $db_entry->get_termSource(); 190 $db_name{ $ts->get_name() }++ if $ts; 191 } 192 } 193 my @dbs = sort keys %db_name; 194 195 return \@dbs; 196} 197 198sub _generate_main_header_line { 199 200 my ( $self, $reporter_dbs, $groups, $composite_dbs ) = @_; 201 202 my @header = ( 203 'Block Column', 204 'Block Row', 205 'Column', 206 'Row', 207 'Reporter Name', 208 'Reporter Sequence', 209 ( map { "Reporter Database Entry [$_]" } @$reporter_dbs ), 210 ( map { "Reporter Group [$_]" } @$groups ), 211 ); 212 if ( scalar @$groups ) { 213 push @header, 'Reporter Group Term Source REF'; 214 215 if ( $self->get_export_version ne '1.0' ) { 216 push @header, 'Reporter Group Term Accession Number'; 217 } 218 } 219 push @header, ( 220 'Control Type', 221 'Control Type Term Source REF', 222 ); 223 if ( $self->get_export_version ne '1.0' ) { 224 push @header, 'Control Type Term Accession Number'; 225 } 226 227 # CompositeElement. 228 unless ( $self->_must_generate_mapping() ) { 229 push @header, 230 'Composite Element Name', 231 ( map { "Composite Element Database Entry [$_]" } @$composite_dbs ), 232 'Composite Element Comment'; 233 } 234 235 return \@header; 236} 237 238sub _get_feature_coords { 239 240 my ( $self, $feature ) = @_; 241 242 my @coords = map { $feature->$_ } 243 qw( get_blockCol get_blockRow get_col get_row ); 244 245 return @coords; 246} 247 248sub _get_element_dbentries { 249 250 my ( $self, $element, $dbs ) = @_; 251 252 my @accessions; 253 254 my %accession = map { 255 my $ts = $_->get_termSource(); 256 ( $ts ? $ts->get_name() : q{} ) => $_->get_accession(); 257 } $element->get_databaseEntries(); 258 foreach my $db ( @$dbs ) { 259 my $acc = $accession{ $db }; 260 push @accessions, ( defined $acc ? $acc : q{} ); 261 } 262 263 return @accessions; 264} 265 266sub _get_reporter_groups { 267 268 my ( $self, $reporter, $groups ) = @_; 269 270 my @groups; 271 my %group = map { 272 $_->get_category() => $_->get_value() 273 } $reporter->get_groups(); 274 foreach my $name ( @$groups ) { 275 my $gr = $group{ $name }; 276 push @groups, ( defined $gr ? $gr : q{} ); 277 } 278 279 return @groups; 280} 281 282sub _get_reporter_group_source { 283 284 my ( $self, $reporter, $groups ) = @_; 285 286 my @sources; 287 288 # Group Term Source and Accession, where needed. 289 if ( scalar @$groups ) { 290 my @rep_groups = $reporter->get_groups(); 291 if ( scalar @rep_groups > 1 ) { 292 carp(qq{Warning: Multiple Reporter Group Term Sources/Accessions not supported. } 293 . qq{ADF output only contains these values for "} 294 . $rep_groups[0]->get_category() . qq{"\n}) 295 } 296 push @sources, $self->_get_type_termsource_name( $rep_groups[0] ); 297 298 if ( $self->get_export_version() ne '1.0' ) { 299 my $acc = $rep_groups[0]->get_accession(); 300 push @sources, ( defined $acc ? $acc : q{} ); 301 } 302 } 303 304 return @sources; 305} 306 307sub _get_reporter_control_type { 308 309 my ( $self, $reporter ) = @_; 310 311 my @typeinfo; 312 if ( my $ctype = $reporter->get_controlType() ) { 313 push @typeinfo, $ctype->get_value(); 314 push @typeinfo, $self->_get_type_termsource_name( $ctype ); 315 316 if ( $self->get_export_version() ne '1.0' ) { 317 my $acc = $ctype->get_accession(); 318 push @typeinfo, ( defined $acc ? $acc : q{} ); 319 } 320 } 321 else { 322 push @typeinfo, (q{}) x 2; 323 if ( $self->get_export_version() ne '1.0' ) { 324 push @typeinfo, q{}; 325 } 326 } 327 328 return @typeinfo; 329} 330 331sub _generate_reporter_data { 332 333 my ( $self, $reporter, $dbs, $groups ) = @_; 334 335 my @data; 336 push @data, $reporter->get_name(), $reporter->get_sequence(); 337 338 # Get the database entries, in order. 339 push @data, $self->_get_element_dbentries( $reporter, $dbs ); 340 341 # Get the group names, in order. 342 push @data, $self->_get_reporter_groups( $reporter, $groups ); 343 push @data, $self->_get_reporter_group_source( $reporter, $groups ); 344 345 # Control Type. 346 push @data, $self->_get_reporter_control_type( $reporter ); 347 348 return @data; 349} 350 351sub _generate_composite_data { 352 353 my ( $self, $composite, $dbs ) = @_; 354 355 my @data = $composite->get_name(); 356 357 # Get the database entries, in order. 358 push @data, $self->_get_element_dbentries( $composite, $dbs ); 359 360 if ( my $comm = $composite->get_comment() ) { 361 push @data, $comm->get_value(); 362 } 363 else { 364 push @data, q{}; 365 } 366 367 return @data; 368} 369 370sub _must_generate_mapping { 371 372 my ( $self ) = @_; 373 374 unless ( $self->_has_cached_mapping_flag() ) { 375 376 # Check all reporters; if any map to more than one CE, we need 377 # a mapping section. The result is cached so we only check 378 # this once. 379 my $array = $self->get_magetab_object(); 380 my @reporters = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Reporter' ) } 381 $array->get_designElements(); 382 383 REPORTER: 384 foreach my $rep ( @reporters ) { 385 if ( scalar @{ [ $rep->get_compositeElements() ] } > 1 ) { 386 $self->_set_cached_mapping_flag(1); 387 last REPORTER; 388 } 389 } 390 391 $self->_set_cached_mapping_flag(0) 392 unless $self->_has_cached_mapping_flag(); 393 } 394 395 return $self->_get_cached_mapping_flag(); 396} 397 398sub _write_main { 399 400 my ( $self ) = @_; 401 402 my $array = $self->get_magetab_object(); 403 404 # Figure out which databases are represented. 405 my ( $reporter_dbs, $groups ) = $self->_get_reporter_tag_lists(); 406 my $composite_dbs = $self->_get_composite_tag_lists(); 407 408 # FIXME beware memory issues here; consider creating an iterator 409 # to access some of these objects? This would probably need to be 410 # in the actual Bio::MAGETAB model, possibly with a file- or 411 # db-based backend. 412 my @features = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Feature' ) } 413 $array->get_designElements(); 414 415 # Print out the column headings. 416 my $header = $self->_generate_main_header_line( $reporter_dbs, 417 $groups, 418 $composite_dbs ); 419 $self->set_num_columns( scalar @$header ); 420 $self->_write_line( '[main]' ); 421 $self->_write_line( @$header ); 422 423 # Loop through all the features, writing out the info. 424 foreach my $feature ( @features ) { 425 426 # Sort out the basics; 427 my @line = $self->_get_feature_coords( $feature ); 428 429 # Simple reporter info. 430 my $reporter = $feature->get_reporter(); 431 push @line, $self->_generate_reporter_data( $reporter, $reporter_dbs, $groups ); 432 433 unless ( $self->_must_generate_mapping() ) { 434 435 # There will be only one (or zero) CompositeElements in 436 # such cases. 437 my $composite = $reporter->get_compositeElements(); 438 push @line, $self->_generate_composite_data( $composite, $composite_dbs ) if $composite; 439 } 440 441 # Write out the line. 442 $self->_write_line( @line ); 443 } 444 445 # These may be needed for the mapping section. 446 return $composite_dbs; 447} 448 449sub _write_mapping { 450 451 my ( $self, $dbs ) = @_; 452 453 my $array = $self->get_magetab_object(); 454 my @header = ( 455 'Composite Element Name', 456 'Map2Reporters', 457 ( map { "Composite Element Database Entry [$_]" } @$dbs ), 458 'Composite Element Comment', 459 ); 460 461 # Print out the column headings. 462 $self->set_num_columns( scalar @header ); 463 $self->_write_line( '[mapping]' ); 464 $self->_write_line( @header ); 465 466 # Build a compositeElement to reporter mapping. 467 my @reporters = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Reporter' ) } 468 $array->get_designElements(); 469 my %map2reporters; 470 foreach my $rep ( @reporters ) { 471 foreach my $comp ( $rep->get_compositeElements() ) { 472 push @{ $map2reporters{ $comp->get_name() } }, $rep->get_name(); 473 } 474 } 475 476 # Build our mapping lines and write them out. 477 my @compelems = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::CompositeElement' ) } 478 $array->get_designElements(); 479 foreach my $element ( @compelems ) { 480 my $name = $element->get_name(); 481 my @line = ( 482 $name, 483 join(';', @{ $map2reporters{ $name } } ), 484 $self->_get_element_dbentries( $element, $dbs ), 485 join('; ', map { $_->get_value() } $element->get_comments()), 486 ); 487 $self->_write_line( @line ); 488 } 489 490 return; 491} 492 493sub write { 494 495 my ( $self ) = @_; 496 497 # First, the header section. 498 $self->_write_header(); 499 500 $self->_write_line( q{} ); # spacer line 501 502 # The main body of the ADF. 503 my $comp_dbs = $self->_write_main(); 504 505 $self->_write_line( q{} ); # spacer line 506 507 # Where necessary, the ADF mapping section. 508 if ( $self->_must_generate_mapping() ) { 509 $self->_write_mapping( $comp_dbs ); 510 } 511 512 return; 513} 514 515# Make the classes immutable. In theory this speeds up object 516# instantiation for a small compilation time cost. 517__PACKAGE__->meta->make_immutable(); 518 519no Moose; 520 521=head1 NAME 522 523Bio::MAGETAB::Util::Writer::ADF - Export of MAGE-TAB ArrayDesign 524objects. 525 526=head1 SYNOPSIS 527 528 use Bio::MAGETAB::Util::Writer::ADF; 529 my $writer = Bio::MAGETAB::Util::Writer::ADF->new({ 530 magetab_object => $array_design, 531 filehandle => $adf_fh, 532 }); 533 534 $writer->write(); 535 536=head1 DESCRIPTION 537 538Export of ArrayDesigns to ADF files. 539 540=head1 ATTRIBUTES 541 542See the L<Tabfile|Bio::MAGETAB::Util::Writer::Tabfile> class for superclass attributes. 543 544=over 2 545 546=item magetab_object 547 548The Bio::MAGETAB::ArrayDesign to export. This is a required 549attribute. 550 551=back 552 553=head1 METHODS 554 555=over 2 556 557=item write 558 559Exports the ArrayDesign to ADF. 560 561=back 562 563=head1 SEE ALSO 564 565L<Bio::MAGETAB::Util::Writer> 566L<Bio::MAGETAB::Util::Writer::Tabfile> 567 568=head1 AUTHOR 569 570Tim F. Rayner <tfrayner@gmail.com> 571 572=head1 LICENSE 573 574This library is released under version 2 of the GNU General Public 575License (GPL). 576 577=cut 578 5791; 580