1# Copyright 2008-2010 Tim Rayner
2#
3# This file is part of Bio::MAGETAB.
4#
5# Bio::MAGETAB is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 2 of the License, or
8# (at your option) any later version.
9#
10# Bio::MAGETAB is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with Bio::MAGETAB.  If not, see <http://www.gnu.org/licenses/>.
17#
18# $Id: ADF.pm 340 2010-07-23 13:19:27Z tfrayner $
19
20package Bio::MAGETAB::Util::Writer::ADF;
21
22use Moose;
23use MooseX::FollowPBP;
24
25use Carp;
26
27use MooseX::Types::Moose qw( Bool );
28
29BEGIN { extends 'Bio::MAGETAB::Util::Writer::Tabfile' };
30
31has 'magetab_object'       => ( is         => 'ro',
32                                isa        => 'Bio::MAGETAB::ArrayDesign',
33                                required   => 1 );
34
35has '_cached_mapping_flag' => ( is         => 'rw',
36                                isa        => Bool,
37                                predicate  => '_has_cached_mapping_flag',
38                                required   => 0 );
39
40sub _write_header {
41
42    my ( $self ) = @_;
43
44    my $array = $self->get_magetab_object();
45
46    # Term Sources are a bit ugly, because they're normally attached
47    # to Investigation. We currently cheat and go via any Bio::MAGETAB
48    # container that's available (this means that *all* in-memory term
49    # sources are dumped into the ADF):
50    my ( @termsources, $num_cols );
51    if ( my $magetab = $array->get_ClassContainer() ) {
52        @termsources = $magetab->get_termSources();
53        if ( my $num_ts = scalar @termsources ) {
54            $num_cols = $num_ts + 1;
55        }
56    }
57
58    # Just two columns is standard for the header section if there are
59    # no Term Sources; main and mapping sections will differ (FIXME
60    # check this against the spec; is this valid?).
61    $num_cols ||= 2;
62    $self->set_num_columns( $num_cols );
63    $self->_write_line( '[header]' );
64
65    my %single = (
66        'Array Design Name'   => 'name',
67        'Version'             => 'version',
68        'Provider'            => 'provider',
69        'Printing Protocol'   => 'printingProtocol',
70    );
71
72    # Single elements are straightforward.
73    while ( my ( $field, $value ) = each %single ) {
74        my $getter = "get_$value";
75        $self->_write_line( $field, $array->$getter );
76    }
77
78    # Elements pointing to objects need a bit more work.
79    my %multi = (
80
81        'technologyType' => [
82            sub { return ( [ 'Technology Type',
83                             map { $_->get_value()     } @_ ] ) },
84            sub { return ( [ 'Technology Type Term Accession Number',
85                             map { $self->_get_thing_accession($_) } @_ ] ) },
86            sub { return ( [ 'Technology Type Term Source REF',
87                             map { $self->_get_type_termsource_name($_) } @_ ] ) },
88        ],
89        'surfaceType' => [
90            sub { return ( [ 'Surface Type',
91                             map { $_->get_value()     } @_ ] ) },
92            sub { return ( [ 'Surface Type Term Accession Number',
93                             map { $self->_get_thing_accession($_) } @_ ] ) },
94            sub { return ( [ 'Surface Type Term Source REF',
95                             map { $self->_get_type_termsource_name($_) } @_ ] ) },
96        ],
97        'substrateType' => [
98            sub { return ( [ 'Substrate Type',
99                             map { $_->get_value()     } @_ ] ) },
100            sub { return ( [ 'Substrate Type Term Accession Number',
101                             map { $self->_get_thing_accession($_) } @_ ] ) },
102            sub { return ( [ 'Substrate Type Term Source REF',
103                             map { $self->_get_type_termsource_name($_) } @_ ] ) },
104        ],
105        'sequencePolymerType' => [
106            sub { return ( [ 'Sequence Polymer Type',
107                             map { $_->get_value()     } @_ ] ) },
108            sub { return ( [ 'Sequence Polymer Type Term Accession Number',
109                             map { $self->_get_thing_accession($_) } @_ ] ) },
110            sub { return ( [ 'Sequence Polymer Type Term Source REF',
111                             map { $self->_get_type_termsource_name($_) } @_ ] ) },
112        ],
113    );
114
115    # All the complicated stuff gets handled by the dispatch methods
116    # in %multi.
117    ATTR:
118    while ( my ( $field, $subs ) = each %multi ) {
119        my $getter = "get_$field";
120        my @attrs = $array->$getter;
121        next ATTR if ( scalar @attrs == 1 && ! defined $attrs[0] );
122        foreach my $sub ( @$subs ) {
123            foreach my $lineref ( $sub->( @attrs ) ) {
124
125                # Don't write the line if there's nothing to write but the tag.
126                if ( scalar grep { defined $_ && $_ ne q{} } @{ $lineref }[1..$#$lineref] ) {
127                    $self->_write_line( @{ $lineref } );
128                }
129            }
130        }
131    }
132
133    # Dump out our Term Source info.
134    if ( scalar @termsources ) {
135        $self->_write_line( 'Term Source Name',
136                            map { $_->get_name() } @termsources );
137        $self->_write_line( 'Term Source Version',
138                            map { $_->get_version() } @termsources );
139        $self->_write_line( 'Term Source File',
140                            map { $_->get_uri() } @termsources );
141    }
142
143    # Attach all comments to the ArrayDesign.
144    foreach my $comment ( $array->get_comments() ) {
145        my $field = sprintf("Comment[%s]", $comment->get_name());
146        $self->_write_line( $field, $comment->get_value() );
147    }
148
149    return;
150}
151
152sub _get_reporter_tag_lists {
153
154    my ( $self ) = @_;
155
156    my $array = $self->get_magetab_object();
157
158    my @reporters = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Reporter' ) }
159                       $array->get_designElements();
160
161    my (%db_name, %group_name);
162    foreach my $rep ( @reporters ) {
163        foreach my $db_entry ( $rep->get_databaseEntries() ) {
164            my $ts = $db_entry->get_termSource();
165            $db_name{ $ts->get_name() }++ if $ts;
166        }
167        foreach my $group ( $rep->get_groups() ) {
168            $group_name{ $group->get_category() }++;
169        }
170    }
171    my @dbs    = sort keys %db_name;
172    my @groups = sort keys %group_name;
173
174    return \@dbs, \@groups;
175}
176
177sub _get_composite_tag_lists {
178
179    my ( $self ) = @_;
180
181    my $array = $self->get_magetab_object();
182
183    my @composites = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::CompositeElement' ) }
184                       $array->get_designElements();
185
186    my %db_name;
187    foreach my $elem ( @composites ) {
188        foreach my $db_entry ( $elem->get_databaseEntries() ) {
189            my $ts = $db_entry->get_termSource();
190            $db_name{ $ts->get_name() }++ if $ts;
191        }
192    }
193    my @dbs = sort keys %db_name;
194
195    return \@dbs;
196}
197
198sub _generate_main_header_line {
199
200    my ( $self, $reporter_dbs, $groups, $composite_dbs ) = @_;
201
202    my @header = (
203        'Block Column',
204        'Block Row',
205        'Column',
206        'Row',
207        'Reporter Name',
208        'Reporter Sequence',
209        ( map { "Reporter Database Entry [$_]" } @$reporter_dbs    ),
210        ( map { "Reporter Group [$_]" }          @$groups ),
211    );
212    if ( scalar @$groups ) {
213        push @header, 'Reporter Group Term Source REF';
214
215        if ( $self->get_export_version ne '1.0' ) {
216            push @header, 'Reporter Group Term Accession Number';
217        }
218    }
219    push @header, (
220        'Control Type',
221        'Control Type Term Source REF',
222    );
223    if ( $self->get_export_version ne '1.0' ) {
224        push @header, 'Control Type Term Accession Number';
225    }
226
227    # CompositeElement.
228    unless ( $self->_must_generate_mapping() ) {
229        push @header,
230            'Composite Element Name',
231            ( map { "Composite Element Database Entry [$_]" } @$composite_dbs ),
232            'Composite Element Comment';
233    }
234
235    return \@header;
236}
237
238sub _get_feature_coords {
239
240    my ( $self, $feature ) = @_;
241
242    my @coords = map { $feature->$_ }
243        qw( get_blockCol get_blockRow get_col get_row );
244
245    return @coords;
246}
247
248sub _get_element_dbentries {
249
250    my ( $self, $element, $dbs ) = @_;
251
252    my @accessions;
253
254    my %accession = map {
255        my $ts = $_->get_termSource();
256        ( $ts ? $ts->get_name() : q{} ) => $_->get_accession();
257    } $element->get_databaseEntries();
258    foreach my $db ( @$dbs ) {
259        my $acc = $accession{ $db };
260        push @accessions, ( defined $acc ? $acc : q{} );
261    }
262
263    return @accessions;
264}
265
266sub _get_reporter_groups {
267
268    my ( $self, $reporter, $groups ) = @_;
269
270    my @groups;
271    my %group = map {
272        $_->get_category() => $_->get_value()
273    } $reporter->get_groups();
274    foreach my $name ( @$groups ) {
275        my $gr = $group{ $name };
276        push @groups, ( defined $gr ? $gr : q{} );
277    }
278
279    return @groups;
280}
281
282sub _get_reporter_group_source {
283
284    my ( $self, $reporter, $groups ) = @_;
285
286    my @sources;
287
288    # Group Term Source and Accession, where needed.
289    if ( scalar @$groups ) {
290        my @rep_groups = $reporter->get_groups();
291        if ( scalar @rep_groups > 1 ) {
292            carp(qq{Warning: Multiple Reporter Group Term Sources/Accessions not supported. }
293               . qq{ADF output only contains these values for "}
294               . $rep_groups[0]->get_category() . qq{"\n})
295        }
296        push @sources, $self->_get_type_termsource_name( $rep_groups[0] );
297
298        if ( $self->get_export_version() ne '1.0' ) {
299            my $acc = $rep_groups[0]->get_accession();
300            push @sources, ( defined $acc ? $acc : q{} );
301        }
302    }
303
304    return @sources;
305}
306
307sub _get_reporter_control_type {
308
309    my ( $self, $reporter ) = @_;
310
311    my @typeinfo;
312    if ( my $ctype = $reporter->get_controlType() ) {
313        push @typeinfo, $ctype->get_value();
314        push @typeinfo, $self->_get_type_termsource_name( $ctype );
315
316        if ( $self->get_export_version() ne '1.0' ) {
317            my $acc = $ctype->get_accession();
318            push @typeinfo, ( defined $acc ? $acc : q{} );
319        }
320    }
321    else {
322        push @typeinfo, (q{}) x 2;
323        if ( $self->get_export_version() ne '1.0' ) {
324            push @typeinfo, q{};
325        }
326    }
327
328    return @typeinfo;
329}
330
331sub _generate_reporter_data {
332
333    my ( $self, $reporter, $dbs, $groups ) = @_;
334
335    my @data;
336    push @data, $reporter->get_name(), $reporter->get_sequence();
337
338    # Get the database entries, in order.
339    push @data, $self->_get_element_dbentries( $reporter, $dbs );
340
341    # Get the group names, in order.
342    push @data, $self->_get_reporter_groups(       $reporter, $groups );
343    push @data, $self->_get_reporter_group_source( $reporter, $groups );
344
345    # Control Type.
346    push @data, $self->_get_reporter_control_type( $reporter );
347
348    return @data;
349}
350
351sub _generate_composite_data {
352
353    my ( $self, $composite, $dbs ) = @_;
354
355    my @data = $composite->get_name();
356
357    # Get the database entries, in order.
358    push @data, $self->_get_element_dbentries( $composite, $dbs );
359
360    if ( my $comm = $composite->get_comment() ) {
361        push @data, $comm->get_value();
362    }
363    else {
364        push @data, q{};
365    }
366
367    return @data;
368}
369
370sub _must_generate_mapping {
371
372    my ( $self ) = @_;
373
374    unless ( $self->_has_cached_mapping_flag() ) {
375
376        # Check all reporters; if any map to more than one CE, we need
377        # a mapping section. The result is cached so we only check
378        # this once.
379        my $array = $self->get_magetab_object();
380        my @reporters = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Reporter' ) }
381            $array->get_designElements();
382
383        REPORTER:
384        foreach my $rep ( @reporters ) {
385            if ( scalar @{ [ $rep->get_compositeElements() ] } > 1 ) {
386                $self->_set_cached_mapping_flag(1);
387                last REPORTER;
388            }
389        }
390
391        $self->_set_cached_mapping_flag(0)
392            unless $self->_has_cached_mapping_flag();
393    }
394
395    return $self->_get_cached_mapping_flag();
396}
397
398sub _write_main {
399
400    my ( $self ) = @_;
401
402    my $array = $self->get_magetab_object();
403
404    # Figure out which databases are represented.
405    my ( $reporter_dbs, $groups ) = $self->_get_reporter_tag_lists();
406    my $composite_dbs             = $self->_get_composite_tag_lists();
407
408    # FIXME beware memory issues here; consider creating an iterator
409    # to access some of these objects? This would probably need to be
410    # in the actual Bio::MAGETAB model, possibly with a file- or
411    # db-based backend.
412    my @features = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Feature' ) }
413                       $array->get_designElements();
414
415    # Print out the column headings.
416    my $header = $self->_generate_main_header_line( $reporter_dbs,
417                                                    $groups,
418                                                    $composite_dbs );
419    $self->set_num_columns( scalar @$header );
420    $self->_write_line( '[main]' );
421    $self->_write_line( @$header );
422
423    # Loop through all the features, writing out the info.
424    foreach my $feature ( @features ) {
425
426        # Sort out the basics;
427        my @line = $self->_get_feature_coords( $feature );
428
429        # Simple reporter info.
430        my $reporter = $feature->get_reporter();
431        push @line, $self->_generate_reporter_data( $reporter, $reporter_dbs, $groups );
432
433        unless ( $self->_must_generate_mapping() ) {
434
435            # There will be only one (or zero) CompositeElements in
436            # such cases.
437            my $composite = $reporter->get_compositeElements();
438            push @line, $self->_generate_composite_data( $composite, $composite_dbs ) if $composite;
439        }
440
441        # Write out the line.
442        $self->_write_line( @line );
443    }
444
445    # These may be needed for the mapping section.
446    return $composite_dbs;
447}
448
449sub _write_mapping {
450
451    my ( $self, $dbs ) = @_;
452
453    my $array = $self->get_magetab_object();
454    my @header = (
455        'Composite Element Name',
456        'Map2Reporters',
457        ( map { "Composite Element Database Entry [$_]" } @$dbs ),
458        'Composite Element Comment',
459    );
460
461    # Print out the column headings.
462    $self->set_num_columns( scalar @header );
463    $self->_write_line( '[mapping]' );
464    $self->_write_line( @header );
465
466    # Build a compositeElement to reporter mapping.
467    my @reporters = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::Reporter' ) }
468                       $array->get_designElements();
469    my %map2reporters;
470    foreach my $rep ( @reporters ) {
471        foreach my $comp ( $rep->get_compositeElements() ) {
472            push @{ $map2reporters{ $comp->get_name() } }, $rep->get_name();
473        }
474    }
475
476    # Build our mapping lines and write them out.
477    my @compelems = grep { UNIVERSAL::isa( $_, 'Bio::MAGETAB::CompositeElement' ) }
478                       $array->get_designElements();
479    foreach my $element ( @compelems ) {
480        my $name = $element->get_name();
481        my @line = (
482            $name,
483            join(';', @{ $map2reporters{ $name } } ),
484            $self->_get_element_dbentries( $element, $dbs ),
485            join('; ', map { $_->get_value() } $element->get_comments()),
486        );
487        $self->_write_line( @line );
488    }
489
490    return;
491}
492
493sub write {
494
495    my ( $self ) = @_;
496
497    # First, the header section.
498    $self->_write_header();
499
500    $self->_write_line( q{} );    # spacer line
501
502    # The main body of the ADF.
503    my $comp_dbs = $self->_write_main();
504
505    $self->_write_line( q{} );    # spacer line
506
507    # Where necessary, the ADF mapping section.
508    if ( $self->_must_generate_mapping() ) {
509        $self->_write_mapping( $comp_dbs );
510    }
511
512    return;
513}
514
515# Make the classes immutable. In theory this speeds up object
516# instantiation for a small compilation time cost.
517__PACKAGE__->meta->make_immutable();
518
519no Moose;
520
521=head1 NAME
522
523Bio::MAGETAB::Util::Writer::ADF - Export of MAGE-TAB ArrayDesign
524objects.
525
526=head1 SYNOPSIS
527
528 use Bio::MAGETAB::Util::Writer::ADF;
529 my $writer = Bio::MAGETAB::Util::Writer::ADF->new({
530    magetab_object => $array_design,
531    filehandle     => $adf_fh,
532 });
533
534 $writer->write();
535
536=head1 DESCRIPTION
537
538Export of ArrayDesigns to ADF files.
539
540=head1 ATTRIBUTES
541
542See the L<Tabfile|Bio::MAGETAB::Util::Writer::Tabfile> class for superclass attributes.
543
544=over 2
545
546=item magetab_object
547
548The Bio::MAGETAB::ArrayDesign to export. This is a required
549attribute.
550
551=back
552
553=head1 METHODS
554
555=over 2
556
557=item write
558
559Exports the ArrayDesign to ADF.
560
561=back
562
563=head1 SEE ALSO
564
565L<Bio::MAGETAB::Util::Writer>
566L<Bio::MAGETAB::Util::Writer::Tabfile>
567
568=head1 AUTHOR
569
570Tim F. Rayner <tfrayner@gmail.com>
571
572=head1 LICENSE
573
574This library is released under version 2 of the GNU General Public
575License (GPL).
576
577=cut
578
5791;
580