1#!/usr/bin/env perl
2
3use strict;
4use warnings;
5
6use FindBin;
7use lib ("/usr/local/lib/perl5/site_perl/transdecoder");
8use Gene_obj;
9
10my $usage = "usage: $0 cufflinks.gtf\n\n";
11
12my $cufflinks_gtf = $ARGV[0] or die $usage;
13
14
15main: {
16
17	my %genome_trans_to_coords;
18
19	open (my $fh, $cufflinks_gtf) or die "Error, cannot open file $cufflinks_gtf";
20	while (<$fh>) {
21		chomp;
22
23        if (/^\#/) { next; }
24		unless (/\w/) { next; }
25
26		my @x = split(/\t/);
27
28		my $scaff = $x[0];
29		my $type = $x[2];
30		my $lend = $x[3];
31		my $rend = $x[4];
32
33		my $orient = $x[6];
34
35		my $info = $x[8];
36
37		unless ($type eq 'exon') { next; }
38
39		my @parts = split(/;/, $info);
40		my %atts;
41		foreach my $part (@parts) {
42			$part =~ s/^\s+|\s+$//;
43			$part =~ s/\"//g;
44			my ($att, $val) = split(/\s+/, $part);
45			unless (defined $att) { next; }
46
47			if (exists $atts{$att}) {
48				die "Error, already defined attribute $att in $_";
49			}
50
51			$atts{$att} = $val;
52		}
53
54		my $gene_id = $atts{gene_id} or die "Error, no gene_id at $_";
55		my $trans_id = $atts{transcript_id} or die "Error, no trans_id at $_";
56
57		my ($end5, $end3) = ($orient eq '+') ? ($lend, $rend) : ($rend, $lend);
58
59		$genome_trans_to_coords{$scaff}->{$gene_id}->{$trans_id}->{$end5} = $end3;
60
61	}
62
63
64	## Output genes in gff3 format:
65
66	foreach my $scaff (sort keys %genome_trans_to_coords) {
67
68		my $genes_href = $genome_trans_to_coords{$scaff};
69
70		foreach my $gene_id (keys %$genes_href) {
71
72			my $trans_href = $genes_href->{$gene_id};
73
74			foreach my $trans_id (keys %$trans_href) {
75
76				my $coords_href = $trans_href->{$trans_id};
77
78				my $gene_obj = new Gene_obj();
79
80				$gene_obj->{TU_feat_name} = $gene_id;
81				$gene_obj->{Model_feat_name} = "$trans_id";
82				$gene_obj->{com_name} = "cufflinks $gene_id $trans_id";
83
84				$gene_obj->{asmbl_id} = $scaff;
85
86				$gene_obj->populate_gene_object($coords_href, $coords_href);
87
88
89                ## encode gene and trans id in the ID and target fields for later extraction.  (probably a better way to do this!!)
90				print $gene_obj->to_alignment_GFF3_format("GENE^$gene_id,TRANS^$trans_id", "GENE^$gene_id,TRANS^$trans_id", "Cufflinks");
91
92				print "\n";
93			}
94		}
95	}
96
97
98	exit(0);
99}
100
101