1#!/usr/bin/env perl
2
3use strict;
4use warnings;
5
6use File::Basename;
7use FindBin;
8use lib ("/usr/local/lib/perl5/site_perl/transdecoder");
9use Gene_obj;
10
11my $usage = "usage: $0 cufflinks.gtf\n\n";
12
13my $cufflinks_gtf = $ARGV[0] or die $usage;
14
15
16main: {
17
18	my %genome_trans_to_coords;
19
20	open (my $fh, $cufflinks_gtf) or die "Error, cannot open file $cufflinks_gtf";
21	while (<$fh>) {
22		chomp;
23		if (/^\#/) { next; }
24		unless (/\w/) { next; }
25
26		my @x = split(/\t/);
27
28		my $scaff = $x[0];
29		my $type = $x[2];
30		my $lend = $x[3];
31		my $rend = $x[4];
32
33		my $orient = $x[6];
34
35		my $info = $x[8];
36
37		unless ($type eq 'exon') { next; }
38
39        $info =~ s/^\s+|\s+$//g;
40		my @parts = split(/;/, $info);
41		my %atts;
42		foreach my $part (@parts) {
43			$part =~ s/^\s+|\s+$//g;
44			$part =~ s/\"//g;
45			my ($att, $val) = split(/\s+/, $part);
46
47			if (exists $atts{$att}) {
48				die "Error, already defined attribute $att in $_";
49			}
50
51			$atts{$att} = $val;
52		}
53
54		my $gene_id = $atts{gene_id} or die "Error, no gene_id at $_";
55		my $trans_id = $atts{transcript_id} or die "Error, no trans_id at $_";
56
57		my ($end5, $end3) = ($orient eq '+') ? ($lend, $rend) : ($rend, $lend);
58
59		$genome_trans_to_coords{$scaff}->{$gene_id}->{$trans_id}->{$end5} = $end3;
60
61	}
62
63
64	## Output genes in gff3 format:
65
66    print "track name=\'" . basename($cufflinks_gtf) . "\'\n";
67
68	foreach my $scaff (sort keys %genome_trans_to_coords) {
69
70		my $genes_href = $genome_trans_to_coords{$scaff};
71
72		foreach my $gene_id (sort keys %$genes_href) {
73
74			my $trans_href = $genes_href->{$gene_id};
75
76			foreach my $trans_id (sort keys %$trans_href) {
77
78				my $coords_href = $trans_href->{$trans_id};
79
80				my $gene_obj = new Gene_obj();
81
82				$gene_obj->{TU_feat_name} = $gene_id;
83				$gene_obj->{Model_feat_name} = $trans_id;
84				$gene_obj->{com_name} = "$gene_id $trans_id";
85
86				$gene_obj->{asmbl_id} = $scaff;
87
88				$gene_obj->populate_gene_object($coords_href, $coords_href);
89
90				print $gene_obj->to_BED_format();
91
92			}
93		}
94	}
95
96
97	exit(0);
98}
99
100