1 /*
2  *  genes.cpp
3  *  cufflinks
4  *
5  *  Created by Cole Trapnell on 8/23/09.
6  *  Copyright 2009 Cole Trapnell. All rights reserved.
7  *
8  */
9 #include <boost/thread.hpp>
10 #include "genes.h"
11 
12 using namespace boost;
13 
14 #if ENABLE_THREADS
15 boost::mutex gene_id_lock;
16 #endif
17 
18 int next_isoform_id = 1;
19 
get_next_isoform_id()20 int get_next_isoform_id()
21 {
22 #if ENABLE_THREADS
23 	gene_id_lock.lock();
24 #endif
25 	int next = next_isoform_id++;
26 #if ENABLE_THREADS
27 	gene_id_lock.unlock();
28 #endif
29 	return next;
30 }
31 
get_gtf(vector<string> & gff_recs,const RefSequenceTable & rt,set<AugmentedCuffOp> * hit_introns) const32 void Isoform::get_gtf(vector<string>& gff_recs,
33 					  const RefSequenceTable& rt,
34                       set<AugmentedCuffOp>* hit_introns) const
35 {
36 	const char* ref_name = rt.get_name(_scaffold.ref_id());
37 
38     assert (ref_name != NULL);
39 
40 	const char* strand_str = NULL;
41 	if (_scaffold.strand() == CUFF_STRAND_UNKNOWN)
42 		strand_str = ".";
43 	else if (_scaffold.strand() == CUFF_FWD)
44 		strand_str = "+";
45 	else
46 		strand_str = "-";
47 
48 	int score = (int)(_FMI * 1000);
49 	score = min(1000, score);
50 	if (score == 0)
51 		score = 1;
52 
53 	char buf[2048];
54 
55 	if (hit_introns != NULL)
56     {
57         sprintf(buf,
58 			"%s\tCufflinks\ttranscript\t%d\t%d\t%d\t%s\t.\tgene_id \"%s\"; transcript_id \"%s\"; FPKM \"%10.10lf\"; frac \"%lf\"; conf_lo \"%lf\"; conf_hi \"%lf\"; cov \"%lf\"; full_read_support \"%s\";\n",
59 			ref_name,
60 			_scaffold.left() + 1,
61 			_scaffold.right(), // GTF intervals are inclusive on both ends, but ours are half-open
62 			score,
63 			strand_str,
64 			gene_id().c_str(),
65 			trans_id().c_str(),
66 			_FPKM,
67 			_fraction,
68 			_confidence.low,
69 			_confidence.high,
70 			_coverage,
71             (_scaffold.has_struct_support(*hit_introns)) ? "yes":"no");
72     }
73     else
74     {
75         sprintf(buf,
76                 "%s\tCufflinks\ttranscript\t%d\t%d\t%d\t%s\t.\tgene_id \"%s\"; transcript_id \"%s\"; FPKM \"%10.10lf\"; frac \"%lf\"; conf_lo \"%lf\"; conf_hi \"%lf\"; cov \"%lf\";\n",
77                 ref_name,
78                 _scaffold.left() + 1,
79                 _scaffold.right(), // GTF intervals are inclusive on both ends, but ours are half-open
80                 score,
81                 strand_str,
82                 gene_id().c_str(),
83                 trans_id().c_str(),
84                 _FPKM,
85                 _fraction,
86                 _confidence.low,
87                 _confidence.high,
88                 _coverage);
89     }
90 
91 
92 	gff_recs.push_back(buf);
93 
94 	int exon_num = 1;
95 	for (size_t op_id = 0; op_id < _scaffold.augmented_ops().size(); ++op_id)
96 	{
97 		const AugmentedCuffOp& op = _scaffold.augmented_ops()[op_id];
98 		if (op.opcode == CUFF_MATCH || op.opcode == CUFF_UNKNOWN)
99 		{
100 			const char* type = op.opcode == CUFF_MATCH ? "exon" : "missing_data";
101 
102 			sprintf(buf,
103 					"%s\tCufflinks\t\%s\t%d\t%d\t%d\t%s\t.\tgene_id \"%s\"; transcript_id \"%s\"; exon_number \"%d\"; FPKM \"%10.10lf\"; frac \"%lf\"; conf_lo \"%lf\"; conf_hi \"%lf\"; cov \"%lf\";\n",
104 					ref_name,
105 					type,
106 					op.g_left() + 1,
107 					op.g_right(), // GTF intervals are inclusive on both ends, but ours are half-open
108 					score,
109 					strand_str,
110 					gene_id().c_str(),
111 					trans_id().c_str(),
112 					exon_num,
113 					_FPKM,
114 					_fraction,
115 					_confidence.low,
116 					_confidence.high,
117 					_coverage);
118 			gff_recs.push_back(buf);
119 
120 			exon_num++;
121 		}
122 		//gff_recs.push_back(buf);
123 	}
124 
125 }
126 
127 
128 int next_gene_id = 1;
129 
get_next_gene_id()130 int get_next_gene_id()
131 {
132 #if ENABLE_THREADS
133 	gene_id_lock.lock();
134 #endif
135 	int next = next_gene_id++;
136 #if ENABLE_THREADS
137 	gene_id_lock.unlock();
138 #endif
139 	return next;
140 }
141 
142 
143 int next_skipped_region_id = 1;
144 
get_next_skipped_region_id()145 int get_next_skipped_region_id()
146 {
147 #if ENABLE_THREADS
148 	gene_id_lock.lock();
149 #endif
150 	int next = next_skipped_region_id++;
151 #if ENABLE_THREADS
152 	gene_id_lock.unlock();
153 #endif
154 	return next;
155 }
156