1 /*
2 * genes.cpp
3 * cufflinks
4 *
5 * Created by Cole Trapnell on 8/23/09.
6 * Copyright 2009 Cole Trapnell. All rights reserved.
7 *
8 */
9 #include <boost/thread.hpp>
10 #include "genes.h"
11
12 using namespace boost;
13
14 #if ENABLE_THREADS
15 boost::mutex gene_id_lock;
16 #endif
17
18 int next_isoform_id = 1;
19
get_next_isoform_id()20 int get_next_isoform_id()
21 {
22 #if ENABLE_THREADS
23 gene_id_lock.lock();
24 #endif
25 int next = next_isoform_id++;
26 #if ENABLE_THREADS
27 gene_id_lock.unlock();
28 #endif
29 return next;
30 }
31
get_gtf(vector<string> & gff_recs,const RefSequenceTable & rt,set<AugmentedCuffOp> * hit_introns) const32 void Isoform::get_gtf(vector<string>& gff_recs,
33 const RefSequenceTable& rt,
34 set<AugmentedCuffOp>* hit_introns) const
35 {
36 const char* ref_name = rt.get_name(_scaffold.ref_id());
37
38 assert (ref_name != NULL);
39
40 const char* strand_str = NULL;
41 if (_scaffold.strand() == CUFF_STRAND_UNKNOWN)
42 strand_str = ".";
43 else if (_scaffold.strand() == CUFF_FWD)
44 strand_str = "+";
45 else
46 strand_str = "-";
47
48 int score = (int)(_FMI * 1000);
49 score = min(1000, score);
50 if (score == 0)
51 score = 1;
52
53 char buf[2048];
54
55 if (hit_introns != NULL)
56 {
57 sprintf(buf,
58 "%s\tCufflinks\ttranscript\t%d\t%d\t%d\t%s\t.\tgene_id \"%s\"; transcript_id \"%s\"; FPKM \"%10.10lf\"; frac \"%lf\"; conf_lo \"%lf\"; conf_hi \"%lf\"; cov \"%lf\"; full_read_support \"%s\";\n",
59 ref_name,
60 _scaffold.left() + 1,
61 _scaffold.right(), // GTF intervals are inclusive on both ends, but ours are half-open
62 score,
63 strand_str,
64 gene_id().c_str(),
65 trans_id().c_str(),
66 _FPKM,
67 _fraction,
68 _confidence.low,
69 _confidence.high,
70 _coverage,
71 (_scaffold.has_struct_support(*hit_introns)) ? "yes":"no");
72 }
73 else
74 {
75 sprintf(buf,
76 "%s\tCufflinks\ttranscript\t%d\t%d\t%d\t%s\t.\tgene_id \"%s\"; transcript_id \"%s\"; FPKM \"%10.10lf\"; frac \"%lf\"; conf_lo \"%lf\"; conf_hi \"%lf\"; cov \"%lf\";\n",
77 ref_name,
78 _scaffold.left() + 1,
79 _scaffold.right(), // GTF intervals are inclusive on both ends, but ours are half-open
80 score,
81 strand_str,
82 gene_id().c_str(),
83 trans_id().c_str(),
84 _FPKM,
85 _fraction,
86 _confidence.low,
87 _confidence.high,
88 _coverage);
89 }
90
91
92 gff_recs.push_back(buf);
93
94 int exon_num = 1;
95 for (size_t op_id = 0; op_id < _scaffold.augmented_ops().size(); ++op_id)
96 {
97 const AugmentedCuffOp& op = _scaffold.augmented_ops()[op_id];
98 if (op.opcode == CUFF_MATCH || op.opcode == CUFF_UNKNOWN)
99 {
100 const char* type = op.opcode == CUFF_MATCH ? "exon" : "missing_data";
101
102 sprintf(buf,
103 "%s\tCufflinks\t\%s\t%d\t%d\t%d\t%s\t.\tgene_id \"%s\"; transcript_id \"%s\"; exon_number \"%d\"; FPKM \"%10.10lf\"; frac \"%lf\"; conf_lo \"%lf\"; conf_hi \"%lf\"; cov \"%lf\";\n",
104 ref_name,
105 type,
106 op.g_left() + 1,
107 op.g_right(), // GTF intervals are inclusive on both ends, but ours are half-open
108 score,
109 strand_str,
110 gene_id().c_str(),
111 trans_id().c_str(),
112 exon_num,
113 _FPKM,
114 _fraction,
115 _confidence.low,
116 _confidence.high,
117 _coverage);
118 gff_recs.push_back(buf);
119
120 exon_num++;
121 }
122 //gff_recs.push_back(buf);
123 }
124
125 }
126
127
128 int next_gene_id = 1;
129
get_next_gene_id()130 int get_next_gene_id()
131 {
132 #if ENABLE_THREADS
133 gene_id_lock.lock();
134 #endif
135 int next = next_gene_id++;
136 #if ENABLE_THREADS
137 gene_id_lock.unlock();
138 #endif
139 return next;
140 }
141
142
143 int next_skipped_region_id = 1;
144
get_next_skipped_region_id()145 int get_next_skipped_region_id()
146 {
147 #if ENABLE_THREADS
148 gene_id_lock.lock();
149 #endif
150 int next = next_skipped_region_id++;
151 #if ENABLE_THREADS
152 gene_id_lock.unlock();
153 #endif
154 return next;
155 }
156