1 #ifndef ISOFORM_H
2 #define ISOFORM_H
3 
4 /*
5  *  genes.h
6  *  cufflinks
7  *
8  *  Created by Cole Trapnell on 8/23/09.
9  *  Copyright 2009 Cole Trapnell. All rights reserved.
10  *
11  */
12 
13 #ifdef HAVE_CONFIG_H
14 #include <config.h>
15 #endif
16 
17 #include "scaffolds.h"
18 #include "abundances.h"
19 #include "common.h"
20 
21 extern int next_isoform_id;
22 
23 int get_next_isoform_id();
24 
25 extern int next_gene_id;
26 
27 int get_next_gene_id();
28 
29 extern int next_skipped_region_id;
30 
31 int get_next_skipped_region_id();
32 
33 class Isoform
34 {
35 public:
36 	Isoform(const Scaffold& s,
37 			int gid,
38 			int tid,
39 			double FPKM = 0.0,
40 			double eff_len = 0.0,
41 			double fraction = 0.0,
42 			ConfidenceInterval ci = ConfidenceInterval(),
43 			double cov = 0.0,
44             double est_frag_count = 0.0,
45 			double fmi = 0.0,
46 			AbundanceStatus status = NUMERIC_FAIL,
47 			string ref_gene_id = "") :
_scaffold(s)48 		_scaffold(s),
49 		_FPKM(FPKM),
50 		_eff_len(eff_len),
51 		_fraction(fraction),
52 		_confidence(ci),
53 		_coverage(cov),
54         _estimated_count(est_frag_count),
55 		_FMI(fmi),
56 		_status(status)
57 	{
58 		_id = get_next_isoform_id();
59 
60 		char trans_id_str[256];
61 		if (_scaffold.annotated_trans_id() != "")
62 			strncpy(trans_id_str, _scaffold.annotated_trans_id().c_str(), 255);
63 		else if (gid == -1)
64 			sprintf(trans_id_str, "%s.%s.%d", user_label.c_str(), ref_gene_id.c_str(), tid);
65 		else
66 			sprintf(trans_id_str, "%s.%d.%d", user_label.c_str(), gid, tid);
67 
68 		_trans_id = trans_id_str;
69 
70 		char gene_id_str[256];
71 		if(gid == -1)
72 			strncpy(gene_id_str, ref_gene_id.c_str(), 255);
73 		else
74 			sprintf(gene_id_str, "%s.%d", user_label.c_str(), gid);
75 		_gene_id = gene_id_str;
76 	}
77 
scaffold()78 	const Scaffold& scaffold() const { return _scaffold; }
FPKM()79 	double FPKM() const { return _FPKM; }
FPKM(double fpkm)80 	void   FPKM(double fpkm) { _FPKM = fpkm; }
81 
effective_length()82 	double effective_length() const { return _eff_len; }
effective_length(double eff_len)83 	void   effective_length(double eff_len) { _eff_len = eff_len; }
84 
status()85 	AbundanceStatus status() const { return _status; }
status(AbundanceStatus status)86 	void   status(AbundanceStatus status) { _status = status; }
87 
fraction()88 	double fraction() const {return _fraction; }
fraction(double f)89 	void fraction(double f) { _fraction = f; }
90 
confidence()91 	ConfidenceInterval confidence() const { return _confidence; }
confidence(ConfidenceInterval c)92 	void   confidence(ConfidenceInterval c) { _confidence = c; }
93 
coverage()94 	double coverage() const { return _coverage; }
coverage(double cov)95 	void   coverage(double cov) { _coverage = cov; }
96 
97 	// fraction of major isoform expression
FMI()98 	double FMI() const { return _FMI; }
FMI(double fmi)99 	void   FMI(double fmi) { _FMI = fmi; }
100 
ID()101 	int ID() const { return _id; }
102 
103 	void get_gtf(vector<string>& gtf_recs,
104 				 const RefSequenceTable& rt,
105                  set<AugmentedCuffOp>* hit_introns=NULL) const;
106 
gene_id(string & gid)107 	void gene_id(string& gid) { _gene_id = gid; }
gene_id()108 	const string& gene_id() const { return _gene_id; }
trans_id()109 	const string& trans_id() const {return _trans_id; }
110 
is_ref_trans()111 	bool is_ref_trans() const { return _scaffold.is_ref(); }
112 
estimated_count()113     double estimated_count() const { return _estimated_count; }
estimated_count(double est)114     void estimated_count(double est) { _estimated_count = est; }
115 private:
116 
117 	Scaffold _scaffold;
118 	double _FPKM;
119 	double _eff_len;
120 	double _fraction;
121 	ConfidenceInterval _confidence;
122 	double _coverage;
123     double _estimated_count;
124 	double _FMI;
125 	int _id;
126 	string _gene_id;
127 	string _trans_id;
128 	AbundanceStatus _status;
129 };
130 
131 class Gene
132 {
133 public:
134 	Gene(const vector<Isoform>& isoforms,
135 		 double FPKM = 0.0,
136 		 const ConfidenceInterval& ci = ConfidenceInterval(),
137 		 AbundanceStatus status=NUMERIC_FAIL) :
_isoforms(isoforms)138 		_isoforms(isoforms),
139 		_FPKM(FPKM),
140 		_confidence(ci),
141 		_status(status)
142 	{
143 		vector<Scaffold> scaffolds;
144 		for (size_t i = 0; i < isoforms.size(); ++i)
145 			scaffolds.push_back(isoforms[i].scaffold());
146 
147 		// Now compute FPKM for the whole gene
148 		Scaffold smashed_gene;
149 		Scaffold::merge(scaffolds, smashed_gene, false);
150 		_left = smashed_gene.left();
151 		_right = smashed_gene.right();
152 
153 		_gene_id = _isoforms.front().gene_id();
154 	}
155 
isoforms()156 	const vector<Isoform>& isoforms() const { return _isoforms; }
FPKM()157 	double FPKM() const { return _FPKM; }
158 
confidence()159 	ConfidenceInterval confidence() const { return _confidence; }
confidence(ConfidenceInterval c)160 	void   confidence(ConfidenceInterval c) { _confidence = c; }
161 
status()162 	AbundanceStatus status() const { return _status; }
status(AbundanceStatus status)163 	void   status(AbundanceStatus status) { _status = status; }
164 
left()165 	int left() const { return _left; }
right()166 	int right() const { return _right; }
167 
gene_id()168 	const string& gene_id() const { return _gene_id; }
169 
has_ref_trans()170 	bool has_ref_trans() const
171 	{
172 		BOOST_FOREACH (const Isoform& iso, _isoforms)
173 		{
174 			if (iso.is_ref_trans())
175 				return true;
176 		}
177 		return false;
178 	}
179 
estimated_count()180     double estimated_count() const
181     {
182         double est = 0.0;
183         BOOST_FOREACH (const Isoform& iso, _isoforms)
184 		{
185 			est += iso.estimated_count();
186 		}
187 		return est;
188     }
189 
effective_length()190     double effective_length() const
191     {
192         double eff = 0.0;
193         double total_fpkm = 0;
194         BOOST_FOREACH (const Isoform& iso, _isoforms)
195 		{
196 			eff += iso.FPKM() * iso.effective_length();
197             total_fpkm += iso.FPKM();
198 		}
199         if (total_fpkm)
200             return eff / total_fpkm;
201 		else
202             return 0;
203     }
204 
205 private:
206 
207 	vector<Isoform> _isoforms;
208 	double _FPKM;
209 	ConfidenceInterval _confidence;
210 	int _id;
211 	int _left;
212 	int _right;
213 	string _gene_id;
214 	AbundanceStatus _status;
215 };
216 
217 #endif
218