1 #ifndef ISOFORM_H 2 #define ISOFORM_H 3 4 /* 5 * genes.h 6 * cufflinks 7 * 8 * Created by Cole Trapnell on 8/23/09. 9 * Copyright 2009 Cole Trapnell. All rights reserved. 10 * 11 */ 12 13 #ifdef HAVE_CONFIG_H 14 #include <config.h> 15 #endif 16 17 #include "scaffolds.h" 18 #include "abundances.h" 19 #include "common.h" 20 21 extern int next_isoform_id; 22 23 int get_next_isoform_id(); 24 25 extern int next_gene_id; 26 27 int get_next_gene_id(); 28 29 extern int next_skipped_region_id; 30 31 int get_next_skipped_region_id(); 32 33 class Isoform 34 { 35 public: 36 Isoform(const Scaffold& s, 37 int gid, 38 int tid, 39 double FPKM = 0.0, 40 double eff_len = 0.0, 41 double fraction = 0.0, 42 ConfidenceInterval ci = ConfidenceInterval(), 43 double cov = 0.0, 44 double est_frag_count = 0.0, 45 double fmi = 0.0, 46 AbundanceStatus status = NUMERIC_FAIL, 47 string ref_gene_id = "") : _scaffold(s)48 _scaffold(s), 49 _FPKM(FPKM), 50 _eff_len(eff_len), 51 _fraction(fraction), 52 _confidence(ci), 53 _coverage(cov), 54 _estimated_count(est_frag_count), 55 _FMI(fmi), 56 _status(status) 57 { 58 _id = get_next_isoform_id(); 59 60 char trans_id_str[256]; 61 if (_scaffold.annotated_trans_id() != "") 62 strncpy(trans_id_str, _scaffold.annotated_trans_id().c_str(), 255); 63 else if (gid == -1) 64 sprintf(trans_id_str, "%s.%s.%d", user_label.c_str(), ref_gene_id.c_str(), tid); 65 else 66 sprintf(trans_id_str, "%s.%d.%d", user_label.c_str(), gid, tid); 67 68 _trans_id = trans_id_str; 69 70 char gene_id_str[256]; 71 if(gid == -1) 72 strncpy(gene_id_str, ref_gene_id.c_str(), 255); 73 else 74 sprintf(gene_id_str, "%s.%d", user_label.c_str(), gid); 75 _gene_id = gene_id_str; 76 } 77 scaffold()78 const Scaffold& scaffold() const { return _scaffold; } FPKM()79 double FPKM() const { return _FPKM; } FPKM(double fpkm)80 void FPKM(double fpkm) { _FPKM = fpkm; } 81 effective_length()82 double effective_length() const { return _eff_len; } effective_length(double eff_len)83 void effective_length(double eff_len) { _eff_len = eff_len; } 84 status()85 AbundanceStatus status() const { return _status; } status(AbundanceStatus status)86 void status(AbundanceStatus status) { _status = status; } 87 fraction()88 double fraction() const {return _fraction; } fraction(double f)89 void fraction(double f) { _fraction = f; } 90 confidence()91 ConfidenceInterval confidence() const { return _confidence; } confidence(ConfidenceInterval c)92 void confidence(ConfidenceInterval c) { _confidence = c; } 93 coverage()94 double coverage() const { return _coverage; } coverage(double cov)95 void coverage(double cov) { _coverage = cov; } 96 97 // fraction of major isoform expression FMI()98 double FMI() const { return _FMI; } FMI(double fmi)99 void FMI(double fmi) { _FMI = fmi; } 100 ID()101 int ID() const { return _id; } 102 103 void get_gtf(vector<string>& gtf_recs, 104 const RefSequenceTable& rt, 105 set<AugmentedCuffOp>* hit_introns=NULL) const; 106 gene_id(string & gid)107 void gene_id(string& gid) { _gene_id = gid; } gene_id()108 const string& gene_id() const { return _gene_id; } trans_id()109 const string& trans_id() const {return _trans_id; } 110 is_ref_trans()111 bool is_ref_trans() const { return _scaffold.is_ref(); } 112 estimated_count()113 double estimated_count() const { return _estimated_count; } estimated_count(double est)114 void estimated_count(double est) { _estimated_count = est; } 115 private: 116 117 Scaffold _scaffold; 118 double _FPKM; 119 double _eff_len; 120 double _fraction; 121 ConfidenceInterval _confidence; 122 double _coverage; 123 double _estimated_count; 124 double _FMI; 125 int _id; 126 string _gene_id; 127 string _trans_id; 128 AbundanceStatus _status; 129 }; 130 131 class Gene 132 { 133 public: 134 Gene(const vector<Isoform>& isoforms, 135 double FPKM = 0.0, 136 const ConfidenceInterval& ci = ConfidenceInterval(), 137 AbundanceStatus status=NUMERIC_FAIL) : _isoforms(isoforms)138 _isoforms(isoforms), 139 _FPKM(FPKM), 140 _confidence(ci), 141 _status(status) 142 { 143 vector<Scaffold> scaffolds; 144 for (size_t i = 0; i < isoforms.size(); ++i) 145 scaffolds.push_back(isoforms[i].scaffold()); 146 147 // Now compute FPKM for the whole gene 148 Scaffold smashed_gene; 149 Scaffold::merge(scaffolds, smashed_gene, false); 150 _left = smashed_gene.left(); 151 _right = smashed_gene.right(); 152 153 _gene_id = _isoforms.front().gene_id(); 154 } 155 isoforms()156 const vector<Isoform>& isoforms() const { return _isoforms; } FPKM()157 double FPKM() const { return _FPKM; } 158 confidence()159 ConfidenceInterval confidence() const { return _confidence; } confidence(ConfidenceInterval c)160 void confidence(ConfidenceInterval c) { _confidence = c; } 161 status()162 AbundanceStatus status() const { return _status; } status(AbundanceStatus status)163 void status(AbundanceStatus status) { _status = status; } 164 left()165 int left() const { return _left; } right()166 int right() const { return _right; } 167 gene_id()168 const string& gene_id() const { return _gene_id; } 169 has_ref_trans()170 bool has_ref_trans() const 171 { 172 BOOST_FOREACH (const Isoform& iso, _isoforms) 173 { 174 if (iso.is_ref_trans()) 175 return true; 176 } 177 return false; 178 } 179 estimated_count()180 double estimated_count() const 181 { 182 double est = 0.0; 183 BOOST_FOREACH (const Isoform& iso, _isoforms) 184 { 185 est += iso.estimated_count(); 186 } 187 return est; 188 } 189 effective_length()190 double effective_length() const 191 { 192 double eff = 0.0; 193 double total_fpkm = 0; 194 BOOST_FOREACH (const Isoform& iso, _isoforms) 195 { 196 eff += iso.FPKM() * iso.effective_length(); 197 total_fpkm += iso.FPKM(); 198 } 199 if (total_fpkm) 200 return eff / total_fpkm; 201 else 202 return 0; 203 } 204 205 private: 206 207 vector<Isoform> _isoforms; 208 double _FPKM; 209 ConfidenceInterval _confidence; 210 int _id; 211 int _left; 212 int _right; 213 string _gene_id; 214 AbundanceStatus _status; 215 }; 216 217 #endif 218