1 #ifndef ABUNDANCES_H
2 #define ABUNDANCES_H
3 /*
4  *  abundances.h
5  *  cufflinks
6  *
7  *  Created by Cole Trapnell on 4/27/09.
8  *  Copyright 2009 Cole Trapnell. All rights reserved.
9  *
10  */
11 
12 #ifdef HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15 
16 #include <stdint.h>
17 #include <vector>
18 #include <boost/numeric/ublas/matrix.hpp>
19 #include <boost/numeric/ublas/vector.hpp>
20 
21 #include <Eigen/Dense>
22 
23 #include "hits.h"
24 #include "scaffolds.h"
25 #include "bundles.h"
26 #include "biascorrection.h"
27 
28 namespace ublas = boost::numeric::ublas;
29 
30 struct ConfidenceInterval
31 {
32 	ConfidenceInterval(double Low = 0.0, double High = 0.0)
lowConfidenceInterval33 	: low(Low), high(High) {}
34 	double low;
35 	double high;
36 
37 private:
38     friend std::ostream & operator<<(std::ostream &os, const ConfidenceInterval &gp);
39     friend class boost::serialization::access;
40     template<class Archive>
serializeConfidenceInterval41     void serialize(Archive & ar, const unsigned int /* file_version */){
42         ar & low & high;
43     }
44 };
45 
46 enum AbundanceStatus { NUMERIC_OK, NUMERIC_FAIL, NUMERIC_LOW_DATA, NUMERIC_HI_DATA };
47 
48 typedef map<boost::shared_ptr<ReadGroupProperties const>, double> CountPerReplicateTable;
49 typedef map<boost::shared_ptr<ReadGroupProperties const>, double> FPKMPerReplicateTable;
50 typedef map<boost::shared_ptr<ReadGroupProperties const>, AbundanceStatus> StatusPerReplicateTable;
51 
52 bool fit_negbin_dist(const vector<double> samples, double& r, double& p);
53 long double negbin_log_likelihood(const vector<double>& samples, long double r, long double p);
54 long double poisson_log_likelihood(const vector<double>& samples, long double lambda);
55 
56 namespace bser = boost::serialization;
57 
58 class Abundance
59 {
60 public:
~Abundance()61 	virtual ~Abundance() {}
62 
63 	// Status of the numerical calculation performed on this object.  Safe to
64 	// do testing only if status == NUMERIC_OK
65 	virtual AbundanceStatus status() const = 0;
66 	virtual void status(AbundanceStatus s) = 0;
67 
68 	// Fragments Per Kilbase of transcript per Million fragments mapped
69 	virtual double				FPKM() const = 0;
70     virtual void				FPKM(double fpkm) = 0;
71 	virtual double				FPKM_variance() const = 0;
72 	virtual void				FPKM_variance(double v) = 0;
73 
74 	virtual ConfidenceInterval	FPKM_conf() const = 0;
75 	virtual void FPKM_conf(const ConfidenceInterval& cf) = 0;
76 
77 	// gamma is a fixed property of each transcript or transcript group.  It's
78 	// the probability that one would draw a fragment from this object, scaled
79 	// to an arbitrary locus' worth of fragments.
80 	virtual double			gamma() const = 0;
81 	virtual void			gamma(double g) = 0;
82 
83 	// Kappa is only really meaningful when this Abundance record is part of a
84 	// group - it's the relative abundance of this object within the larger
85 	// group.
86 	virtual double			kappa() const	= 0;
87 	virtual void			kappa(double k)	= 0;
88 
89     // This tracks the final modeled variance in the assigned counts.
90 	virtual double			num_fragments() const = 0;
91 	virtual void			num_fragments(double nf) = 0;
92 
93     // This tracks the final modeled variance in the assigned counts.
94     virtual double num_fragment_var() const	= 0;
95 	virtual void num_fragment_var(double nfv) = 0;
96 
97     virtual CountPerReplicateTable num_fragments_by_replicate() const = 0;
98     virtual void            num_fragments_by_replicate(CountPerReplicateTable& cpr) = 0;
99 
100     virtual FPKMPerReplicateTable FPKM_by_replicate() const = 0;
101     virtual void            FPKM_by_replicate(CountPerReplicateTable& cpr) = 0;
102 
103     virtual StatusPerReplicateTable status_by_replicate() const = 0;
104     virtual void status_by_replicate(StatusPerReplicateTable& fpr) = 0;
105 
106     // This tracks the fitted variance from the overdispersion model,
107     // and does not include the fragment assignment uncertainty.
108     virtual double          mass_variance() const = 0;
109 	virtual void            mass_variance(double mv) = 0;
110 
111     // This tracks the fragment assignment uncertainty,
112     // and does not include the overdispersion.
113     virtual double          num_fragment_uncertainty_var() const = 0;
114 	virtual void            num_fragment_uncertainty_var(double mv) = 0;
115 
116 	virtual double			effective_length() const= 0;
117 	virtual void			effective_length(double el) = 0;
118 
cond_probs()119 	virtual const vector<double>*	cond_probs() const		{ return NULL; }
120 	virtual void					cond_probs(vector<double>* cp) = 0;
121 
122 	// The structural information for the object, if defined.
transfrag()123 	virtual boost::shared_ptr<Scaffold> transfrag() const		{ return boost::shared_ptr<Scaffold>(); }
124 
125     virtual const vector<double>& fpkm_samples() const = 0;
126     virtual void  fpkm_samples(const vector<double>& s) = 0;
127 
128 
129 	virtual set<string>		gene_id() const = 0;
130 	virtual set<string>		gene_name() const = 0;
131 	virtual set<string>		tss_id() const = 0;
132 	virtual set<string>		protein_id() const = 0;
133 
134 	virtual const string&	description() const = 0;
135 	virtual void			description(const string& d) = 0;
136 
137 	virtual const string&	locus_tag() const = 0;
138 	virtual void			locus_tag(const string& L) = 0;
139 
140 	virtual const string&	reference_tag() const = 0;
141 	virtual void			reference_tag(const string& r) = 0;
142 
143     template<class Archive>
serialize(Archive & ar,const unsigned int file_version)144     void serialize(Archive & ar, const unsigned int file_version)
145     {
146 
147     }
148 
149     virtual void clear_non_serialized_data() = 0;
150 };
151 
152 BOOST_SERIALIZATION_ASSUME_ABSTRACT(Abundance);
153 
154 class TranscriptAbundance : public Abundance
155 {
156 public:
157 
TranscriptAbundance()158 	TranscriptAbundance() :
159 		_status(NUMERIC_OK),
160 		_transfrag(boost::shared_ptr<Scaffold>()),
161 		_FPKM(0),
162 		_FPKM_variance(0),
163 		_gamma(0),
164 		_kappa(1.0),
165 		_num_fragments(0),
166         _num_fragment_var(0),
167         _num_fragment_uncertainty_var(0),
168 		_eff_len(0),
169 		_cond_probs(NULL),
170         _sample_mass_variance(0.0){}
171 
~TranscriptAbundance()172 	~TranscriptAbundance()
173 	{
174 		if (_cond_probs != NULL)
175 		{
176 			delete _cond_probs;
177 			_cond_probs = NULL;
178 		}
179 	}
180 
status()181 	AbundanceStatus status() const			{ return _status; }
status(AbundanceStatus s)182 	void status(AbundanceStatus s)			{ _status = s; }
183 
FPKM()184 	double FPKM() const						{ return _FPKM; }
FPKM(double fpkm)185     void FPKM(double fpkm)
186 	{
187 		_FPKM = fpkm;
188         if (_transfrag)
189             _transfrag->fpkm(fpkm);
190 	}
FPKM_variance()191 	double FPKM_variance() const			{ return _FPKM_variance; }
192 	void   FPKM_variance(double v);
193 
FPKM_conf()194 	ConfidenceInterval	FPKM_conf() const   { return _FPKM_conf; }
FPKM_conf(const ConfidenceInterval & cf)195 	void FPKM_conf(const ConfidenceInterval& cf) { _FPKM_conf = cf; }
196 
gamma()197 	double gamma() const					{ return _gamma; }
gamma(double g)198 	void gamma(double g)					{ assert(!isnan(g)); _gamma = g; };
199 
kappa()200 	double kappa() const					{ return _kappa; }
kappa(double k)201 	void kappa(double k)					{ _kappa = k; }
202 
203     // This returns the estimated number of fragments
num_fragments()204 	double num_fragments() const			{ return _num_fragments; }
num_fragments(double nf)205 	void num_fragments(double nf)
206     {
207         assert (!isnan(nf));
208         _num_fragments = nf;
209         if (_transfrag)
210             _transfrag->num_fragments(nf);
211     }
212 
213     // This tracks the final modeled variance in the assigned counts.
num_fragment_var()214     double num_fragment_var() const			{ return _num_fragment_var; }
num_fragment_var(double nfv)215 	void num_fragment_var(double nfv)		{ assert (!isnan(nfv)); _num_fragment_var = nfv; }
216 
217     // This tracks the fragment assignment uncertainty,
218     // and does not include the overdispersion.
num_fragment_uncertainty_var()219     double          num_fragment_uncertainty_var() const { return _num_fragment_uncertainty_var; }
num_fragment_uncertainty_var(double nfv)220 	void            num_fragment_uncertainty_var(double nfv) { assert (!isnan(nfv)); _num_fragment_uncertainty_var = nfv; }
221 
num_fragments_by_replicate()222     CountPerReplicateTable num_fragments_by_replicate() const { return _num_fragments_per_replicate; }
num_fragments_by_replicate(CountPerReplicateTable & cpr)223     void num_fragments_by_replicate(CountPerReplicateTable& cpr) { _num_fragments_per_replicate = cpr; }
224 
FPKM_by_replicate()225     FPKMPerReplicateTable FPKM_by_replicate() const { return _fpkm_per_replicate; }
FPKM_by_replicate(FPKMPerReplicateTable & fpr)226     void FPKM_by_replicate(FPKMPerReplicateTable& fpr) { _fpkm_per_replicate = fpr; }
227 
status_by_replicate()228     StatusPerReplicateTable status_by_replicate() const { return _status_per_replicate; }
status_by_replicate(StatusPerReplicateTable & fpr)229     void status_by_replicate(StatusPerReplicateTable& fpr) { _status_per_replicate = fpr; }
230 
mass_variance()231     double mass_variance() const			{ return _sample_mass_variance; }
mass_variance(double mv)232 	void mass_variance(double mv)			{ _sample_mass_variance = mv; }
233 
transfrag(boost::shared_ptr<Scaffold> tf)234 	void transfrag(boost::shared_ptr<Scaffold> tf)		{ _transfrag = tf; }
transfrag()235 	boost::shared_ptr<Scaffold> transfrag() const		{ return _transfrag; }
236 
effective_length()237 	double effective_length() const			{ return _eff_len; }
effective_length(double el)238 	void effective_length(double el)		{ _eff_len = el; }
239 
cond_probs()240 	const vector<double>* cond_probs() const	{ return _cond_probs; }
cond_probs(vector<double> * cp)241 	void cond_probs(vector<double>* cp)
242 	{
243 		if(_cond_probs != NULL) { delete _cond_probs; };
244 		_cond_probs = cp;
245 	}
246 
fpkm_samples()247     const vector<double>& fpkm_samples() const { return _fpkm_samples; }
fpkm_samples(const vector<double> & s)248     void  fpkm_samples(const vector<double>& s) { _fpkm_samples = s; }
249 
gene_id()250 	set<string> gene_id() const
251 	{
252 		if (_transfrag)
253 		{
254 			set<string> s;
255 			s.insert(_transfrag->annotated_gene_id());
256 			return s;
257 		}
258 		else
259 		{
260 			assert (false);
261 			return set<string>();
262 		}
263 	}
264 
gene_name()265 	set<string> gene_name() const
266 	{
267 		if (_transfrag)
268 		{
269 			set<string> s;
270 			s.insert(_transfrag->annotated_gene_name());
271 			return s;
272 		}
273 		else
274 		{
275 			assert (false);
276 			return set<string>();
277 		}
278 	}
279 
tss_id()280 	set<string> tss_id() const
281 	{
282 		if (_transfrag)
283 		{
284 			set<string> s;
285 			s.insert(_transfrag->annotated_tss_id());
286 			return s;
287 		}
288 		else
289 		{
290 			assert (false);
291 			return set<string>();
292 		}
293 	}
294 
protein_id()295 	set<string> protein_id() const
296 	{
297 		if (_transfrag)
298 		{
299 			set<string> s;
300 			s.insert(_transfrag->annotated_protein_id());
301 			return s;
302 		}
303 		else
304 		{
305 			assert (false);
306 			return set<string>();
307 		}
308 	}
309 
description()310 	virtual const string&	description() const	{ return _description; }
description(const string & d)311 	virtual void			description(const string& d) { _description = d; }
312 
locus_tag()313 	virtual const string&	locus_tag() const { return _locus_tag; }
locus_tag(const string & L)314 	virtual void			locus_tag(const string& L) { _locus_tag = L; }
315 
reference_tag()316 	virtual const string&	reference_tag() const { return _ref_tag; }
reference_tag(const string & r)317 	virtual void			reference_tag(const string& r) { _ref_tag = r; }
318 
319 	void clear_non_serialized_data();
320 private:
321 
322     friend std::ostream & operator<<(std::ostream &os, const TranscriptAbundance &gp);
323     friend class boost::serialization::access;
324 
325     template<class Archive>
serialize(Archive & ar,const unsigned int)326     void serialize(Archive & ar, const unsigned int /* file_version */){
327 
328         ar & _status;
329         //ar & _transfrag;
330         ar & _FPKM;
331         ar & _FPKM_conf;
332         ar & _gamma;
333         ar & _kappa;
334         ar & _num_fragments;
335         ar & _num_fragment_var;
336         ar & _num_fragment_uncertainty_var;
337         ar & _eff_len;
338         //ar & _cond_probs;
339         ar & _description;
340         ar & _locus_tag;
341         ar & _ref_tag;
342         ar & _sample_mass_variance;
343         ar & _num_fragments_per_replicate;
344         ar & _fpkm_per_replicate;
345         ar & _status_per_replicate;
346     }
347 
348 	void calculate_FPKM_err_bar(double variance);
349 
350 	AbundanceStatus _status;
351 	boost::shared_ptr<Scaffold> _transfrag;
352 	double _FPKM;
353 	double _FPKM_variance;
354 	ConfidenceInterval _FPKM_conf;
355 	double _gamma;
356 	double _kappa;
357 	double _num_fragments;
358     double _num_fragment_var;
359     double _num_fragment_uncertainty_var;
360 	double _eff_len;
361 	vector<double>* _cond_probs;
362 
363     vector<double> _fpkm_samples;
364 
365 	string _description;
366 	string _locus_tag;
367 	string _ref_tag;
368 
369     long double _sample_mass_variance;
370 
371     CountPerReplicateTable _num_fragments_per_replicate;
372     FPKMPerReplicateTable _fpkm_per_replicate;
373     StatusPerReplicateTable _status_per_replicate;
374 };
375 
376 //BOOST_CLASS_EXPORT_GUID(TranscriptAbundance, "TranscriptAbundance");
377 //BOOST_SERIALIZATION_boost::shared_ptr(TranscriptAbundance)
378 
379 class AbundanceGroup : public Abundance
380 {
381 public:
AbundanceGroup()382 	AbundanceGroup() :
383         _kappa(1.0),
384         _FPKM_variance(0.0),
385         _salient_frags(0.0),
386         _total_frags(0.0) {}
387 
AbundanceGroup(const vector<boost::shared_ptr<Abundance>> & abundances)388 	AbundanceGroup(const vector<boost::shared_ptr<Abundance> >& abundances) :
389 		_abundances(abundances),
390         _iterated_exp_count_covariance(ublas::zero_matrix<double>(abundances.size(), abundances.size())),
391         _count_covariance(ublas::zero_matrix<double>(abundances.size(), abundances.size())),
392         _fpkm_covariance(ublas::zero_matrix<double>(abundances.size(), abundances.size())),
393 		_gamma_covariance(ublas::zero_matrix<double>(abundances.size(), abundances.size())),
394 		_kappa_covariance(ublas::zero_matrix<double>(abundances.size(), abundances.size())),
395 		_kappa(1.0),
396 		_FPKM_variance(0.0),
397         _salient_frags(0.0),
398         _total_frags(0.0) {}
399 
400 	AbundanceGroup(const vector<boost::shared_ptr<Abundance> >& abundances,
401 				   const ublas::matrix<double>& gamma_covariance,
402                    const ublas::matrix<double>& iterated_exp_count_covariance,
403                    const ublas::matrix<double>& count_covariance,
404                    const ublas::matrix<double>& fpkm_covariance,
405                    const std::set<boost::shared_ptr<ReadGroupProperties const > >& rg_props);
406 
407 	AbundanceStatus status() const;
status(AbundanceStatus s)408 	void status(AbundanceStatus s)			{ }
409 	bool has_member_with_status(AbundanceStatus member_status) const;
410 
411 	double FPKM() const;
FPKM(double fpkm)412     void   FPKM(double fpkm)                { }
413 
FPKM_variance()414 	double FPKM_variance() const			{ return _FPKM_variance; }
FPKM_variance(double v)415 	void   FPKM_variance(double v)			{ }
416 
FPKM_conf()417 	ConfidenceInterval	FPKM_conf() const   { return _FPKM_conf; }
418 
419 
420 	double gamma() const;
gamma(double g)421 	void gamma(double g)					{  };
422 
kappa()423 	double kappa() const					{ return _kappa; }
kappa(double k)424 	void kappa(double k)					{ _kappa = k; }
425 
426 	double num_fragments() const;
num_fragments(double nf)427 	void num_fragments(double nf)			{  }
428 
429     // This tracks the final modeled variance in the assigned counts.
430     double num_fragment_var() const;
num_fragment_var(double nfv)431 	void num_fragment_var(double nfv)		{  }
432 
433     // This tracks the uncertainty in the assigned counts
434     double num_fragment_uncertainty_var() const;
num_fragment_uncertainty_var(double nfv)435 	void num_fragment_uncertainty_var(double nfv)		{  }
436 
437     CountPerReplicateTable num_fragments_by_replicate() const;
num_fragments_by_replicate(CountPerReplicateTable & cpr)438     void num_fragments_by_replicate(CountPerReplicateTable& cpr) { }
439 
440     FPKMPerReplicateTable FPKM_by_replicate() const;
FPKM_by_replicate(FPKMPerReplicateTable & fpr)441     void FPKM_by_replicate(FPKMPerReplicateTable& fpr) { }
442 
443     StatusPerReplicateTable status_by_replicate() const;
status_by_replicate(StatusPerReplicateTable & fpr)444     void status_by_replicate(StatusPerReplicateTable& fpr) { }
445 
446     double mass_variance() const;
mass_variance(double mf)447 	void mass_variance(double mf)	{  }
448 
449 	set<string> gene_id() const;
450 	set<string> gene_name() const;
451 	set<string> tss_id() const;
452 	set<string> protein_id() const;
453 
description()454 	virtual const string&	description() const	{ return _description; }
description(const string & d)455 	virtual void			description(const string& d) { _description = d; }
456 
457 	virtual const string&	locus_tag() const;
locus_tag(const string & L)458 	virtual void			locus_tag(const string& L) { }
459 
460 	virtual const string&	reference_tag() const;
reference_tag(const string & r)461 	virtual void			reference_tag(const string& r) { }
462 
463 	double effective_length() const;
464 
465 	//DUMMY FUNCTIONS
effective_length(double ef)466 	void effective_length(double ef) {}
cond_probs(vector<double> * cp)467 	void cond_probs(vector<double>* cp) {}
468 
469 	void filter_group(const vector<bool>& to_keep,
470 					  AbundanceGroup& filtered_group) const;
471 
472 	void get_transfrags(vector<boost::shared_ptr<Abundance> >& transfrags) const;
473 
abundances()474 	vector<boost::shared_ptr<Abundance> >& abundances() { return _abundances; }
abundances()475 	const vector<boost::shared_ptr<Abundance> >& abundances() const { return _abundances; }
476 
gamma_cov()477 	const ublas::matrix<double>& gamma_cov() const { return _gamma_covariance; }
478 
iterated_count_cov()479     const ublas::matrix<double>& iterated_count_cov() const { return _iterated_exp_count_covariance; }
480 
count_cov()481     const ublas::matrix<double>& count_cov() const { return _count_covariance; }
482 
kappa_cov()483 	const ublas::matrix<double>& kappa_cov() const { return _kappa_covariance; }
484 
fpkm_cov()485     const ublas::matrix<double>& fpkm_cov() const { return _fpkm_covariance; }
486 
assigned_counts()487 	const vector<Eigen::VectorXd>& assigned_counts() const { return _assigned_count_samples; }
488 
fpkm_samples()489     const vector<double>& fpkm_samples() const { return _fpkm_samples; }
fpkm_samples(const vector<double> & s)490     void  fpkm_samples(const vector<double>& s) { _fpkm_samples = s; }
clear_fpkm_samples()491     void clear_fpkm_samples() {
492         _fpkm_samples.clear();
493         std::vector<double>().swap(_fpkm_samples);
494         for (size_t i = 0; i < _member_fpkm_samples.size(); ++i)
495         {
496             _member_fpkm_samples.clear();
497             std::vector<Eigen::VectorXd>().swap(_member_fpkm_samples);
498         }
499     }
500 
member_fpkm_samples()501     const vector<Eigen::VectorXd>& member_fpkm_samples() const { return _member_fpkm_samples; }
member_fpkm_samples(const vector<Eigen::VectorXd> & s)502     void  member_fpkm_samples(const vector<Eigen::VectorXd>& s) { _member_fpkm_samples = s; }
503 
504 	void calculate_abundance(const vector<MateHit>& alignments,
505                              bool perform_collapse = true,
506                              bool calculate_variance = true);
507 
salient_frags()508     double salient_frags() const { return _salient_frags; }
salient_frags(double nf)509     void salient_frags(double nf) { _salient_frags = nf; }
510 
total_frags()511     double total_frags() const { return _total_frags; }
total_frags(double nf)512     void total_frags(double nf) { _total_frags = nf; }
513 
rg_props()514     const std::set<boost::shared_ptr<ReadGroupProperties const> >& rg_props() const { return _read_group_props; }
515 
init_rg_props(const std::set<boost::shared_ptr<ReadGroupProperties const>> & rg)516     void init_rg_props(const std::set<boost::shared_ptr<ReadGroupProperties const> >& rg)
517     {
518         _read_group_props = rg;
519         _count_per_replicate.clear();
520         for ( std::set<boost::shared_ptr<ReadGroupProperties const> >::const_iterator itr = rg.begin();
521              itr != rg.end();
522              ++itr)
523         {
524             _count_per_replicate[*itr] = 0;
525         }
526     }
527 
528     void fit_gamma_distributions();
529 
530     void clear_non_serialized_data();
531 
532     void aggregate_replicate_abundances(const std::map<boost::shared_ptr<ReadGroupProperties const >, boost::shared_ptr<const AbundanceGroup> >& ab_group_per_replicate);
533 
534     void calculate_abundance_group_variance(const vector<boost::shared_ptr<Abundance> >& transcripts,
535                                             const std::map<boost::shared_ptr<ReadGroupProperties const >, boost::shared_ptr<const AbundanceGroup> >& ab_group_per_replicate);
536 
collect_per_replicate_mass(std::map<boost::shared_ptr<ReadGroupProperties const>,boost::shared_ptr<const AbundanceGroup>> & ab_group_per_replicate)537     void collect_per_replicate_mass(std::map<boost::shared_ptr<ReadGroupProperties const >, boost::shared_ptr<const AbundanceGroup> >& ab_group_per_replicate)
538     {
539         _count_per_replicate.clear();
540         for (std::map<boost::shared_ptr<ReadGroupProperties const >, boost::shared_ptr<const AbundanceGroup> >::const_iterator itr = ab_group_per_replicate.begin();
541              itr != ab_group_per_replicate.end(); ++itr)
542         {
543             _count_per_replicate[itr->first] = itr->second->num_fragments();
544         }
545     }
546 
547     static void apply_normalization_to_abundances(const map<boost::shared_ptr<const ReadGroupProperties>, boost::shared_ptr<const AbundanceGroup> >& unnormalized_ab_group_per_replicate,
548                                                   map<boost::shared_ptr<const ReadGroupProperties>, boost::shared_ptr<AbundanceGroup> >& normalized_ab_group_per_replicate);
549 
550 private:
551 
552     void calculate_abundance_for_replicate(const vector<MateHit>& alignments, bool perform_collapse);
553 
554 
555 
FPKM_conf(const ConfidenceInterval & cf)556 	void FPKM_conf(const ConfidenceInterval& cf)  { _FPKM_conf = cf; }
557 
558 	bool calculate_gammas(const vector<MateHit>& nr_alignments,
559                           const vector<double>& log_conv_factors,
560 						  const vector<boost::shared_ptr<Abundance> >& transcripts,
561 						  const vector<boost::shared_ptr<Abundance> >& mapped_transcripts);
562 	void calculate_FPKM_covariance();
563 
564     void calculate_conf_intervals();
565 	void calculate_locus_scaled_mass_and_variance(const vector<boost::shared_ptr<Abundance> >& transcripts);
566 
567 
568     AbundanceStatus calculate_per_replicate_abundances(vector<boost::shared_ptr<Abundance> >& transcripts,
569                                                        const std::map<boost::shared_ptr<ReadGroupProperties const >, vector<MateHit> >& alignments_per_read_group,
570                                                        std::map<boost::shared_ptr<ReadGroupProperties const >, boost::shared_ptr<AbundanceGroup> >& ab_group_per_replicate,
571                                                        bool perform_collapse = true);
572 
573 
574 	void calculate_kappas();
575 
576 
577 	void update_multi_reads(const vector<MateHit>& alignments, vector<boost::shared_ptr<Abundance> > transcripts);
578 
579     void collect_per_replicate_mass(const vector<MateHit>& alignments,
580                                     vector<boost::shared_ptr<Abundance> >& transcripts);
581 
582     void generate_fpkm_samples();
583 
584     friend std::ostream & operator<<(std::ostream &os, const AbundanceGroup &gp);
585     friend class boost::serialization::access;
586 
587     template<class Archive>
save(Archive & ar,const unsigned int version)588     void save(Archive & ar, const unsigned int version) const
589     {
590         //ar & _abundances;
591         vector<TranscriptAbundance*> tmp;
592         BOOST_FOREACH(boost::shared_ptr<Abundance> ab, _abundances)
593         {
594             tmp.push_back((TranscriptAbundance*)&(*ab));
595         }
596         ar & tmp;
597         ar & _iterated_exp_count_covariance;
598         ar & _count_covariance;
599         ar & _fpkm_covariance;
600         ar & _gamma_covariance;
601 
602         //ar & _FPKM_conf;
603         //ar & _kappa_covariance;
604         //ar & _assign_probs;
605 
606         ar & _kappa;
607         ar & _FPKM_variance;
608         ar & _description;
609         //ar & _salient_frags;
610         ar & _total_frags;
611         //ar & _fpkm_samples; // don't save the samples
612         ar & _read_group_props;
613         //ar & _member_fpkm_samples // don't save the member samples either
614         ar & _count_per_replicate;
615 
616     }
617     template<class Archive>
load(Archive & ar,const unsigned int version)618     void load(Archive & ar, const unsigned int version)
619     {
620 
621         vector<TranscriptAbundance*> tmp;
622         ar & tmp;
623         BOOST_FOREACH(TranscriptAbundance* ab, tmp)
624         {
625             _abundances.push_back(boost::shared_ptr<Abundance>(ab));
626         }
627 
628         //ar & _abundances;
629 
630         ar & _iterated_exp_count_covariance;
631         ar & _count_covariance;
632         ar & _fpkm_covariance;
633         ar & _gamma_covariance;
634 
635         //ar & _FPKM_conf;
636         //ar & _kappa_covariance;
637         //ar & _assign_probs;
638 
639         ar & _kappa;
640         ar & _FPKM_variance;
641         ar & _description;
642         //ar & _salient_frags;
643         ar & _total_frags;
644         //ar & _fpkm_samples; // don't save the samples
645         ar & _read_group_props;
646         //ar & _member_fpkm_samples // don't save the member samples either
647         ar & _count_per_replicate;
648     }
649     BOOST_SERIALIZATION_SPLIT_MEMBER()
650 
651     //void collect_read_group_props();
652 
653 	vector<boost::shared_ptr<Abundance> > _abundances;
654 
655     // _iterated_exp_count_covariance is the ITERATED EXPECTATION count covariance matrix.  It's not the
656     // estimated count covariance matrix (i.e. it doesn't include biological variability from
657     // the fitted model.
658     ublas::matrix<double> _iterated_exp_count_covariance;
659 
660     // _count_covariance is the final count covariance matrix.  It's includes our estimates
661     // of transcript-level biological variability on counts
662     ublas::matrix<double> _count_covariance;
663 
664     ublas::matrix<double> _fpkm_covariance;
665 	ublas::matrix<double> _gamma_covariance;
666 
667 	ConfidenceInterval _FPKM_conf;
668 
669 	ublas::matrix<double> _kappa_covariance;
670     ublas::matrix<double> _assign_probs;
671 
672 	double _kappa;
673 	double _FPKM_variance;
674 	string _description;
675     double _salient_frags;
676     double _total_frags;
677 
678     vector<double> _fpkm_samples;
679     vector<Eigen::VectorXd> _member_fpkm_samples;
680 
681     std::set<boost::shared_ptr<ReadGroupProperties const > > _read_group_props;
682     vector<Eigen::VectorXd> _assigned_count_samples;
683 
684     map<boost::shared_ptr<ReadGroupProperties const>, double> _count_per_replicate;
685     //std::map<boost::shared_ptr<ReadGroupProperties const >, ublas::vector<double> > _mles_for_read_groups;
686 };
687 
688 struct SampleAbundances
689 {
690     string locus_tag;
691 	AbundanceGroup transcripts;
692 	vector<AbundanceGroup> primary_transcripts;
693 	vector<AbundanceGroup> gene_primary_transcripts;
694 	vector<AbundanceGroup> cds;
695 	vector<AbundanceGroup> gene_cds;
696 	vector<AbundanceGroup> genes;
697 	double cluster_mass;
698 };
699 
700 void compute_cond_probs_and_effective_lengths(const vector<MateHit>& alignments,
701                                               vector<boost::shared_ptr<Abundance> >& transcripts,
702                                               vector<boost::shared_ptr<Abundance> >& mapped_transcripts);
703 
704 void compute_compatibilities(const vector<boost::shared_ptr<Abundance> >& transcripts,
705 							 const vector<MateHit>& alignments,
706 							 vector<vector<char> >& compatibilities);
707 
708 void get_alignments_from_scaffolds(const vector<boost::shared_ptr<Abundance> >& abundances,
709 								   vector<MateHit>& alignments);
710 
711 AbundanceStatus empirical_replicate_gammas(vector<boost::shared_ptr<Abundance> >& transcripts,
712                                            const vector<MateHit>& nr_alignments,
713                                            const vector<double>& log_conv_factors,
714                                            ublas::vector<double>& gamma_map_estimate,
715                                            ublas::matrix<double>& gamma_map_covariance,
716                                            std::map<boost::shared_ptr<ReadGroupProperties const >, ublas::vector<double> >& mles_for_read_groups);
717 
718 AbundanceStatus gamma_mle(const vector<boost::shared_ptr<Abundance> >& transcripts,
719                           const vector<MateHit>& nr_alignments,
720                           const vector<double>& log_conv_factors,
721                           vector<double>& gammas,
722                           bool check_identifiability = true);
723 
724 double compute_doc(int bundle_origin,
725 				   const vector<Scaffold>& scaffolds,
726 				   vector<float>& depth_of_coverage,
727 				   map<pair<int, int>, float>& intron_depth_of_coverage,
728 				   bool exclude_intra_intron=false,
729 				   vector<float>* intronic_cov=NULL,
730 				   vector<int>* scaff_intronic_status=NULL);
731 
732 double major_isoform_intron_doc(map<pair<int, int>, float>& intron_doc);
733 
734 void record_doc_for_scaffolds(int bundle_origin,
735 							  const std::vector<Scaffold>& hits,
736 							  const std::vector<float>& depth_of_coverage,
737 							  const std::map<std::pair<int, int>, float>& intron_depth_of_coverage,
738 							  std::vector<double>& scaff_doc);
739 
740 void record_doc_for_scaffolds(int bundle_origin,
741 							  const std::vector<Scaffold>& hits,
742 							  const std::vector<float>& depth_of_coverage,
743 							  std::vector<double>& scaff_doc);
744 
745 void record_min_doc_for_scaffolds(int bundle_origin,
746 								  const std::vector<Scaffold>& hits,
747 								  const std::vector<float>& depth_of_coverage,
748 								  const std::map<std::pair<int, int>, float>& intron_depth_of_coverage,
749 								  std::vector<double>& scaff_doc);
750 
751 
752 double get_intron_doc(const Scaffold& s,
753 					  const map<pair<int, int>, float>& intron_depth_of_coverage);
754 
755 double get_scaffold_doc(int bundle_origin,
756 						const Scaffold& s,
757 						const vector<float>& depth_of_coverage);
758 
759 double get_scaffold_min_doc(int bundle_origin,
760 							const Scaffold& s,
761 							const vector<float>& depth_of_coverage);
762 
763 AbundanceStatus calculate_inverse_fisher(const vector<boost::shared_ptr<Abundance> >& transcripts,
764                                          const vector<MateHit>& alignments,
765                                          const ublas::vector<double>& gamma_mean,
766                                          ublas::matrix<double>& inverse_fisher);
767 
768 void calculate_assignment_probs(const Eigen::VectorXd& alignment_multiplicities,
769                                 const Eigen::MatrixXd& transcript_cond_probs,
770                                 const Eigen::VectorXd& proposed_gammas,
771                                 Eigen::MatrixXd& assignment_probs);
772 
773 void calculate_average_assignment_probs(const Eigen::VectorXd& alignment_multiplicities,
774                                         const Eigen::MatrixXd& transcript_cond_probs,
775                                         const Eigen::VectorXd& proposed_gammas,
776                                         Eigen::MatrixXd& assignment_probs);
777 
778 void calculate_iterated_exp_count_covariance(const vector<double>& gammas,
779                                              const vector<MateHit>& nr_alignments,
780                                              const vector<boost::shared_ptr<Abundance> >& transcripts,
781                                              ublas::matrix<double>& count_covariance);
782 
783 bool simulate_count_covariance(const vector<double>& num_fragments,
784                                const vector<double>& frag_variances,
785                                const ublas::matrix<double>& iterated_exp_count_covariances,
786                                const vector<boost::shared_ptr<Abundance> >& transcripts,
787                                ublas::matrix<double>& count_covariances,
788                                vector<Eigen::VectorXd>& assigned_count_samples,
789                                vector<ublas::vector<double> >* gamma_samples);
790 
791 void sample_abundance_worker(const string& locus_tag,
792                              const set<boost::shared_ptr<ReadGroupProperties const> >& rg_props,
793                              SampleAbundances& sample,
794                              boost::shared_ptr<HitBundle> sample_bundle,
795                              bool perform_cds_analysis,
796                              bool perform_tss_analysis,
797                              bool calculate_variance);
798 
799 void merge_precomputed_expression_worker(const string& locus_tag,
800                                          const vector<boost::shared_ptr<PrecomputedExpressionBundleFactory> >& expression_factories,
801                                          SampleAbundances& sample,
802                                          boost::shared_ptr<HitBundle> sample_bundle,
803                                          bool perform_cds_analysis,
804                                          bool perform_tss_analysis,
805                                          bool calculate_variance);
806 #endif
807