1 #ifndef COMMON_H
2 #define COMMON_H
3 /*
4  *  common.h
5  *  Cufflinks
6  *
7  *  Created by Cole Trapnell on 11/26/08.
8  *  Copyright 2008 Cole Trapnell. All rights reserved.
9  *
10  */
11 
12 #include <boost/version.hpp>
13 
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #else
17 #define PACKAGE_VERSION "INTERNAL"
18 #define SVN_REVISION "XXX"
19 #define BOOST_VERSION 104700
20 #endif
21 
22 #include <stdint.h>
23 #include <cassert>
24 #include <string>
25 #include <utility>
26 
27 #include <boost/math/distributions/normal.hpp>
28 using boost::math::normal;
29 
30 #include <boost/archive/tmpdir.hpp>
31 
32 #include <boost/archive/binary_iarchive.hpp>
33 #include <boost/archive/binary_oarchive.hpp>
34 
35 #include <boost/serialization/base_object.hpp>
36 #include <boost/serialization/utility.hpp>
37 #include <boost/serialization/list.hpp>
38 #include <boost/serialization/map.hpp>
39 #include <boost/serialization/set.hpp>
40 #include <boost/serialization/vector.hpp>
41 #include <boost/serialization/assume_abstract.hpp>
42 #include <boost/serialization/shared_ptr.hpp>
43 #include <boost/serialization/export.hpp>
44 
45 #include <boost/foreach.hpp>
46 
47 #include <boost/thread.hpp>
48 #include <boost/shared_ptr.hpp>
49 
50 #include <boost/crc.hpp>
51 
52 #include <boost/filesystem.hpp>
53 
54 // Non-option globals
55 extern bool final_est_run;
56 extern bool allow_junk_filtering;
57 extern bool user_provided_fld;
58 extern int def_max_frag_len;
59 extern int max_frag_len;
60 extern int min_frag_len;
61 
62 // Behavior options
63 extern int num_threads;
64 extern bool no_update_check;
65 extern bool cuff_quiet;
66 extern bool cuff_verbose;
67 extern bool output_fld;
68 extern bool output_bias_params;
69 
70 // General options
71 extern int max_partner_dist;
72 extern uint32_t max_gene_length;
73 extern std::string ref_gtf_filename;
74 extern std::string mask_gtf_filename;
75 extern std::string contrast_filename;
76 extern std::string norm_standards_filename;
77 extern bool use_sample_sheet;
78 extern std::string output_dir;
79 extern std::string fasta_dir;
80 extern std::string library_type;
81 
82 // Abundance estimation options
83 extern bool corr_bias;
84 extern bool corr_multi;
85 
86 extern int def_frag_len_mean;
87 extern int def_frag_len_std_dev;
88 extern int max_mle_iterations;
89 extern int num_importance_samples;
90 extern float min_isoform_fraction;
91 extern bool cond_prob_collapse;
92 extern bool use_compat_mass;
93 extern bool use_total_mass;
94 extern bool model_mle_error;
95 
96 // Ref-guided assembly options
97 extern int overhang_3;
98 extern int ref_merge_overhang_tolerance;
99 extern int tile_len;
100 extern int tile_off;
101 extern bool enable_faux_reads;
102 extern bool enable_5_extend;
103 
104 // Assembly options
105 extern uint32_t min_intron_length;
106 extern uint32_t max_intron_length;
107 extern int olap_radius;
108 extern int bowtie_overhang_tolerance;
109 extern int min_frags_per_transfrag;
110 extern int microexon_length;
111 extern float pre_mrna_fraction;
112 extern float high_phred_err_prob;
113 extern double trim_3_dropoff_frac;
114 extern double trim_3_avgcov_thresh;
115 extern double small_anchor_fraction;
116 extern double binomial_junc_filter_alpha;
117 extern std::string user_label;
118 extern long random_seed;
119 extern bool emit_count_tables;
120 extern bool use_fisher_covariance;
121 extern bool split_variance;
122 
123 extern int max_frags_per_bundle;
124 //extern bool analytic_diff;
125 extern bool no_differential;
126 extern double num_frag_count_draws;
127 extern double num_frag_assignments;
128 extern double max_multiread_fraction;
129 extern double max_frag_multihits;
130 extern int min_reps_for_js_test;
131 extern bool no_effective_length_correction;
132 extern bool no_length_correction;
133 extern bool no_js_tests;
134 
135 extern bool no_scv_correction;
136 
137 extern double min_outlier_p;
138 
139 
140 extern std::string default_dispersion_method;
141 extern std::string default_lib_norm_method;
142 extern std::string default_cufflinks_lib_norm_method;
143 extern std::string default_output_format;
144 
145 // SECRET OPTIONS:
146 // These options are just for instrumentation and benchmarking code
147 
148 extern bool no_read_pairs;
149 extern float read_skip_fraction;
150 extern int trim_read_length;
151 extern double mle_accuracy;
152 
153 // END SECRET OPTIONS
154 
155 #define ASM_VERBOSE 0
156 #define ENABLE_THREADS 1
157 
158 #if ENABLE_THREADS
159 extern boost::thread_specific_ptr<std::string> bundle_label; // for consistent, traceable logging
160 #else
161 extern boost::shared_ptr<std::string> bundle_label;
162 #endif
163 
164 // Global switch to mark when we're in the middle of learning bias.
165 extern bool bias_run;
166 
167 // Hold the command line string used to run the program
168 extern std::string cmd_str;
169 
170 bool gaurd_assembly();
171 
172 void asm_verbose(const char* fmt,...);
173 void verbose_msg(const char* fmt,...);
174 
175 int parseInt(int lower,
176 			 const char *errmsg,
177 			 void (*print_usage)());
178 
179 float parseFloat(float lower,
180 				 float upper,
181 				 const char *errmsg,
182 				 void (*print_usage)());
183 
184 void encode_seq(const std::string seqStr, char* seq, char* c_seq);
185 int mkpath(const char *s, mode_t mode);
186 
187 
188 template<typename InputIterator,
189 		 typename OutputIterator,
190 		 typename Predicate>
copy_if(InputIterator begin,InputIterator end,OutputIterator destBegin,Predicate p)191 OutputIterator copy_if(InputIterator begin,
192 					   InputIterator end,
193 					   OutputIterator destBegin,
194 					   Predicate p)
195 {
196 	while (begin != end)
197 	{
198 		if (p(*begin)) *destBegin++ = *begin;
199 		++begin;
200 	}
201 	return destBegin;
202 }
203 
204 enum BundleMode
205 {
206 	HIT_DRIVEN,
207 	REF_DRIVEN,
208 	REF_GUIDED
209 };
210 extern BundleMode bundle_mode;
211 extern BundleMode init_bundle_mode;
212 
213 enum BiasMode
214 {
215 	SITE,
216 	VLMM,
217 	POS,
218 	POS_VLMM,
219     POS_SITE
220 };
221 extern BiasMode bias_mode;
222 
223 enum Strandedness
224 {
225     UNKNOWN_STRANDEDNESS,
226 	STRANDED_PROTOCOL,
227     UNSTRANDED_PROTOCOL
228 };
229 
230 enum StandardMateOrientation
231 {
232     UNKNOWN_MATE_ORIENTATION,
233     MATES_POINT_TOWARD,
234     MATES_POINT_SAME,
235     MATES_POINT_AWAY,
236     UNPAIRED,
237 };
238 
239 enum MateStrandMapping
240 {
241 	FF,
242 	FR,
243 	RF, // This is really FR with first-strandedness
244 	RR // This is really FF with first-strandedness
245 };
246 
247 enum Platform
248 {
249     UNKNOWN_PLATFORM,
250     ILLUMINA,
251     SOLID
252 };
253 
254 enum FLDSource
255 {
256     LEARNED,
257     USER,
258     DEFAULT
259 };
260 
261 enum DispersionMethod
262 {
263     DISP_NOT_SET,
264     BLIND,
265     PER_CONDITION,
266     POOLED,
267     POISSON
268 };
269 
270 enum LibNormalizationMethod
271 {
272     LIB_NORM_NOT_SET,
273     GEOMETRIC,
274     CLASSIC_FPKM,
275     TMM,
276     QUARTILE,
277     ABSOLUTE // Requires spike-in controls, not yet implemented
278 };
279 
280 enum OutputFormat
281 {
282     OUTPUT_FMT_NOT_SET,
283     CUFFDIFF_OUTPUT_FMT,
284     SIMPLE_TABLE_OUTPUT_FMT
285 };
286 
287 
288 class EmpDist
289 {
290 	//Vectors only valid between min and max!
291 	std::vector<double> _pdf;
292 	std::vector<double> _cdf;
293 	int _mode;
294 	double _mean;
295     double _std_dev;
296 	int _min;
297 	int _max;
298 	FLDSource _source;
299 
EmpDist()300     EmpDist() {}
301 
302     friend std::ostream & operator<<(std::ostream &os, const EmpDist &gp);
303     friend class boost::serialization::access;
304 
305     template<class Archive>
serialize(Archive & ar,const unsigned int)306     void serialize(Archive & ar, const unsigned int /* file_version */){
307         ar & _pdf;
308         ar & _cdf;
309         ar & _mode;
310         ar & _mean;
311         ar & _std_dev;
312         ar & _min;
313         ar & _max;
314         ar & _source;
315     }
316 
317 public:
EmpDist(std::vector<double> & pdf,std::vector<double> & cdf,int mode,double mean,double std_dev,int min,int max,FLDSource source)318 	EmpDist(std::vector<double>& pdf, std::vector<double>& cdf, int mode, double mean, double std_dev, int min, int max, FLDSource source)
319 	: _pdf(pdf), _cdf(cdf), _mode(mode), _mean(mean), _std_dev(std_dev), _min(min), _max(max), _source(source) {}
320 
pdf(std::vector<double> & pdf)321 	void pdf(std::vector<double>& pdf)	{ _pdf = pdf; }
pdf(int l)322 	double pdf(int l) const
323 	{
324 		if (!valid_len(l))
325 			return 0.0;
326 		return _pdf[l];
327 	}
328 
329 	// pdf renomalized over the lengths <= r
npdf(int l,int r)330 	double npdf(int l, int r) const
331  	{
332 		if (!valid_len(l))
333 			return 0.0;
334 
335 		if (r > _max || r == 0)
336 			return pdf(l);
337 
338 		return pdf(l)/cdf(r);
339 	}
340 
cdf(std::vector<double> & cdf)341 	void cdf(std::vector<double>& cdf)	{ _cdf = cdf; }
cdf(int l)342 	double cdf(int l) const
343 	{
344 		if (l > _max)
345 			return 1.0;
346         if (l < 0)
347             return 0.0;
348 		return _cdf[l];
349 	}
350 
valid_len(int l)351 	bool valid_len(int l) const { return (l >= _min && l <= _max); }
too_short(int l)352 	bool too_short(int l) const { return (l < _min); }
353 
mode(int mode)354 	void mode(int mode)				{ _mode = mode; }
mode()355 	int mode() const				{ return _mode; }
356 
max(int max)357 	void max(int max)				{ _max = max;  }
max()358 	int max() const					{ return _max; }
359 
min(int min)360 	void min(int min)				{ _min = min;  }
min()361 	int min() const					{ return _min; }
362 
mean(double mean)363     void mean(double mean)				{ _mean = mean;  }
mean()364 	double mean() const					{ return _mean; }
365 
std_dev(double std_dev)366     void std_dev(double std_dev)				{ _std_dev = std_dev;  }
std_dev()367 	double std_dev() const					{ return _std_dev; }
368 
source()369     FLDSource source() const        { return _source; }
source(FLDSource source)370     void source(FLDSource source)   { _source = source; }
371 };
372 
373 class BiasLearner;
374 class MultiReadTable;
375 
376 class MassDispersionModel;
377 class MleErrorModel;
378 
379 struct LocusCount
380 {
LocusCountLocusCount381     LocusCount(std::string ld, double c, int nt, const std::vector<std::string>& gids, const std::vector<std::string>& gnms) :
382         locus_desc(ld), count(c), num_transcripts(nt), gene_ids(gids), gene_short_names(gnms) {}
383     std::string locus_desc;
384     double count;
385     int num_transcripts;
386     std::vector<std::string> gene_ids;
387     std::vector<std::string> gene_short_names;
388 
389 private:
390 
LocusCountLocusCount391     LocusCount() {} //needs an empty constructor for serialization
392 
393     friend std::ostream & operator<<(std::ostream &os, const LocusCount &gp);
394     friend class boost::serialization::access;
395 
396 //    template<class Archive>
397 //    void serialize(Archive & ar, const unsigned int /* file_version */){
398 //        ar & locus_desc;
399 //        ar & count;
400 //        ar & num_transcripts;
401 //        ar & gene_ids;
402 //        ar & gene_short_names;
403 //    }
404     template<class Archive>
saveLocusCount405     void save(Archive & ar, const unsigned int version) const
406     {
407         ar & locus_desc;
408         ar & count;
409         ar & num_transcripts;
410         ar & gene_ids;
411         ar & gene_short_names;
412     }
413     template<class Archive>
loadLocusCount414     void load(Archive & ar, const unsigned int version)
415     {
416         // create some temporaries, because we don't want to load the whole LocusCount;
417         std::string dsc;
418         ar & dsc;
419         ar & count;
420         ar & num_transcripts;
421         std::vector<std::string> gids;
422         ar & gids;
423         std::vector<std::string> gsns;
424         ar & gsns;
425     }
426     BOOST_SERIALIZATION_SPLIT_MEMBER()
427 
428 };
429 
430 // This class stores user-supplied options that affect quantification
431 // We'll serialize these into abundance files (i.e. CXB files)
432 // so we can ensure that they're consistent across all samples
433 // provided to cuffnorm and cuffdiff.
434 struct CheckedParameters
435 {
CheckedParametersCheckedParameters436     CheckedParameters() :
437         frag_len_mean(0.0),
438         frag_len_std_dev(0.0),
439         corr_bias(0.0),
440         frag_bias_mode(VLMM),
441         corr_multireads(false),
442         max_mle_iterations(false),
443         min_mle_accuracy(0.0),
444         max_bundle_frags(0.0),
445         max_frags_multihits(0.0),
446         no_effective_length_correction(false),
447         no_length_correction(false),
448         ref_gtf_file_path(""),
449         ref_gtf_crc(0),
450         mask_gtf_file_path(""),
451         mask_gtf_crc(0)
452     {} //needs an empty constructor for serialization
453 
454     double frag_len_mean;
455     double frag_len_std_dev;
456 
457     // TODO: add CRCs for reference GTF, mask file
458     bool corr_bias;
459 
460     BiasMode frag_bias_mode;
461     bool corr_multireads;
462 
463     double max_mle_iterations;
464     double min_mle_accuracy;
465 
466     double max_bundle_frags;
467     double max_frags_multihits;
468 
469     bool no_effective_length_correction;
470     bool no_length_correction;
471 
472     std::string ref_gtf_file_path;
473     boost::crc_32_type::value_type ref_gtf_crc;
474 
475     std::string mask_gtf_file_path;
476     boost::crc_32_type::value_type mask_gtf_crc;
477 
478     friend std::ostream & operator<<(std::ostream &os, const CheckedParameters &gp);
479     friend class boost::serialization::access;
480 
481     template<class Archive>
serializeCheckedParameters482     void serialize(Archive & ar, const unsigned int /* file_version */){
483         ar & frag_len_mean;
484         ar & frag_len_std_dev;
485         ar & corr_bias;
486         ar & frag_bias_mode;
487         ar & corr_multireads;
488         ar & max_mle_iterations;
489         ar & min_mle_accuracy;
490         ar & max_bundle_frags;
491         ar & max_frags_multihits;
492         ar & no_effective_length_correction;
493         ar & no_length_correction;
494         ar & ref_gtf_file_path;
495         ar & ref_gtf_crc;
496         ar & mask_gtf_file_path;
497         ar & mask_gtf_crc;
498     }
499 
500     bool operator!=(const CheckedParameters& rhs) const {
501         return !(*this == rhs);
502     }
503 
504     bool operator==(const CheckedParameters& rhs) const
505     {
506         return (frag_len_mean == rhs.frag_len_mean &&
507                 frag_len_std_dev == rhs.frag_len_std_dev &&
508                 corr_bias == rhs.corr_bias &&
509                 frag_bias_mode  == rhs.frag_bias_mode &&
510                 corr_multireads == rhs.corr_multireads &&
511                 max_mle_iterations  == rhs.max_mle_iterations &&
512                 min_mle_accuracy == rhs.min_mle_accuracy &&
513                 max_bundle_frags == rhs.max_bundle_frags &&
514                 max_frags_multihits == rhs.max_frags_multihits &&
515                 no_effective_length_correction == rhs.no_effective_length_correction &&
516                 no_length_correction == rhs.no_length_correction &&
517                 ref_gtf_file_path == rhs.ref_gtf_file_path &&
518                 ref_gtf_crc == rhs.ref_gtf_crc &&
519                 mask_gtf_file_path == rhs.mask_gtf_file_path &&
520                 mask_gtf_crc == rhs.mask_gtf_crc);
521 
522     }
523 
524 };
525 
526 class ReadGroupProperties
527 {
528 public:
529 
530     ReadGroupProperties();
531 
strandedness()532     Strandedness strandedness() const { return _strandedness; }
strandedness(Strandedness s)533     void strandedness(Strandedness s) { _strandedness = s; }
534 
std_mate_orientation()535     StandardMateOrientation std_mate_orientation() const { return _std_mate_orient; }
std_mate_orientation(StandardMateOrientation so)536     void std_mate_orientation(StandardMateOrientation so)  { _std_mate_orient = so; }
537 
mate_strand_mapping()538 	MateStrandMapping mate_strand_mapping() const { return _mate_strand_mapping; }
mate_strand_mapping(MateStrandMapping msm)539 	void mate_strand_mapping(MateStrandMapping msm) { _mate_strand_mapping = msm; }
540 
platform()541     Platform platform() const { return _platform; }
platform(Platform p)542     void platform(Platform p)  { _platform = p; }
543 
total_map_mass()544     long double total_map_mass() const { return _total_map_mass; }
total_map_mass(long double p)545     void total_map_mass(long double p)  { _total_map_mass = p; }
546 
normalized_map_mass()547     long double normalized_map_mass() const { return _norm_map_mass; }
normalized_map_mass(long double p)548     void normalized_map_mass(long double p)  { _norm_map_mass = p; }
549 
frag_len_dist()550     boost::shared_ptr<EmpDist const> frag_len_dist() const { return _frag_len_dist; }
frag_len_dist(boost::shared_ptr<EmpDist const> p)551     void frag_len_dist(boost::shared_ptr<EmpDist const> p)  { _frag_len_dist = p; }
552 
bias_learner()553 	boost::shared_ptr<BiasLearner const> bias_learner() const { return _bias_learner; }
bias_learner(boost::shared_ptr<BiasLearner const> bl)554     void bias_learner(boost::shared_ptr<BiasLearner const> bl)  { _bias_learner = bl; }
555 
556     // The internal scaling factor relates replicates to each other, so
557     // that replicates with larger library sizes don't bias the isoform
558     // deconvolution over smaller libraries
internal_scale_factor(double sf)559     void internal_scale_factor(double sf) { _internal_scale_factor = sf; }
internal_scale_factor()560     double internal_scale_factor() const  { return _internal_scale_factor; }
561 
external_scale_factor(double sf)562     void external_scale_factor(double sf) { _external_scale_factor = sf; }
external_scale_factor()563     double external_scale_factor() const  { return _external_scale_factor; }
564 
complete_fragments(bool c)565     void complete_fragments(bool c)  { _complete_fragments = c; }
complete_fragments()566     bool complete_fragments() const { return _complete_fragments; }
567 
internally_scale_mass(double unscaled_mass)568     double internally_scale_mass(double unscaled_mass) const
569     {
570         if (_internal_scale_factor == 0)
571             return unscaled_mass;
572 
573         return unscaled_mass * (1.0 / _internal_scale_factor);
574     }
575 
mass_dispersion_model()576     boost::shared_ptr<const MassDispersionModel> mass_dispersion_model() const
577     {
578         return _mass_dispersion_model;
579     };
580 
mass_dispersion_model(boost::shared_ptr<const MassDispersionModel> nm)581     void mass_dispersion_model(boost::shared_ptr<const MassDispersionModel> nm)
582     {
583         _mass_dispersion_model = nm;
584     }
585 
mle_error_model()586     boost::shared_ptr<const MleErrorModel> mle_error_model() const
587     {
588         return _mle_error_model;
589     };
590 
mle_error_model(boost::shared_ptr<const MleErrorModel> nm)591     void mle_error_model(boost::shared_ptr<const MleErrorModel> nm)
592     {
593         _mle_error_model = nm;
594     }
595 
common_scale_compatible_counts()596     const std::vector<LocusCount>& common_scale_compatible_counts() { return _common_scale_compatible_counts; }
common_scale_compatible_counts(const std::vector<LocusCount> & counts)597     void common_scale_compatible_counts(const std::vector<LocusCount>& counts) { _common_scale_compatible_counts = counts; }
598 
common_scale_total_counts()599     const std::vector<LocusCount>& common_scale_total_counts() { return _common_scale_total_counts; }
common_scale_total_counts(const std::vector<LocusCount> & counts)600     void common_scale_total_counts(const std::vector<LocusCount>& counts) { _common_scale_total_counts = counts; }
601 
raw_compatible_counts()602     const std::vector<LocusCount>& raw_compatible_counts() { return _raw_compatible_counts; }
raw_compatible_counts(const std::vector<LocusCount> & counts)603     void raw_compatible_counts(const std::vector<LocusCount>& counts) { _raw_compatible_counts = counts; }
604 
raw_total_counts()605     const std::vector<LocusCount>& raw_total_counts() { return _raw_total_counts; }
raw_total_counts(const std::vector<LocusCount> & counts)606     void raw_total_counts(const std::vector<LocusCount>& counts) { _raw_total_counts = counts; }
607 
clear_count_tables()608     void clear_count_tables() {
609         _common_scale_compatible_counts.clear();
610         std::vector<LocusCount>().swap(_common_scale_compatible_counts);
611 
612         _common_scale_total_counts.clear();
613         std::vector<LocusCount>().swap(_common_scale_total_counts);
614 
615         _raw_compatible_counts.clear();
616         std::vector<LocusCount>().swap(_raw_compatible_counts);
617 
618         _raw_total_counts.clear();
619         std::vector<LocusCount>().swap(_raw_total_counts);
620     }
621 
multi_read_table()622 	boost::shared_ptr<MultiReadTable> multi_read_table() const {return _multi_read_table; }
multi_read_table(boost::shared_ptr<MultiReadTable> mrt)623 	void multi_read_table(boost::shared_ptr<MultiReadTable> mrt) { _multi_read_table = mrt;	}
624 
625 //    const string& description() const { return _description; }
626 //    void description(const string& d) { _description = d; }
627 
condition_name()628     const std::string& condition_name() const { return _condition_name; }
condition_name(const std::string & cd)629     void condition_name(const std::string& cd) { _condition_name = cd; }
630 
file_path()631     const std::string& file_path() const { return _file_path; }
file_path(const std::string & fp)632     void file_path(const std::string& fp) { _file_path = fp; }
633 
replicate_num()634     int replicate_num() const { return _replicate_num; }
replicate_num(int rn)635     void replicate_num(int rn) { _replicate_num = rn; }
636 
ref_gtf(const std::string & file_path,const boost::crc_32_type & gtf_crc)637     void ref_gtf(const std::string& file_path, const boost::crc_32_type& gtf_crc )
638     {
639         _checked_params.ref_gtf_file_path = file_path;
640         _checked_params.ref_gtf_crc = gtf_crc();
641     }
642 
mask_gtf(const std::string & file_path,const boost::crc_32_type & gtf_crc)643     void mask_gtf(const std::string& file_path, const boost::crc_32_type& gtf_crc )
644     {
645         _checked_params.mask_gtf_file_path = file_path;
646         _checked_params.mask_gtf_crc = gtf_crc();
647     }
648 
649 
checked_parameters()650     const CheckedParameters& checked_parameters() const { return _checked_params; }
checked_parameters(const CheckedParameters & rhs)651     void checked_parameters(const CheckedParameters& rhs) { _checked_params = rhs; }
652 
653     // NOTE: this only picks up user-supplied options, not GTF files!
collect_checked_parameters()654     void collect_checked_parameters() {
655 
656         _checked_params.frag_len_mean = def_frag_len_mean;
657         _checked_params.frag_len_std_dev = def_frag_len_std_dev;
658 
659         // TODO: add CRCs for reference GTF, mask file, norm standards file if using.
660         _checked_params.corr_bias = corr_bias;
661 
662         _checked_params.frag_bias_mode = bias_mode;
663         _checked_params.corr_multireads = corr_multi;
664 
665         _checked_params.max_mle_iterations = max_mle_iterations;
666         _checked_params.min_mle_accuracy = mle_accuracy;
667 
668         _checked_params.max_bundle_frags = max_frags_per_bundle;
669         _checked_params.max_frags_multihits = max_frag_multihits;
670 
671         _checked_params.no_effective_length_correction = no_effective_length_correction;
672         _checked_params.no_length_correction = no_length_correction;
673     }
674 
675 
676 private:
677 
678     friend std::ostream & operator<<(std::ostream &os, const ReadGroupProperties &gp);
679     friend class boost::serialization::access;
680 
681     template<class Archive>
serialize(Archive & ar,const unsigned int)682     void serialize(Archive & ar, const unsigned int /* file_version */){
683         ar & _strandedness;
684         ar & _std_mate_orient;
685         ar & _mate_strand_mapping;
686         ar & _platform;
687         ar & _total_map_mass;
688         ar & _norm_map_mass;
689         ar & _frag_len_dist;
690         // TODO: probably should serialize the bias parameters somehow.
691         //ar & _bias_learner;
692         //ar & _multi_read_table; // we should never need this, I think.
693         ar & _internal_scale_factor;
694         ar & _external_scale_factor;
695         //ar & _mass_dispersion_model;
696         ar & _common_scale_compatible_counts;
697         ar & _common_scale_total_counts;
698         ar & _raw_compatible_counts;
699         ar & _raw_total_counts;
700         //ar & _mle_error_model;
701         ar & _complete_fragments;
702         ar & _condition_name;
703         ar & _file_path;
704         ar & _replicate_num;
705         ar & _checked_params;
706     }
707 
708     Strandedness _strandedness;
709     StandardMateOrientation _std_mate_orient;
710 	MateStrandMapping _mate_strand_mapping;
711     Platform _platform;
712     long double _total_map_mass;
713     long double _norm_map_mass;
714     boost::shared_ptr<EmpDist const> _frag_len_dist;
715 	boost::shared_ptr<BiasLearner const> _bias_learner;
716 	boost::shared_ptr<MultiReadTable> _multi_read_table;
717 
718     double _internal_scale_factor;
719     double _external_scale_factor;
720     boost::shared_ptr<const MassDispersionModel> _mass_dispersion_model;
721     std::vector<LocusCount> _common_scale_compatible_counts;
722     std::vector<LocusCount> _common_scale_total_counts;
723     std::vector<LocusCount> _raw_compatible_counts;
724     std::vector<LocusCount> _raw_total_counts;
725 
726     boost::shared_ptr<const MleErrorModel> _mle_error_model;
727 
728     bool _complete_fragments;
729 
730     std::string _condition_name;
731     std::string _file_path;
732     int _replicate_num;
733 
734     CheckedParameters _checked_params;
735 };
736 
737 BOOST_SERIALIZATION_SHARED_PTR(ReadGroupProperties)
738 
739 extern std::map<std::string, ReadGroupProperties> library_type_table;
740 
741 extern const ReadGroupProperties* global_read_properties;
742 
743 extern std::map<std::string, DispersionMethod> dispersion_method_table;
744 extern DispersionMethod dispersion_method;
745 
746 extern std::map<std::string, LibNormalizationMethod> lib_norm_method_table;
747 extern LibNormalizationMethod lib_norm_method;
748 
749 extern std::map<std::string, OutputFormat> output_format_table;
750 extern OutputFormat output_format;
751 
752 
753 void print_library_table();
754 void init_library_table();
755 
756 void print_dispersion_method_table();
757 void init_dispersion_method_table();
758 
759 void print_lib_norm_method_table();
760 void init_lib_norm_method_table();
761 void init_cufflinks_lib_norm_method_table();
762 
763 void print_output_format_table();
764 void init_output_format_table();
765 
766 
767 struct LibNormStandards
768 {
769 
770 };
771 
772 extern boost::shared_ptr<const std::map<std::string, LibNormStandards> > lib_norm_standards;
773 
774 template<typename T>
775 std::string cat_strings(const T& container, const char* delimiter=",")
776 {
777     std::string cat;
778 	if (container.empty())
779 	{
780 		cat = "";
781 	}
782 	else
783 	{
784 		typename T::const_iterator itr = container.begin();
785 		//cat = *(itr);
786 		for (; itr != container.end(); itr++)
787 		{
788 			if (!(*itr).empty()) {
789 				if (!cat.empty()) cat += delimiter;
790 				cat += *itr;
791             }
792 		}
793 	}
794 
795 	return cat;
796 }
797 
798 #define OPT_NUM_IMP_SAMPLES         260
799 #define OPT_MLE_MAX_ITER            261
800 #define OPT_FDR                     262
801 #define OPT_LIBRARY_TYPE            263
802 #define OPT_OVERHANG_TOLERANCE      264
803 #define OPT_MAX_BUNDLE_LENGTH       265
804 #define OPT_MIN_FRAGS_PER_TRANSFRAG 266
805 #define OPT_BIAS_MODE               267
806 #define OPT_MIN_INTRON_LENGTH       268
807 #define OPT_3_PRIME_AVGCOV_THRESH	269
808 #define OPT_3_PRIME_DROPOFF_FRAC    270
809 #define OPT_POISSON_DISPERSION      271
810 #define OPT_NO_UPDATE_CHECK         272
811 #define OPT_OUTPUT_FLD              273
812 #define OPT_OUTPUT_BIAS_PARAMS      274
813 #define OPT_USE_EM                  275
814 #define OPT_COLLAPSE_COND_PROB      276
815 #define OPT_RANDOM_SEED             277
816 #define OPT_NO_FAUX_READS           278
817 #define OPT_3_OVERHANG_TOLERANCE    279
818 #define OPT_INTRON_OVERHANG_TOLERANCE 280
819 #define OPT_EMIT_COUNT_TABLES       281
820 #define OPT_USE_COMPAT_MASS         282
821 #define OPT_USE_TOTAL_MASS          283
822 #define OPT_USE_FISHER_COVARIANCE   284
823 #define OPT_USE_EMPIRICAL_COVARIANCE   285
824 #define OPT_SPLIT_MASS              286
825 #define OPT_SPLIT_VARIANCE          287
826 #define OPT_TILE_LEN                291
827 #define OPT_TILE_SEP                292
828 #define OPT_NO_5_EXTEND             293
829 #define OPT_MAX_FRAGS_PER_BUNDLE    294
830 #define OPT_READ_SKIP_FRACTION      295
831 #define OPT_NO_READ_PAIRS           296
832 #define OPT_TRIM_READ_LENGTH        297
833 #define OPT_MAX_DELTA_GAP           298
834 #define OPT_MLE_MIN_ACC             299
835 //#define OPT_ANALYTIC_DIFF           300
836 #define OPT_NO_DIFF                 301
837 #define OPT_GEOMETRIC_NORM          302
838 #define OPT_RAW_MAPPED_NORM         303
839 #define OPT_NUM_FRAG_COUNT_DRAWS    304
840 #define OPT_NUM_FRAG_ASSIGN_DRAWS   305
841 #define OPT_MAX_MULTIREAD_FRACTION  306
842 #define OPT_LOCUS_COUNT_DISPERSION  307
843 #define OPT_MIN_OUTLIER_P           308
844 #define OPT_FRAG_MAX_MULTIHITS      309
845 #define OPT_MIN_REPS_FOR_JS_TEST    310
846 #define OPT_OLAP_RADIUS             311
847 #define OPT_NO_LENGTH_CORRECTION    312
848 #define OPT_NO_EFFECTIVE_LENGTH_CORRECTION    313
849 #define OPT_NO_JS_TESTS             314
850 #define OPT_DISPERSION_METHOD       315
851 #define OPT_LIB_NORM_METHOD         316
852 #define OPT_NO_SCV_CORRECTION       317
853 #define OPT_NORM_STANDARDS_FILE     318
854 #define OPT_USE_SAMPLE_SHEET        319
855 #define OPT_OUTPUT_FORMAT           320
856 #endif
857