1 #ifndef COMMON_H
2 #define COMMON_H
3 /*
4 * common.h
5 * Cufflinks
6 *
7 * Created by Cole Trapnell on 11/26/08.
8 * Copyright 2008 Cole Trapnell. All rights reserved.
9 *
10 */
11
12 #include <boost/version.hpp>
13
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #else
17 #define PACKAGE_VERSION "INTERNAL"
18 #define SVN_REVISION "XXX"
19 #define BOOST_VERSION 104700
20 #endif
21
22 #include <stdint.h>
23 #include <cassert>
24 #include <string>
25 #include <utility>
26
27 #include <boost/math/distributions/normal.hpp>
28 using boost::math::normal;
29
30 #include <boost/archive/tmpdir.hpp>
31
32 #include <boost/archive/binary_iarchive.hpp>
33 #include <boost/archive/binary_oarchive.hpp>
34
35 #include <boost/serialization/base_object.hpp>
36 #include <boost/serialization/utility.hpp>
37 #include <boost/serialization/list.hpp>
38 #include <boost/serialization/map.hpp>
39 #include <boost/serialization/set.hpp>
40 #include <boost/serialization/vector.hpp>
41 #include <boost/serialization/assume_abstract.hpp>
42 #include <boost/serialization/shared_ptr.hpp>
43 #include <boost/serialization/export.hpp>
44
45 #include <boost/foreach.hpp>
46
47 #include <boost/thread.hpp>
48 #include <boost/shared_ptr.hpp>
49
50 #include <boost/crc.hpp>
51
52 #include <boost/filesystem.hpp>
53
54 // Non-option globals
55 extern bool final_est_run;
56 extern bool allow_junk_filtering;
57 extern bool user_provided_fld;
58 extern int def_max_frag_len;
59 extern int max_frag_len;
60 extern int min_frag_len;
61
62 // Behavior options
63 extern int num_threads;
64 extern bool no_update_check;
65 extern bool cuff_quiet;
66 extern bool cuff_verbose;
67 extern bool output_fld;
68 extern bool output_bias_params;
69
70 // General options
71 extern int max_partner_dist;
72 extern uint32_t max_gene_length;
73 extern std::string ref_gtf_filename;
74 extern std::string mask_gtf_filename;
75 extern std::string contrast_filename;
76 extern std::string norm_standards_filename;
77 extern bool use_sample_sheet;
78 extern std::string output_dir;
79 extern std::string fasta_dir;
80 extern std::string library_type;
81
82 // Abundance estimation options
83 extern bool corr_bias;
84 extern bool corr_multi;
85
86 extern int def_frag_len_mean;
87 extern int def_frag_len_std_dev;
88 extern int max_mle_iterations;
89 extern int num_importance_samples;
90 extern float min_isoform_fraction;
91 extern bool cond_prob_collapse;
92 extern bool use_compat_mass;
93 extern bool use_total_mass;
94 extern bool model_mle_error;
95
96 // Ref-guided assembly options
97 extern int overhang_3;
98 extern int ref_merge_overhang_tolerance;
99 extern int tile_len;
100 extern int tile_off;
101 extern bool enable_faux_reads;
102 extern bool enable_5_extend;
103
104 // Assembly options
105 extern uint32_t min_intron_length;
106 extern uint32_t max_intron_length;
107 extern int olap_radius;
108 extern int bowtie_overhang_tolerance;
109 extern int min_frags_per_transfrag;
110 extern int microexon_length;
111 extern float pre_mrna_fraction;
112 extern float high_phred_err_prob;
113 extern double trim_3_dropoff_frac;
114 extern double trim_3_avgcov_thresh;
115 extern double small_anchor_fraction;
116 extern double binomial_junc_filter_alpha;
117 extern std::string user_label;
118 extern long random_seed;
119 extern bool emit_count_tables;
120 extern bool use_fisher_covariance;
121 extern bool split_variance;
122
123 extern int max_frags_per_bundle;
124 //extern bool analytic_diff;
125 extern bool no_differential;
126 extern double num_frag_count_draws;
127 extern double num_frag_assignments;
128 extern double max_multiread_fraction;
129 extern double max_frag_multihits;
130 extern int min_reps_for_js_test;
131 extern bool no_effective_length_correction;
132 extern bool no_length_correction;
133 extern bool no_js_tests;
134
135 extern bool no_scv_correction;
136
137 extern double min_outlier_p;
138
139
140 extern std::string default_dispersion_method;
141 extern std::string default_lib_norm_method;
142 extern std::string default_cufflinks_lib_norm_method;
143 extern std::string default_output_format;
144
145 // SECRET OPTIONS:
146 // These options are just for instrumentation and benchmarking code
147
148 extern bool no_read_pairs;
149 extern float read_skip_fraction;
150 extern int trim_read_length;
151 extern double mle_accuracy;
152
153 // END SECRET OPTIONS
154
155 #define ASM_VERBOSE 0
156 #define ENABLE_THREADS 1
157
158 #if ENABLE_THREADS
159 extern boost::thread_specific_ptr<std::string> bundle_label; // for consistent, traceable logging
160 #else
161 extern boost::shared_ptr<std::string> bundle_label;
162 #endif
163
164 // Global switch to mark when we're in the middle of learning bias.
165 extern bool bias_run;
166
167 // Hold the command line string used to run the program
168 extern std::string cmd_str;
169
170 bool gaurd_assembly();
171
172 void asm_verbose(const char* fmt,...);
173 void verbose_msg(const char* fmt,...);
174
175 int parseInt(int lower,
176 const char *errmsg,
177 void (*print_usage)());
178
179 float parseFloat(float lower,
180 float upper,
181 const char *errmsg,
182 void (*print_usage)());
183
184 void encode_seq(const std::string seqStr, char* seq, char* c_seq);
185 int mkpath(const char *s, mode_t mode);
186
187
188 template<typename InputIterator,
189 typename OutputIterator,
190 typename Predicate>
copy_if(InputIterator begin,InputIterator end,OutputIterator destBegin,Predicate p)191 OutputIterator copy_if(InputIterator begin,
192 InputIterator end,
193 OutputIterator destBegin,
194 Predicate p)
195 {
196 while (begin != end)
197 {
198 if (p(*begin)) *destBegin++ = *begin;
199 ++begin;
200 }
201 return destBegin;
202 }
203
204 enum BundleMode
205 {
206 HIT_DRIVEN,
207 REF_DRIVEN,
208 REF_GUIDED
209 };
210 extern BundleMode bundle_mode;
211 extern BundleMode init_bundle_mode;
212
213 enum BiasMode
214 {
215 SITE,
216 VLMM,
217 POS,
218 POS_VLMM,
219 POS_SITE
220 };
221 extern BiasMode bias_mode;
222
223 enum Strandedness
224 {
225 UNKNOWN_STRANDEDNESS,
226 STRANDED_PROTOCOL,
227 UNSTRANDED_PROTOCOL
228 };
229
230 enum StandardMateOrientation
231 {
232 UNKNOWN_MATE_ORIENTATION,
233 MATES_POINT_TOWARD,
234 MATES_POINT_SAME,
235 MATES_POINT_AWAY,
236 UNPAIRED,
237 };
238
239 enum MateStrandMapping
240 {
241 FF,
242 FR,
243 RF, // This is really FR with first-strandedness
244 RR // This is really FF with first-strandedness
245 };
246
247 enum Platform
248 {
249 UNKNOWN_PLATFORM,
250 ILLUMINA,
251 SOLID
252 };
253
254 enum FLDSource
255 {
256 LEARNED,
257 USER,
258 DEFAULT
259 };
260
261 enum DispersionMethod
262 {
263 DISP_NOT_SET,
264 BLIND,
265 PER_CONDITION,
266 POOLED,
267 POISSON
268 };
269
270 enum LibNormalizationMethod
271 {
272 LIB_NORM_NOT_SET,
273 GEOMETRIC,
274 CLASSIC_FPKM,
275 TMM,
276 QUARTILE,
277 ABSOLUTE // Requires spike-in controls, not yet implemented
278 };
279
280 enum OutputFormat
281 {
282 OUTPUT_FMT_NOT_SET,
283 CUFFDIFF_OUTPUT_FMT,
284 SIMPLE_TABLE_OUTPUT_FMT
285 };
286
287
288 class EmpDist
289 {
290 //Vectors only valid between min and max!
291 std::vector<double> _pdf;
292 std::vector<double> _cdf;
293 int _mode;
294 double _mean;
295 double _std_dev;
296 int _min;
297 int _max;
298 FLDSource _source;
299
EmpDist()300 EmpDist() {}
301
302 friend std::ostream & operator<<(std::ostream &os, const EmpDist &gp);
303 friend class boost::serialization::access;
304
305 template<class Archive>
serialize(Archive & ar,const unsigned int)306 void serialize(Archive & ar, const unsigned int /* file_version */){
307 ar & _pdf;
308 ar & _cdf;
309 ar & _mode;
310 ar & _mean;
311 ar & _std_dev;
312 ar & _min;
313 ar & _max;
314 ar & _source;
315 }
316
317 public:
EmpDist(std::vector<double> & pdf,std::vector<double> & cdf,int mode,double mean,double std_dev,int min,int max,FLDSource source)318 EmpDist(std::vector<double>& pdf, std::vector<double>& cdf, int mode, double mean, double std_dev, int min, int max, FLDSource source)
319 : _pdf(pdf), _cdf(cdf), _mode(mode), _mean(mean), _std_dev(std_dev), _min(min), _max(max), _source(source) {}
320
pdf(std::vector<double> & pdf)321 void pdf(std::vector<double>& pdf) { _pdf = pdf; }
pdf(int l)322 double pdf(int l) const
323 {
324 if (!valid_len(l))
325 return 0.0;
326 return _pdf[l];
327 }
328
329 // pdf renomalized over the lengths <= r
npdf(int l,int r)330 double npdf(int l, int r) const
331 {
332 if (!valid_len(l))
333 return 0.0;
334
335 if (r > _max || r == 0)
336 return pdf(l);
337
338 return pdf(l)/cdf(r);
339 }
340
cdf(std::vector<double> & cdf)341 void cdf(std::vector<double>& cdf) { _cdf = cdf; }
cdf(int l)342 double cdf(int l) const
343 {
344 if (l > _max)
345 return 1.0;
346 if (l < 0)
347 return 0.0;
348 return _cdf[l];
349 }
350
valid_len(int l)351 bool valid_len(int l) const { return (l >= _min && l <= _max); }
too_short(int l)352 bool too_short(int l) const { return (l < _min); }
353
mode(int mode)354 void mode(int mode) { _mode = mode; }
mode()355 int mode() const { return _mode; }
356
max(int max)357 void max(int max) { _max = max; }
max()358 int max() const { return _max; }
359
min(int min)360 void min(int min) { _min = min; }
min()361 int min() const { return _min; }
362
mean(double mean)363 void mean(double mean) { _mean = mean; }
mean()364 double mean() const { return _mean; }
365
std_dev(double std_dev)366 void std_dev(double std_dev) { _std_dev = std_dev; }
std_dev()367 double std_dev() const { return _std_dev; }
368
source()369 FLDSource source() const { return _source; }
source(FLDSource source)370 void source(FLDSource source) { _source = source; }
371 };
372
373 class BiasLearner;
374 class MultiReadTable;
375
376 class MassDispersionModel;
377 class MleErrorModel;
378
379 struct LocusCount
380 {
LocusCountLocusCount381 LocusCount(std::string ld, double c, int nt, const std::vector<std::string>& gids, const std::vector<std::string>& gnms) :
382 locus_desc(ld), count(c), num_transcripts(nt), gene_ids(gids), gene_short_names(gnms) {}
383 std::string locus_desc;
384 double count;
385 int num_transcripts;
386 std::vector<std::string> gene_ids;
387 std::vector<std::string> gene_short_names;
388
389 private:
390
LocusCountLocusCount391 LocusCount() {} //needs an empty constructor for serialization
392
393 friend std::ostream & operator<<(std::ostream &os, const LocusCount &gp);
394 friend class boost::serialization::access;
395
396 // template<class Archive>
397 // void serialize(Archive & ar, const unsigned int /* file_version */){
398 // ar & locus_desc;
399 // ar & count;
400 // ar & num_transcripts;
401 // ar & gene_ids;
402 // ar & gene_short_names;
403 // }
404 template<class Archive>
saveLocusCount405 void save(Archive & ar, const unsigned int version) const
406 {
407 ar & locus_desc;
408 ar & count;
409 ar & num_transcripts;
410 ar & gene_ids;
411 ar & gene_short_names;
412 }
413 template<class Archive>
loadLocusCount414 void load(Archive & ar, const unsigned int version)
415 {
416 // create some temporaries, because we don't want to load the whole LocusCount;
417 std::string dsc;
418 ar & dsc;
419 ar & count;
420 ar & num_transcripts;
421 std::vector<std::string> gids;
422 ar & gids;
423 std::vector<std::string> gsns;
424 ar & gsns;
425 }
426 BOOST_SERIALIZATION_SPLIT_MEMBER()
427
428 };
429
430 // This class stores user-supplied options that affect quantification
431 // We'll serialize these into abundance files (i.e. CXB files)
432 // so we can ensure that they're consistent across all samples
433 // provided to cuffnorm and cuffdiff.
434 struct CheckedParameters
435 {
CheckedParametersCheckedParameters436 CheckedParameters() :
437 frag_len_mean(0.0),
438 frag_len_std_dev(0.0),
439 corr_bias(0.0),
440 frag_bias_mode(VLMM),
441 corr_multireads(false),
442 max_mle_iterations(false),
443 min_mle_accuracy(0.0),
444 max_bundle_frags(0.0),
445 max_frags_multihits(0.0),
446 no_effective_length_correction(false),
447 no_length_correction(false),
448 ref_gtf_file_path(""),
449 ref_gtf_crc(0),
450 mask_gtf_file_path(""),
451 mask_gtf_crc(0)
452 {} //needs an empty constructor for serialization
453
454 double frag_len_mean;
455 double frag_len_std_dev;
456
457 // TODO: add CRCs for reference GTF, mask file
458 bool corr_bias;
459
460 BiasMode frag_bias_mode;
461 bool corr_multireads;
462
463 double max_mle_iterations;
464 double min_mle_accuracy;
465
466 double max_bundle_frags;
467 double max_frags_multihits;
468
469 bool no_effective_length_correction;
470 bool no_length_correction;
471
472 std::string ref_gtf_file_path;
473 boost::crc_32_type::value_type ref_gtf_crc;
474
475 std::string mask_gtf_file_path;
476 boost::crc_32_type::value_type mask_gtf_crc;
477
478 friend std::ostream & operator<<(std::ostream &os, const CheckedParameters &gp);
479 friend class boost::serialization::access;
480
481 template<class Archive>
serializeCheckedParameters482 void serialize(Archive & ar, const unsigned int /* file_version */){
483 ar & frag_len_mean;
484 ar & frag_len_std_dev;
485 ar & corr_bias;
486 ar & frag_bias_mode;
487 ar & corr_multireads;
488 ar & max_mle_iterations;
489 ar & min_mle_accuracy;
490 ar & max_bundle_frags;
491 ar & max_frags_multihits;
492 ar & no_effective_length_correction;
493 ar & no_length_correction;
494 ar & ref_gtf_file_path;
495 ar & ref_gtf_crc;
496 ar & mask_gtf_file_path;
497 ar & mask_gtf_crc;
498 }
499
500 bool operator!=(const CheckedParameters& rhs) const {
501 return !(*this == rhs);
502 }
503
504 bool operator==(const CheckedParameters& rhs) const
505 {
506 return (frag_len_mean == rhs.frag_len_mean &&
507 frag_len_std_dev == rhs.frag_len_std_dev &&
508 corr_bias == rhs.corr_bias &&
509 frag_bias_mode == rhs.frag_bias_mode &&
510 corr_multireads == rhs.corr_multireads &&
511 max_mle_iterations == rhs.max_mle_iterations &&
512 min_mle_accuracy == rhs.min_mle_accuracy &&
513 max_bundle_frags == rhs.max_bundle_frags &&
514 max_frags_multihits == rhs.max_frags_multihits &&
515 no_effective_length_correction == rhs.no_effective_length_correction &&
516 no_length_correction == rhs.no_length_correction &&
517 ref_gtf_file_path == rhs.ref_gtf_file_path &&
518 ref_gtf_crc == rhs.ref_gtf_crc &&
519 mask_gtf_file_path == rhs.mask_gtf_file_path &&
520 mask_gtf_crc == rhs.mask_gtf_crc);
521
522 }
523
524 };
525
526 class ReadGroupProperties
527 {
528 public:
529
530 ReadGroupProperties();
531
strandedness()532 Strandedness strandedness() const { return _strandedness; }
strandedness(Strandedness s)533 void strandedness(Strandedness s) { _strandedness = s; }
534
std_mate_orientation()535 StandardMateOrientation std_mate_orientation() const { return _std_mate_orient; }
std_mate_orientation(StandardMateOrientation so)536 void std_mate_orientation(StandardMateOrientation so) { _std_mate_orient = so; }
537
mate_strand_mapping()538 MateStrandMapping mate_strand_mapping() const { return _mate_strand_mapping; }
mate_strand_mapping(MateStrandMapping msm)539 void mate_strand_mapping(MateStrandMapping msm) { _mate_strand_mapping = msm; }
540
platform()541 Platform platform() const { return _platform; }
platform(Platform p)542 void platform(Platform p) { _platform = p; }
543
total_map_mass()544 long double total_map_mass() const { return _total_map_mass; }
total_map_mass(long double p)545 void total_map_mass(long double p) { _total_map_mass = p; }
546
normalized_map_mass()547 long double normalized_map_mass() const { return _norm_map_mass; }
normalized_map_mass(long double p)548 void normalized_map_mass(long double p) { _norm_map_mass = p; }
549
frag_len_dist()550 boost::shared_ptr<EmpDist const> frag_len_dist() const { return _frag_len_dist; }
frag_len_dist(boost::shared_ptr<EmpDist const> p)551 void frag_len_dist(boost::shared_ptr<EmpDist const> p) { _frag_len_dist = p; }
552
bias_learner()553 boost::shared_ptr<BiasLearner const> bias_learner() const { return _bias_learner; }
bias_learner(boost::shared_ptr<BiasLearner const> bl)554 void bias_learner(boost::shared_ptr<BiasLearner const> bl) { _bias_learner = bl; }
555
556 // The internal scaling factor relates replicates to each other, so
557 // that replicates with larger library sizes don't bias the isoform
558 // deconvolution over smaller libraries
internal_scale_factor(double sf)559 void internal_scale_factor(double sf) { _internal_scale_factor = sf; }
internal_scale_factor()560 double internal_scale_factor() const { return _internal_scale_factor; }
561
external_scale_factor(double sf)562 void external_scale_factor(double sf) { _external_scale_factor = sf; }
external_scale_factor()563 double external_scale_factor() const { return _external_scale_factor; }
564
complete_fragments(bool c)565 void complete_fragments(bool c) { _complete_fragments = c; }
complete_fragments()566 bool complete_fragments() const { return _complete_fragments; }
567
internally_scale_mass(double unscaled_mass)568 double internally_scale_mass(double unscaled_mass) const
569 {
570 if (_internal_scale_factor == 0)
571 return unscaled_mass;
572
573 return unscaled_mass * (1.0 / _internal_scale_factor);
574 }
575
mass_dispersion_model()576 boost::shared_ptr<const MassDispersionModel> mass_dispersion_model() const
577 {
578 return _mass_dispersion_model;
579 };
580
mass_dispersion_model(boost::shared_ptr<const MassDispersionModel> nm)581 void mass_dispersion_model(boost::shared_ptr<const MassDispersionModel> nm)
582 {
583 _mass_dispersion_model = nm;
584 }
585
mle_error_model()586 boost::shared_ptr<const MleErrorModel> mle_error_model() const
587 {
588 return _mle_error_model;
589 };
590
mle_error_model(boost::shared_ptr<const MleErrorModel> nm)591 void mle_error_model(boost::shared_ptr<const MleErrorModel> nm)
592 {
593 _mle_error_model = nm;
594 }
595
common_scale_compatible_counts()596 const std::vector<LocusCount>& common_scale_compatible_counts() { return _common_scale_compatible_counts; }
common_scale_compatible_counts(const std::vector<LocusCount> & counts)597 void common_scale_compatible_counts(const std::vector<LocusCount>& counts) { _common_scale_compatible_counts = counts; }
598
common_scale_total_counts()599 const std::vector<LocusCount>& common_scale_total_counts() { return _common_scale_total_counts; }
common_scale_total_counts(const std::vector<LocusCount> & counts)600 void common_scale_total_counts(const std::vector<LocusCount>& counts) { _common_scale_total_counts = counts; }
601
raw_compatible_counts()602 const std::vector<LocusCount>& raw_compatible_counts() { return _raw_compatible_counts; }
raw_compatible_counts(const std::vector<LocusCount> & counts)603 void raw_compatible_counts(const std::vector<LocusCount>& counts) { _raw_compatible_counts = counts; }
604
raw_total_counts()605 const std::vector<LocusCount>& raw_total_counts() { return _raw_total_counts; }
raw_total_counts(const std::vector<LocusCount> & counts)606 void raw_total_counts(const std::vector<LocusCount>& counts) { _raw_total_counts = counts; }
607
clear_count_tables()608 void clear_count_tables() {
609 _common_scale_compatible_counts.clear();
610 std::vector<LocusCount>().swap(_common_scale_compatible_counts);
611
612 _common_scale_total_counts.clear();
613 std::vector<LocusCount>().swap(_common_scale_total_counts);
614
615 _raw_compatible_counts.clear();
616 std::vector<LocusCount>().swap(_raw_compatible_counts);
617
618 _raw_total_counts.clear();
619 std::vector<LocusCount>().swap(_raw_total_counts);
620 }
621
multi_read_table()622 boost::shared_ptr<MultiReadTable> multi_read_table() const {return _multi_read_table; }
multi_read_table(boost::shared_ptr<MultiReadTable> mrt)623 void multi_read_table(boost::shared_ptr<MultiReadTable> mrt) { _multi_read_table = mrt; }
624
625 // const string& description() const { return _description; }
626 // void description(const string& d) { _description = d; }
627
condition_name()628 const std::string& condition_name() const { return _condition_name; }
condition_name(const std::string & cd)629 void condition_name(const std::string& cd) { _condition_name = cd; }
630
file_path()631 const std::string& file_path() const { return _file_path; }
file_path(const std::string & fp)632 void file_path(const std::string& fp) { _file_path = fp; }
633
replicate_num()634 int replicate_num() const { return _replicate_num; }
replicate_num(int rn)635 void replicate_num(int rn) { _replicate_num = rn; }
636
ref_gtf(const std::string & file_path,const boost::crc_32_type & gtf_crc)637 void ref_gtf(const std::string& file_path, const boost::crc_32_type& gtf_crc )
638 {
639 _checked_params.ref_gtf_file_path = file_path;
640 _checked_params.ref_gtf_crc = gtf_crc();
641 }
642
mask_gtf(const std::string & file_path,const boost::crc_32_type & gtf_crc)643 void mask_gtf(const std::string& file_path, const boost::crc_32_type& gtf_crc )
644 {
645 _checked_params.mask_gtf_file_path = file_path;
646 _checked_params.mask_gtf_crc = gtf_crc();
647 }
648
649
checked_parameters()650 const CheckedParameters& checked_parameters() const { return _checked_params; }
checked_parameters(const CheckedParameters & rhs)651 void checked_parameters(const CheckedParameters& rhs) { _checked_params = rhs; }
652
653 // NOTE: this only picks up user-supplied options, not GTF files!
collect_checked_parameters()654 void collect_checked_parameters() {
655
656 _checked_params.frag_len_mean = def_frag_len_mean;
657 _checked_params.frag_len_std_dev = def_frag_len_std_dev;
658
659 // TODO: add CRCs for reference GTF, mask file, norm standards file if using.
660 _checked_params.corr_bias = corr_bias;
661
662 _checked_params.frag_bias_mode = bias_mode;
663 _checked_params.corr_multireads = corr_multi;
664
665 _checked_params.max_mle_iterations = max_mle_iterations;
666 _checked_params.min_mle_accuracy = mle_accuracy;
667
668 _checked_params.max_bundle_frags = max_frags_per_bundle;
669 _checked_params.max_frags_multihits = max_frag_multihits;
670
671 _checked_params.no_effective_length_correction = no_effective_length_correction;
672 _checked_params.no_length_correction = no_length_correction;
673 }
674
675
676 private:
677
678 friend std::ostream & operator<<(std::ostream &os, const ReadGroupProperties &gp);
679 friend class boost::serialization::access;
680
681 template<class Archive>
serialize(Archive & ar,const unsigned int)682 void serialize(Archive & ar, const unsigned int /* file_version */){
683 ar & _strandedness;
684 ar & _std_mate_orient;
685 ar & _mate_strand_mapping;
686 ar & _platform;
687 ar & _total_map_mass;
688 ar & _norm_map_mass;
689 ar & _frag_len_dist;
690 // TODO: probably should serialize the bias parameters somehow.
691 //ar & _bias_learner;
692 //ar & _multi_read_table; // we should never need this, I think.
693 ar & _internal_scale_factor;
694 ar & _external_scale_factor;
695 //ar & _mass_dispersion_model;
696 ar & _common_scale_compatible_counts;
697 ar & _common_scale_total_counts;
698 ar & _raw_compatible_counts;
699 ar & _raw_total_counts;
700 //ar & _mle_error_model;
701 ar & _complete_fragments;
702 ar & _condition_name;
703 ar & _file_path;
704 ar & _replicate_num;
705 ar & _checked_params;
706 }
707
708 Strandedness _strandedness;
709 StandardMateOrientation _std_mate_orient;
710 MateStrandMapping _mate_strand_mapping;
711 Platform _platform;
712 long double _total_map_mass;
713 long double _norm_map_mass;
714 boost::shared_ptr<EmpDist const> _frag_len_dist;
715 boost::shared_ptr<BiasLearner const> _bias_learner;
716 boost::shared_ptr<MultiReadTable> _multi_read_table;
717
718 double _internal_scale_factor;
719 double _external_scale_factor;
720 boost::shared_ptr<const MassDispersionModel> _mass_dispersion_model;
721 std::vector<LocusCount> _common_scale_compatible_counts;
722 std::vector<LocusCount> _common_scale_total_counts;
723 std::vector<LocusCount> _raw_compatible_counts;
724 std::vector<LocusCount> _raw_total_counts;
725
726 boost::shared_ptr<const MleErrorModel> _mle_error_model;
727
728 bool _complete_fragments;
729
730 std::string _condition_name;
731 std::string _file_path;
732 int _replicate_num;
733
734 CheckedParameters _checked_params;
735 };
736
737 BOOST_SERIALIZATION_SHARED_PTR(ReadGroupProperties)
738
739 extern std::map<std::string, ReadGroupProperties> library_type_table;
740
741 extern const ReadGroupProperties* global_read_properties;
742
743 extern std::map<std::string, DispersionMethod> dispersion_method_table;
744 extern DispersionMethod dispersion_method;
745
746 extern std::map<std::string, LibNormalizationMethod> lib_norm_method_table;
747 extern LibNormalizationMethod lib_norm_method;
748
749 extern std::map<std::string, OutputFormat> output_format_table;
750 extern OutputFormat output_format;
751
752
753 void print_library_table();
754 void init_library_table();
755
756 void print_dispersion_method_table();
757 void init_dispersion_method_table();
758
759 void print_lib_norm_method_table();
760 void init_lib_norm_method_table();
761 void init_cufflinks_lib_norm_method_table();
762
763 void print_output_format_table();
764 void init_output_format_table();
765
766
767 struct LibNormStandards
768 {
769
770 };
771
772 extern boost::shared_ptr<const std::map<std::string, LibNormStandards> > lib_norm_standards;
773
774 template<typename T>
775 std::string cat_strings(const T& container, const char* delimiter=",")
776 {
777 std::string cat;
778 if (container.empty())
779 {
780 cat = "";
781 }
782 else
783 {
784 typename T::const_iterator itr = container.begin();
785 //cat = *(itr);
786 for (; itr != container.end(); itr++)
787 {
788 if (!(*itr).empty()) {
789 if (!cat.empty()) cat += delimiter;
790 cat += *itr;
791 }
792 }
793 }
794
795 return cat;
796 }
797
798 #define OPT_NUM_IMP_SAMPLES 260
799 #define OPT_MLE_MAX_ITER 261
800 #define OPT_FDR 262
801 #define OPT_LIBRARY_TYPE 263
802 #define OPT_OVERHANG_TOLERANCE 264
803 #define OPT_MAX_BUNDLE_LENGTH 265
804 #define OPT_MIN_FRAGS_PER_TRANSFRAG 266
805 #define OPT_BIAS_MODE 267
806 #define OPT_MIN_INTRON_LENGTH 268
807 #define OPT_3_PRIME_AVGCOV_THRESH 269
808 #define OPT_3_PRIME_DROPOFF_FRAC 270
809 #define OPT_POISSON_DISPERSION 271
810 #define OPT_NO_UPDATE_CHECK 272
811 #define OPT_OUTPUT_FLD 273
812 #define OPT_OUTPUT_BIAS_PARAMS 274
813 #define OPT_USE_EM 275
814 #define OPT_COLLAPSE_COND_PROB 276
815 #define OPT_RANDOM_SEED 277
816 #define OPT_NO_FAUX_READS 278
817 #define OPT_3_OVERHANG_TOLERANCE 279
818 #define OPT_INTRON_OVERHANG_TOLERANCE 280
819 #define OPT_EMIT_COUNT_TABLES 281
820 #define OPT_USE_COMPAT_MASS 282
821 #define OPT_USE_TOTAL_MASS 283
822 #define OPT_USE_FISHER_COVARIANCE 284
823 #define OPT_USE_EMPIRICAL_COVARIANCE 285
824 #define OPT_SPLIT_MASS 286
825 #define OPT_SPLIT_VARIANCE 287
826 #define OPT_TILE_LEN 291
827 #define OPT_TILE_SEP 292
828 #define OPT_NO_5_EXTEND 293
829 #define OPT_MAX_FRAGS_PER_BUNDLE 294
830 #define OPT_READ_SKIP_FRACTION 295
831 #define OPT_NO_READ_PAIRS 296
832 #define OPT_TRIM_READ_LENGTH 297
833 #define OPT_MAX_DELTA_GAP 298
834 #define OPT_MLE_MIN_ACC 299
835 //#define OPT_ANALYTIC_DIFF 300
836 #define OPT_NO_DIFF 301
837 #define OPT_GEOMETRIC_NORM 302
838 #define OPT_RAW_MAPPED_NORM 303
839 #define OPT_NUM_FRAG_COUNT_DRAWS 304
840 #define OPT_NUM_FRAG_ASSIGN_DRAWS 305
841 #define OPT_MAX_MULTIREAD_FRACTION 306
842 #define OPT_LOCUS_COUNT_DISPERSION 307
843 #define OPT_MIN_OUTLIER_P 308
844 #define OPT_FRAG_MAX_MULTIHITS 309
845 #define OPT_MIN_REPS_FOR_JS_TEST 310
846 #define OPT_OLAP_RADIUS 311
847 #define OPT_NO_LENGTH_CORRECTION 312
848 #define OPT_NO_EFFECTIVE_LENGTH_CORRECTION 313
849 #define OPT_NO_JS_TESTS 314
850 #define OPT_DISPERSION_METHOD 315
851 #define OPT_LIB_NORM_METHOD 316
852 #define OPT_NO_SCV_CORRECTION 317
853 #define OPT_NORM_STANDARDS_FILE 318
854 #define OPT_USE_SAMPLE_SHEET 319
855 #define OPT_OUTPUT_FORMAT 320
856 #endif
857