1 /*
2  *  cuffdiff.cpp
3  *  cufflinks
4  *
5  *  Created by Cole Trapnell on 10/21/09.
6  *  Copyright 2009 Cole Trapnell. All rights reserved.
7  *
8  */
9 
10 #include <stdlib.h>
11 #include <getopt.h>
12 #include <string>
13 #include <numeric>
14 #include <cfloat>
15 #include <iostream>
16 
17 #include "common.h"
18 #include "hits.h"
19 #include "bundles.h"
20 #include "abundances.h"
21 #include "tokenize.h"
22 #include "biascorrection.h"
23 #include "update_check.h"
24 
25 #include <boost/thread.hpp>
26 #include <boost/version.hpp>
27 #include <boost/graph/adjacency_list.hpp>
28 #include <boost/graph/graph_traits.hpp>
29 #include <boost/numeric/ublas/matrix.hpp>
30 #include <boost/numeric/ublas/matrix_proxy.hpp>
31 #include <boost/numeric/ublas/vector.hpp>
32 #include <boost/numeric/ublas/vector_proxy.hpp>
33 #include <boost/numeric/ublas/io.hpp>
34 #include <boost/algorithm/string.hpp>
35 
36 #include <boost/random/mersenne_twister.hpp>
37 
38 #include "differential.h"
39 
40 extern "C" {
41 #include "locfit/local.h"
42 }
43 
44 // Need at least this many reads in a locus to do any testing on it
45 
46 vector<string> sample_labels;
47 
48 double FDR = 0.05;
49 bool samples_are_time_series = false;
50 using namespace std;
51 using namespace boost;
52 
53 // We leave out the short codes for options that don't take an argument
54 #if ENABLE_THREADS
55 const char *short_options = "m:p:s:c:I:j:L:M:o:b:TNqvuF:C:";
56 #else
57 const char *short_options = "m:s:c:I:j:L:M:o:b:TNqvuF:C:";
58 #endif
59 
60 static struct option long_options[] = {
61 {"labels",					required_argument,		 0,			 'L'},
62 {"seed",                    required_argument,		 0,			 OPT_RANDOM_SEED},
63 {"norm-standards-file",     required_argument,		 0,			 OPT_NORM_STANDARDS_FILE},
64 {"use-sample-sheet",        no_argument,             0,			 OPT_USE_SAMPLE_SHEET},
65 {"output-dir",			    required_argument,		 0,			 'o'},
66 {"verbose",			    	no_argument,			 0,			 'v'},
67 {"quiet",			    	no_argument,			 0,			 'q'},
68 #if ENABLE_THREADS
69 {"num-threads",				required_argument,       0,          'p'},
70 #endif
71 {"library-type",		    required_argument,		 0,			 OPT_LIBRARY_TYPE},
72 {"no-update-check",         no_argument,             0,          OPT_NO_UPDATE_CHECK},
73 {"compatible-hits-norm",    no_argument,	 		 0,	         OPT_USE_COMPAT_MASS},
74 {"total-hits-norm",         no_argument,	 		 0,	         OPT_USE_TOTAL_MASS},
75 
76 // Some options for testing different stats policies
77 {"max-bundle-frags",        required_argument,       0,          OPT_MAX_FRAGS_PER_BUNDLE},
78 {"library-norm-method",     required_argument,       0,          OPT_LIB_NORM_METHOD},
79 {"output-format",           required_argument,       0,          OPT_OUTPUT_FORMAT},
80 {0, 0, 0, 0} // terminator
81 };
82 
print_usage()83 void print_usage()
84 {
85 	fprintf(stderr, "cuffnorm v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION);
86 	fprintf(stderr, "-----------------------------\n");
87 
88 	//NOTE: SPACES ONLY, bozo
89     fprintf(stderr, "Usage:   cuffnorm [options] <transcripts.gtf> <sample1_expr.cxb> <sample2_expr.cxb> [... sampleN_expr.cxb]\n");
90 	fprintf(stderr, "   Supply replicate CXB files as comma separated lists for each condition: sample1_rep1.cxb,sample1_rep2.cxb,...sample1_repM.cxb\n");
91     fprintf(stderr, "General Options:\n");
92     fprintf(stderr, "  -o/--output-dir              write all output files to this directory              [ default:     ./ ]\n");
93     fprintf(stderr, "  -L/--labels                  comma-separated list of condition labels\n");
94     fprintf(stderr, "  --norm-standards-file        Housekeeping/spike genes to normalize libraries       [ default:   NULL ]\n"); // NOT YET DOCUMENTED, keep secret for now
95 #if ENABLE_THREADS
96 	fprintf(stderr, "  -p/--num-threads             number of threads used during quantification          [ default:      1 ]\n");
97 #endif
98     fprintf(stderr, "  --library-type               Library prep used for input reads                     [ default:  below ]\n");
99     fprintf(stderr, "  --library-norm-method        Method used to normalize library sizes                [ default:  below ]\n");
100     fprintf(stderr, "  --output-format              Format for output tables                              [ default:  below ]\n");
101 
102     fprintf(stderr, "\nAdvanced Options:\n");
103     fprintf(stderr, "  --compatible-hits-norm       count hits compatible with reference RNAs only        [ default:   TRUE ]\n");
104     fprintf(stderr, "  --total-hits-norm            count all hits for normalization                      [ default:  FALSE ]\n");
105     fprintf(stderr, "  -v/--verbose                 log-friendly verbose processing (no progress bar)     [ default:  FALSE ]\n");
106 	fprintf(stderr, "  -q/--quiet                   log-friendly quiet processing (no progress bar)       [ default:  FALSE ]\n");
107     fprintf(stderr, "  --seed                       value of random number generator seed                 [ default:      0 ]\n");
108     fprintf(stderr, "  --no-update-check            do not contact server to check for update availability[ default:  FALSE ]\n");
109     print_library_table();
110     print_lib_norm_method_table();
111     print_output_format_table();
112 }
113 
parse_options(int argc,char ** argv)114 int parse_options(int argc, char** argv)
115 {
116     int option_index = 0;
117     int next_option;
118     string sample_label_list;
119     string dispersion_method_str;
120     string lib_norm_method_str;
121     string output_format_str;
122 
123     do {
124         next_option = getopt_long_only(argc, argv, short_options, long_options, &option_index);
125         if (next_option == -1)     /* Done with options. */
126             break;
127         switch (next_option) {
128             case 0:
129                 /* If this option set a flag, do nothing else now. */
130                 if (long_options[option_index].flag != 0)
131                     break;
132                 break;
133 
134 			case 'p':
135 				num_threads = (uint32_t)parseInt(1, "-p/--num-threads arg must be at least 1", print_usage);
136 				break;
137             case 'F':
138 				min_isoform_fraction = parseFloat(0, 1.0, "-F/--min-isoform-fraction must be between 0 and 1.0", print_usage);
139 				break;
140             case 'L':
141 				sample_label_list = optarg;
142 				break;
143 
144 
145 			case OPT_NORM_STANDARDS_FILE:
146 			{
147 				norm_standards_filename = optarg;
148 				break;
149 			}
150             case OPT_USE_SAMPLE_SHEET:
151 			{
152                 use_sample_sheet = true;
153 				break;
154 			}
155             case 'v':
156 			{
157 				if (cuff_quiet)
158 				{
159 					fprintf(stderr, "Warning: Can't be both verbose and quiet!  Setting verbose only.\n");
160 				}
161 				cuff_quiet = false;
162 				cuff_verbose = true;
163 				break;
164 			}
165 			case 'q':
166 			{
167 				if (cuff_verbose)
168 				{
169 					fprintf(stderr, "Warning: Can't be both verbose and quiet!  Setting quiet only.\n");
170 				}
171 				cuff_verbose = false;
172 				cuff_quiet = true;
173 				break;
174 			}
175             case 'o':
176 			{
177 				output_dir = optarg;
178 				break;
179 			}
180 
181 
182             case OPT_LIBRARY_TYPE:
183 			{
184 				library_type = optarg;
185 				break;
186 			}
187 
188             case OPT_NO_UPDATE_CHECK:
189             {
190                 no_update_check = true;
191                 break;
192             }
193             case OPT_RANDOM_SEED:
194             {
195                 random_seed = parseInt(0, "--seed must be at least 0", print_usage);
196                 break;
197             }
198             case OPT_USE_COMPAT_MASS:
199             {
200                 use_compat_mass = true;
201                 break;
202             }
203             case OPT_USE_TOTAL_MASS:
204             {
205                 use_total_mass = true;
206                 break;
207             }
208             case OPT_LIB_NORM_METHOD:
209 			{
210 				lib_norm_method_str = optarg;
211 				break;
212 			}
213             case OPT_OUTPUT_FORMAT:
214 			{
215 				output_format_str = optarg;
216 				break;
217 			}
218 			default:
219 				print_usage();
220 				return 1;
221         }
222     } while(next_option != -1);
223 
224 	if (library_type != "")
225     {
226         map<string, ReadGroupProperties>::iterator lib_itr =
227 		library_type_table.find(library_type);
228         if (lib_itr == library_type_table.end())
229         {
230             fprintf(stderr, "Error: Library type %s not supported\n", library_type.c_str());
231             exit(1);
232         }
233         else
234         {
235             if (library_type == "transfrags")
236             {
237                 allow_junk_filtering = false;
238             }
239             global_read_properties = &lib_itr->second;
240         }
241     }
242     else
243     {
244 
245     }
246 
247     // Set the count dispersion method to use
248     if (dispersion_method_str == "")
249     {
250         dispersion_method_str = default_dispersion_method;
251     }
252 
253     map<string, DispersionMethod>::iterator disp_itr =
254     dispersion_method_table.find(dispersion_method_str);
255     if (disp_itr == dispersion_method_table.end())
256     {
257         fprintf(stderr, "Error: Dispersion method %s not supported\n", dispersion_method_str.c_str());
258         exit(1);
259     }
260     else
261     {
262         dispersion_method = disp_itr->second;
263     }
264 
265     // Set the library size normalization method to use
266     if (lib_norm_method_str == "")
267     {
268         lib_norm_method_str = default_lib_norm_method;
269     }
270 
271     map<string, LibNormalizationMethod>::iterator lib_norm_itr =
272     lib_norm_method_table.find(lib_norm_method_str);
273     if (lib_norm_itr == lib_norm_method_table.end())
274     {
275         fprintf(stderr, "Error: Normalization method %s not supported\n", lib_norm_method_str.c_str());
276         exit(1);
277     }
278     else
279     {
280         lib_norm_method = lib_norm_itr->second;
281     }
282 
283     // Set the count dispersion method to use
284     if (output_format_str == "")
285     {
286         output_format_str = default_output_format;
287     }
288 
289     map<string, OutputFormat>::iterator output_itr =
290     output_format_table.find(output_format_str);
291     if (output_itr == output_format_table.end())
292     {
293         fprintf(stderr, "Error: Output format %s not supported\n", output_format_str.c_str());
294         exit(1);
295     }
296     else
297     {
298         output_format = output_itr->second;
299     }
300 
301     if (use_total_mass && use_compat_mass)
302     {
303         fprintf (stderr, "Error: please supply only one of --compatibile-hits-norm and --total-hits-norm\n");
304         exit(1);
305     }
306 
307     tokenize(sample_label_list, ",", sample_labels);
308 
309 	allow_junk_filtering = false;
310 
311 	return 0;
312 }
313 
314 #if ENABLE_THREADS
315 boost::mutex inspect_lock;
316 #endif
317 
inspect_map_worker(ReplicatedBundleFactory & fac,int & tmp_min_frag_len,int & tmp_max_frag_len,IdToLocusMap & id_to_locus_map)318 void inspect_map_worker(ReplicatedBundleFactory& fac,
319                         int& tmp_min_frag_len,
320                         int& tmp_max_frag_len,
321                         IdToLocusMap& id_to_locus_map)
322 {
323 #if ENABLE_THREADS
324 	boost::this_thread::at_thread_exit(decr_pool_count);
325 #endif
326 
327     int min_f = std::numeric_limits<int>::max();
328     int max_f = 0;
329 
330     fac.inspect_replicate_maps(min_f, max_f, id_to_locus_map);
331 
332 #if ENABLE_THREADS
333     inspect_lock.lock();
334 #endif
335     tmp_min_frag_len = min(min_f, tmp_min_frag_len);
336     tmp_max_frag_len = max(max_f, tmp_max_frag_len);
337 #if ENABLE_THREADS
338     inspect_lock.unlock();
339 #endif
340 }
341 
342 boost::shared_ptr<TrackingDataWriter> tracking_data_writer;
343 
quantitate_next_locus(const RefSequenceTable & rt,vector<boost::shared_ptr<ReplicatedBundleFactory>> & bundle_factories,boost::shared_ptr<TestLauncher> launcher)344 bool quantitate_next_locus(const RefSequenceTable& rt,
345                            vector<boost::shared_ptr<ReplicatedBundleFactory> >& bundle_factories,
346                            boost::shared_ptr<TestLauncher> launcher)
347 {
348     for (size_t i = 0; i < bundle_factories.size(); ++i)
349     {
350         boost::shared_ptr<SampleAbundances> s_ab = boost::shared_ptr<SampleAbundances>(new SampleAbundances);
351 
352 #if ENABLE_THREADS
353         while(1)
354         {
355             locus_thread_pool_lock.lock();
356             if (locus_curr_threads < locus_num_threads)
357             {
358                 break;
359             }
360 
361             locus_thread_pool_lock.unlock();
362 
363             boost::this_thread::sleep(boost::posix_time::milliseconds(5));
364 
365         }
366 
367         locus_curr_threads++;
368         locus_thread_pool_lock.unlock();
369 
370         boost::shared_ptr<HitBundle> pBundle = boost::shared_ptr<HitBundle>(new HitBundle());
371         bool non_empty = bundle_factories[i]->next_bundle(*pBundle, true);
372 
373         if (pBundle->compatible_mass() > 0)
374         {
375             boost::thread quantitate(sample_worker,
376                               non_empty,
377                               pBundle,
378                               boost::ref(rt),
379                               boost::ref(*(bundle_factories[i])),
380                               s_ab,
381                               i,
382                               launcher,
383                               false);
384         }
385         else
386         {
387             sample_worker(non_empty,
388                           pBundle,
389                           boost::ref(rt),
390                           boost::ref(*(bundle_factories[i])),
391                           s_ab,
392                           i,
393                           launcher,
394                           false);
395             locus_thread_pool_lock.lock();
396             locus_curr_threads--;
397             locus_thread_pool_lock.unlock();
398         }
399 #else
400         HitBundle bundle;
401         bool non_empty = sample_factory.next_bundle(bundle);
402 
403         sample_worker(non_emtpy,
404                       pBundle,
405                       boost::ref(rt),
406                       boost::ref(*(bundle_factories[i])),
407                       s_ab,
408                       i,
409                       launcher,
410                       false);
411 #endif
412     }
413     return true;
414 }
415 
parse_sample_sheet_file(FILE * sample_sheet_file,vector<string> & sample_labels,vector<string> & sam_hit_filename_lists)416 void parse_sample_sheet_file(FILE* sample_sheet_file,
417                              vector<string>& sample_labels,
418                              vector<string>& sam_hit_filename_lists)
419 {
420 
421     char pBuf[10 * 1024];
422     size_t non_blank_lines_read = 0;
423 
424     sample_labels.clear();
425 
426     map<string, vector<string> > sample_groups;
427 
428     while (fgets(pBuf, 10*1024, sample_sheet_file))
429     {
430         if (strlen(pBuf) > 0)
431         {
432             char* nl = strchr(pBuf, '\n');
433             if (nl)
434                 *nl = 0;
435 
436             string pBufstr = pBuf;
437             string trimmed = boost::trim_copy(pBufstr);
438 
439             if (trimmed.length() > 0 && trimmed[0] != '#')
440             {
441                 non_blank_lines_read++;
442                 vector<string> columns;
443                 tokenize(trimmed, "\t", columns);
444 
445                 if (non_blank_lines_read == 1)
446                     continue;
447 
448                 if (columns.size() < 2)
449                 {
450                     if (columns.size() > 0)
451                         fprintf(stderr, "Malformed record in sample sheet: \n   >  %s\n", pBuf);
452                     else
453                         continue;
454                 }
455 
456                 string sam_file = columns[0];
457                 string sample_group = columns[1];
458 
459                 pair<map<string, vector<string> >::iterator, bool> inserted = sample_groups.insert(make_pair(sample_group, vector<string>()));
460                 inserted.first->second.push_back(sam_file);
461             }
462         }
463     }
464 
465     for (map<string, vector<string> >::iterator itr = sample_groups.begin();
466          itr != sample_groups.end(); ++itr)
467     {
468         sample_labels.push_back(itr->first);
469         string sam_list = boost::join(itr->second, ",");
470         sam_hit_filename_lists.push_back(sam_list);
471     }
472 }
473 
parse_norm_standards_file(FILE * norm_standards_file)474 void parse_norm_standards_file(FILE* norm_standards_file)
475 {
476     char pBuf[10 * 1024];
477     size_t non_blank_lines_read = 0;
478 
479     boost::shared_ptr<map<string, LibNormStandards> > norm_standards(new map<string, LibNormStandards>);
480 
481     while (fgets(pBuf, 10*1024, norm_standards_file))
482     {
483         if (strlen(pBuf) > 0)
484         {
485             char* nl = strchr(pBuf, '\n');
486             if (nl)
487                 *nl = 0;
488 
489             string pBufstr = pBuf;
490             string trimmed = boost::trim_copy(pBufstr);
491 
492             if (trimmed.length() > 0 && trimmed[0] != '#')
493             {
494                 non_blank_lines_read++;
495                 vector<string> columns;
496                 tokenize(trimmed, "\t", columns);
497 
498                 if (non_blank_lines_read == 1)
499                     continue;
500 
501                 if (columns.size() < 1) //
502                 {
503                     continue;
504                 }
505 
506                 string gene_id = columns[0];
507                 LibNormStandards L;
508                 norm_standards->insert(make_pair(gene_id, L));
509             }
510         }
511     }
512     lib_norm_standards = norm_standards;
513 }
514 
515 
print_variability_models(FILE * var_model_out,const vector<boost::shared_ptr<ReplicatedBundleFactory>> & factories)516 void print_variability_models(FILE* var_model_out, const vector<boost::shared_ptr<ReplicatedBundleFactory> >& factories)
517 {
518 
519     fprintf(var_model_out, "condition\tlocus\tcompatible_count_mean\tcompatible_count_var\ttotal_count_mean\ttotal_count_var\tfitted_var\n");
520 
521     for (size_t i = 0; i < factories.size(); ++i)
522     {
523         string factor_name = factories[i]->condition_name();
524         boost::shared_ptr<ReadGroupProperties> rg = factories[i]->factories()[0]->read_group_properties();
525         boost::shared_ptr<MassDispersionModel const> model = rg->mass_dispersion_model();
526 //        const vector<double>& means = model->scaled_compatible_mass_means();
527 //        const vector<double>& raw_vars  = model->scaled_compatible_variances();
528 
529         const vector<LocusCount>& common_scale_compatible_counts = rg->common_scale_compatible_counts();
530         for (size_t j = 0; j < common_scale_compatible_counts.size(); ++j)
531         {
532             string locus_desc = common_scale_compatible_counts[j].locus_desc;
533             pair<double, double> compat_mean_and_var = model->get_compatible_mean_and_var(locus_desc);
534             pair<double, double> total_mean_and_var = model->get_total_mean_and_var(locus_desc);
535 //            double total_compat_count = 0;
536 //            if (itr != locus_to_total_count_table.end())
537 //                total_compat_count = itr->second.count;
538 
539 
540             fprintf(var_model_out, "%s\t%s\t%lg\t%lg\t%lg\t%lg\t%lg\n",
541                     factor_name.c_str(),
542                     locus_desc.c_str(),
543                     compat_mean_and_var.first,
544                     compat_mean_and_var.second,
545                     total_mean_and_var.first,
546                     total_mean_and_var.second,
547                     model->scale_mass_variance(compat_mean_and_var.first));
548         }
549     }
550     fclose(var_model_out);
551 
552 }
553 
driver(FILE * ref_gtf,FILE * mask_gtf,FILE * contrast_file,FILE * norm_standards_file,vector<string> & sam_hit_filename_lists,Outfiles & outfiles)554 void driver(FILE* ref_gtf, FILE* mask_gtf, FILE* contrast_file, FILE* norm_standards_file, vector<string>& sam_hit_filename_lists, Outfiles& outfiles)
555 {
556 
557 	ReadTable it;
558 	RefSequenceTable rt(true, false);
559 
560 	vector<boost::shared_ptr<Scaffold> > ref_mRNAs;
561 
562 	vector<boost::shared_ptr<ReplicatedBundleFactory> > bundle_factories;
563     vector<boost::shared_ptr<ReadGroupProperties> > all_read_groups;
564     set<boost::shared_ptr<ReadGroupProperties> > serialized_read_groups;
565 
566 	for (size_t i = 0; i < sam_hit_filename_lists.size(); ++i)
567 	{
568         vector<string> sam_hit_filenames;
569         tokenize(sam_hit_filename_lists[i], ",", sam_hit_filenames);
570 
571         vector<boost::shared_ptr<BundleFactory> > replicate_factories;
572 
573         string condition_name = sample_labels[i];
574 
575         for (size_t j = 0; j < sam_hit_filenames.size(); ++j)
576         {
577             boost::shared_ptr<HitFactory> hs;
578             boost::shared_ptr<BundleFactory> hf;
579             try
580             {
581                 hs = boost::shared_ptr<HitFactory>(new PrecomputedExpressionHitFactory(sam_hit_filenames[j], it, rt));
582                 hf = boost::shared_ptr<BundleFactory>(new PrecomputedExpressionBundleFactory(static_pointer_cast<PrecomputedExpressionHitFactory>(hs)));
583             }
584 
585             catch(boost::archive::archive_exception & e)
586             {
587 				hs = boost::shared_ptr<HitFactory>(createSamHitFactory(sam_hit_filenames[j], it, rt));
588                 hf = boost::shared_ptr<BundleFactory>(new BundleFactory(hs, REF_DRIVEN));
589             }
590 
591 
592             boost::shared_ptr<ReadGroupProperties> rg_props(new ReadGroupProperties);
593 
594             if (global_read_properties)
595             {
596                 *rg_props = *global_read_properties;
597             }
598             else
599             {
600                 *rg_props = hs->read_group_properties();
601             }
602 
603             rg_props->checked_parameters(hs->read_group_properties().checked_parameters());
604             rg_props->condition_name(condition_name);
605             rg_props->replicate_num(j);
606             rg_props->file_path(sam_hit_filenames[j]);
607 
608             all_read_groups.push_back(rg_props);
609 
610             hf->read_group_properties(rg_props);
611 
612             replicate_factories.push_back(hf);
613             //replicate_factories.back()->set_ref_rnas(ref_mRNAs);
614         }
615 
616         bundle_factories.push_back(boost::shared_ptr<ReplicatedBundleFactory>(new ReplicatedBundleFactory(replicate_factories, condition_name)));
617 	}
618 
619     boost::crc_32_type ref_gtf_crc_result;
620     ::load_ref_rnas(ref_gtf, rt, ref_mRNAs, ref_gtf_crc_result, corr_bias, false);
621     if (ref_mRNAs.empty())
622         return;
623 
624     vector<boost::shared_ptr<Scaffold> > mask_rnas;
625     if (mask_gtf)
626     {
627         boost::crc_32_type mask_gtf_crc_result;
628         ::load_ref_rnas(mask_gtf, rt, mask_rnas, mask_gtf_crc_result, false, false);
629     }
630 
631     BOOST_FOREACH (boost::shared_ptr<ReplicatedBundleFactory> fac, bundle_factories)
632     {
633         // NOTE: we aren't deep copying here to save memory.  We can pull this off
634         // because unless bias correction or multiread correction is enabled,
635         // we don't need to write into Scaffold objects.
636         fac->set_ref_rnas(ref_mRNAs, false);
637         if (mask_gtf)
638             fac->set_mask_rnas(mask_rnas);
639     }
640 
641     if (norm_standards_file != NULL)
642     {
643         parse_norm_standards_file(norm_standards_file);
644     }
645 
646     validate_cross_sample_parameters(all_read_groups);
647 
648     vector<pair<size_t, size_t > > contrasts;
649 
650 #if ENABLE_THREADS
651     locus_num_threads = num_threads;
652 #endif
653 
654 	int tmp_min_frag_len = numeric_limits<int>::max();
655 	int tmp_max_frag_len = 0;
656 
657     boost::shared_ptr<IdToLocusMap> id_to_locus_map(new IdToLocusMap(boost::shared_ptr<map<string, set<string> > >(new map<string, set<string> >())));
658 
659 	ProgressBar p_bar("Inspecting maps and determining fragment length distributions.",0);
660 	BOOST_FOREACH (boost::shared_ptr<ReplicatedBundleFactory> fac, bundle_factories)
661     {
662 #if ENABLE_THREADS
663         while(1)
664         {
665             locus_thread_pool_lock.lock();
666             if (locus_curr_threads < locus_num_threads)
667             {
668                 break;
669             }
670 
671             locus_thread_pool_lock.unlock();
672 
673             boost::this_thread::sleep(boost::posix_time::milliseconds(5));
674         }
675 
676         locus_curr_threads++;
677         locus_thread_pool_lock.unlock();
678 
679         boost::thread inspect(inspect_map_worker,
680                        boost::ref(*fac),
681                        boost::ref(tmp_min_frag_len),
682                        boost::ref(tmp_max_frag_len),
683                        boost::ref(*id_to_locus_map));
684 #else
685         inspect_map_worker(boost::ref(*fac),
686                            boost::ref(tmp_min_frag_len),
687                            boost::ref(tmp_max_frag_len),
688                            boost::ref(*id_to_locus_map));
689 #endif
690     }
691 
692     // wait for the workers to finish up before reporting everthing.
693 #if ENABLE_THREADS
694     while(1)
695     {
696         locus_thread_pool_lock.lock();
697         if (locus_curr_threads == 0)
698         {
699             locus_thread_pool_lock.unlock();
700             break;
701         }
702         locus_thread_pool_lock.unlock();
703 
704         boost::this_thread::sleep(boost::posix_time::milliseconds(5));
705     }
706 #endif
707 
708     normalize_counts(all_read_groups);
709 
710     long double total_norm_mass = 0.0;
711     long double total_mass = 0.0;
712     BOOST_FOREACH (boost::shared_ptr<ReadGroupProperties> rg_props, all_read_groups)
713     {
714         total_norm_mass += rg_props->normalized_map_mass();
715         total_mass += rg_props->total_map_mass();
716         rg_props->clear_count_tables();
717     }
718 
719 	min_frag_len = tmp_min_frag_len;
720     max_frag_len = tmp_max_frag_len;
721 
722 	final_est_run = false;
723 
724 	double num_bundles = (double)bundle_factories[0]->num_bundles();
725 
726     p_bar = ProgressBar("Calculating preliminary abundance estimates", num_bundles);
727 
728     Tracking tracking;
729 
730 	final_est_run = true;
731 	p_bar = ProgressBar("Normalizing expression levels for locus", num_bundles);
732 
733     tracking_data_writer = boost::shared_ptr<TrackingDataWriter>(new TrackingDataWriter(bundle_factories.size(), &outfiles, &tracking, all_read_groups, sample_labels, &p_bar, id_to_locus_map));
734 
735 	while (true)
736 	{
737         //boost::shared_ptr<vector<boost::shared_ptr<SampleAbundances> > > abundances(new vector<boost::shared_ptr<SampleAbundances> >());
738         quantitate_next_locus(rt, bundle_factories, tracking_data_writer);
739         bool more_loci_remain = false;
740         BOOST_FOREACH (boost::shared_ptr<ReplicatedBundleFactory> rep_fac, bundle_factories)
741         {
742             if (rep_fac->bundles_remain())
743             {
744                 more_loci_remain = true;
745                 break;
746             }
747         }
748         if (!more_loci_remain)
749         {
750             // wait for the workers to finish up before doing the cross-sample testing.
751 #if ENABLE_THREADS
752             while(1)
753             {
754                 locus_thread_pool_lock.lock();
755                 if (locus_curr_threads == 0)
756                 {
757                     locus_thread_pool_lock.unlock();
758                     break;
759                 }
760 
761                 locus_thread_pool_lock.unlock();
762 
763                 boost::this_thread::sleep(boost::posix_time::milliseconds(5));
764 
765             }
766 #endif
767             break;
768         }
769     }
770 
771 	p_bar.complete();
772 
773 }
774 
open_outfiles_for_writing_cuffdiff_format(Outfiles & outfiles)775 void open_outfiles_for_writing_cuffdiff_format(Outfiles& outfiles)
776 {
777 
778     static const int filename_buf_size = 2048;
779 
780 	char isoform_fpkm_tracking_name[filename_buf_size];
781 	sprintf(isoform_fpkm_tracking_name, "%s/isoforms.fpkm_tracking", output_dir.c_str());
782 	FILE* isoform_fpkm_out = fopen(isoform_fpkm_tracking_name, "w");
783 	if (!isoform_fpkm_out)
784 	{
785 		fprintf(stderr, "Error: cannot open isoform-level FPKM tracking file %s for writing\n",
786 				isoform_fpkm_tracking_name);
787 		exit(1);
788 	}
789 	outfiles.isoform_fpkm_tracking_out = isoform_fpkm_out;
790 
791 	char tss_group_fpkm_tracking_name[filename_buf_size];
792 	sprintf(tss_group_fpkm_tracking_name, "%s/tss_groups.fpkm_tracking", output_dir.c_str());
793 	FILE* tss_group_fpkm_out = fopen(tss_group_fpkm_tracking_name, "w");
794 	if (!tss_group_fpkm_out)
795 	{
796 		fprintf(stderr, "Error: cannot open TSS group-level FPKM tracking file %s for writing\n",
797 				tss_group_fpkm_tracking_name);
798 		exit(1);
799 	}
800 	outfiles.tss_group_fpkm_tracking_out = tss_group_fpkm_out;
801 
802 	char cds_fpkm_tracking_name[filename_buf_size];
803 	sprintf(cds_fpkm_tracking_name, "%s/cds.fpkm_tracking", output_dir.c_str());
804 	FILE* cds_fpkm_out = fopen(cds_fpkm_tracking_name, "w");
805 	if (!cds_fpkm_out)
806 	{
807 		fprintf(stderr, "Error: cannot open CDS level FPKM tracking file %s for writing\n",
808 				cds_fpkm_tracking_name);
809 		exit(1);
810 	}
811 	outfiles.cds_fpkm_tracking_out = cds_fpkm_out;
812 
813 	char gene_fpkm_tracking_name[filename_buf_size];
814 	sprintf(gene_fpkm_tracking_name, "%s/genes.fpkm_tracking", output_dir.c_str());
815 	FILE* gene_fpkm_out = fopen(gene_fpkm_tracking_name, "w");
816 	if (!gene_fpkm_out)
817 	{
818 		fprintf(stderr, "Error: cannot open gene-level FPKM tracking file %s for writing\n",
819 				gene_fpkm_tracking_name);
820 		exit(1);
821 	}
822 	outfiles.gene_fpkm_tracking_out = gene_fpkm_out;
823 
824     char isoform_count_tracking_name[filename_buf_size];
825 	sprintf(isoform_count_tracking_name, "%s/isoforms.count_tracking", output_dir.c_str());
826 	FILE* isoform_count_out = fopen(isoform_count_tracking_name, "w");
827 	if (!isoform_count_out)
828 	{
829 		fprintf(stderr, "Error: cannot open isoform-level count tracking file %s for writing\n",
830 				isoform_count_tracking_name);
831 		exit(1);
832 	}
833 	outfiles.isoform_count_tracking_out = isoform_count_out;
834 
835 	char tss_group_count_tracking_name[filename_buf_size];
836 	sprintf(tss_group_count_tracking_name, "%s/tss_groups.count_tracking", output_dir.c_str());
837 	FILE* tss_group_count_out = fopen(tss_group_count_tracking_name, "w");
838 	if (!tss_group_count_out)
839 	{
840 		fprintf(stderr, "Error: cannot open TSS group-level count tracking file %s for writing\n",
841 				tss_group_count_tracking_name);
842 		exit(1);
843 	}
844 	outfiles.tss_group_count_tracking_out = tss_group_count_out;
845 
846 	char cds_count_tracking_name[filename_buf_size];
847 	sprintf(cds_count_tracking_name, "%s/cds.count_tracking", output_dir.c_str());
848 	FILE* cds_count_out = fopen(cds_count_tracking_name, "w");
849 	if (!cds_count_out)
850 	{
851 		fprintf(stderr, "Error: cannot open CDS level count tracking file %s for writing\n",
852 				cds_count_tracking_name);
853 		exit(1);
854 	}
855 	outfiles.cds_count_tracking_out = cds_count_out;
856 
857 	char gene_count_tracking_name[filename_buf_size];
858 	sprintf(gene_count_tracking_name, "%s/genes.count_tracking", output_dir.c_str());
859 	FILE* gene_count_out = fopen(gene_count_tracking_name, "w");
860 	if (!gene_count_out)
861 	{
862 		fprintf(stderr, "Error: cannot open gene-level count tracking file %s for writing\n",
863 				gene_count_tracking_name);
864 		exit(1);
865 	}
866 	outfiles.gene_count_tracking_out = gene_count_out;
867 
868     char isoform_rep_tracking_name[filename_buf_size];
869 	sprintf(isoform_rep_tracking_name, "%s/isoforms.read_group_tracking", output_dir.c_str());
870 	FILE* isoform_rep_out = fopen(isoform_rep_tracking_name, "w");
871 	if (!isoform_rep_out)
872 	{
873 		fprintf(stderr, "Error: cannot open isoform-level read group tracking file %s for writing\n",
874 				isoform_rep_tracking_name);
875 		exit(1);
876 	}
877 	outfiles.isoform_rep_tracking_out = isoform_rep_out;
878 
879 	char tss_group_rep_tracking_name[filename_buf_size];
880 	sprintf(tss_group_rep_tracking_name, "%s/tss_groups.read_group_tracking", output_dir.c_str());
881 	FILE* tss_group_rep_out = fopen(tss_group_rep_tracking_name, "w");
882 	if (!tss_group_rep_out)
883 	{
884 		fprintf(stderr, "Error: cannot open TSS group-level read group tracking file %s for writing\n",
885 				tss_group_rep_tracking_name);
886 		exit(1);
887 	}
888 	outfiles.tss_group_rep_tracking_out = tss_group_rep_out;
889 
890 	char cds_rep_tracking_name[filename_buf_size];
891 	sprintf(cds_rep_tracking_name, "%s/cds.read_group_tracking", output_dir.c_str());
892 	FILE* cds_rep_out = fopen(cds_rep_tracking_name, "w");
893 	if (!cds_rep_out)
894 	{
895 		fprintf(stderr, "Error: cannot open CDS level read group tracking file %s for writing\n",
896 				cds_rep_tracking_name);
897 		exit(1);
898 	}
899 	outfiles.cds_rep_tracking_out = cds_rep_out;
900 
901 	char gene_rep_tracking_name[filename_buf_size];
902 	sprintf(gene_rep_tracking_name, "%s/genes.read_group_tracking", output_dir.c_str());
903 	FILE* gene_rep_out = fopen(gene_rep_tracking_name, "w");
904 	if (!gene_rep_out)
905 	{
906 		fprintf(stderr, "Error: cannot open gene-level read group tracking file %s for writing\n",
907 				gene_rep_tracking_name);
908 		exit(1);
909 	}
910 	outfiles.gene_rep_tracking_out = gene_rep_out;
911 
912     char read_group_info_name[filename_buf_size];
913 	sprintf(read_group_info_name, "%s/read_groups.info", output_dir.c_str());
914 	FILE* read_group_out = fopen(read_group_info_name, "w");
915 	if (!read_group_out)
916 	{
917 		fprintf(stderr, "Error: cannot open read group info file %s for writing\n",
918 				read_group_info_name);
919 		exit(1);
920 	}
921 	outfiles.read_group_info_out = read_group_out;
922 
923     char run_info_name[filename_buf_size];
924 	sprintf(run_info_name, "%s/run.info", output_dir.c_str());
925 	FILE* run_info_out = fopen(run_info_name, "w");
926 	if (!run_info_out)
927 	{
928 		fprintf(stderr, "Error: cannot open run info file %s for writing\n",
929 				run_info_name);
930 		exit(1);
931 	}
932 	outfiles.run_info_out = run_info_out;
933 
934 }
935 
open_outfiles_for_writing_simple_table_format(Outfiles & outfiles)936 void open_outfiles_for_writing_simple_table_format(Outfiles& outfiles)
937 {
938     static const int filename_buf_size = 2048;
939 
940 	char isoform_fpkm_tracking_name[filename_buf_size];
941 	sprintf(isoform_fpkm_tracking_name, "%s/isoforms.fpkm_table", output_dir.c_str());
942 	FILE* isoform_fpkm_out = fopen(isoform_fpkm_tracking_name, "w");
943 	if (!isoform_fpkm_out)
944 	{
945 		fprintf(stderr, "Error: cannot open isoform-level FPKM table %s for writing\n",
946 				isoform_fpkm_tracking_name);
947 		exit(1);
948 	}
949 	outfiles.isoform_fpkm_tracking_out = isoform_fpkm_out;
950 
951 	char tss_group_fpkm_tracking_name[filename_buf_size];
952 	sprintf(tss_group_fpkm_tracking_name, "%s/tss_groups.fpkm_table", output_dir.c_str());
953 	FILE* tss_group_fpkm_out = fopen(tss_group_fpkm_tracking_name, "w");
954 	if (!tss_group_fpkm_out)
955 	{
956 		fprintf(stderr, "Error: cannot open TSS group-level FPKM table %s for writing\n",
957 				tss_group_fpkm_tracking_name);
958 		exit(1);
959 	}
960 	outfiles.tss_group_fpkm_tracking_out = tss_group_fpkm_out;
961 
962 	char cds_fpkm_tracking_name[filename_buf_size];
963 	sprintf(cds_fpkm_tracking_name, "%s/cds.fpkm_table", output_dir.c_str());
964 	FILE* cds_fpkm_out = fopen(cds_fpkm_tracking_name, "w");
965 	if (!cds_fpkm_out)
966 	{
967 		fprintf(stderr, "Error: cannot open CDS level FPKM table %s for writing\n",
968 				cds_fpkm_tracking_name);
969 		exit(1);
970 	}
971 	outfiles.cds_fpkm_tracking_out = cds_fpkm_out;
972 
973 	char gene_fpkm_tracking_name[filename_buf_size];
974 	sprintf(gene_fpkm_tracking_name, "%s/genes.fpkm_table", output_dir.c_str());
975 	FILE* gene_fpkm_out = fopen(gene_fpkm_tracking_name, "w");
976 	if (!gene_fpkm_out)
977 	{
978 		fprintf(stderr, "Error: cannot open gene-level FPKM table %s for writing\n",
979 				gene_fpkm_tracking_name);
980 		exit(1);
981 	}
982 	outfiles.gene_fpkm_tracking_out = gene_fpkm_out;
983 
984     char isoform_count_tracking_name[filename_buf_size];
985 	sprintf(isoform_count_tracking_name, "%s/isoforms.count_table", output_dir.c_str());
986 	FILE* isoform_count_out = fopen(isoform_count_tracking_name, "w");
987 	if (!isoform_count_out)
988 	{
989 		fprintf(stderr, "Error: cannot open isoform-level count table %s for writing\n",
990 				isoform_count_tracking_name);
991 		exit(1);
992 	}
993 	outfiles.isoform_count_tracking_out = isoform_count_out;
994 
995 	char tss_group_count_tracking_name[filename_buf_size];
996 	sprintf(tss_group_count_tracking_name, "%s/tss_groups.count_table", output_dir.c_str());
997 	FILE* tss_group_count_out = fopen(tss_group_count_tracking_name, "w");
998 	if (!tss_group_count_out)
999 	{
1000 		fprintf(stderr, "Error: cannot open TSS group-level count table %s for writing\n",
1001 				tss_group_count_tracking_name);
1002 		exit(1);
1003 	}
1004 	outfiles.tss_group_count_tracking_out = tss_group_count_out;
1005 
1006 	char cds_count_tracking_name[filename_buf_size];
1007 	sprintf(cds_count_tracking_name, "%s/cds.count_table", output_dir.c_str());
1008 	FILE* cds_count_out = fopen(cds_count_tracking_name, "w");
1009 	if (!cds_count_out)
1010 	{
1011 		fprintf(stderr, "Error: cannot open CDS level count table %s for writing\n",
1012 				cds_count_tracking_name);
1013 		exit(1);
1014 	}
1015 	outfiles.cds_count_tracking_out = cds_count_out;
1016 
1017 	char gene_count_tracking_name[filename_buf_size];
1018 	sprintf(gene_count_tracking_name, "%s/genes.count_table", output_dir.c_str());
1019 	FILE* gene_count_out = fopen(gene_count_tracking_name, "w");
1020 	if (!gene_count_out)
1021 	{
1022 		fprintf(stderr, "Error: cannot open gene-level count table %s for writing\n",
1023 				gene_count_tracking_name);
1024 		exit(1);
1025 	}
1026 	outfiles.gene_count_tracking_out = gene_count_out;
1027 
1028     char isoform_attr_name[filename_buf_size];
1029 	sprintf(isoform_attr_name, "%s/isoforms.attr_table", output_dir.c_str());
1030 	FILE* isoform_attr_out = fopen(isoform_attr_name, "w");
1031 	if (!isoform_attr_out)
1032 	{
1033 		fprintf(stderr, "Error: cannot open isoform-level attribute table %s for writing\n",
1034 				isoform_attr_name);
1035 		exit(1);
1036 	}
1037 	outfiles.isoform_attr_out = isoform_attr_out;
1038 
1039 	char tss_group_attr_name[filename_buf_size];
1040 	sprintf(tss_group_attr_name, "%s/tss_groups.attr_table", output_dir.c_str());
1041 	FILE* tss_group_attr_out = fopen(tss_group_attr_name, "w");
1042 	if (!tss_group_attr_out)
1043 	{
1044 		fprintf(stderr, "Error: cannot open TSS group-level attribute table %s for writing\n",
1045 				tss_group_attr_name);
1046 		exit(1);
1047 	}
1048 	outfiles.tss_group_attr_out = tss_group_attr_out;
1049 
1050 	char cds_attr_name[filename_buf_size];
1051 	sprintf(cds_attr_name, "%s/cds.attr_table", output_dir.c_str());
1052 	FILE* cds_attr_out = fopen(cds_attr_name, "w");
1053 	if (!cds_attr_out)
1054 	{
1055 		fprintf(stderr, "Error: cannot open CDS level attribute table %s for writing\n",
1056 				cds_attr_name);
1057 		exit(1);
1058 	}
1059 	outfiles.cds_attr_out = cds_attr_out;
1060 
1061 	char gene_attr_name[filename_buf_size];
1062 	sprintf(gene_attr_name, "%s/genes.attr_table", output_dir.c_str());
1063 	FILE* gene_attr_out = fopen(gene_attr_name, "w");
1064 	if (!gene_attr_out)
1065 	{
1066 		fprintf(stderr, "Error: cannot open gene-level attribute table %s for writing\n",
1067 				gene_attr_name);
1068 		exit(1);
1069 	}
1070 	outfiles.gene_attr_out = gene_attr_out;
1071 
1072     char read_group_info_name[filename_buf_size];
1073 	sprintf(read_group_info_name, "%s/samples.table", output_dir.c_str());
1074 	FILE* read_group_out = fopen(read_group_info_name, "w");
1075 	if (!read_group_out)
1076 	{
1077 		fprintf(stderr, "Error: cannot open read group info file %s for writing\n",
1078 				read_group_info_name);
1079 		exit(1);
1080 	}
1081 	outfiles.read_group_info_out = read_group_out;
1082 
1083     char run_info_name[filename_buf_size];
1084 	sprintf(run_info_name, "%s/run.info", output_dir.c_str());
1085 	FILE* run_info_out = fopen(run_info_name, "w");
1086 	if (!run_info_out)
1087 	{
1088 		fprintf(stderr, "Error: cannot open run info file %s for writing\n",
1089 				run_info_name);
1090 		exit(1);
1091 	}
1092 	outfiles.run_info_out = run_info_out;
1093 
1094 }
1095 
open_outfiles_for_writing(Outfiles & outfiles)1096 void open_outfiles_for_writing(Outfiles& outfiles)
1097 {
1098     if (output_format == CUFFDIFF_OUTPUT_FMT)
1099     {
1100         open_outfiles_for_writing_cuffdiff_format(outfiles);
1101     }
1102     else if (output_format == SIMPLE_TABLE_OUTPUT_FMT)
1103     {
1104         open_outfiles_for_writing_simple_table_format(outfiles);
1105     }
1106     else{
1107         fprintf(stderr, "Error: unrecognized output format!\n");
1108         exit(1);
1109     }
1110 
1111 }
1112 
main(int argc,char ** argv)1113 int main(int argc, char** argv)
1114 {
1115     for (int i = 0; i < argc; ++i)
1116     {
1117         cmd_str += string(argv[i]) + " ";
1118     }
1119 
1120     init_library_table();
1121     init_dispersion_method_table();
1122     init_lib_norm_method_table();
1123     init_output_format_table();
1124 
1125     min_isoform_fraction = 1e-5;
1126 
1127 	int parse_ret = parse_options(argc,argv);
1128     if (parse_ret)
1129         return parse_ret;
1130 
1131     if (!use_total_mass && !use_compat_mass)
1132     {
1133         use_total_mass = false;
1134         use_compat_mass = true;
1135     }
1136 
1137 	if(optind >= argc)
1138     {
1139         print_usage();
1140         return 1;
1141     }
1142 
1143     if (!no_update_check)
1144         check_version(PACKAGE_VERSION);
1145 
1146     string ref_gtf_filename = argv[optind++];
1147     vector<string> sam_hit_filenames;
1148 
1149     if (use_sample_sheet)
1150     {
1151         if  (optind < argc)
1152         {
1153 
1154             string sample_sheet_filename = argv[optind++];
1155             FILE* sample_sheet_file = NULL;
1156             if (sample_sheet_filename != "")
1157             {
1158                 sample_sheet_file = fopen(sample_sheet_filename.c_str(), "r");
1159                 if (!sample_sheet_file)
1160                 {
1161                     fprintf(stderr, "Error: cannot open sample sheet file %s for reading\n",
1162                             sample_sheet_filename.c_str());
1163                     exit(1);
1164                 }
1165             }
1166             parse_sample_sheet_file(sample_sheet_file, sample_labels, sam_hit_filenames);
1167         }
1168         else
1169         {
1170             fprintf(stderr, "Error: option --use-sample-sheet requires a single sample sheet filename instead of a list of SAM/BAM files\n");
1171         }
1172     }
1173     else
1174     {
1175         while(optind < argc)
1176         {
1177             string sam_hits_file_name = argv[optind++];
1178             sam_hit_filenames.push_back(sam_hits_file_name);
1179         }
1180 
1181         if (sample_labels.size() == 0)
1182         {
1183             for (size_t i = 1; i < sam_hit_filenames.size() + 1; ++i)
1184             {
1185                 char buf[256];
1186                 sprintf(buf, "q%lu", i);
1187                 sample_labels.push_back(buf);
1188             }
1189         }
1190     }
1191 
1192 	while (sam_hit_filenames.size() < 2)
1193     {
1194         fprintf(stderr, "Error: cuffdiff requires at least 2 SAM files\n");
1195         exit(1);
1196     }
1197 
1198 
1199     if (sam_hit_filenames.size() != sample_labels.size())
1200     {
1201         fprintf(stderr, "Error: number of labels must match number of conditions\n");
1202         exit(1);
1203     }
1204 
1205     if (random_seed == -1)
1206         random_seed = boost::mt19937::default_seed;
1207 
1208 	// seed the random number generator - we'll need it for the importance
1209 	// sampling during MAP estimation of the gammas
1210 	srand48(random_seed);
1211 
1212 	FILE* ref_gtf = NULL;
1213 	if (ref_gtf_filename != "")
1214 	{
1215 		ref_gtf = fopen(ref_gtf_filename.c_str(), "r");
1216 		if (!ref_gtf)
1217 		{
1218 			fprintf(stderr, "Error: cannot open reference GTF file %s for reading\n",
1219 					ref_gtf_filename.c_str());
1220 			exit(1);
1221 		}
1222 	}
1223 
1224 	FILE* mask_gtf = NULL;
1225 	if (mask_gtf_filename != "")
1226 	{
1227 		mask_gtf = fopen(mask_gtf_filename.c_str(), "r");
1228 		if (!mask_gtf)
1229 		{
1230 			fprintf(stderr, "Error: cannot open mask GTF file %s for reading\n",
1231 					mask_gtf_filename.c_str());
1232 			exit(1);
1233 		}
1234 	}
1235 
1236 
1237     FILE* contrast_file = NULL;
1238 	if (contrast_filename != "")
1239 	{
1240 		contrast_file = fopen(contrast_filename.c_str(), "r");
1241 		if (!contrast_file)
1242 		{
1243 			fprintf(stderr, "Error: cannot open contrast file %s for reading\n",
1244 					contrast_filename.c_str());
1245 			exit(1);
1246 		}
1247 	}
1248 
1249     FILE* norm_standards_file = NULL;
1250 	if (norm_standards_filename != "")
1251 	{
1252 		norm_standards_file = fopen(norm_standards_filename.c_str(), "r");
1253 		if (!norm_standards_file)
1254 		{
1255 			fprintf(stderr, "Error: cannot open contrast file %s for reading\n",
1256 					norm_standards_filename.c_str());
1257 			exit(1);
1258 		}
1259 	}
1260 
1261 
1262 	// Note: we don't want the assembly filters interfering with calculations
1263 	// here
1264 
1265 	pre_mrna_fraction = 0.0;
1266     olap_radius = 0;
1267 
1268 	Outfiles outfiles;
1269 
1270     if (output_dir != "")
1271     {
1272         int retcode = mkpath(output_dir.c_str(), 0777);
1273         if (retcode == -1)
1274         {
1275             if (errno != EEXIST)
1276             {
1277                 fprintf (stderr,
1278                          "Error: cannot create directory %s\n",
1279                          output_dir.c_str());
1280                 exit(1);
1281             }
1282         }
1283     }
1284 
1285     open_outfiles_for_writing(outfiles);
1286 
1287     driver(ref_gtf, mask_gtf, contrast_file, norm_standards_file, sam_hit_filenames, outfiles);
1288 
1289 #if 0
1290     if (emit_count_tables)
1291     {
1292         dump_locus_variance_info(output_dir + string("/locus_var.txt"));
1293     }
1294 #endif
1295 
1296 	return 0;
1297 }
1298