1 #include "meta-velvetg.hh"
2 
main(int argc,char * argv[])3 int main( int argc, char* argv[] ){
4   MetaVelvetG* mvg = new MetaVelvetG( argc, argv );
5 
6   cout << "[meta-velvetg] " << "Load meta-graph ..." << endl;
7   MetaGraph* graph = mvg->loadGraph();
8   cout << "[meta-velveth] " << "...done (load meta-graph)." << endl << endl;
9 
10   cout << "[meta-velvetg] " << "Estimate coverage parameters..." << endl;
11   mvg->estimateCoverage( graph );
12   cout << "[meta-velvetg] " << "...done (estimate coverage parameters)." << endl << endl;
13 
14   cout << "[meta-velvetg] " << "Remove low & high coverage nodes ..." << endl;
15   mvg->removeNodes( graph );
16   cout << "[meta-velvetg] " << "...done (remove low & high coverage nodes)." << endl << endl;
17 
18   cout << "[meta-velvetg] " << "Scaffolding based on paired-end information ..." << endl;
19   mvg->scaffolding( graph );
20   cout << "[meta-velvetg] " << "...done (scaffolding)." << endl << endl;
21 
22   cout << "[meta-velvetg] " << "Finalize assembly ... " << endl;
23   mvg->finalize( graph );
24   cout << "[meta-velvetg] " << "...done (finalize assembly)." << endl << endl;
25 
26   cout << "[meta-velvetg] " << "Output results ..." << endl;
27   mvg->output( graph );
28   cout << "[meta-velvetg] " << "...done (output results)." << endl << endl;
29 
30   cout << "[meta-velvetg] " << "All tasks were safely completed..." << endl << endl;
31   delete mvg;
32   return 0;
33 }
34 
loadGraph() const35 MetaGraph* MetaVelvetG::loadGraph() const {
36   MetaGraph* graph = new MetaGraph( FileNameUtils::getSeqFileName(prefix), FileNameUtils::getInitialGraphFileName(prefix) );
37   setInsertLengths( graph );
38   setSplitJudge( graph );
39   setSubgraphOptions( graph );
40   return graph;
41 }
42 
setInsertLengths(MetaGraph * metaGraph) const43 void MetaVelvetG::setInsertLengths( MetaGraph* metaGraph ) const {
44   for( int cat=0 ; cat<CATEGORIES ; ++cat ){
45     metaGraph->setInsertLength( cat, insertLength[cat], insertLengthSD[cat] );
46     cout << "[meta-velvetg] " << "Category = 'short" << cat+1 << "'\t"
47 	 << "Ave. = " << insertLength[cat] << ", SD = " << insertLengthSD[cat] << endl;
48   }
49   metaGraph->setInsertLength( CATEGORIES, longInsertLength, longInsertLengthSD );
50   cout << "[meta-velvetg] " << "Category = 'long'" << "\t"
51        << "Ave. = " << longInsertLength << ", SD = " << longInsertLengthSD << endl;
52 }
53 
setSplitJudge(MetaGraph * metaGraph) const54 void MetaVelvetG::setSplitJudge( MetaGraph* metaGraph ) const {
55   SplitJudge* judge = new SplitJudge( repeatCoverageSD, minSplitLength, flagUseConnections, numValidConnections, numNoiseConnections );
56   judge->setStatsWriter( FileNameUtils::getSplitStatsFileName(prefix), FileNameUtils::getSplitDetailFileName(prefix), flagReportSplitDetail );
57   metaGraph->setSplitJudge( judge );
58 }
59 
setSubgraphOptions(MetaGraph * metaGraph) const60 void MetaVelvetG::setSubgraphOptions( MetaGraph* metaGraph ) const {
61   metaGraph->setSubgraphOptions( FileNameUtils::getSubgraphPrefix(prefix), flagReportSubgraph );
62 }
63 
estimateCoverage(MetaGraph * metaGraph)64 void MetaVelvetG::estimateCoverage( MetaGraph* metaGraph ){
65   estimateExpectedCoverage( metaGraph );
66   estimateExpectedCoverages( metaGraph );
67   estimateCoverageCutoff( metaGraph );
68 }
69 
estimateExpectedCoverages(MetaGraph * metaGraph)70 void MetaVelvetG::estimateExpectedCoverages( MetaGraph* metaGraph ){
71   cout << "[mate-velvetg] " << "Estimate expected coverages ... ";
72   if( flagEstimateExpectedCoverages ){
73     cout << "yes." << endl;
74     metaGraph->saveStats( FileNameUtils::getHistoStatsFileName(prefix) );
75     PeakDetectorParameters* params = getPeakDetectorParameters();
76     PeakDetector* peakDetector = PeakDetectorFactory::instantiatePeakDetector( "simple", params );
77     numCoveragePeaks = peakDetector->detectCoveragePeaks( metaGraph, expectedCoverages, coverageBoundaries );
78     delete params; delete peakDetector;
79     if( numCoveragePeaks <= 1 ){
80       cout << endl
81 	   << "[meta-velvetg] " << "Warning: Can't find multiple coverage peaks." << endl
82 	   << "[meta-velvetg] " << "Trun on single coverage peak mode. " << endl << endl;
83       numCoveragePeaks = 1;
84       expectedCoverages[0] = expectedCoverage;
85     }
86   } else {
87     cout << "no." << endl;
88   }
89   metaGraph->setExpectedCoverages( numCoveragePeaks, expectedCoverages );
90   metaGraph->showExpectedCoverages();
91 }
92 
getPeakDetectorParameters() const93 PeakDetectorParameters* MetaVelvetG::getPeakDetectorParameters() const {
94   return new PeakDetectorParameters( minPeakCoverage, maxPeakCoverage, histoBinWidth, histoSnRatio );
95 }
96 
estimateExpectedCoverage(MetaGraph * metaGraph)97 void MetaVelvetG::estimateExpectedCoverage( MetaGraph* metaGraph ){
98   cout << "[mate-velvetg] " << "Estimate expected coverage ... ";
99   if( flagEstimateExpectedCoverage ){
100     cout << "yes.";
101     expectedCoverage = metaGraph->estimateExpectedCoverage( prefix );
102   } else {
103     cout << "no.";
104   }
105   cout << "\t" << "Expected coverage = " << expectedCoverage << endl;
106 }
107 
estimateCoverageCutoff(MetaGraph * metaGraph)108 void MetaVelvetG::estimateCoverageCutoff( MetaGraph* metaGraph ){
109   cout << "[meta-velvetg] " << "Estimate coverage cutoff ... ";
110   if( flagEstimateCoverageCutoff ){
111     cout << "yes.";
112     coverageCutoff = expectedCoverages[numCoveragePeaks-1] / 2;
113   } else {
114     cout << "no.";
115   }
116   cout << "\t" << "Coverage cutoff = " << coverageCutoff << endl;
117 }
118 
removeNodes(MetaGraph * metaGraph)119 void MetaVelvetG::removeNodes( MetaGraph* metaGraph ){
120   cout << "[meta-velvetg] " << "Min. coverage cutoff for short reads        = " << coverageCutoff << endl
121        << "[meta-velvetg] " << "Min. coverage cutoff for long reads         = " << longCoverageCutoff << endl
122        << "[meta-velvetg] " << "Max. coverage cutoff for short & long reads = " << maxCoverageCutoff << endl
123        << "[meta-velvetg] " << "Min. contig length                          = " << minContigLength << endl;
124   metaGraph->removeNodes( coverageCutoff, longCoverageCutoff, maxCoverageCutoff,
125 			  minContigLength, flagExportFilteredNodes,
126 			  FileNameUtils::getLowCoverageContigsFileName(prefix),
127 			  FileNameUtils::getHighCoverageContigsFileName(prefix) );
128 }
129 
scaffolding(MetaGraph * metaGraph)130 void MetaVelvetG::scaffolding( MetaGraph* metaGraph ){
131   metaGraph->scaffolding( flagMatePair, flagScaffolding, maxChimeraRate, flagDiscardChimera );
132 }
133 
finalize(MetaGraph * metaGraph)134 void MetaVelvetG::finalize( MetaGraph* metaGraph ){
135   metaGraph->finalize( coverageCutoff, longCoverageCutoff );
136 }
137 
output(MetaGraph * metaGraph)138 void MetaVelvetG::output( MetaGraph* metaGraph ){
139   metaGraph->save( FileNameUtils::getScaffoldFileName(prefix), minContigLength, coverageMask );
140   metaGraph->saveStats( FileNameUtils::getStatsFileName(prefix) );
141   metaGraph->save( FileNameUtils::getLastGraphFileName(prefix) );
142   if( flagExportAlignments ){ metaGraph->saveAlignments( FileNameUtils::getAlignmentFileName(prefix), minContigLength, FileNameUtils::getSeqFileName(prefix) ); }
143   if( flagExportAmos ){ metaGraph->saveAmos( FileNameUtils::getAmosFileName(prefix), minContigLength ); }
144   if( flagExportUnusedReads ){ metaGraph->saveUnusedReads( prefix, minContigLength ); }
145 }
146 
MetaVelvetG(int argc,char * argv[])147 MetaVelvetG::MetaVelvetG( int argc, char* argv[] ){
148   setDefaultParameters();
149   cout << endl<< endl
150        << "[meta-velvetg] " << "Check command line options..." << endl;
151   if( !checkParameters( argc, argv ) or !setParameters( argc, argv ) or !checkParameters() ){
152     cout << "\t" << "Your command line options are not appropriate." << endl
153 	 << "\t" << "Please read usage of meta-velvetg carefully." << endl
154 	 << endl
155 	 << "\t" << "Thanks, " << endl
156 	 << endl << endl;
157     printUsage();
158     exit(-1);
159   }
160   cout << "\t" << "OK. Your command line options seem to be good." << endl
161        << endl << endl;
162   guessNonMandatoryParameters();
163 }
164 
setDefaultParameters()165 void MetaVelvetG::setDefaultParameters(){
166   // velvet parameters
167   flagEstimateCoverageCutoff   = true;
168   flagEstimateExpectedCoverage = true;
169   flagReadTracking             = true;
170   flagScaffolding              = true;
171   flagExportUnusedReads        = false;
172   flagExportFilteredNodes      = false;
173   flagExportAmos               = false;
174   flagExportAlignments         = false;
175   accelerationBits   = 24;
176   minContigLength    = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
177   coverageMask       = 1;
178   coverageCutoff     = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
179   maxCoverageCutoff  = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
180   longCoverageCutoff = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
181   expectedCoverage   = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
182   longInsertLength   = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
183   longInsertLengthSD = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
184   for ( int cat = 0; cat < CATEGORIES; ++cat) {
185     insertLength[cat]   = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
186     insertLengthSD[cat] = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
187     flagMatePair[cat]   = false;
188   }
189 
190   // graph-splitting parameters
191   flagDiscardChimera  = false;
192   maxChimeraRate      = META_VELEVTG_DEFAULT_MAX_CHIMERA_RATE;
193   repeatCoverageSD    = META_VELVETG_DEFAULT_REPEAT_COVERAGE_SD;
194   minSplitLength      = META_VELVETG_DEFAULT_MIN_SPLIT_LENGTH;
195   flagUseConnections  = true;
196   numValidConnections = META_VELVETG_DEFAULT_NUM_VALID_CONNECTIONS;
197   numNoiseConnections = META_VELVETG_DEFAULT_NUM_NOISE_CONNECTIONS;
198   flagReportSplitDetail = false;
199   flagReportSubgraph  = false;
200 
201   // peak parameters
202   flagEstimateExpectedCoverages = true;
203   numCoveragePeaks   = 1;
204   for( int i=0 ; i<META_VELVET_G_MAX_NUM_COVERAGE_PEAKS ; ++i ){
205     expectedCoverages[i] = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
206   }
207   minPeakCoverage = META_VELVET_G_DEFAULT_MIN_PEAK_COVERAGE;
208   maxPeakCoverage = META_VELVET_G_DEFAULT_MAX_PEAK_COVERAGE;
209   histoBinWidth   = META_VELVET_G_DEFAULT_HISTO_BIN_WIDTH;
210   histoSnRatio    = META_VELVET_G_DEFAULT_HISTO_SN_RATIO;
211 }
212 
checkParameters() const213 bool MetaVelvetG::checkParameters() const {
214   return checkMetaHistoParameters();
215 }
216 
checkMetaHistoParameters() const217 bool MetaVelvetG::checkMetaHistoParameters() const {
218   PeakDetectorParameters* param = getPeakDetectorParameters();
219   bool checkResult = param->checkParameters();
220   delete param;
221   return checkResult;
222 }
223 
checkParameters(int argc,char * argv[]) const224 bool MetaVelvetG::checkParameters( int argc, char* argv[] ) const {
225   if( checkArgc(argc) and checkPrefix(argc, argv) ){
226     return true;
227   }
228   return false;
229 }
230 
checkArgc(int argc) const231 bool MetaVelvetG::checkArgc( int argc ) const {
232   if( argc < 2 ){
233     return false;
234   }
235   return true;
236 }
237 
checkPrefix(int argc,char * argv[]) const238 bool MetaVelvetG::checkPrefix( int argc, char* argv[] ) const {
239   if( strncmp( argv[1], "-", 1 ) == 0 ){
240     cerr << "[meta-velvetg] Error: the first argument should be a directory path (should not be start with '-')" << endl;
241     return false;
242   }
243   return checkMandatoryFiles( (string)argv[1] );
244 }
245 
checkMandatoryFiles(string prefix) const246 bool MetaVelvetG::checkMandatoryFiles( string prefix ) const {
247   if( !FileUtils::isValidFile(FileNameUtils::getSeqFileName(prefix)) ){
248     cerr << "[meta-velvetg] Error: There is no 'Sequences' file in the directory: " << prefix << endl
249 	 << "\t" << "You should run 'velveth' before running 'meta-velvetg'" << endl << endl;
250     return false;
251   }
252   if( !FileUtils::isValidFile(FileNameUtils::getRoadmapFileName(prefix)) ){
253     cerr << "[meta-velvetg] Error: There is no 'Roadmaps' file in the directory: " << prefix << endl
254 	 << "\t" << "You should run 'velveth' before running 'meta-velvetg'" << endl << endl;
255     return false;
256   }
257   if( !FileUtils::isValidFile(FileNameUtils::getInitialGraphFileName(prefix)) ){
258     cerr << "[meta-velvetg] Error: There is no 'Graph2' file in the directory: " << prefix << endl
259 	 << "\t" << "You should run 'velvetg' with '-read_trkg' opton before runing 'meta-velvetg'" << endl << endl;
260     return false;
261   }
262   return true;
263 }
264 
printUsage() const265 void MetaVelvetG::printUsage() const {
266   cerr << endl << endl
267        << "************************************************************" << endl
268        << "Program: ----------------------------------------------" << endl
269        << "  " << "meta-velvetg - contiging and scaffolding program for metagenomics NGS data" << endl
270        << "\t" << "Version = " << META_VELVET_VERSION << endl
271        << "\t" << "MAX_CATEGORIES = " << CATEGORIES << endl
272        << "\t" << "MAX_KMER_LENGTH = " << MAXKMERLENGTH << endl
273        << endl << endl
274        << "Usage: ------------------------------------------------" << endl
275        << "  " << "meta-velvetg directory [options]" << endl
276        << "\t" << "directory                   " << "\t: directory name for output files" << endl
277        << endl
278        << "  " << "Graph-splitting options (metagenome-specific):" << endl
279        << "\t" << "-discard_chimera <yes|no>    " << "\t: discard chimera sub-graph (default: no)" << endl
280        << "\t" << "-max_chimera_rate <double>   " << "\t: maximum allowable chimera rate (default: 0.0)" << endl
281        << "\t" << "-repeat_cov_sd               " << "\t: standard deviation of repeat node coverages (default: 0.1)" << endl
282        << "\t" << "-min_split_length <int>      " << "\t: minimum node length required for repeat resolution (default: 0)" << endl
283        << "\t" << "-valid_connections <int>     " << "\t: minimum allowable number of consistent paired-end connections (default: 1)" << endl
284        << "\t" << "-noise_connections <int>     " << "\t: maximum allowable number of inconsistent paired-end connections (default: 0)" << endl
285        << "\t" << "-use_connections <yes|no>    " << "\t: use paired-end connections for graph splitting (default: yes)" << endl
286        << "\t" << "-report_split_detail <yes|no>" << "\t: report sequences around repeat nodes (default: no)" << endl
287        << "\t" << "-report_subgraph <yes|no>    " << "\t: report node sequences for each subgraph (default: no)" << endl
288        << endl
289        << "  " << "Peak detection options (metagenome-specific):" << endl
290        << "\t" << "-exp_covs <string|auto>      " << "\t: expected coverages for each species in microbiome (default: auto)" << endl
291        << "\t" << "                             " << "\t    ex) -exp_covs 214_122_70_43_25_13.5" << endl
292        << "\t" << "                             " << "\t    coverage values should be sorted in a descending order" << endl
293        << "\t" << "-min_peak_cov <double>       " << "\t: minimum peak coverage (default: " << META_VELVET_G_DEFAULT_MIN_PEAK_COVERAGE << ")" << endl
294        << "\t" << "-max_peak_cov <double>       " << "\t: maximum peak coverage (default: " << META_VELVET_G_DEFAULT_MAX_PEAK_COVERAGE << ")" << endl
295        << "\t" << "-histo_bin_width <double>    " << "\t: bin width of peak coverage histogram (default: " << META_VELVET_G_DEFAULT_HISTO_BIN_WIDTH << ")" << endl
296        << "\t" << "-histo_sn_ratio <double>     " << "\t: signal-noise ratio to remove peak noises (default: " << META_VELVET_G_DEFAULT_HISTO_SN_RATIO << ")" << endl
297        << endl
298        << "  " << "Contiging options: (common to single-genome)" << endl
299        << "\t" << "-cov_cutoff <double|auto>    " << "\t: removal of low coverage nodes AFTER tour bus or allow the system to infer it (default: auto)" << endl
300        << "\t" << "-max_coverage <double>       " << "\t: removal of high coverage nodes AFTER tour bus (default: no removal)" << endl
301        << "\t" << "-long_cov_cutoff <double>    " << "\t: removal of nodes with low long-read coverage AFTER tour bus (default: no removal)" << endl
302     //<< "\t" << "-read_trkg <yes|no>          " << "\t: tracking of short read positions in assembly (default: yes)" << endl
303        << "\t" << "-max_branch_length <int>     " << "\t: maximum length in base pair of bubble (default: 100)" << endl
304        << "\t" << "-max_divergence <double>     " << "\t: maximum divergence rate between two branches in a bubble (default: 0.2)" << endl
305        << "\t" << "-max_gap_count <int>         " << "\t: maximum number of gaps allowed in the alignment of the two branches of a bubble (default: 3)" << endl
306        << "\t" << "-min_contig_lgth <int>       " << "\t: minimum contig length exported to contigs.fa file (default: hash length * 2)" << endl
307        << endl
308        << "  " << "Scaffolding options: (common to single-genome)" << endl
309        << "\t" << "-scaffolding <yes|no>        " << "\t: scaffolding of contigs used paired end information (default: on)" << endl
310        << "\t" << "-exp_cov <double|auto>       " << "\t: expected coverage of unique regions or allow the system to infer it (default: auto)" << endl
311        << "\t" << "-ins_length <double>         " << "\t: expected distance between two paired end reads for the category (default: no read pairing)" << endl
312        << "\t" << "-ins_length_sd <double>      " << "\t: standard deviation of insert length for the category (default: insert length / 10)" << endl
313        << "\t" << "-ins_length2 <double>        " << "\t: expected distance between two paired end reads for the category (default: no read pairing)" << endl
314        << "\t" << "-ins_length2_sd <double>     " << "\t: standard deviation of insert length for the category (default: insert length / 10)" << endl
315        << "\t" << "-ins_length_long <double>    " << "\t: expected distance between two long paired-end reads (default: no read pairing)" << endl
316        << "\t" << "-ins_length_long_sd <double> " << "\t: standard deviation of insert length for the category" << endl
317        << "\t" << "-min_pair_count <int>        " << "\t: minimum number of paired end connections to justify the scaffolding of two long contigs (default: 5)" << endl
318        << "\t" << "-long_mult_cutoff <int>      " << "\t: minimum number of long reads required to merge contigs (default: 2)" << endl
319        << "\t" << "-shortMatePaired <yes|no>    " << "\t: for mate-pair libraries, indicate that the library might be contaminated with paired-end reads (default no)" << endl
320        << endl
321        << "  " << "Output options: (common to single-genome)" << endl
322        << "\t" << "-amos_file <yes|no>          " << "\t: export assembly to AMOS file (default: no export)" << endl
323        << "\t" << "-coverage_mask <int>         " << "\t: minimum coverage required for confident regions of contigs (default: 1)" << endl
324        << "\t" << "-unused_reads <yes|no>       " << "\t: export unused reads in UnusedReads.fa file (default: no)" << endl
325        << "\t" << "-alignments <yes|no>         " << "\t: export a summary of contig alignment to the reference sequences (default: no)" << endl
326        << "\t" << "-exportFiltered <yes|no>     " << "\t: export the long nodes which were eliminated by the coverage filters (default: no)" << endl
327        << "\t" << "-paired_exp_fraction <double>" << "\t: remove all the paired end connections which less than the specified fraction of the expected count (default: 0.1)" << endl
328        << endl << endl
329        << "Output: -----------------------------------------------" << endl
330        << "\t" << "directory/meta-velvetg.contigs.fa       \t: fasta file of contigs longer than twice hash length" << endl
331        << "\t" << "directory/meta-velvetg.LastGraph        \t: special formatted file with all the information on the final graph" << endl
332        << "\t" << "directory/meta-velvetg.Graph2-stats.txt \t: stats file (tab-delimited) useful for optimizing coverage peak values" << endl
333        << "\t" << "directory/meta-velvetg.split-stats.txt  \t: stats file (tab-delimited) useful for optimizing graph-splitting parameters" << endl
334        << endl
335        << "************************************************************" << endl
336        << endl << endl;
337 }
338 
setParameters(int argc,char * argv[])339 bool MetaVelvetG::setParameters( int argc, char* argv[] ){
340   prefix = argv[1];
341   for ( int arg_index=2 ; arg_index<argc ; arg_index++ ) {
342     char* arg = argv[arg_index++];
343     if ( arg_index >= argc ) {
344       cerr << "[meta-velvetg] Error: Unusual number of arguments!" << endl;
345       return false;
346     }
347     if( !setParameter( arg,  argv[arg_index] ) ){
348       return false;
349     }
350   }
351   return true;
352 }
353 
setParameter(char * name,char * val)354 bool MetaVelvetG::setParameter( char* name, char* val ){
355   if( strcmp(name, "-cov_cutoff")           == 0 ){ return setCoverageCutoff( val ); }
356   if( strcmp(name, "-long_cov_cutoff")      == 0 ){ return setLongCoverageCutoff( val ); }
357   if( strcmp(name, "-exp_cov")              == 0 ){ return setExpectedCoverage( val ); }
358   if( strcmp(name, "-ins_length")           == 0 ){ return setInsertLength( 0, val ); }
359   if( strcmp(name, "-ins_length_sd")        == 0 ){ return setInsertLengthSD( 0, val ); }
360   if( strcmp(name, "-ins_length_long")      == 0 ){ return setLongInsertLength( val ); }
361   if( strcmp(name, "-ins_length_long_sd")   == 0 ){ return setLongInsertLengthSD( val ); }
362   if( strncmp(name, "-ins_length", 11)      == 0 && strchr(name, 'd') == NULL ){int cat = 0; sscanf(name, "-ins_length%d", &cat); return setInsertLength( cat-1, val );}
363   if( strncmp(name, "-ins_length", 11)      == 0 ){ int cat = 0; sscanf(name, "-ins_length%d_sd", &cat); return setInsertLengthSD( cat-1, val ); }
364   if( strcmp(name, "-read_trkg")            == 0 ){ return setFlagReadTracking( val ); }
365   if( strcmp(name, "-scaffolding")          == 0 ){ return setFlagScaffolding( val ); }
366   if( strcmp(name, "-exportFiltered")       == 0 ){ return setFlagExportFilteredNodes( val ); }
367   if( strcmp(name, "-amos_file")            == 0 ){ return setFlagExportAmos( val ); }
368   if( strcmp(name, "-alignments")           == 0 ){ return setFlagExportAlignments( val ); }
369   if( strcmp(name, "-min_contig_lgth")      == 0 ){ return setMinContigLength( val ); }
370   if( strcmp(name, "-coverage_mask")        == 0 ){ return setCoverageMask( val ); }
371   if( strcmp(name, "-accel_bits")           == 0 ){ return setAccelerationBits( val ); }
372   if( strcmp(name, "-max_branch_length")    == 0 ){ return setMaxBranchLength( val ); }
373   if( strcmp(name, "-max_divergence")       == 0 ){ return _setMaxDivergence( val ); }
374   if( strcmp(name, "-max_gap_count")        == 0 ){ return setMaxGapCount( val ); }
375   if( strcmp(name, "-min_pair_count")       == 0 ){ return setMinPairCount( val ); }
376   if( strcmp(name, "-max_coverage")         == 0 ){ return setMaxCoverage( val ); }
377   if( strcmp(name, "-long_mult_cutoff")     == 0 ){ return setLongMultiCutoff( val ); }
378   if( strcmp(name, "-paired_exp_fraction")  == 0 ){ return _setPairedExpFraction( val ); }
379   if( strcmp(name, "-unused_reads")         == 0 ){ return setFlagExportUnusedReads(val); }
380   if( strcmp(name, "-shortMatePaired")      == 0 ){ return setFlagMatePair( 0, val ); }
381   if( strncmp(name, "-shortMatePaired", 16) == 0 ){ int cat = 0; sscanf(name, "-shortMatePaired%i", &cat); return setFlagMatePair( cat-1, val ); }
382   if( strcmp(name, "-discard_chimera")      == 0 ){ return setFlagDiscardChimera( val ); }
383   if( strcmp(name, "-max_chimera_rate")     == 0 ){ return setMaxChimeraRate( val ); }
384   if( strcmp(name, "-exp_covs")             == 0 ){ return setExpectedCoverages( val ); }
385   if( strcmp(name, "-manual_exp_cov_multi") == 0 ){ return setExpectedCoverages( val ); }
386   if( strcmp(name, "-repeat_cov_sd")        == 0 ){ return setRepeatCoverageSD( val ); }
387   if( strcmp(name, "-min_split_length")     == 0 ){ return setMinSplitLength( val ); }
388   if( strcmp(name, "-min_peak_cov")         == 0 ){ return setMinPeakCoverage( val ); }
389   if( strcmp(name, "-max_peak_cov")         == 0 ){ return setMaxPeakCoverage( val ); }
390   if( strcmp(name, "-histo_bin_width")      == 0 ){ return setHistoBinWidth( val ); }
391   if( strcmp(name, "-histo_sn_ratio")       == 0 ){ return setHistoSnRatio( val ); }
392   if( strcmp(name, "-valid_connections")    == 0 ){ return setNumValidConnections( val ); }
393   if( strcmp(name, "-noise_connections")    == 0 ){ return setNumNoiseConnections( val ); }
394   if( strcmp(name, "-use_connections")      == 0 ){ return setFlagUseConnections( val ); }
395   if( strcmp(name, "-report_split_detail")  == 0 ){ return setFlagReportSplitDetail( val ); }
396   if( strcmp(name, "-report_subgraph")      == 0 ){ return setFlagReportSubgraph( val ); }
397   if( strcmp(name, "--help")                == 0 ){ return false; }
398   cerr << "[meta-velvetg] Unknown option: " << name << endl;
399   return false;
400 }
401 
setCoverageCutoff(char * val)402 bool MetaVelvetG::setCoverageCutoff( char* val ){
403   if ( strcmp(val, "auto") == 0 ){
404     flagEstimateCoverageCutoff = true;
405   } else {
406     flagEstimateCoverageCutoff = false;
407     coverageCutoff = atof( val );
408   }
409   return true;
410 }
411 
setLongCoverageCutoff(char * val)412 bool MetaVelvetG::setLongCoverageCutoff( char* val ){
413   longCoverageCutoff = atof( val );
414   return true;
415 }
416 
setExpectedCoverage(char * val)417 bool MetaVelvetG::setExpectedCoverage( char* val ){
418   if( strcmp(val, "auto") == 0 ){
419     flagEstimateExpectedCoverage = true;
420   } else {
421     flagEstimateExpectedCoverage = false;
422     expectedCoverage = atof( val );
423   }
424   return true;
425 }
426 
setInsertLength(int cat,char * val)427 bool MetaVelvetG::setInsertLength( int cat, char* val ){
428   if( !checkCategory( cat ) ){ return false; }
429   insertLength[cat] = atof( val );
430   if( insertLength[cat] <= 0 ){
431     cerr << "[meta-velvetg] Error: Invalid insert length for category " << cat << ": " << insertLength[cat] << endl;
432     return false;
433   }
434   return true;
435 }
436 
setInsertLengthSD(int cat,char * val)437 bool MetaVelvetG::setInsertLengthSD( int cat, char* val ){
438   if( !checkCategory( cat ) ){ return false; }
439   insertLengthSD[cat] = atof( val );
440   if( insertLengthSD[cat] <= 0 ){
441     cerr << "[meta-velvetg] Error: Invalid insert length SD for category " << cat << ": " << insertLengthSD[cat] << endl;
442     return false;
443   }
444   return true;
445 }
446 
setLongInsertLength(char * val)447 bool MetaVelvetG::setLongInsertLength( char* val ){
448   longInsertLength = atof( val );
449   if( longInsertLength <= 0 ){
450     cerr << "[meta-velvetg] Error: Invalid insert length for long reads: " << longInsertLength << endl;
451     return false;
452   }
453   return true;
454 }
455 
setLongInsertLengthSD(char * val)456 bool MetaVelvetG::setLongInsertLengthSD( char* val ){
457   longInsertLengthSD = atof( val );
458   if( longInsertLengthSD <= 0 ){
459     cerr << "[meta-velvetg] Error: Invalid insert length SD for long reads: " << longInsertLengthSD << endl;
460     return false;
461   }
462   return true;
463 }
464 
setFlagReadTracking(char * val)465 bool MetaVelvetG::setFlagReadTracking( char* val ){
466   flagReadTracking = (strcmp(val, "yes") == 0);
467   return true;
468 }
469 
setFlagScaffolding(char * val)470 bool MetaVelvetG::setFlagScaffolding( char* val ){
471   flagScaffolding = (strcmp(val, "yes") == 0);
472   return true;
473 }
474 
setFlagExportFilteredNodes(char * val)475 bool MetaVelvetG::setFlagExportFilteredNodes( char* val ){
476   flagExportFilteredNodes = (strcmp(val, "yes") == 0);
477   return true;
478 }
479 
setFlagExportAmos(char * val)480 bool MetaVelvetG::setFlagExportAmos( char* val ){
481   flagExportAmos = (strcmp(val, "yes") == 0);
482   return true;
483 }
484 
setFlagExportAlignments(char * val)485 bool MetaVelvetG::setFlagExportAlignments( char* val ){
486   flagExportAlignments = (strcmp(val, "yes") == 0);
487   return true;
488 }
489 
setMinContigLength(char * val)490 bool MetaVelvetG::setMinContigLength( char* val ){
491   minContigLength = atoi( val );
492   return true;
493 }
494 
setCoverageMask(char * val)495 bool MetaVelvetG::setCoverageMask( char* val ){
496   coverageMask = atof( val );
497   return true;
498 }
499 
setAccelerationBits(char * val)500 bool MetaVelvetG::setAccelerationBits( char* val ){
501   accelerationBits = atoi( val );
502   if( accelerationBits < 0 ){
503     cerr << "[meta-velvetg] Error: Illegal acceleration parameter: " << accelerationBits << endl;
504     return false;
505   }
506   return true;
507 }
508 
setMaxBranchLength(char * val)509 bool MetaVelvetG::setMaxBranchLength( char* val ){
510   setMaxReadLength( atoi(val) );
511   setLocalMaxReadLength( atoi(val) );
512   return true;
513 }
514 
_setMaxDivergence(char * val)515 bool MetaVelvetG::_setMaxDivergence( char* val ){
516   setMaxDivergence( atof(val) );
517   setLocalMaxDivergence( atof(val) );
518   return true;
519 }
520 
setMaxGapCount(char * val)521 bool MetaVelvetG::setMaxGapCount( char* val ){
522   setMaxGaps( atoi(val) );
523   setLocalMaxGaps( atoi(val) );
524   return true;
525 }
526 
setMinPairCount(char * val)527 bool MetaVelvetG::setMinPairCount( char* val ){
528   setUnreliableConnectionCutoff( atoi(val) );
529   return true;
530 }
531 
setMaxCoverage(char * val)532 bool MetaVelvetG::setMaxCoverage( char* val ){
533   maxCoverageCutoff = atof( val );
534   return true;
535 }
536 
setLongMultiCutoff(char * val)537 bool MetaVelvetG::setLongMultiCutoff( char * val ){
538   setMultiplicityCutoff( atoi(val) );
539   return true;
540 }
541 
_setPairedExpFraction(char * val)542 bool MetaVelvetG::_setPairedExpFraction( char * val ){
543   setPairedExpFraction( atof(val) );
544   return true;
545 }
546 
setFlagExportUnusedReads(char * val)547 bool MetaVelvetG::setFlagExportUnusedReads( char* val ){
548   if( strcmp(val, "yes") == 0 ){
549     flagExportUnusedReads = true;
550     flagReadTracking = true;
551   }
552   return true;
553 }
554 
setFlagMatePair(int cat,char * val)555 bool MetaVelvetG::setFlagMatePair( int cat, char* val ){
556   checkCategory( cat );
557   flagMatePair[cat] = ( strcmp(val, "yes") == 0 );
558   return true;
559 }
560 
setFlagDiscardChimera(char * val)561 bool MetaVelvetG::setFlagDiscardChimera( char* val ){
562   flagDiscardChimera = ( strcmp(val, "yes") == 0 );
563   return true;
564 }
565 
setMaxChimeraRate(char * val)566 bool MetaVelvetG::setMaxChimeraRate( char* val ){
567   maxChimeraRate = atof( val );
568   return true;
569 }
570 
setExpectedCoverages(char * val)571 bool MetaVelvetG::setExpectedCoverages( char* val ){
572   flagEstimateExpectedCoverages = false;
573   char* tokenPointer = val;
574   char* tmp[META_VELVET_G_MAX_NUM_COVERAGE_PEAKS];
575   for( numCoveragePeaks=0; numCoveragePeaks<META_VELVET_G_MAX_NUM_COVERAGE_PEAKS ; ++numCoveragePeaks ){
576     if( (tmp[numCoveragePeaks] = strtok(tokenPointer, META_VELVET_G_COVERAGE_DELIM_CHAR)) != NULL ){
577       expectedCoverages[numCoveragePeaks] = atof( tmp[numCoveragePeaks] );
578     } else {
579       break;
580     }
581     tokenPointer = NULL;
582   }
583   return checkCoverageOrder();
584 }
585 
setRepeatCoverageSD(char * val)586 bool MetaVelvetG::setRepeatCoverageSD( char* val ){
587   repeatCoverageSD = atof(val);
588   return true;
589 }
590 
setMinSplitLength(char * val)591 bool MetaVelvetG::setMinSplitLength( char* val ){
592   minSplitLength = atoi(val);
593   return true;
594 }
595 
setMinPeakCoverage(char * val)596 bool MetaVelvetG::setMinPeakCoverage( char* val ){
597   minPeakCoverage = atof(val);
598   return true;
599 }
600 
setMaxPeakCoverage(char * val)601 bool MetaVelvetG::setMaxPeakCoverage( char* val ){
602   maxPeakCoverage = atof(val);
603   return true;
604 }
605 
setHistoBinWidth(char * val)606 bool MetaVelvetG::setHistoBinWidth( char* val ){
607   histoBinWidth = atof(val);
608   return true;
609 }
610 
setHistoSnRatio(char * val)611 bool MetaVelvetG::setHistoSnRatio( char* val ){
612   histoSnRatio = atof(val);
613   return true;
614 }
615 
setNumValidConnections(char * val)616 bool MetaVelvetG::setNumValidConnections( char* val ){
617   numValidConnections = atoi(val);
618   return true;
619 }
620 
setNumNoiseConnections(char * val)621 bool MetaVelvetG::setNumNoiseConnections( char* val ){
622   numNoiseConnections = atoi(val);
623   return true;
624 }
625 
setFlagUseConnections(char * val)626 bool MetaVelvetG::setFlagUseConnections( char* val ){
627   flagUseConnections = ( strcmp(val, "yes") == 0 );
628   return true;
629 }
630 
setFlagReportSplitDetail(char * val)631 bool MetaVelvetG::setFlagReportSplitDetail( char* val ){
632   flagReportSplitDetail = ( strcmp(val, "yes") == 0 );
633   return true;
634 }
635 
setFlagReportSubgraph(char * val)636 bool MetaVelvetG::setFlagReportSubgraph( char* val ){
637   flagReportSubgraph = ( strcmp(val, "yes") == 0 );
638   return true;
639 }
640 
checkCategory(int cat) const641 bool MetaVelvetG::checkCategory( int cat ) const {
642   if( (cat<0) or (cat>CATEGORIES) ){
643     cerr << "[meta-velvetg] Error: Invalid category: " << cat << endl;
644     return false;
645   }
646   return true;
647 }
648 
checkCoverageOrder() const649 bool MetaVelvetG::checkCoverageOrder() const {
650   for( int i=1 ; i<numCoveragePeaks ; ++i ){
651     if( expectedCoverages[i-1] <= expectedCoverages[i] ){
652       cerr << "[meta-velvetg] Error: expected coverages should be sorted in a descending order: "
653 	   << expectedCoverages[i-1] << " <-> " << expectedCoverages[i] << endl;
654       return false;
655     }
656   }
657   return true;
658 }
659 
guessNonMandatoryParameters()660 void MetaVelvetG::guessNonMandatoryParameters(){
661   guessInsertLengthSD();
662   guessCoverageFlags();
663 }
664 
guessInsertLengthSD()665 void MetaVelvetG::guessInsertLengthSD(){
666   for( int cat=0 ; cat<CATEGORIES ; ++cat){
667     if( isGuessInsertLengthSD( insertLength[cat], insertLengthSD[cat] ) )
668       insertLengthSD[cat] = guessInsertLengthSD( insertLength[cat] );
669   }
670   if( isGuessInsertLengthSD( longInsertLength, longInsertLengthSD ) )
671     longInsertLengthSD = guessInsertLengthSD( longInsertLength );
672 }
673 
isGuessInsertLengthSD(double ave,double sd)674 bool MetaVelvetG::isGuessInsertLengthSD( double ave, double sd ){
675   return (ave != META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED) and (sd == META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED);
676 }
677 
guessInsertLengthSD(double ave)678 double MetaVelvetG::guessInsertLengthSD( double ave ){
679   return ave * META_VELVET_G_SD_RATIO_FOR_GUESS;
680 }
681 
guessCoverageFlags()682 void MetaVelvetG::guessCoverageFlags(){
683   if( expectedCoverage < 0 )
684     flagEstimateExpectedCoverage = true;
685   if( coverageCutoff < 0 )
686     flagEstimateCoverageCutoff = true;
687 }
688 
689 
690