1 #include "meta-velvetg.hh"
2
main(int argc,char * argv[])3 int main( int argc, char* argv[] ){
4 MetaVelvetG* mvg = new MetaVelvetG( argc, argv );
5
6 cout << "[meta-velvetg] " << "Load meta-graph ..." << endl;
7 MetaGraph* graph = mvg->loadGraph();
8 cout << "[meta-velveth] " << "...done (load meta-graph)." << endl << endl;
9
10 cout << "[meta-velvetg] " << "Estimate coverage parameters..." << endl;
11 mvg->estimateCoverage( graph );
12 cout << "[meta-velvetg] " << "...done (estimate coverage parameters)." << endl << endl;
13
14 cout << "[meta-velvetg] " << "Remove low & high coverage nodes ..." << endl;
15 mvg->removeNodes( graph );
16 cout << "[meta-velvetg] " << "...done (remove low & high coverage nodes)." << endl << endl;
17
18 cout << "[meta-velvetg] " << "Scaffolding based on paired-end information ..." << endl;
19 mvg->scaffolding( graph );
20 cout << "[meta-velvetg] " << "...done (scaffolding)." << endl << endl;
21
22 cout << "[meta-velvetg] " << "Finalize assembly ... " << endl;
23 mvg->finalize( graph );
24 cout << "[meta-velvetg] " << "...done (finalize assembly)." << endl << endl;
25
26 cout << "[meta-velvetg] " << "Output results ..." << endl;
27 mvg->output( graph );
28 cout << "[meta-velvetg] " << "...done (output results)." << endl << endl;
29
30 cout << "[meta-velvetg] " << "All tasks were safely completed..." << endl << endl;
31 delete mvg;
32 return 0;
33 }
34
loadGraph() const35 MetaGraph* MetaVelvetG::loadGraph() const {
36 MetaGraph* graph = new MetaGraph( FileNameUtils::getSeqFileName(prefix), FileNameUtils::getInitialGraphFileName(prefix) );
37 setInsertLengths( graph );
38 setSplitJudge( graph );
39 setSubgraphOptions( graph );
40 return graph;
41 }
42
setInsertLengths(MetaGraph * metaGraph) const43 void MetaVelvetG::setInsertLengths( MetaGraph* metaGraph ) const {
44 for( int cat=0 ; cat<CATEGORIES ; ++cat ){
45 metaGraph->setInsertLength( cat, insertLength[cat], insertLengthSD[cat] );
46 cout << "[meta-velvetg] " << "Category = 'short" << cat+1 << "'\t"
47 << "Ave. = " << insertLength[cat] << ", SD = " << insertLengthSD[cat] << endl;
48 }
49 metaGraph->setInsertLength( CATEGORIES, longInsertLength, longInsertLengthSD );
50 cout << "[meta-velvetg] " << "Category = 'long'" << "\t"
51 << "Ave. = " << longInsertLength << ", SD = " << longInsertLengthSD << endl;
52 }
53
setSplitJudge(MetaGraph * metaGraph) const54 void MetaVelvetG::setSplitJudge( MetaGraph* metaGraph ) const {
55 SplitJudge* judge = new SplitJudge( repeatCoverageSD, minSplitLength, flagUseConnections, numValidConnections, numNoiseConnections );
56 judge->setStatsWriter( FileNameUtils::getSplitStatsFileName(prefix), FileNameUtils::getSplitDetailFileName(prefix), flagReportSplitDetail );
57 metaGraph->setSplitJudge( judge );
58 }
59
setSubgraphOptions(MetaGraph * metaGraph) const60 void MetaVelvetG::setSubgraphOptions( MetaGraph* metaGraph ) const {
61 metaGraph->setSubgraphOptions( FileNameUtils::getSubgraphPrefix(prefix), flagReportSubgraph );
62 }
63
estimateCoverage(MetaGraph * metaGraph)64 void MetaVelvetG::estimateCoverage( MetaGraph* metaGraph ){
65 estimateExpectedCoverage( metaGraph );
66 estimateExpectedCoverages( metaGraph );
67 estimateCoverageCutoff( metaGraph );
68 }
69
estimateExpectedCoverages(MetaGraph * metaGraph)70 void MetaVelvetG::estimateExpectedCoverages( MetaGraph* metaGraph ){
71 cout << "[mate-velvetg] " << "Estimate expected coverages ... ";
72 if( flagEstimateExpectedCoverages ){
73 cout << "yes." << endl;
74 metaGraph->saveStats( FileNameUtils::getHistoStatsFileName(prefix) );
75 PeakDetectorParameters* params = getPeakDetectorParameters();
76 PeakDetector* peakDetector = PeakDetectorFactory::instantiatePeakDetector( "simple", params );
77 numCoveragePeaks = peakDetector->detectCoveragePeaks( metaGraph, expectedCoverages, coverageBoundaries );
78 delete params; delete peakDetector;
79 if( numCoveragePeaks <= 1 ){
80 cout << endl
81 << "[meta-velvetg] " << "Warning: Can't find multiple coverage peaks." << endl
82 << "[meta-velvetg] " << "Trun on single coverage peak mode. " << endl << endl;
83 numCoveragePeaks = 1;
84 expectedCoverages[0] = expectedCoverage;
85 }
86 } else {
87 cout << "no." << endl;
88 }
89 metaGraph->setExpectedCoverages( numCoveragePeaks, expectedCoverages );
90 metaGraph->showExpectedCoverages();
91 }
92
getPeakDetectorParameters() const93 PeakDetectorParameters* MetaVelvetG::getPeakDetectorParameters() const {
94 return new PeakDetectorParameters( minPeakCoverage, maxPeakCoverage, histoBinWidth, histoSnRatio );
95 }
96
estimateExpectedCoverage(MetaGraph * metaGraph)97 void MetaVelvetG::estimateExpectedCoverage( MetaGraph* metaGraph ){
98 cout << "[mate-velvetg] " << "Estimate expected coverage ... ";
99 if( flagEstimateExpectedCoverage ){
100 cout << "yes.";
101 expectedCoverage = metaGraph->estimateExpectedCoverage( prefix );
102 } else {
103 cout << "no.";
104 }
105 cout << "\t" << "Expected coverage = " << expectedCoverage << endl;
106 }
107
estimateCoverageCutoff(MetaGraph * metaGraph)108 void MetaVelvetG::estimateCoverageCutoff( MetaGraph* metaGraph ){
109 cout << "[meta-velvetg] " << "Estimate coverage cutoff ... ";
110 if( flagEstimateCoverageCutoff ){
111 cout << "yes.";
112 coverageCutoff = expectedCoverages[numCoveragePeaks-1] / 2;
113 } else {
114 cout << "no.";
115 }
116 cout << "\t" << "Coverage cutoff = " << coverageCutoff << endl;
117 }
118
removeNodes(MetaGraph * metaGraph)119 void MetaVelvetG::removeNodes( MetaGraph* metaGraph ){
120 cout << "[meta-velvetg] " << "Min. coverage cutoff for short reads = " << coverageCutoff << endl
121 << "[meta-velvetg] " << "Min. coverage cutoff for long reads = " << longCoverageCutoff << endl
122 << "[meta-velvetg] " << "Max. coverage cutoff for short & long reads = " << maxCoverageCutoff << endl
123 << "[meta-velvetg] " << "Min. contig length = " << minContigLength << endl;
124 metaGraph->removeNodes( coverageCutoff, longCoverageCutoff, maxCoverageCutoff,
125 minContigLength, flagExportFilteredNodes,
126 FileNameUtils::getLowCoverageContigsFileName(prefix),
127 FileNameUtils::getHighCoverageContigsFileName(prefix) );
128 }
129
scaffolding(MetaGraph * metaGraph)130 void MetaVelvetG::scaffolding( MetaGraph* metaGraph ){
131 metaGraph->scaffolding( flagMatePair, flagScaffolding, maxChimeraRate, flagDiscardChimera );
132 }
133
finalize(MetaGraph * metaGraph)134 void MetaVelvetG::finalize( MetaGraph* metaGraph ){
135 metaGraph->finalize( coverageCutoff, longCoverageCutoff );
136 }
137
output(MetaGraph * metaGraph)138 void MetaVelvetG::output( MetaGraph* metaGraph ){
139 metaGraph->save( FileNameUtils::getScaffoldFileName(prefix), minContigLength, coverageMask );
140 metaGraph->saveStats( FileNameUtils::getStatsFileName(prefix) );
141 metaGraph->save( FileNameUtils::getLastGraphFileName(prefix) );
142 if( flagExportAlignments ){ metaGraph->saveAlignments( FileNameUtils::getAlignmentFileName(prefix), minContigLength, FileNameUtils::getSeqFileName(prefix) ); }
143 if( flagExportAmos ){ metaGraph->saveAmos( FileNameUtils::getAmosFileName(prefix), minContigLength ); }
144 if( flagExportUnusedReads ){ metaGraph->saveUnusedReads( prefix, minContigLength ); }
145 }
146
MetaVelvetG(int argc,char * argv[])147 MetaVelvetG::MetaVelvetG( int argc, char* argv[] ){
148 setDefaultParameters();
149 cout << endl<< endl
150 << "[meta-velvetg] " << "Check command line options..." << endl;
151 if( !checkParameters( argc, argv ) or !setParameters( argc, argv ) or !checkParameters() ){
152 cout << "\t" << "Your command line options are not appropriate." << endl
153 << "\t" << "Please read usage of meta-velvetg carefully." << endl
154 << endl
155 << "\t" << "Thanks, " << endl
156 << endl << endl;
157 printUsage();
158 exit(-1);
159 }
160 cout << "\t" << "OK. Your command line options seem to be good." << endl
161 << endl << endl;
162 guessNonMandatoryParameters();
163 }
164
setDefaultParameters()165 void MetaVelvetG::setDefaultParameters(){
166 // velvet parameters
167 flagEstimateCoverageCutoff = true;
168 flagEstimateExpectedCoverage = true;
169 flagReadTracking = true;
170 flagScaffolding = true;
171 flagExportUnusedReads = false;
172 flagExportFilteredNodes = false;
173 flagExportAmos = false;
174 flagExportAlignments = false;
175 accelerationBits = 24;
176 minContigLength = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
177 coverageMask = 1;
178 coverageCutoff = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
179 maxCoverageCutoff = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
180 longCoverageCutoff = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
181 expectedCoverage = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
182 longInsertLength = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
183 longInsertLengthSD = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
184 for ( int cat = 0; cat < CATEGORIES; ++cat) {
185 insertLength[cat] = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
186 insertLengthSD[cat] = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
187 flagMatePair[cat] = false;
188 }
189
190 // graph-splitting parameters
191 flagDiscardChimera = false;
192 maxChimeraRate = META_VELEVTG_DEFAULT_MAX_CHIMERA_RATE;
193 repeatCoverageSD = META_VELVETG_DEFAULT_REPEAT_COVERAGE_SD;
194 minSplitLength = META_VELVETG_DEFAULT_MIN_SPLIT_LENGTH;
195 flagUseConnections = true;
196 numValidConnections = META_VELVETG_DEFAULT_NUM_VALID_CONNECTIONS;
197 numNoiseConnections = META_VELVETG_DEFAULT_NUM_NOISE_CONNECTIONS;
198 flagReportSplitDetail = false;
199 flagReportSubgraph = false;
200
201 // peak parameters
202 flagEstimateExpectedCoverages = true;
203 numCoveragePeaks = 1;
204 for( int i=0 ; i<META_VELVET_G_MAX_NUM_COVERAGE_PEAKS ; ++i ){
205 expectedCoverages[i] = META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED;
206 }
207 minPeakCoverage = META_VELVET_G_DEFAULT_MIN_PEAK_COVERAGE;
208 maxPeakCoverage = META_VELVET_G_DEFAULT_MAX_PEAK_COVERAGE;
209 histoBinWidth = META_VELVET_G_DEFAULT_HISTO_BIN_WIDTH;
210 histoSnRatio = META_VELVET_G_DEFAULT_HISTO_SN_RATIO;
211 }
212
checkParameters() const213 bool MetaVelvetG::checkParameters() const {
214 return checkMetaHistoParameters();
215 }
216
checkMetaHistoParameters() const217 bool MetaVelvetG::checkMetaHistoParameters() const {
218 PeakDetectorParameters* param = getPeakDetectorParameters();
219 bool checkResult = param->checkParameters();
220 delete param;
221 return checkResult;
222 }
223
checkParameters(int argc,char * argv[]) const224 bool MetaVelvetG::checkParameters( int argc, char* argv[] ) const {
225 if( checkArgc(argc) and checkPrefix(argc, argv) ){
226 return true;
227 }
228 return false;
229 }
230
checkArgc(int argc) const231 bool MetaVelvetG::checkArgc( int argc ) const {
232 if( argc < 2 ){
233 return false;
234 }
235 return true;
236 }
237
checkPrefix(int argc,char * argv[]) const238 bool MetaVelvetG::checkPrefix( int argc, char* argv[] ) const {
239 if( strncmp( argv[1], "-", 1 ) == 0 ){
240 cerr << "[meta-velvetg] Error: the first argument should be a directory path (should not be start with '-')" << endl;
241 return false;
242 }
243 return checkMandatoryFiles( (string)argv[1] );
244 }
245
checkMandatoryFiles(string prefix) const246 bool MetaVelvetG::checkMandatoryFiles( string prefix ) const {
247 if( !FileUtils::isValidFile(FileNameUtils::getSeqFileName(prefix)) ){
248 cerr << "[meta-velvetg] Error: There is no 'Sequences' file in the directory: " << prefix << endl
249 << "\t" << "You should run 'velveth' before running 'meta-velvetg'" << endl << endl;
250 return false;
251 }
252 if( !FileUtils::isValidFile(FileNameUtils::getRoadmapFileName(prefix)) ){
253 cerr << "[meta-velvetg] Error: There is no 'Roadmaps' file in the directory: " << prefix << endl
254 << "\t" << "You should run 'velveth' before running 'meta-velvetg'" << endl << endl;
255 return false;
256 }
257 if( !FileUtils::isValidFile(FileNameUtils::getInitialGraphFileName(prefix)) ){
258 cerr << "[meta-velvetg] Error: There is no 'Graph2' file in the directory: " << prefix << endl
259 << "\t" << "You should run 'velvetg' with '-read_trkg' opton before runing 'meta-velvetg'" << endl << endl;
260 return false;
261 }
262 return true;
263 }
264
printUsage() const265 void MetaVelvetG::printUsage() const {
266 cerr << endl << endl
267 << "************************************************************" << endl
268 << "Program: ----------------------------------------------" << endl
269 << " " << "meta-velvetg - contiging and scaffolding program for metagenomics NGS data" << endl
270 << "\t" << "Version = " << META_VELVET_VERSION << endl
271 << "\t" << "MAX_CATEGORIES = " << CATEGORIES << endl
272 << "\t" << "MAX_KMER_LENGTH = " << MAXKMERLENGTH << endl
273 << endl << endl
274 << "Usage: ------------------------------------------------" << endl
275 << " " << "meta-velvetg directory [options]" << endl
276 << "\t" << "directory " << "\t: directory name for output files" << endl
277 << endl
278 << " " << "Graph-splitting options (metagenome-specific):" << endl
279 << "\t" << "-discard_chimera <yes|no> " << "\t: discard chimera sub-graph (default: no)" << endl
280 << "\t" << "-max_chimera_rate <double> " << "\t: maximum allowable chimera rate (default: 0.0)" << endl
281 << "\t" << "-repeat_cov_sd " << "\t: standard deviation of repeat node coverages (default: 0.1)" << endl
282 << "\t" << "-min_split_length <int> " << "\t: minimum node length required for repeat resolution (default: 0)" << endl
283 << "\t" << "-valid_connections <int> " << "\t: minimum allowable number of consistent paired-end connections (default: 1)" << endl
284 << "\t" << "-noise_connections <int> " << "\t: maximum allowable number of inconsistent paired-end connections (default: 0)" << endl
285 << "\t" << "-use_connections <yes|no> " << "\t: use paired-end connections for graph splitting (default: yes)" << endl
286 << "\t" << "-report_split_detail <yes|no>" << "\t: report sequences around repeat nodes (default: no)" << endl
287 << "\t" << "-report_subgraph <yes|no> " << "\t: report node sequences for each subgraph (default: no)" << endl
288 << endl
289 << " " << "Peak detection options (metagenome-specific):" << endl
290 << "\t" << "-exp_covs <string|auto> " << "\t: expected coverages for each species in microbiome (default: auto)" << endl
291 << "\t" << " " << "\t ex) -exp_covs 214_122_70_43_25_13.5" << endl
292 << "\t" << " " << "\t coverage values should be sorted in a descending order" << endl
293 << "\t" << "-min_peak_cov <double> " << "\t: minimum peak coverage (default: " << META_VELVET_G_DEFAULT_MIN_PEAK_COVERAGE << ")" << endl
294 << "\t" << "-max_peak_cov <double> " << "\t: maximum peak coverage (default: " << META_VELVET_G_DEFAULT_MAX_PEAK_COVERAGE << ")" << endl
295 << "\t" << "-histo_bin_width <double> " << "\t: bin width of peak coverage histogram (default: " << META_VELVET_G_DEFAULT_HISTO_BIN_WIDTH << ")" << endl
296 << "\t" << "-histo_sn_ratio <double> " << "\t: signal-noise ratio to remove peak noises (default: " << META_VELVET_G_DEFAULT_HISTO_SN_RATIO << ")" << endl
297 << endl
298 << " " << "Contiging options: (common to single-genome)" << endl
299 << "\t" << "-cov_cutoff <double|auto> " << "\t: removal of low coverage nodes AFTER tour bus or allow the system to infer it (default: auto)" << endl
300 << "\t" << "-max_coverage <double> " << "\t: removal of high coverage nodes AFTER tour bus (default: no removal)" << endl
301 << "\t" << "-long_cov_cutoff <double> " << "\t: removal of nodes with low long-read coverage AFTER tour bus (default: no removal)" << endl
302 //<< "\t" << "-read_trkg <yes|no> " << "\t: tracking of short read positions in assembly (default: yes)" << endl
303 << "\t" << "-max_branch_length <int> " << "\t: maximum length in base pair of bubble (default: 100)" << endl
304 << "\t" << "-max_divergence <double> " << "\t: maximum divergence rate between two branches in a bubble (default: 0.2)" << endl
305 << "\t" << "-max_gap_count <int> " << "\t: maximum number of gaps allowed in the alignment of the two branches of a bubble (default: 3)" << endl
306 << "\t" << "-min_contig_lgth <int> " << "\t: minimum contig length exported to contigs.fa file (default: hash length * 2)" << endl
307 << endl
308 << " " << "Scaffolding options: (common to single-genome)" << endl
309 << "\t" << "-scaffolding <yes|no> " << "\t: scaffolding of contigs used paired end information (default: on)" << endl
310 << "\t" << "-exp_cov <double|auto> " << "\t: expected coverage of unique regions or allow the system to infer it (default: auto)" << endl
311 << "\t" << "-ins_length <double> " << "\t: expected distance between two paired end reads for the category (default: no read pairing)" << endl
312 << "\t" << "-ins_length_sd <double> " << "\t: standard deviation of insert length for the category (default: insert length / 10)" << endl
313 << "\t" << "-ins_length2 <double> " << "\t: expected distance between two paired end reads for the category (default: no read pairing)" << endl
314 << "\t" << "-ins_length2_sd <double> " << "\t: standard deviation of insert length for the category (default: insert length / 10)" << endl
315 << "\t" << "-ins_length_long <double> " << "\t: expected distance between two long paired-end reads (default: no read pairing)" << endl
316 << "\t" << "-ins_length_long_sd <double> " << "\t: standard deviation of insert length for the category" << endl
317 << "\t" << "-min_pair_count <int> " << "\t: minimum number of paired end connections to justify the scaffolding of two long contigs (default: 5)" << endl
318 << "\t" << "-long_mult_cutoff <int> " << "\t: minimum number of long reads required to merge contigs (default: 2)" << endl
319 << "\t" << "-shortMatePaired <yes|no> " << "\t: for mate-pair libraries, indicate that the library might be contaminated with paired-end reads (default no)" << endl
320 << endl
321 << " " << "Output options: (common to single-genome)" << endl
322 << "\t" << "-amos_file <yes|no> " << "\t: export assembly to AMOS file (default: no export)" << endl
323 << "\t" << "-coverage_mask <int> " << "\t: minimum coverage required for confident regions of contigs (default: 1)" << endl
324 << "\t" << "-unused_reads <yes|no> " << "\t: export unused reads in UnusedReads.fa file (default: no)" << endl
325 << "\t" << "-alignments <yes|no> " << "\t: export a summary of contig alignment to the reference sequences (default: no)" << endl
326 << "\t" << "-exportFiltered <yes|no> " << "\t: export the long nodes which were eliminated by the coverage filters (default: no)" << endl
327 << "\t" << "-paired_exp_fraction <double>" << "\t: remove all the paired end connections which less than the specified fraction of the expected count (default: 0.1)" << endl
328 << endl << endl
329 << "Output: -----------------------------------------------" << endl
330 << "\t" << "directory/meta-velvetg.contigs.fa \t: fasta file of contigs longer than twice hash length" << endl
331 << "\t" << "directory/meta-velvetg.LastGraph \t: special formatted file with all the information on the final graph" << endl
332 << "\t" << "directory/meta-velvetg.Graph2-stats.txt \t: stats file (tab-delimited) useful for optimizing coverage peak values" << endl
333 << "\t" << "directory/meta-velvetg.split-stats.txt \t: stats file (tab-delimited) useful for optimizing graph-splitting parameters" << endl
334 << endl
335 << "************************************************************" << endl
336 << endl << endl;
337 }
338
setParameters(int argc,char * argv[])339 bool MetaVelvetG::setParameters( int argc, char* argv[] ){
340 prefix = argv[1];
341 for ( int arg_index=2 ; arg_index<argc ; arg_index++ ) {
342 char* arg = argv[arg_index++];
343 if ( arg_index >= argc ) {
344 cerr << "[meta-velvetg] Error: Unusual number of arguments!" << endl;
345 return false;
346 }
347 if( !setParameter( arg, argv[arg_index] ) ){
348 return false;
349 }
350 }
351 return true;
352 }
353
setParameter(char * name,char * val)354 bool MetaVelvetG::setParameter( char* name, char* val ){
355 if( strcmp(name, "-cov_cutoff") == 0 ){ return setCoverageCutoff( val ); }
356 if( strcmp(name, "-long_cov_cutoff") == 0 ){ return setLongCoverageCutoff( val ); }
357 if( strcmp(name, "-exp_cov") == 0 ){ return setExpectedCoverage( val ); }
358 if( strcmp(name, "-ins_length") == 0 ){ return setInsertLength( 0, val ); }
359 if( strcmp(name, "-ins_length_sd") == 0 ){ return setInsertLengthSD( 0, val ); }
360 if( strcmp(name, "-ins_length_long") == 0 ){ return setLongInsertLength( val ); }
361 if( strcmp(name, "-ins_length_long_sd") == 0 ){ return setLongInsertLengthSD( val ); }
362 if( strncmp(name, "-ins_length", 11) == 0 && strchr(name, 'd') == NULL ){int cat = 0; sscanf(name, "-ins_length%d", &cat); return setInsertLength( cat-1, val );}
363 if( strncmp(name, "-ins_length", 11) == 0 ){ int cat = 0; sscanf(name, "-ins_length%d_sd", &cat); return setInsertLengthSD( cat-1, val ); }
364 if( strcmp(name, "-read_trkg") == 0 ){ return setFlagReadTracking( val ); }
365 if( strcmp(name, "-scaffolding") == 0 ){ return setFlagScaffolding( val ); }
366 if( strcmp(name, "-exportFiltered") == 0 ){ return setFlagExportFilteredNodes( val ); }
367 if( strcmp(name, "-amos_file") == 0 ){ return setFlagExportAmos( val ); }
368 if( strcmp(name, "-alignments") == 0 ){ return setFlagExportAlignments( val ); }
369 if( strcmp(name, "-min_contig_lgth") == 0 ){ return setMinContigLength( val ); }
370 if( strcmp(name, "-coverage_mask") == 0 ){ return setCoverageMask( val ); }
371 if( strcmp(name, "-accel_bits") == 0 ){ return setAccelerationBits( val ); }
372 if( strcmp(name, "-max_branch_length") == 0 ){ return setMaxBranchLength( val ); }
373 if( strcmp(name, "-max_divergence") == 0 ){ return _setMaxDivergence( val ); }
374 if( strcmp(name, "-max_gap_count") == 0 ){ return setMaxGapCount( val ); }
375 if( strcmp(name, "-min_pair_count") == 0 ){ return setMinPairCount( val ); }
376 if( strcmp(name, "-max_coverage") == 0 ){ return setMaxCoverage( val ); }
377 if( strcmp(name, "-long_mult_cutoff") == 0 ){ return setLongMultiCutoff( val ); }
378 if( strcmp(name, "-paired_exp_fraction") == 0 ){ return _setPairedExpFraction( val ); }
379 if( strcmp(name, "-unused_reads") == 0 ){ return setFlagExportUnusedReads(val); }
380 if( strcmp(name, "-shortMatePaired") == 0 ){ return setFlagMatePair( 0, val ); }
381 if( strncmp(name, "-shortMatePaired", 16) == 0 ){ int cat = 0; sscanf(name, "-shortMatePaired%i", &cat); return setFlagMatePair( cat-1, val ); }
382 if( strcmp(name, "-discard_chimera") == 0 ){ return setFlagDiscardChimera( val ); }
383 if( strcmp(name, "-max_chimera_rate") == 0 ){ return setMaxChimeraRate( val ); }
384 if( strcmp(name, "-exp_covs") == 0 ){ return setExpectedCoverages( val ); }
385 if( strcmp(name, "-manual_exp_cov_multi") == 0 ){ return setExpectedCoverages( val ); }
386 if( strcmp(name, "-repeat_cov_sd") == 0 ){ return setRepeatCoverageSD( val ); }
387 if( strcmp(name, "-min_split_length") == 0 ){ return setMinSplitLength( val ); }
388 if( strcmp(name, "-min_peak_cov") == 0 ){ return setMinPeakCoverage( val ); }
389 if( strcmp(name, "-max_peak_cov") == 0 ){ return setMaxPeakCoverage( val ); }
390 if( strcmp(name, "-histo_bin_width") == 0 ){ return setHistoBinWidth( val ); }
391 if( strcmp(name, "-histo_sn_ratio") == 0 ){ return setHistoSnRatio( val ); }
392 if( strcmp(name, "-valid_connections") == 0 ){ return setNumValidConnections( val ); }
393 if( strcmp(name, "-noise_connections") == 0 ){ return setNumNoiseConnections( val ); }
394 if( strcmp(name, "-use_connections") == 0 ){ return setFlagUseConnections( val ); }
395 if( strcmp(name, "-report_split_detail") == 0 ){ return setFlagReportSplitDetail( val ); }
396 if( strcmp(name, "-report_subgraph") == 0 ){ return setFlagReportSubgraph( val ); }
397 if( strcmp(name, "--help") == 0 ){ return false; }
398 cerr << "[meta-velvetg] Unknown option: " << name << endl;
399 return false;
400 }
401
setCoverageCutoff(char * val)402 bool MetaVelvetG::setCoverageCutoff( char* val ){
403 if ( strcmp(val, "auto") == 0 ){
404 flagEstimateCoverageCutoff = true;
405 } else {
406 flagEstimateCoverageCutoff = false;
407 coverageCutoff = atof( val );
408 }
409 return true;
410 }
411
setLongCoverageCutoff(char * val)412 bool MetaVelvetG::setLongCoverageCutoff( char* val ){
413 longCoverageCutoff = atof( val );
414 return true;
415 }
416
setExpectedCoverage(char * val)417 bool MetaVelvetG::setExpectedCoverage( char* val ){
418 if( strcmp(val, "auto") == 0 ){
419 flagEstimateExpectedCoverage = true;
420 } else {
421 flagEstimateExpectedCoverage = false;
422 expectedCoverage = atof( val );
423 }
424 return true;
425 }
426
setInsertLength(int cat,char * val)427 bool MetaVelvetG::setInsertLength( int cat, char* val ){
428 if( !checkCategory( cat ) ){ return false; }
429 insertLength[cat] = atof( val );
430 if( insertLength[cat] <= 0 ){
431 cerr << "[meta-velvetg] Error: Invalid insert length for category " << cat << ": " << insertLength[cat] << endl;
432 return false;
433 }
434 return true;
435 }
436
setInsertLengthSD(int cat,char * val)437 bool MetaVelvetG::setInsertLengthSD( int cat, char* val ){
438 if( !checkCategory( cat ) ){ return false; }
439 insertLengthSD[cat] = atof( val );
440 if( insertLengthSD[cat] <= 0 ){
441 cerr << "[meta-velvetg] Error: Invalid insert length SD for category " << cat << ": " << insertLengthSD[cat] << endl;
442 return false;
443 }
444 return true;
445 }
446
setLongInsertLength(char * val)447 bool MetaVelvetG::setLongInsertLength( char* val ){
448 longInsertLength = atof( val );
449 if( longInsertLength <= 0 ){
450 cerr << "[meta-velvetg] Error: Invalid insert length for long reads: " << longInsertLength << endl;
451 return false;
452 }
453 return true;
454 }
455
setLongInsertLengthSD(char * val)456 bool MetaVelvetG::setLongInsertLengthSD( char* val ){
457 longInsertLengthSD = atof( val );
458 if( longInsertLengthSD <= 0 ){
459 cerr << "[meta-velvetg] Error: Invalid insert length SD for long reads: " << longInsertLengthSD << endl;
460 return false;
461 }
462 return true;
463 }
464
setFlagReadTracking(char * val)465 bool MetaVelvetG::setFlagReadTracking( char* val ){
466 flagReadTracking = (strcmp(val, "yes") == 0);
467 return true;
468 }
469
setFlagScaffolding(char * val)470 bool MetaVelvetG::setFlagScaffolding( char* val ){
471 flagScaffolding = (strcmp(val, "yes") == 0);
472 return true;
473 }
474
setFlagExportFilteredNodes(char * val)475 bool MetaVelvetG::setFlagExportFilteredNodes( char* val ){
476 flagExportFilteredNodes = (strcmp(val, "yes") == 0);
477 return true;
478 }
479
setFlagExportAmos(char * val)480 bool MetaVelvetG::setFlagExportAmos( char* val ){
481 flagExportAmos = (strcmp(val, "yes") == 0);
482 return true;
483 }
484
setFlagExportAlignments(char * val)485 bool MetaVelvetG::setFlagExportAlignments( char* val ){
486 flagExportAlignments = (strcmp(val, "yes") == 0);
487 return true;
488 }
489
setMinContigLength(char * val)490 bool MetaVelvetG::setMinContigLength( char* val ){
491 minContigLength = atoi( val );
492 return true;
493 }
494
setCoverageMask(char * val)495 bool MetaVelvetG::setCoverageMask( char* val ){
496 coverageMask = atof( val );
497 return true;
498 }
499
setAccelerationBits(char * val)500 bool MetaVelvetG::setAccelerationBits( char* val ){
501 accelerationBits = atoi( val );
502 if( accelerationBits < 0 ){
503 cerr << "[meta-velvetg] Error: Illegal acceleration parameter: " << accelerationBits << endl;
504 return false;
505 }
506 return true;
507 }
508
setMaxBranchLength(char * val)509 bool MetaVelvetG::setMaxBranchLength( char* val ){
510 setMaxReadLength( atoi(val) );
511 setLocalMaxReadLength( atoi(val) );
512 return true;
513 }
514
_setMaxDivergence(char * val)515 bool MetaVelvetG::_setMaxDivergence( char* val ){
516 setMaxDivergence( atof(val) );
517 setLocalMaxDivergence( atof(val) );
518 return true;
519 }
520
setMaxGapCount(char * val)521 bool MetaVelvetG::setMaxGapCount( char* val ){
522 setMaxGaps( atoi(val) );
523 setLocalMaxGaps( atoi(val) );
524 return true;
525 }
526
setMinPairCount(char * val)527 bool MetaVelvetG::setMinPairCount( char* val ){
528 setUnreliableConnectionCutoff( atoi(val) );
529 return true;
530 }
531
setMaxCoverage(char * val)532 bool MetaVelvetG::setMaxCoverage( char* val ){
533 maxCoverageCutoff = atof( val );
534 return true;
535 }
536
setLongMultiCutoff(char * val)537 bool MetaVelvetG::setLongMultiCutoff( char * val ){
538 setMultiplicityCutoff( atoi(val) );
539 return true;
540 }
541
_setPairedExpFraction(char * val)542 bool MetaVelvetG::_setPairedExpFraction( char * val ){
543 setPairedExpFraction( atof(val) );
544 return true;
545 }
546
setFlagExportUnusedReads(char * val)547 bool MetaVelvetG::setFlagExportUnusedReads( char* val ){
548 if( strcmp(val, "yes") == 0 ){
549 flagExportUnusedReads = true;
550 flagReadTracking = true;
551 }
552 return true;
553 }
554
setFlagMatePair(int cat,char * val)555 bool MetaVelvetG::setFlagMatePair( int cat, char* val ){
556 checkCategory( cat );
557 flagMatePair[cat] = ( strcmp(val, "yes") == 0 );
558 return true;
559 }
560
setFlagDiscardChimera(char * val)561 bool MetaVelvetG::setFlagDiscardChimera( char* val ){
562 flagDiscardChimera = ( strcmp(val, "yes") == 0 );
563 return true;
564 }
565
setMaxChimeraRate(char * val)566 bool MetaVelvetG::setMaxChimeraRate( char* val ){
567 maxChimeraRate = atof( val );
568 return true;
569 }
570
setExpectedCoverages(char * val)571 bool MetaVelvetG::setExpectedCoverages( char* val ){
572 flagEstimateExpectedCoverages = false;
573 char* tokenPointer = val;
574 char* tmp[META_VELVET_G_MAX_NUM_COVERAGE_PEAKS];
575 for( numCoveragePeaks=0; numCoveragePeaks<META_VELVET_G_MAX_NUM_COVERAGE_PEAKS ; ++numCoveragePeaks ){
576 if( (tmp[numCoveragePeaks] = strtok(tokenPointer, META_VELVET_G_COVERAGE_DELIM_CHAR)) != NULL ){
577 expectedCoverages[numCoveragePeaks] = atof( tmp[numCoveragePeaks] );
578 } else {
579 break;
580 }
581 tokenPointer = NULL;
582 }
583 return checkCoverageOrder();
584 }
585
setRepeatCoverageSD(char * val)586 bool MetaVelvetG::setRepeatCoverageSD( char* val ){
587 repeatCoverageSD = atof(val);
588 return true;
589 }
590
setMinSplitLength(char * val)591 bool MetaVelvetG::setMinSplitLength( char* val ){
592 minSplitLength = atoi(val);
593 return true;
594 }
595
setMinPeakCoverage(char * val)596 bool MetaVelvetG::setMinPeakCoverage( char* val ){
597 minPeakCoverage = atof(val);
598 return true;
599 }
600
setMaxPeakCoverage(char * val)601 bool MetaVelvetG::setMaxPeakCoverage( char* val ){
602 maxPeakCoverage = atof(val);
603 return true;
604 }
605
setHistoBinWidth(char * val)606 bool MetaVelvetG::setHistoBinWidth( char* val ){
607 histoBinWidth = atof(val);
608 return true;
609 }
610
setHistoSnRatio(char * val)611 bool MetaVelvetG::setHistoSnRatio( char* val ){
612 histoSnRatio = atof(val);
613 return true;
614 }
615
setNumValidConnections(char * val)616 bool MetaVelvetG::setNumValidConnections( char* val ){
617 numValidConnections = atoi(val);
618 return true;
619 }
620
setNumNoiseConnections(char * val)621 bool MetaVelvetG::setNumNoiseConnections( char* val ){
622 numNoiseConnections = atoi(val);
623 return true;
624 }
625
setFlagUseConnections(char * val)626 bool MetaVelvetG::setFlagUseConnections( char* val ){
627 flagUseConnections = ( strcmp(val, "yes") == 0 );
628 return true;
629 }
630
setFlagReportSplitDetail(char * val)631 bool MetaVelvetG::setFlagReportSplitDetail( char* val ){
632 flagReportSplitDetail = ( strcmp(val, "yes") == 0 );
633 return true;
634 }
635
setFlagReportSubgraph(char * val)636 bool MetaVelvetG::setFlagReportSubgraph( char* val ){
637 flagReportSubgraph = ( strcmp(val, "yes") == 0 );
638 return true;
639 }
640
checkCategory(int cat) const641 bool MetaVelvetG::checkCategory( int cat ) const {
642 if( (cat<0) or (cat>CATEGORIES) ){
643 cerr << "[meta-velvetg] Error: Invalid category: " << cat << endl;
644 return false;
645 }
646 return true;
647 }
648
checkCoverageOrder() const649 bool MetaVelvetG::checkCoverageOrder() const {
650 for( int i=1 ; i<numCoveragePeaks ; ++i ){
651 if( expectedCoverages[i-1] <= expectedCoverages[i] ){
652 cerr << "[meta-velvetg] Error: expected coverages should be sorted in a descending order: "
653 << expectedCoverages[i-1] << " <-> " << expectedCoverages[i] << endl;
654 return false;
655 }
656 }
657 return true;
658 }
659
guessNonMandatoryParameters()660 void MetaVelvetG::guessNonMandatoryParameters(){
661 guessInsertLengthSD();
662 guessCoverageFlags();
663 }
664
guessInsertLengthSD()665 void MetaVelvetG::guessInsertLengthSD(){
666 for( int cat=0 ; cat<CATEGORIES ; ++cat){
667 if( isGuessInsertLengthSD( insertLength[cat], insertLengthSD[cat] ) )
668 insertLengthSD[cat] = guessInsertLengthSD( insertLength[cat] );
669 }
670 if( isGuessInsertLengthSD( longInsertLength, longInsertLengthSD ) )
671 longInsertLengthSD = guessInsertLengthSD( longInsertLength );
672 }
673
isGuessInsertLengthSD(double ave,double sd)674 bool MetaVelvetG::isGuessInsertLengthSD( double ave, double sd ){
675 return (ave != META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED) and (sd == META_VELVET_G_PARAMETER_VALUE_NOT_SPECIFIED);
676 }
677
guessInsertLengthSD(double ave)678 double MetaVelvetG::guessInsertLengthSD( double ave ){
679 return ave * META_VELVET_G_SD_RATIO_FOR_GUESS;
680 }
681
guessCoverageFlags()682 void MetaVelvetG::guessCoverageFlags(){
683 if( expectedCoverage < 0 )
684 flagEstimateExpectedCoverage = true;
685 if( coverageCutoff < 0 )
686 flagEstimateCoverageCutoff = true;
687 }
688
689
690