1 /*==========================================================================
2 RazerS - Fast Read Mapping with Controlled Loss Rate
3 http://www.seqan.de/projects/razers.html
4
5 ============================================================================
6 Copyright (C) 2008 by David Weese
7
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 3 of the License, or (at your options) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 ==========================================================================*/
21
22 #define SEQAN_PROFILE // enable time measuring
23 //#define SEQAN_DEBUG_SWIFT // test SWIFT correctness and print bucket parameters
24 //#define RAZERS_DEBUG // print verification regions
25 #define RAZERS_PRUNE_QGRAM_INDEX // ignore highly abundant q-grams
26 #define RAZERS_CONCATREADS // use <ConcatDirect> StringSet to store reads
27 #define RAZERS_MEMOPT // optimize memory consumption
28 #define RAZERS_MASK_READS // remove matches with max-hits optimal hits on-the-fly
29 //#define NO_PARAM_CHOOSER // disable loss-rate parameter choosing
30 //#define RAZERS_PARALLEL // parallelize using Intel's Threading Building Blocks
31 #define RAZERS_MATEPAIRS // enable paired-end matching
32 //#define RAZERS_DIRECT_MAQ_MAPPING
33 //#define SEQAN_USE_SSE2_WORDS // use SSE2 128-bit integers for MyersBitVector
34 //#define RAZERS_DONTMASKDUPLICATES
35 #define RAZERS_NOOUTERREADGAPS // enforce the alignment of the first and last base (determines the lakes)
36 #define RAZERS_OPENADDRESSING
37
38 #include <seqan/platform.h>
39 #ifdef PLATFORM_WINDOWS
40 #define SEQAN_DEFAULT_TMPDIR "C:\\TEMP\\"
41 #else
42 #define SEQAN_DEFAULT_TMPDIR "./"
43 #endif
44
45 #include <seqan/misc/misc_cmdparser.h>
46 #include "razers.h"
47 #include "outputFormat.h"
48 #include "paramChooser.h"
49 #include <seqan/file.h>
50 #include <seqan/store.h>
51
52 #ifdef RAZERS_PARALLEL
53 #include "razers_parallel.h"
54 #endif
55
56 #ifdef RAZERS_MATEPAIRS
57 #include "razers_matepairs.h"
58 #endif
59
60
61 #include <iostream>
62 #include <sstream>
63
64 using namespace std;
65 using namespace seqan;
66
67
68 //////////////////////////////////////////////////////////////////////////////
69 // Read a list of genome file names
70 template<typename TSpec>
getGenomeFileNameList(CharString filename,StringSet<CharString> & genomeFileNames,RazerSOptions<TSpec> & options)71 int getGenomeFileNameList(CharString filename, StringSet<CharString> & genomeFileNames, RazerSOptions<TSpec> &options)
72 {
73 ifstream file;
74 file.open(toCString(filename),ios_base::in | ios_base::binary);
75 if(!file.is_open())
76 return RAZERS_GENOME_FAILED;
77
78 clear(genomeFileNames);
79 char c = _streamGet(file);
80 if (c != '>' && c != '@') //if file does not start with a fasta header --> list of multiple reference genome files
81 {
82 if(options._debugLevel >=1)
83 cout << endl << "Reading multiple genome files:" <<endl;
84
85 /* //locations of genome files are relative to list file's location
86 string tempGenomeFile(filename);
87 size_t lastPos = tempGenomeFile.find_last_of('/') + 1;
88 if (lastPos == tempGenomeFile.npos) lastPos = tempGenomeFile.find_last_of('\\') + 1;
89 if (lastPos == tempGenomeFile.npos) lastPos = 0;
90 string filePrefix = tempGenomeFile.substr(0,lastPos);*/
91
92 unsigned i = 1;
93 while(!_streamEOF(file))
94 {
95 _parseSkipWhitespace(file, c);
96 appendValue(genomeFileNames,_parseReadFilepath(file,c));
97 //CharString currentGenomeFile(filePrefix);
98 //append(currentGenomeFile,_parseReadFilepath(file,c));
99 //appendValue(genomeFileNames,currentGenomeFile);
100 if(options._debugLevel >=2)
101 cout <<"Genome file #"<< i <<": " << genomeFileNames[length(genomeFileNames)-1] << endl;
102 ++i;
103 _parseSkipWhitespace(file, c);
104 }
105 if(options._debugLevel >=1)
106 cout << i-1 << " genome files total." <<endl;
107 }
108 else //if file starts with a fasta header --> regular one-genome-file input
109 appendValue(genomeFileNames, filename, Exact());
110 file.close();
111 return 0;
112
113 }
114
115 //////////////////////////////////////////////////////////////////////////////
116 // Main read mapper function
117 template <typename TSpec>
mapReads(StringSet<CharString> & genomeFileNames,StringSet<CharString> & readFileNames,CharString & errorPrbFileName,RazerSOptions<TSpec> & options)118 int mapReads(
119 StringSet<CharString> & genomeFileNames,
120 StringSet<CharString> & readFileNames, // NULL terminated list of one/two read files (single/mate-pairs)
121 CharString & errorPrbFileName,
122 RazerSOptions<TSpec> &options)
123 {
124 FragmentStore<> store; // stores all of the tables
125
126 MultiFasta genomeSet;
127 String<String<unsigned short> > stats; // needed for mapping quality calculation
128
129 // dump configuration in verbose mode
130 if (options._debugLevel >= 1)
131 {
132 CharString bitmap;
133 Shape<Dna, GenericShape> shape;
134 stringToShape(shape, options.shape);
135 shapeToString(bitmap, shape);
136
137 cerr << "___SETTINGS____________" << endl;
138 cerr << "Genome file: \t" << genomeFileNames[0] << endl;
139 if (empty(readFileNames[1]))
140 cerr << "Read file: \t" << readFileNames[0] << endl;
141 else
142 {
143 cerr << "Read files: \t" << readFileNames[0] << endl;
144 for (unsigned i = 1; i < length(readFileNames); ++i)
145 cerr << " \t" << readFileNames[i] << endl;
146 }
147 cerr << "Compute forward matches: \t";
148 if (options.forward) cerr << "YES" << endl;
149 else cerr << "NO" << endl;
150 cerr << "Compute reverse matches: \t";
151 if (options.reverse) cerr << "YES" << endl;
152 else cerr << "NO" << endl;
153 cerr << "Error rate: \t" << options.errorRate << endl;
154 cerr << "Minimal threshold: \t" << options.threshold << endl;
155 cerr << "Shape: \t" << bitmap << endl;
156 cerr << "Repeat threshold: \t" << options.repeatLength << endl;
157 cerr << "Overabundance threshold: \t" << options.abundanceCut << endl;
158 cerr << "Taboo length: \t" << options.tabooLength << endl;
159 cerr << endl;
160 }
161
162 // circumvent numerical obstacles
163 options.errorRate += 0.0000001;
164
165 //////////////////////////////////////////////////////////////////////////////
166 // Step 1: Load fasta files and determine genome file type
167 SEQAN_PROTIMESTART(load_time);
168
169 #ifdef RAZERS_MATEPAIRS
170 if (length(readFileNames) == 2)
171 {
172 if (!loadReads(store, toCString(readFileNames[0]), toCString(readFileNames[1]), options)) {
173 //if (!loadReads(readSet, readQualities, readNames, readFileNames[0], readFileNames[1], options)) {
174 cerr << "Failed to load reads" << endl;
175 return RAZERS_READS_FAILED;
176 }
177 }
178 else
179 #endif
180 {
181 if (!loadReads(store, toCString(readFileNames[0]), options)) {
182 cerr << "Failed to load reads" << endl;
183 return RAZERS_READS_FAILED;
184 }
185 }
186
187 if (options._debugLevel >= 1) cerr << lengthSum(store.readSeqStore) << " bps of " << length(store.readSeqStore) << " reads loaded." << endl;
188 options.timeLoadFiles = SEQAN_PROTIMEDIFF(load_time);
189
190 if (options._debugLevel >= 1)
191 cerr << "Loading reads took \t" << options.timeLoadFiles << " seconds" << endl;
192
193 if (length(genomeFileNames) == 1)
194 {
195 int result = getGenomeFileNameList(genomeFileNames[0], genomeFileNames, options);
196 if (result == RAZERS_GENOME_FAILED)
197 {
198 cerr << "Failed to open genome file " << genomeFileNames[0] << endl;
199 return result;
200 }
201 }
202
203 //////////////////////////////////////////////////////////////////////////////
204 // Step 2: Find matches using SWIFT
205 #ifdef RAZERS_PARALLEL
206 typedef typename RazerSOptions<TSpec>::TMutex TMutex;
207 options.patternMutex = new TMutex[length(readSet)];
208 #endif
209
210 String< Pair<CharString, unsigned> > gnoToFileMap; //map to retrieve genome filename and sequence number within that file
211 int error = mapReads(store, genomeFileNames, gnoToFileMap, stats, options);
212 if (error != 0)
213 {
214 switch (error)
215 {
216 case RAZERS_GENOME_FAILED:
217 cerr << "Failed to load genomes" << endl;
218 break;
219
220 case RAZERS_INVALID_SHAPE:
221 cerr << "Invalid Shape" << endl;
222 break;
223 }
224 return error;
225 }
226
227 #ifdef RAZERS_PARALLEL
228 delete[] options.patternMutex;
229 #endif
230
231 //////////////////////////////////////////////////////////////////////////////
232 // Step 3: Remove duplicates and output matches
233 if (!options.spec.DONT_DUMP_RESULTS)
234 dumpMatches(store, genomeFileNames, gnoToFileMap, stats, readFileNames[0], errorPrbFileName, options);
235
236 return 0;
237 }
238
239 //////////////////////////////////////////////////////////////////////////////
240 // Command line parsing and parameter choosing
main(int argc,const char * argv[])241 int main(int argc, const char *argv[])
242 {
243 RazerSOptions<> options;
244 ParamChooserOptions pm_options;
245
246 #ifdef RAZERS_MATEPAIRS
247 const unsigned maxFiles = 3;
248 #else
249 const unsigned maxFiles = 2;
250 #endif
251 StringSet<CharString> genomeFileNames;
252 StringSet<CharString> readFileNames;
253 CharString errorPrbFileName;
254
255
256 // Change defaults
257 options.forward = false;
258 options.reverse = false;
259
260 CommandLineParser parser;
261 string rev = "$Revision: 4739 $";
262 addVersionLine(parser, "RazerS version 1.1 20090909 [" + rev.substr(11, 4) + "]");
263
264 //////////////////////////////////////////////////////////////////////////////
265 // Define options
266 addTitleLine(parser, "***********************************************************");
267 addTitleLine(parser, "*** RazerS - Fast Read Mapping with Sensitivity Control ***");
268 addTitleLine(parser, "*** (c) Copyright 2009 by David Weese ***");
269 addTitleLine(parser, "***********************************************************");
270 addUsageLine(parser, "[OPTION]... <GENOME FILE> <READS FILE>");
271 #ifdef RAZERS_MATEPAIRS
272 addUsageLine(parser, "[OPTION]... <GENOME FILE> <MP-READS FILE1> <MP-READS FILE2>");
273 #endif
274 addSection(parser, "Main Options:");
275 addOption(parser, CommandLineOption("f", "forward", "only compute forward matches", OptionType::Boolean));
276 addOption(parser, CommandLineOption("r", "reverse", "only compute reverse complement matches", OptionType::Boolean));
277 addOption(parser, CommandLineOption("i", "percent-identity", "set the percent identity threshold", OptionType::Double | OptionType::Label, 100 - (100.0 * options.errorRate)));
278 #ifndef NO_PARAM_CHOOSER
279 addOption(parser, CommandLineOption("rr", "recognition-rate", "set the percent recognition rate", OptionType::Double | OptionType::Label, 100 - (100.0 * pm_options.optionLossRate)));
280 addOption(parser, addArgumentText(CommandLineOption("pd", "param-dir", "folder containing user-computed parameter files (optional)", OptionType::String | OptionType::Label), "DIR"));
281 #endif
282 addOption(parser, CommandLineOption("id", "indels", "allow indels (default: mismatches only)", OptionType::Boolean));
283 #ifdef RAZERS_MATEPAIRS
284 addOption(parser, CommandLineOption("ll", "library-length", "mate-pair library length", OptionType::Int | OptionType::Label, options.libraryLength));
285 addOption(parser, CommandLineOption("le", "library-error", "mate-pair library length tolerance", OptionType::Int | OptionType::Label, options.libraryError));
286 #endif
287 addOption(parser, CommandLineOption("m", "max-hits", "output only NUM of the best hits", OptionType::Int | OptionType::Label, options.maxHits));
288 addOption(parser, CommandLineOption("", "unique", "output only unique best matches (-m 1 -dr 0 -pa)", OptionType::Boolean));
289 addOption(parser, CommandLineOption("tr", "trim-reads", "trim reads to given length (default off)", OptionType::Int | OptionType::Label));
290 addOption(parser, addArgumentText(CommandLineOption("o", "output", "change output filename (default <READS FILE>.result)", OptionType::String), "FILE"));
291 addOption(parser, CommandLineOption("v", "verbose", "verbose mode", OptionType::Boolean));
292 addOption(parser, CommandLineOption("vv", "vverbose", "very verbose mode", OptionType::Boolean));
293 addSection(parser, "Output Format Options:");
294 addOption(parser, CommandLineOption("a", "alignment", "dump the alignment for each match", OptionType::Boolean));
295 addOption(parser, CommandLineOption("pa", "purge-ambiguous", "purge reads with more than max-hits best matches", OptionType::Boolean));
296 addOption(parser, CommandLineOption("dr", "distance-range", "only consider matches with at most NUM more errors compared to the best (default output all)", OptionType::Int | OptionType::Label));
297 addOption(parser, CommandLineOption("of", "output-format", "set output format", OptionType::Int | OptionType::Label, options.outputFormat));
298 addHelpLine(parser, "0 = Razer format");
299 addHelpLine(parser, "1 = enhanced Fasta format");
300 addHelpLine(parser, "2 = Eland format");
301 addHelpLine(parser, "3 = Gff format");
302 addHelpLine(parser, "4 = Sam format");
303 addHelpLine(parser, "5 = Amos AFG format");
304 addOption(parser, CommandLineOption("gn", "genome-naming", "select how genomes are named", OptionType::Int | OptionType::Label, options.genomeNaming));
305 addHelpLine(parser, "0 = use Fasta id");
306 addHelpLine(parser, "1 = enumerate beginning with 1");
307 addOption(parser, CommandLineOption("rn", "read-naming", "select how reads are named", OptionType::Int | OptionType::Label, options.readNaming));
308 addHelpLine(parser, "0 = use Fasta id");
309 addHelpLine(parser, "1 = enumerate beginning with 1");
310 addHelpLine(parser, "2 = use the read sequence (only for short reads!)");
311 addHelpLine(parser, "3 = use the Fasta id, do NOT append '/L' or '/R' for mate pairs");
312 addOption(parser, CommandLineOption("so", "sort-order", "select how matches are sorted", OptionType::Int | OptionType::Label, options.sortOrder));
313 addHelpLine(parser, "0 = 1. read number, 2. genome position");
314 addHelpLine(parser, "1 = 1. genome position, 2. read number");
315 addOption(parser, CommandLineOption("pf", "position-format", "select begin/end position numbering", OptionType::Int | OptionType::Label, options.sortOrder));
316 addHelpLine(parser, "0 = gap space");
317 addHelpLine(parser, "1 = position space");
318 addSection(parser, "Filtration Options:");
319 addOption(parser, addArgumentText(CommandLineOption("s", "shape", "set k-mer shape", OptionType::String | OptionType::Label, options.shape), "BITSTRING"));
320 addOption(parser, CommandLineOption("t", "threshold", "set minimum k-mer threshold", OptionType::Int | OptionType::Label, options.threshold));
321 addOption(parser, CommandLineOption("oc", "overabundance-cut", "set k-mer overabundance cut ratio", OptionType::Int | OptionType::Label, options.abundanceCut));
322 addOption(parser, CommandLineOption("rl", "repeat-length", "set simple-repeat length threshold", OptionType::Int | OptionType::Label, options.repeatLength));
323 addOption(parser, CommandLineOption("tl", "taboo-length", "set taboo length", OptionType::Int | OptionType::Label, options.tabooLength));
324 #ifdef RAZERS_DIRECT_MAQ_MAPPING
325 addOption(parser, CommandLineOption("lm", "low-memory", "decrease memory usage at the expense of runtime", OptionType::Boolean));
326 addSection(parser, "Mapping Quality Options:");
327 addOption(parser, CommandLineOption("mq", "mapping-quality", "switch on mapping quality mode", OptionType::Boolean));
328 addOption(parser, CommandLineOption("nbi","no-below-id", "do not report matches with seed identity < percent id", OptionType::Boolean));
329 addOption(parser, CommandLineOption("qsl","mq-seed-length", "seed length used for mapping quality assignment", OptionType::Int | OptionType::Label, options.artSeedLength));
330 #endif
331 addSection(parser, "Verification Options:");
332 addOption(parser, CommandLineOption("mN", "match-N", "\'N\' matches with all other characters", OptionType::Boolean));
333 addOption(parser, addArgumentText(CommandLineOption("ed", "error-distr", "write error distribution to FILE", OptionType::String), "FILE"));
334
335 bool stop = !parse(parser, argc, argv, cerr);
336
337 //////////////////////////////////////////////////////////////////////////////
338 // Extract options
339 getOptionValueLong(parser, "forward", options.forward);
340 getOptionValueLong(parser, "reverse", options.reverse);
341 getOptionValueLong(parser, "percent-identity", options.errorRate);
342 #ifndef NO_PARAM_CHOOSER
343 getOptionValueLong(parser, "recognition-rate", pm_options.optionLossRate);
344 getOptionValueLong(parser, "param-dir", pm_options.paramFolder);
345 #endif
346 getOptionValueLong(parser, "indels", options.hammingOnly);
347 options.hammingOnly = !options.hammingOnly;
348 #ifdef RAZERS_MATEPAIRS
349 getOptionValueLong(parser, "library-length", options.libraryLength);
350 getOptionValueLong(parser, "library-error", options.libraryError);
351 #endif
352 getOptionValueLong(parser, "max-hits", options.maxHits);
353 getOptionValueLong(parser, "purge-ambiguous", options.purgeAmbiguous);
354 getOptionValueLong(parser, "distance-range", options.distanceRange);
355 if (isSetLong(parser, "distance-range")) options.distanceRange++;
356 getOptionValueLong(parser, "alignment", options.dumpAlignment);
357 getOptionValueLong(parser, "output", options.output);
358 getOptionValueLong(parser, "output-format", options.outputFormat);
359 getOptionValueLong(parser, "sort-order", options.sortOrder);
360 getOptionValueLong(parser, "genome-naming", options.genomeNaming);
361 getOptionValueLong(parser, "read-naming", options.readNaming);
362 getOptionValueLong(parser, "position-format", options.positionFormat);
363 getOptionValueLong(parser, "shape", options.shape);
364 getOptionValueLong(parser, "threshold", options.threshold);
365 getOptionValueLong(parser, "overabundance-cut", options.abundanceCut);
366 getOptionValueLong(parser, "repeat-length", options.repeatLength);
367 #ifdef RAZERS_DIRECT_MAQ_MAPPING
368 getOptionValueLong(parser, "quality-in-header", options.fastaIdQual);
369 getOptionValueLong(parser, "mapping-quality", options.maqMapping);
370 getOptionValueLong(parser, "no-below-id", options.noBelowIdentity);
371 getOptionValueLong(parser, "mq-seed-length", options.artSeedLength);
372 getOptionValueLong(parser, "seed-mism-quality", options.maxMismatchQualSum);
373 getOptionValueLong(parser, "total-mism-quality", options.absMaxQualSumErrors);
374 getOptionValueLong(parser, "low-memory", options.lowMemory);
375 #endif
376 getOptionValueLong(parser, "trim-reads", options.trimLength);
377 getOptionValueLong(parser, "taboo-length", options.tabooLength);
378 getOptionValueLong(parser, "match-N", options.matchN);
379 getOptionValueLong(parser, "error-distr", errorPrbFileName);
380 if (isSetLong(parser, "help") || isSetLong(parser, "version")) return 0; // print help or version and exit
381 if (isSetLong(parser, "verbose")) options._debugLevel = max(options._debugLevel, 1);
382 if (isSetLong(parser, "vverbose")) options._debugLevel = max(options._debugLevel, 3);
383 if (isSetLong(parser, "unique"))
384 {
385 options.maxHits = 1;
386 options.distanceRange = 1;
387 options.purgeAmbiguous = true;
388 }
389 if (!options.forward && !options.reverse) // enable both per default
390 {
391 options.forward = true;
392 options.reverse = true;
393 }
394 appendValue(genomeFileNames, getArgumentValue(parser, 0));
395 for (unsigned i = 1; i < argumentCount(parser) && i < maxFiles; ++i)
396 appendValue(readFileNames, getArgumentValue(parser, i), Generous());
397
398 //////////////////////////////////////////////////////////////////////////////
399 // Check options
400 if ((options.errorRate < 50 || options.errorRate > 100) && (stop = true))
401 cerr << "Percent identity threshold must be a value between 50 and 100" << endl;
402 if ((pm_options.optionLossRate < 80 || pm_options.optionLossRate > 100) && (stop = true))
403 cerr << "Recognition rate must be a value between 80 and 100" << endl;
404 #ifdef RAZERS_MATEPAIRS
405 if ((options.libraryLength <= 0) && (stop = true))
406 cerr << "Library length must be a value greater 0" << endl;
407 if ((options.libraryError <= 0) && (stop = true))
408 cerr << "Library error must be a value greater or equal 0" << endl;
409 #endif
410 if ((options.maxHits < 1) && (stop = true))
411 cerr << "Maximum hits threshold must be greater than 0" << endl;
412 if ((options.outputFormat > 5) && (stop = true))
413 cerr << "Invalid output format option." << endl;
414 if ((options.sortOrder > 1) && (stop = true))
415 cerr << "Invalid sort order options." << endl;
416 if ((options.genomeNaming > 1) && (stop = true))
417 cerr << "Invalid genome naming options." << endl;
418 if ((options.readNaming > 3) && (stop = true))
419 cerr << "Invalid read naming options." << endl;
420 if ((options.positionFormat > 1) && (stop = true))
421 cerr << "Invalid position format options." << endl;
422 if ((options.threshold < 1) && (stop = true))
423 cerr << "Threshold must be a value greater than 0" << endl;
424 if (isSetLong(parser, "shape"))
425 {
426 unsigned ones = 0;
427 unsigned zeros = 0;
428 for(unsigned i = 0; i < length(options.shape); ++i)
429 switch (options.shape[i])
430 {
431 case '0':
432 ++zeros;
433 break;
434 case '1':
435 ++ones;
436 break;
437 default:
438 cerr << "Shape must be a binary string" << endl;
439 stop = true;
440 i = length(options.shape);
441 }
442 if ((ones == 0 || ones > 31) && !stop)
443 {
444 cerr << "Invalid Shape" << endl;
445 stop = true;
446 }
447 unsigned maxOnes = 14;
448 #ifdef RAZERS_OPENADDRESSING
449 maxOnes = 31;
450 #endif
451 if ((ones < 7 || ones > maxOnes) && !stop)
452 cerr << "Warning: Shape should contain at least 7 and at most " << maxOnes << " '1's" << endl;
453 }
454 if ((options.abundanceCut <= 0 || options.abundanceCut > 1) && (stop = true))
455 cerr << "Overabundance cut ratio must be a value >0 and <=1. Set to 1 to disable." << endl;
456 if ((options.repeatLength <= 1) && (stop = true))
457 cerr << "Repeat length must be a value greater than 1" << endl;
458 #ifdef RAZERS_DIRECT_MAQ_MAPPING
459 if ((options.artSeedLength < 24) && (stop = true))
460 cerr << "Minimum seed length is 24" << endl;
461 if ((options.maxMismatchQualSum < 0) && (stop = true))
462 cerr << "Max seed mismatch quality needs to be 0 or a positive value" << endl;
463 if ((options.absMaxQualSumErrors < 0) && (stop = true))
464 cerr << "Max total mismatch quality needs to be 0 or a positive value" << endl;
465 #endif
466 if ((options.trimLength != 0 && options.trimLength < 14) && (stop = true))
467 cerr << "Minimum read length is 14" << endl;
468 if ((options.tabooLength < 1) && (stop = true))
469 cerr << "Taboo length must be a value greater than 0" << endl;
470 if (argumentCount(parser) == 2)
471 options.libraryLength = -1; // only 1 readset -> disable mate-pair mapping
472 if ((argumentCount(parser) > maxFiles) && (stop = true))
473 cerr << "More than " << maxFiles << " input files specified." << endl;
474 if ((argumentCount(parser) < 2) && (stop = true))
475 {
476 if (argc > 1)
477 cerr << "Less than 2 input files specified." << endl;
478 else
479 {
480 shortHelp(parser, cerr); // print short help and exit
481 return 0;
482 }
483 }
484
485 options.errorRate = (100.0 - options.errorRate) / 100.0;
486 pm_options.optionLossRate = (ParamChooserOptions::TFloat)(100.0 - pm_options.optionLossRate) / 100.0;
487 if (stop)
488 {
489 cerr << "Exiting ..." << endl;
490 return RAZERS_INVALID_OPTIONS;
491 }
492
493 //////////////////////////////////////////////////////////////////////////////
494 // get read length
495 int readLength = estimateReadLength(toCString(readFileNames[0]));
496 if (readLength == RAZERS_READS_FAILED)
497 {
498 cerr << "Failed to open reads file " << readFileNames[0] << endl;
499 cerr << "Exiting ..." << endl;
500 return RAZERS_READS_FAILED;
501 }
502 if (readLength == 0) {
503 cerr << "Failed to read the first read sequence." << endl;
504 cerr << "Exiting ..." << endl;
505 return RAZERS_READS_FAILED;
506 }
507
508 if (options.trimLength > readLength)
509 options.trimLength = readLength;
510
511 #ifndef NO_PARAM_CHOOSER
512 if (!(isSetLong(parser, "shape") || isSetLong(parser, "threshold")))
513 {
514 if (options.lowMemory) pm_options.maxWeight = 13;
515 pm_options.verbose = (options._debugLevel >= 1);
516 pm_options.optionErrorRate = (ParamChooserOptions::TFloat)options.errorRate;
517 if (options.hammingOnly)
518 {
519 pm_options.optionProbINSERT = (ParamChooserOptions::TFloat)0.0;
520 pm_options.optionProbDELETE = (ParamChooserOptions::TFloat)0.0;
521 }
522 else
523 {
524 pm_options.optionProbINSERT = (ParamChooserOptions::TFloat)0.01; //this number is basically without meaning, any value > 0 will lead to
525 pm_options.optionProbDELETE = (ParamChooserOptions::TFloat)0.01; //edit distance parameter choosing
526 }
527
528 if (empty(pm_options.paramFolder))
529 {
530 string razersFolder = argv[0];
531 size_t lastPos = razersFolder.find_last_of('/') + 1;
532 if (lastPos == razersFolder.npos + 1) lastPos = razersFolder.find_last_of('\\') + 1;
533 if (lastPos == razersFolder.npos + 1) lastPos = 0;
534 razersFolder.erase(lastPos);
535 pm_options.paramFolderPath = razersFolder;
536 }
537 if (options.trimLength > 0) readLength = options.trimLength;
538 if (readLength > 0)
539 {
540 /* if(options.maqMapping && readLength != options.artSeedLength)
541 pm_options.totalN = options.artSeedLength;
542 else*/
543 pm_options.totalN = readLength;
544 if (options._debugLevel >= 1)
545 cerr << "___PARAMETER_CHOOSING__" << endl;
546 if (!chooseParams(options,pm_options))
547 {
548 if (pm_options.verbose)
549 cerr << "Couldn't find preprocessed parameter files. Please configure manually (options --shape and --threshold)." << endl;
550 cerr << "Using default configurations (shape = " << options.shape << " and q-gram lemma)." << endl;
551 }
552 if (options._debugLevel >= 1) cerr << endl;
553 } else
554 {
555 cerr << "Failed to load reads" << endl;
556 cerr << "Exiting ..." << endl;
557 return RAZERS_READS_FAILED;
558 }
559 }
560 #endif
561
562 #ifdef RAZERS_PARALLEL
563 tbb::task_scheduler_init scheduler;
564 #endif
565
566 int result = mapReads(genomeFileNames, readFileNames, errorPrbFileName, options);
567 if (result != 0)
568 cerr << "Exiting ..." << endl;
569 return result;
570 }
571